You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mxnet.apache.org by pt...@apache.org on 2020/01/05 08:07:20 UTC
[incubator-mxnet] branch v1.6.x updated: Backport #16980 #17031
#17018 #17019 to 1.6 branch (#17213)
This is an automated email from the ASF dual-hosted git repository.
ptrendx pushed a commit to branch v1.6.x
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git
The following commit(s) were added to refs/heads/v1.6.x by this push:
new 0015fc3 Backport #16980 #17031 #17018 #17019 to 1.6 branch (#17213)
0015fc3 is described below
commit 0015fc3728ced8198d0ca93f6d54a165f465f302
Author: Hu Shiwen <ya...@gmail.com>
AuthorDate: Sun Jan 5 16:06:37 2020 +0800
Backport #16980 #17031 #17018 #17019 to 1.6 branch (#17213)
Fix CUDNN detection for CMake build (#17019)
Replace mxnet_option macro with standard CMAKE_DEPENDENT_OPTION (#17018)
Switch to modern CMake CUDA handling (#17031)
Introduce unified MXNET_CUDA_ARCH option to specify cuda architectures.
Previously cuda architecture setting was partially broken and different options
were applied to different parts of the build (CUDA_ARCH_NAME CUDA_ARCH_BIN
CUDA_ARCH_PTX and CUDA_ARCH_LIST).
Include FindCUDAToolkit from CMake 3.17, which replaces the deprecated FindCUDA
functionality for finding the cuda toolkit include directories and libraries.
Workaround for DLL size limitation on Windows (#16980)
* change windows build system.
add gen_warp cpp version
add add_custom_command to run warp_gen
add download cmake
add option
change option
add dynamic read mxnet dll
Co-authored-by: Leonard Lausen <le...@lausen.nl>
---
3rdparty/mshadow/cmake/Cuda.cmake | 324 ------------
3rdparty/mshadow/cmake/Utils.cmake | 398 --------------
3rdparty/mshadow/cmake/mshadow.cmake | 91 ----
3rdparty/mshadow/cmake/mshadowUtils.cmake | 2 -
CMakeLists.txt | 389 +++++++-------
ci/build_windows.py | 8 +-
ci/docker/install/ubuntu_core.sh | 2 +-
ci/docker/runtime_functions.sh | 17 +-
cmake/BuildTVM.cmake | 23 +-
cmake/FirstClassLangCuda.cmake | 277 ----------
cmake/Modules/FindCUDAToolkit.cmake | 833 ++++++++++++++++++++++++++++++
cmake/Modules/FindCUDNN.cmake | 33 ++
cmake/Modules/FindMKL.cmake | 10 +-
cmake/Utils.cmake | 41 --
contrib/tvmop/compile.py | 5 +
tools/windowsbuild/README.md | 19 +
tools/windowsbuild/gen_warp.cpp | 209 ++++++++
tools/windowsbuild/warp_dll.cpp | 151 ++++++
18 files changed, 1478 insertions(+), 1354 deletions(-)
diff --git a/3rdparty/mshadow/cmake/Cuda.cmake b/3rdparty/mshadow/cmake/Cuda.cmake
deleted file mode 100644
index bc09a39..0000000
--- a/3rdparty/mshadow/cmake/Cuda.cmake
+++ /dev/null
@@ -1,324 +0,0 @@
-if(NOT USE_CUDA)
- return()
-endif()
-
-include(CheckCXXCompilerFlag)
-check_cxx_compiler_flag("-std=c++11" SUPPORT_CXX11)
-
-################################################################################################
-# A function for automatic detection of GPUs installed (if autodetection is enabled)
-# Usage:
-# mshadow_detect_installed_gpus(out_variable)
-function(mshadow_detect_installed_gpus out_variable)
-set(CUDA_gpu_detect_output "")
- if(NOT CUDA_gpu_detect_output)
- message(STATUS "Running GPU architecture autodetection")
- set(__cufile ${PROJECT_BINARY_DIR}/detect_cuda_archs.cu)
-
- file(WRITE ${__cufile} ""
- "#include <cstdio>\n"
- "#include <iostream>\n"
- "using namespace std;\n"
- "int main()\n"
- "{\n"
- " int count = 0;\n"
- " if (cudaSuccess != cudaGetDeviceCount(&count)) { return -1; }\n"
- " if (count == 0) { cerr << \"No cuda devices detected\" << endl; return -1; }\n"
- " for (int device = 0; device < count; ++device)\n"
- " {\n"
- " cudaDeviceProp prop;\n"
- " if (cudaSuccess == cudaGetDeviceProperties(&prop, device))\n"
- " std::printf(\"%d.%d \", prop.major, prop.minor);\n"
- " }\n"
- " return 0;\n"
- "}\n")
- if(MSVC)
- #find vcvarsall.bat and run it building msvc environment
- get_filename_component(MY_COMPILER_DIR ${CMAKE_CXX_COMPILER} DIRECTORY)
- find_file(MY_VCVARSALL_BAT vcvarsall.bat "${MY_COMPILER_DIR}/.." "${MY_COMPILER_DIR}/../..")
- execute_process(COMMAND ${MY_VCVARSALL_BAT} && ${CUDA_NVCC_EXECUTABLE} -arch sm_30 --run ${__cufile}
- WORKING_DIRECTORY "${PROJECT_BINARY_DIR}/CMakeFiles/"
- RESULT_VARIABLE __nvcc_res OUTPUT_VARIABLE __nvcc_out
- OUTPUT_STRIP_TRAILING_WHITESPACE)
- else()
- if(CUDA_LIBRARY_PATH)
- set(CUDA_LINK_LIBRARY_PATH "-L${CUDA_LIBRARY_PATH}")
- endif()
- execute_process(COMMAND ${CUDA_NVCC_EXECUTABLE} -arch sm_30 --run ${__cufile} ${CUDA_LINK_LIBRARY_PATH}
- WORKING_DIRECTORY "${PROJECT_BINARY_DIR}/CMakeFiles/"
- RESULT_VARIABLE __nvcc_res OUTPUT_VARIABLE __nvcc_out
- OUTPUT_STRIP_TRAILING_WHITESPACE)
- endif()
- if(__nvcc_res EQUAL 0)
- # nvcc outputs text containing line breaks when building with MSVC.
- # The line below prevents CMake from inserting a variable with line
- # breaks in the cache
- message(STATUS "Found CUDA arch ${__nvcc_out}")
- string(REGEX MATCH "([1-9].[0-9])" __nvcc_out "${__nvcc_out}")
- string(REPLACE "2.1" "2.1(2.0)" __nvcc_out "${__nvcc_out}")
- set(CUDA_gpu_detect_output ${__nvcc_out} CACHE INTERNAL "Returned GPU architetures from mshadow_detect_gpus tool" FORCE)
- else()
- message(WARNING "Running GPU detection script with nvcc failed: ${__nvcc_out}")
- endif()
- endif()
-
- if(NOT CUDA_gpu_detect_output)
- message(WARNING "Automatic GPU detection failed. Building for all known architectures (${mshadow_known_gpu_archs}).")
- set(${out_variable} ${mshadow_known_gpu_archs} PARENT_SCOPE)
- else()
- set(${out_variable} ${CUDA_gpu_detect_output} PARENT_SCOPE)
- endif()
-endfunction()
-
-
-################################################################################################
-# Function for selecting GPU arch flags for nvcc based on CUDA_ARCH_NAME
-# Usage:
-# mshadow_select_nvcc_arch_flags(out_variable)
-function(mshadow_select_nvcc_arch_flags out_variable)
- # List of arch names
- set(__archs_names "Fermi" "Kepler" "Maxwell" "Pascal" "Volta" "All" "Manual")
- set(__archs_name_default "All")
- if(NOT CMAKE_CROSSCOMPILING)
- list(APPEND __archs_names "Auto")
- set(__archs_name_default "Auto")
- endif()
-
- # set CUDA_ARCH_NAME strings (so it will be seen as dropbox in CMake-Gui)
- set(CUDA_ARCH_NAME ${__archs_name_default} CACHE STRING "Select target NVIDIA GPU achitecture.")
- set_property( CACHE CUDA_ARCH_NAME PROPERTY STRINGS "" ${__archs_names} )
- mark_as_advanced(CUDA_ARCH_NAME)
-
- # verify CUDA_ARCH_NAME value
- if(NOT ";${__archs_names};" MATCHES ";${CUDA_ARCH_NAME};")
- string(REPLACE ";" ", " __archs_names "${__archs_names}")
- message(FATAL_ERROR "Only ${__archs_names} architeture names are supported.")
- endif()
-
- if(${CUDA_ARCH_NAME} STREQUAL "Manual")
- set(CUDA_ARCH_BIN ${mshadow_known_gpu_archs} CACHE STRING "Specify 'real' GPU architectures to build binaries for, BIN(PTX) format is supported")
- set(CUDA_ARCH_PTX "50" CACHE STRING "Specify 'virtual' PTX architectures to build PTX intermediate code for")
- mark_as_advanced(CUDA_ARCH_BIN CUDA_ARCH_PTX)
- else()
- unset(CUDA_ARCH_BIN CACHE)
- unset(CUDA_ARCH_PTX CACHE)
- endif()
-
- if(${CUDA_ARCH_NAME} STREQUAL "Fermi")
- set(__cuda_arch_bin "20 21(20)")
- elseif(${CUDA_ARCH_NAME} STREQUAL "Kepler")
- set(__cuda_arch_bin "30 35")
- elseif(${CUDA_ARCH_NAME} STREQUAL "Maxwell")
- set(__cuda_arch_bin "50")
- elseif(${CUDA_ARCH_NAME} STREQUAL "Pascal")
- set(__cuda_arch_bin "60 61")
- elseif(${CUDA_ARCH_NAME} STREQUAL "Volta")
- set(__cuda_arch_bin "70")
- elseif(${CUDA_ARCH_NAME} STREQUAL "All")
- set(__cuda_arch_bin ${mshadow_known_gpu_archs})
- elseif(${CUDA_ARCH_NAME} STREQUAL "Auto")
- mshadow_detect_installed_gpus(__cuda_arch_bin)
- else() # (${CUDA_ARCH_NAME} STREQUAL "Manual")
- set(__cuda_arch_bin ${CUDA_ARCH_BIN})
- endif()
-
- # remove dots and convert to lists
- string(REGEX REPLACE "\\." "" __cuda_arch_bin "${__cuda_arch_bin}")
- string(REGEX REPLACE "\\." "" __cuda_arch_ptx "${CUDA_ARCH_PTX}")
- string(REGEX MATCHALL "[0-9()]+" __cuda_arch_bin "${__cuda_arch_bin}")
- string(REGEX MATCHALL "[0-9]+" __cuda_arch_ptx "${__cuda_arch_ptx}")
- mshadow_list_unique(__cuda_arch_bin __cuda_arch_ptx)
-
- set(__nvcc_flags "")
- set(__nvcc_archs_readable "")
-
- # Tell NVCC to add binaries for the specified GPUs
- foreach(__arch ${__cuda_arch_bin})
- if(__arch MATCHES "([0-9]+)\\(([0-9]+)\\)")
- # User explicitly specified PTX for the concrete BIN
- list(APPEND __nvcc_flags -gencode arch=compute_${CMAKE_MATCH_2},code=sm_${CMAKE_MATCH_1})
- list(APPEND __nvcc_archs_readable sm_${CMAKE_MATCH_1})
- else()
- # User didn't explicitly specify PTX for the concrete BIN, we assume PTX=BIN
- list(APPEND __nvcc_flags -gencode arch=compute_${__arch},code=sm_${__arch})
- list(APPEND __nvcc_archs_readable sm_${__arch})
- endif()
- endforeach()
-
- # Tell NVCC to add PTX intermediate code for the specified architectures
- foreach(__arch ${__cuda_arch_ptx})
- list(APPEND __nvcc_flags -gencode arch=compute_${__arch},code=compute_${__arch})
- list(APPEND __nvcc_archs_readable compute_${__arch})
- endforeach()
-
- string(REPLACE ";" " " __nvcc_archs_readable "${__nvcc_archs_readable}")
- set(${out_variable} ${__nvcc_flags} PARENT_SCOPE)
- set(${out_variable}_readable ${__nvcc_archs_readable} PARENT_SCOPE)
-endfunction()
-
-################################################################################################
-# Short command for cuda comnpilation
-# Usage:
-# mshadow_cuda_compile(<objlist_variable> <cuda_files>)
-macro(mshadow_cuda_compile objlist_variable)
- foreach(var CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_RELEASE CMAKE_CXX_FLAGS_DEBUG)
- set(${var}_backup_in_cuda_compile_ "${${var}}")
-
- # we remove /EHa as it generates warnings under windows
- string(REPLACE "/EHa" "" ${var} "${${var}}")
-
- endforeach()
- if(UNIX OR APPLE)
- list(APPEND CUDA_NVCC_FLAGS -Xcompiler -fPIC)
- endif()
-
- if(APPLE)
- list(APPEND CUDA_NVCC_FLAGS -Xcompiler -Wno-unused-function)
- endif()
-
- set(CUDA_NVCC_FLAGS_DEBUG "${CUDA_NVCC_FLAGS_DEBUG} -G")
-
- if(MSVC)
- # disable noisy warnings:
- # 4819: The file contains a character that cannot be represented in the current code page (number).
- list(APPEND CUDA_NVCC_FLAGS -Xcompiler "/wd4819")
- foreach(flag_var
- CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_DEBUG CMAKE_CXX_FLAGS_RELEASE
- CMAKE_CXX_FLAGS_MINSIZEREL CMAKE_CXX_FLAGS_RELWITHDEBINFO)
- if(${flag_var} MATCHES "/MD")
- string(REGEX REPLACE "/MD" "/MT" ${flag_var} "${${flag_var}}")
- endif(${flag_var} MATCHES "/MD")
- endforeach(flag_var)
- endif()
-
- # If the build system is a container, make sure the nvcc intermediate files
- # go into the build output area rather than in /tmp, which may run out of space
- if(IS_CONTAINER_BUILD)
- set(CUDA_NVCC_INTERMEDIATE_DIR "${CMAKE_CURRENT_BINARY_DIR}")
- message(STATUS "Container build enabled, so nvcc intermediate files in: ${CUDA_NVCC_INTERMEDIATE_DIR}")
- list(APPEND CUDA_NVCC_FLAGS "--keep --keep-dir ${CUDA_NVCC_INTERMEDIATE_DIR}")
- endif()
-
- cuda_compile(cuda_objcs ${ARGN})
-
- foreach(var CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_RELEASE CMAKE_CXX_FLAGS_DEBUG)
- set(${var} "${${var}_backup_in_cuda_compile_}")
- unset(${var}_backup_in_cuda_compile_)
- endforeach()
-
- set(${objlist_variable} ${cuda_objcs})
-endmacro()
-
-################################################################################################
-# Short command for cuDNN detection. Believe it soon will be a part of CUDA toolkit distribution.
-# That's why not FindcuDNN.cmake file, but just the macro
-# Usage:
-# detect_cuDNN()
-function(detect_cuDNN)
- set(CUDNN_ROOT "" CACHE PATH "CUDNN root folder")
-
- find_path(CUDNN_INCLUDE cudnn.h
- PATHS ${CUDNN_ROOT} $ENV{CUDNN_ROOT} ${CUDA_TOOLKIT_INCLUDE}
- DOC "Path to cuDNN include directory." )
-
- get_filename_component(__libpath_hist ${CUDA_CUDART_LIBRARY} PATH)
- find_library(CUDNN_LIBRARY NAMES libcudnn.so cudnn.lib # libcudnn_static.a
- PATHS ${CUDNN_ROOT} $ENV{CUDNN_ROOT} ${CUDNN_INCLUDE} ${__libpath_hist}
- DOC "Path to cuDNN library.")
-
- if(CUDNN_INCLUDE AND CUDNN_LIBRARY)
- set(HAVE_CUDNN TRUE PARENT_SCOPE)
- set(CUDNN_FOUND TRUE PARENT_SCOPE)
-
- mark_as_advanced(CUDNN_INCLUDE CUDNN_LIBRARY CUDNN_ROOT)
- message(STATUS "Found cuDNN (include: ${CUDNN_INCLUDE}, library: ${CUDNN_LIBRARY})")
- endif()
-endfunction()
-
-
-################################################################################################
-### Non macro section
-################################################################################################
-
-# Try to prime CUDA_TOOLKIT_ROOT_DIR by looking for libcudart.so
-if(NOT CUDA_TOOLKIT_ROOT_DIR)
- find_library(CUDA_LIBRARY_PATH libcudart.so PATHS ENV LD_LIBRARY_PATH PATH_SUFFIXES lib lib64)
- if(CUDA_LIBRARY_PATH)
- get_filename_component(CUDA_LIBRARY_PATH ${CUDA_LIBRARY_PATH} DIRECTORY)
- set(CUDA_TOOLKIT_ROOT_DIR "${CUDA_LIBRARY_PATH}/..")
- endif()
-endif()
-
-find_package(CUDA 5.5 QUIET REQUIRED)
-find_cuda_helper_libs(curand) # cmake 2.8.7 compartibility which doesn't search for curand
-
-if(NOT CUDA_FOUND)
- return()
-endif()
-
-set(HAVE_CUDA TRUE)
-message(STATUS "CUDA detected: " ${CUDA_VERSION})
-include_directories(SYSTEM ${CUDA_INCLUDE_DIRS})
-list(APPEND mshadow_LINKER_LIBS ${CUDA_CUDART_LIBRARY}
- ${CUDA_curand_LIBRARY} ${CUDA_CUBLAS_LIBRARIES})
-
-# Known NVIDIA GPU achitectures mshadow can be compiled for.
-# This list will be used for CUDA_ARCH_NAME = All option
-if(CUDA_ARCH_ALL)
- set(mshadow_known_gpu_archs "${CUDA_ARCH_ALL}")
-else()
- if(${CUDA_VERSION} EQUAL 9.0 OR ${CUDA_VERSION} GREATER 9.0)
- set(mshadow_known_gpu_archs "30 35 50 52 60 61 70")
- elseif(${CUDA_VERSION} EQUAL 8.0 OR ${CUDA_VERSION} GREATER 8.0)
- set(mshadow_known_gpu_archs "30 35 50 52 60 61")
- else()
- set(mshadow_known_gpu_archs "30 35 50 52")
- endif()
-endif()
-
-# cudnn detection
-if(USE_CUDNN)
- detect_cuDNN()
- if(HAVE_CUDNN)
- add_definitions(-DUSE_CUDNN)
- include_directories(SYSTEM ${CUDNN_INCLUDE})
- list(APPEND mshadow_LINKER_LIBS ${CUDNN_LIBRARY})
- endif()
-endif()
-
-# setting nvcc arch flags
-mshadow_select_nvcc_arch_flags(NVCC_FLAGS_EXTRA)
-list(APPEND CUDA_NVCC_FLAGS ${NVCC_FLAGS_EXTRA})
-message(STATUS "Added CUDA NVCC flags for: ${NVCC_FLAGS_EXTRA_readable}")
-
-# Boost 1.55 workaround, see https://svn.boost.org/trac/boost/ticket/9392 or
-# https://github.com/ComputationalRadiationPhysics/picongpu/blob/master/src/picongpu/CMakeLists.txt
-if(Boost_VERSION EQUAL 105500)
- message(STATUS "Cuda + Boost 1.55: Applying noinline work around")
- # avoid warning for CMake >= 2.8.12
- set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} \"-DBOOST_NOINLINE=__attribute__((noinline))\" ")
-endif()
-
-# disable some nvcc diagnostic that apears in boost, glog, glags, opencv, etc.
-foreach(diag cc_clobber_ignored integer_sign_change useless_using_declaration set_but_not_used)
- list(APPEND CUDA_NVCC_FLAGS -Xcudafe --diag_suppress=${diag})
-endforeach()
-
-# setting default testing device
-if(NOT CUDA_TEST_DEVICE)
- set(CUDA_TEST_DEVICE -1)
-endif()
-
-mark_as_advanced(CUDA_BUILD_CUBIN CUDA_BUILD_EMULATION CUDA_VERBOSE_BUILD)
-mark_as_advanced(CUDA_SDK_ROOT_DIR CUDA_SEPARABLE_COMPILATION)
-
-# Handle clang/libc++ issue
-if(APPLE)
- mshadow_detect_darwin_version(OSX_VERSION)
-
- # OSX 10.9 and higher uses clang/libc++ by default which is incompartible with old CUDA toolkits
- if(OSX_VERSION VERSION_GREATER 10.8)
- # enabled by default if and only if CUDA version is less than 7.0
- mshadow_option(USE_libstdcpp "Use libstdc++ instead of libc++" (CUDA_VERSION VERSION_LESS 7.0))
- endif()
-endif()
diff --git a/3rdparty/mshadow/cmake/Utils.cmake b/3rdparty/mshadow/cmake/Utils.cmake
deleted file mode 100644
index dc464f0..0000000
--- a/3rdparty/mshadow/cmake/Utils.cmake
+++ /dev/null
@@ -1,398 +0,0 @@
-################################################################################################
-# Command alias for debugging messages
-# Usage:
-# dmsg(<message>)
-function(dmsg)
- message(STATUS ${ARGN})
-endfunction()
-
-################################################################################################
-# Removes duplicates from list(s)
-# Usage:
-# mshadow_list_unique(<list_variable> [<list_variable>] [...])
-macro(mshadow_list_unique)
- foreach(__lst ${ARGN})
- if(${__lst})
- list(REMOVE_DUPLICATES ${__lst})
- endif()
- endforeach()
-endmacro()
-
-################################################################################################
-# Clears variables from list
-# Usage:
-# mshadow_clear_vars(<variables_list>)
-macro(mshadow_clear_vars)
- foreach(_var ${ARGN})
- unset(${_var})
- endforeach()
-endmacro()
-
-################################################################################################
-# Removes duplicates from string
-# Usage:
-# mshadow_string_unique(<string_variable>)
-function(mshadow_string_unique __string)
- if(${__string})
- set(__list ${${__string}})
- separate_arguments(__list)
- list(REMOVE_DUPLICATES __list)
- foreach(__e ${__list})
- set(__str "${__str} ${__e}")
- endforeach()
- set(${__string} ${__str} PARENT_SCOPE)
- endif()
-endfunction()
-
-################################################################################################
-# Prints list element per line
-# Usage:
-# mshadow_print_list(<list>)
-function(mshadow_print_list)
- foreach(e ${ARGN})
- message(STATUS ${e})
- endforeach()
-endfunction()
-
-################################################################################################
-# Function merging lists of compiler flags to single string.
-# Usage:
-# mshadow_merge_flag_lists(out_variable <list1> [<list2>] [<list3>] ...)
-function(mshadow_merge_flag_lists out_var)
- set(__result "")
- foreach(__list ${ARGN})
- foreach(__flag ${${__list}})
- string(STRIP ${__flag} __flag)
- set(__result "${__result} ${__flag}")
- endforeach()
- endforeach()
- string(STRIP ${__result} __result)
- set(${out_var} ${__result} PARENT_SCOPE)
-endfunction()
-
-################################################################################################
-# Converts all paths in list to absolute
-# Usage:
-# mshadow_convert_absolute_paths(<list_variable>)
-function(mshadow_convert_absolute_paths variable)
- set(__dlist "")
- foreach(__s ${${variable}})
- get_filename_component(__abspath ${__s} ABSOLUTE)
- list(APPEND __list ${__abspath})
- endforeach()
- set(${variable} ${__list} PARENT_SCOPE)
-endfunction()
-
-################################################################################################
-# Reads set of version defines from the header file
-# Usage:
-# mshadow_parse_header(<file> <define1> <define2> <define3> ..)
-macro(mshadow_parse_header FILENAME FILE_VAR)
- set(vars_regex "")
- set(__parnet_scope OFF)
- set(__add_cache OFF)
- foreach(name ${ARGN})
- if("${name}" STREQUAL "PARENT_SCOPE")
- set(__parnet_scope ON)
- elseif("${name}" STREQUAL "CACHE")
- set(__add_cache ON)
- elseif(vars_regex)
- set(vars_regex "${vars_regex}|${name}")
- else()
- set(vars_regex "${name}")
- endif()
- endforeach()
- if(EXISTS "${FILENAME}")
- file(STRINGS "${FILENAME}" ${FILE_VAR} REGEX "#define[ \t]+(${vars_regex})[ \t]+[0-9]+" )
- else()
- unset(${FILE_VAR})
- endif()
- foreach(name ${ARGN})
- if(NOT "${name}" STREQUAL "PARENT_SCOPE" AND NOT "${name}" STREQUAL "CACHE")
- if(${FILE_VAR})
- if(${FILE_VAR} MATCHES ".+[ \t]${name}[ \t]+([0-9]+).*")
- string(REGEX REPLACE ".+[ \t]${name}[ \t]+([0-9]+).*" "\\1" ${name} "${${FILE_VAR}}")
- else()
- set(${name} "")
- endif()
- if(__add_cache)
- set(${name} ${${name}} CACHE INTERNAL "${name} parsed from ${FILENAME}" FORCE)
- elseif(__parnet_scope)
- set(${name} "${${name}}" PARENT_SCOPE)
- endif()
- else()
- unset(${name} CACHE)
- endif()
- endif()
- endforeach()
-endmacro()
-
-################################################################################################
-# Reads single version define from the header file and parses it
-# Usage:
-# mshadow_parse_header_single_define(<library_name> <file> <define_name>)
-function(mshadow_parse_header_single_define LIBNAME HDR_PATH VARNAME)
- set(${LIBNAME}_H "")
- if(EXISTS "${HDR_PATH}")
- file(STRINGS "${HDR_PATH}" ${LIBNAME}_H REGEX "^#define[ \t]+${VARNAME}[ \t]+\"[^\"]*\".*$" LIMIT_COUNT 1)
- endif()
-
- if(${LIBNAME}_H)
- string(REGEX REPLACE "^.*[ \t]${VARNAME}[ \t]+\"([0-9]+).*$" "\\1" ${LIBNAME}_VERSION_MAJOR "${${LIBNAME}_H}")
- string(REGEX REPLACE "^.*[ \t]${VARNAME}[ \t]+\"[0-9]+\\.([0-9]+).*$" "\\1" ${LIBNAME}_VERSION_MINOR "${${LIBNAME}_H}")
- string(REGEX REPLACE "^.*[ \t]${VARNAME}[ \t]+\"[0-9]+\\.[0-9]+\\.([0-9]+).*$" "\\1" ${LIBNAME}_VERSION_PATCH "${${LIBNAME}_H}")
- set(${LIBNAME}_VERSION_MAJOR ${${LIBNAME}_VERSION_MAJOR} ${ARGN} PARENT_SCOPE)
- set(${LIBNAME}_VERSION_MINOR ${${LIBNAME}_VERSION_MINOR} ${ARGN} PARENT_SCOPE)
- set(${LIBNAME}_VERSION_PATCH ${${LIBNAME}_VERSION_PATCH} ${ARGN} PARENT_SCOPE)
- set(${LIBNAME}_VERSION_STRING "${${LIBNAME}_VERSION_MAJOR}.${${LIBNAME}_VERSION_MINOR}.${${LIBNAME}_VERSION_PATCH}" PARENT_SCOPE)
-
- # append a TWEAK version if it exists:
- set(${LIBNAME}_VERSION_TWEAK "")
- if("${${LIBNAME}_H}" MATCHES "^.*[ \t]${VARNAME}[ \t]+\"[0-9]+\\.[0-9]+\\.[0-9]+\\.([0-9]+).*$")
- set(${LIBNAME}_VERSION_TWEAK "${CMAKE_MATCH_1}" ${ARGN} PARENT_SCOPE)
- endif()
- if(${LIBNAME}_VERSION_TWEAK)
- set(${LIBNAME}_VERSION_STRING "${${LIBNAME}_VERSION_STRING}.${${LIBNAME}_VERSION_TWEAK}" ${ARGN} PARENT_SCOPE)
- else()
- set(${LIBNAME}_VERSION_STRING "${${LIBNAME}_VERSION_STRING}" ${ARGN} PARENT_SCOPE)
- endif()
- endif()
-endfunction()
-
-########################################################################################################
-# An option that the user can select. Can accept condition to control when option is available for user.
-# Usage:
-# mshadow_option(<option_variable> "doc string" <initial value or boolean expression> [IF <condition>])
-function(mshadow_option variable description value)
- set(__value ${value})
- set(__condition "")
- set(__varname "__value")
- foreach(arg ${ARGN})
- if(arg STREQUAL "IF" OR arg STREQUAL "if")
- set(__varname "__condition")
- else()
- list(APPEND ${__varname} ${arg})
- endif()
- endforeach()
- unset(__varname)
- if("${__condition}" STREQUAL "")
- set(__condition 2 GREATER 1)
- endif()
-
- if(${__condition})
- if("${__value}" MATCHES ";")
- if(${__value})
- option(${variable} "${description}" ON)
- else()
- option(${variable} "${description}" OFF)
- endif()
- elseif(DEFINED ${__value})
- if(${__value})
- option(${variable} "${description}" ON)
- else()
- option(${variable} "${description}" OFF)
- endif()
- else()
- option(${variable} "${description}" ${__value})
- endif()
- else()
- unset(${variable} CACHE)
- endif()
-endfunction()
-
-################################################################################################
-# Utility macro for comparing two lists. Used for CMake debugging purposes
-# Usage:
-# mshadow_compare_lists(<list_variable> <list2_variable> [description])
-function(mshadow_compare_lists list1 list2 desc)
- set(__list1 ${${list1}})
- set(__list2 ${${list2}})
- list(SORT __list1)
- list(SORT __list2)
- list(LENGTH __list1 __len1)
- list(LENGTH __list2 __len2)
-
- if(NOT ${__len1} EQUAL ${__len2})
- message(FATAL_ERROR "Lists are not equal. ${__len1} != ${__len2}. ${desc}")
- endif()
-
- foreach(__i RANGE 1 ${__len1})
- math(EXPR __index "${__i}- 1")
- list(GET __list1 ${__index} __item1)
- list(GET __list2 ${__index} __item2)
- if(NOT ${__item1} STREQUAL ${__item2})
- message(FATAL_ERROR "Lists are not equal. Differ at element ${__index}. ${desc}")
- endif()
- endforeach()
-endfunction()
-
-################################################################################################
-# Command for disabling warnings for different platforms (see below for gcc and VisualStudio)
-# Usage:
-# mshadow_warnings_disable(<CMAKE_[C|CXX]_FLAGS[_CONFIGURATION]> -Wshadow /wd4996 ..,)
-macro(mshadow_warnings_disable)
- set(_flag_vars "")
- set(_msvc_warnings "")
- set(_gxx_warnings "")
-
- foreach(arg ${ARGN})
- if(arg MATCHES "^CMAKE_")
- list(APPEND _flag_vars ${arg})
- elseif(arg MATCHES "^/wd")
- list(APPEND _msvc_warnings ${arg})
- elseif(arg MATCHES "^-W")
- list(APPEND _gxx_warnings ${arg})
- endif()
- endforeach()
-
- if(NOT _flag_vars)
- set(_flag_vars CMAKE_C_FLAGS CMAKE_CXX_FLAGS)
- endif()
-
- if(MSVC AND _msvc_warnings)
- foreach(var ${_flag_vars})
- foreach(warning ${_msvc_warnings})
- set(${var} "${${var}} ${warning}")
- endforeach()
- endforeach()
- elseif((CMAKE_COMPILER_IS_GNUCXX OR CMAKE_COMPILER_IS_CLANGXX) AND _gxx_warnings)
- foreach(var ${_flag_vars})
- foreach(warning ${_gxx_warnings})
- if(NOT warning MATCHES "^-Wno-")
- string(REPLACE "${warning}" "" ${var} "${${var}}")
- string(REPLACE "-W" "-Wno-" warning "${warning}")
- endif()
- set(${var} "${${var}} ${warning}")
- endforeach()
- endforeach()
- endif()
- mshadow_clear_vars(_flag_vars _msvc_warnings _gxx_warnings)
-endmacro()
-
-################################################################################################
-# Helper function get current definitions
-# Usage:
-# mshadow_get_current_definitions(<definitions_variable>)
-function(mshadow_get_current_definitions definitions_var)
- get_property(current_definitions DIRECTORY PROPERTY COMPILE_DEFINITIONS)
- set(result "")
-
- foreach(d ${current_definitions})
- list(APPEND result -D${d})
- endforeach()
-
- mshadow_list_unique(result)
- set(${definitions_var} ${result} PARENT_SCOPE)
-endfunction()
-
-################################################################################################
-# Helper function get current includes/definitions
-# Usage:
-# mshadow_get_current_cflags(<cflagslist_variable>)
-function(mshadow_get_current_cflags cflags_var)
- get_property(current_includes DIRECTORY PROPERTY INCLUDE_DIRECTORIES)
- mshadow_convert_absolute_paths(current_includes)
- mshadow_get_current_definitions(cflags)
-
- foreach(i ${current_includes})
- list(APPEND cflags "-I${i}")
- endforeach()
-
- mshadow_list_unique(cflags)
- set(${cflags_var} ${cflags} PARENT_SCOPE)
-endfunction()
-
-################################################################################################
-# Helper function to parse current linker libs into link directories, libflags and osx frameworks
-# Usage:
-# mshadow_parse_linker_libs(<mshadow_LINKER_LIBS_var> <directories_var> <libflags_var> <frameworks_var>)
-function(mshadow_parse_linker_libs mshadow_LINKER_LIBS_variable folders_var flags_var frameworks_var)
-
- set(__unspec "")
- set(__debug "")
- set(__optimized "")
- set(__framework "")
- set(__varname "__unspec")
-
- # split libs into debug, optimized, unspecified and frameworks
- foreach(list_elem ${${mshadow_LINKER_LIBS_variable}})
- if(list_elem STREQUAL "debug")
- set(__varname "__debug")
- elseif(list_elem STREQUAL "optimized")
- set(__varname "__optimized")
- elseif(list_elem MATCHES "^-framework[ \t]+([^ \t].*)")
- list(APPEND __framework -framework ${CMAKE_MATCH_1})
- else()
- list(APPEND ${__varname} ${list_elem})
- set(__varname "__unspec")
- endif()
- endforeach()
-
- # attach debug or optimized libs to unspecified according to current configuration
- if(CMAKE_BUILD_TYPE MATCHES "Debug")
- set(__libs ${__unspec} ${__debug})
- else()
- set(__libs ${__unspec} ${__optimized})
- endif()
-
- set(libflags "")
- set(folders "")
-
- # convert linker libraries list to link flags
- foreach(lib ${__libs})
- if(TARGET ${lib})
- list(APPEND folders $<TARGET_LINKER_FILE_DIR:${lib}>)
- list(APPEND libflags -l${lib})
- elseif(lib MATCHES "^-l.*")
- list(APPEND libflags ${lib})
- elseif(IS_ABSOLUTE ${lib})
- get_filename_component(name_we ${lib} NAME_WE)
- get_filename_component(folder ${lib} PATH)
-
- string(REGEX MATCH "^lib(.*)" __match ${name_we})
- list(APPEND libflags -l${CMAKE_MATCH_1})
- list(APPEND folders ${folder})
- else()
- message(FATAL_ERROR "Logic error. Need to update cmake script")
- endif()
- endforeach()
-
- mshadow_list_unique(libflags folders)
-
- set(${folders_var} ${folders} PARENT_SCOPE)
- set(${flags_var} ${libflags} PARENT_SCOPE)
- set(${frameworks_var} ${__framework} PARENT_SCOPE)
-endfunction()
-
-################################################################################################
-# Helper function to detect Darwin version, i.e. 10.8, 10.9, 10.10, ....
-# Usage:
-# mshadow_detect_darwin_version(<version_variable>)
-function(mshadow_detect_darwin_version output_var)
- if(APPLE)
- execute_process(COMMAND /usr/bin/sw_vers -productVersion
- RESULT_VARIABLE __sw_vers OUTPUT_VARIABLE __sw_vers_out
- ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
-
- set(${output_var} ${__sw_vers_out} PARENT_SCOPE)
- else()
- set(${output_var} "" PARENT_SCOPE)
- endif()
-endfunction()
-
-################################################################################################
-# Convenient command to setup source group for IDEs that support this feature (VS, XCode)
-# Usage:
-# caffe_source_group(<group> GLOB[_RECURSE] <globbing_expression>)
-function(mshadow_source_group group)
- cmake_parse_arguments(CAFFE_SOURCE_GROUP "" "" "GLOB;GLOB_RECURSE" ${ARGN})
- if(CAFFE_SOURCE_GROUP_GLOB)
- file(GLOB srcs1 ${CAFFE_SOURCE_GROUP_GLOB})
- source_group(${group} FILES ${srcs1})
- endif()
-
- if(CAFFE_SOURCE_GROUP_GLOB_RECURSE)
- file(GLOB_RECURSE srcs2 ${CAFFE_SOURCE_GROUP_GLOB_RECURSE})
- source_group(${group} FILES ${srcs2})
- endif()
-endfunction()
\ No newline at end of file
diff --git a/3rdparty/mshadow/cmake/mshadow.cmake b/3rdparty/mshadow/cmake/mshadow.cmake
deleted file mode 100644
index 1ef7698..0000000
--- a/3rdparty/mshadow/cmake/mshadow.cmake
+++ /dev/null
@@ -1,91 +0,0 @@
-set(mshadow_LINKER_LIBS "")
-
-set(BLAS "Open" CACHE STRING "Selected BLAS library")
-set_property(CACHE BLAS PROPERTY STRINGS "Atlas;Open;MKL")
-
-if(DEFINED USE_BLAS)
- set(BLAS "${USE_BLAS}")
-else()
- if(USE_MKL_IF_AVAILABLE)
- if(NOT MKL_FOUND)
- find_package(MKL)
- endif()
- if(MKL_FOUND)
- set(BLAS "MKL")
- endif()
- endif()
-endif()
-
-if(BLAS STREQUAL "Atlas" OR BLAS STREQUAL "atlas")
- find_package(Atlas REQUIRED)
- include_directories(SYSTEM ${Atlas_INCLUDE_DIR})
- list(APPEND mshadow_LINKER_LIBS ${Atlas_LIBRARIES})
- add_definitions(-DMSHADOW_USE_CBLAS=1)
- add_definitions(-DMSHADOW_USE_MKL=0)
-elseif(BLAS STREQUAL "Open" OR BLAS STREQUAL "open")
- find_package(OpenBLAS REQUIRED)
- include_directories(SYSTEM ${OpenBLAS_INCLUDE_DIR})
- list(APPEND mshadow_LINKER_LIBS ${OpenBLAS_LIB})
- add_definitions(-DMSHADOW_USE_CBLAS=1)
- add_definitions(-DMSHADOW_USE_MKL=0)
-elseif(BLAS STREQUAL "MKL" OR BLAS STREQUAL "mkl")
- find_package(MKL REQUIRED)
- include_directories(SYSTEM ${MKL_INCLUDE_DIR})
- list(APPEND mshadow_LINKER_LIBS ${MKL_LIBRARIES})
- add_definitions(-DMSHADOW_USE_CBLAS=0)
- add_definitions(-DMSHADOW_USE_MKL=1)
-elseif(BLAS STREQUAL "apple")
- find_package(Accelerate REQUIRED)
- include_directories(SYSTEM ${Accelerate_INCLUDE_DIR})
- list(APPEND mshadow_LINKER_LIBS ${Accelerate_LIBRARIES})
- add_definitions(-DMSHADOW_USE_MKL=0)
- add_definitions(-DMSHADOW_USE_CBLAS=1)
-endif()
-
-if(SUPPORT_MSSE2)
- add_definitions(-DMSHADOW_USE_SSE=1)
-else()
- add_definitions(-DMSHADOW_USE_SSE=0)
-endif()
-
-if(NOT DEFINED SUPPORT_F16C AND NOT MSVC)
- check_cxx_compiler_flag("-mf16c" COMPILER_SUPPORT_MF16C)
- if(CMAKE_SYSTEM_NAME STREQUAL "Linux")
- execute_process(COMMAND cat /proc/cpuinfo
- COMMAND grep flags
- COMMAND grep f16c
- OUTPUT_VARIABLE CPU_SUPPORT_F16C)
- elseif(CMAKE_SYSTEM_NAME STREQUAL "Darwin")
- execute_process(COMMAND sysctl -a
- COMMAND grep machdep.cpu.features
- COMMAND grep F16C
- OUTPUT_VARIABLE CPU_SUPPORT_F16C)
- endif()
- if(NOT CPU_SUPPORT_F16C)
- message("CPU does not support F16C instructions")
- endif()
- if(CPU_SUPPORT_F16C AND COMPILER_SUPPORT_MF16C)
- set(SUPPORT_F16C TRUE)
- endif()
-endif()
-
-if(SUPPORT_F16C)
- set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mf16c")
-else()
- add_definitions(-DMSHADOW_USE_F16C=0)
-endif()
-
-if(USE_CUDA)
- find_package(CUDA 5.5 QUIET)
- find_cuda_helper_libs(curand)
- if(NOT CUDA_FOUND)
- message(FATAL_ERROR "-- CUDA is disabled.")
- endif()
- add_definitions(-DMSHADOW_USE_CUDA=1)
- add_definitions(-DMSHADOW_FORCE_STREAM)
- include_directories(SYSTEM ${CUDA_INCLUDE_DIRS})
- list(APPEND mshadow_LINKER_LIBS ${CUDA_CUDART_LIBRARY}
- ${CUDA_curand_LIBRARY} ${CUDA_CUBLAS_LIBRARIES})
-else()
- add_definitions(-DMSHADOW_USE_CUDA=0)
-endif()
diff --git a/3rdparty/mshadow/cmake/mshadowUtils.cmake b/3rdparty/mshadow/cmake/mshadowUtils.cmake
deleted file mode 100644
index d4b8bfc..0000000
--- a/3rdparty/mshadow/cmake/mshadowUtils.cmake
+++ /dev/null
@@ -1,2 +0,0 @@
-include("${CMAKE_CURRENT_LIST_DIR}/Utils.cmake")
-
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 8f0974d..12bc195 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -18,39 +18,57 @@ endif()
include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/Utils.cmake)
+include(CMakeDependentOption)
#Some things have order. This must be put in front alone
-mxnet_option(USE_CUDA "Build with CUDA support" ON)
-mxnet_option(USE_OLDCMAKECUDA "Build with old cmake cuda" OFF)
-mxnet_option(USE_NCCL "Use NVidia NCCL with CUDA" OFF)
-mxnet_option(USE_OPENCV "Build with OpenCV support" ON)
-mxnet_option(USE_OPENMP "Build with Openmp support" ON)
-mxnet_option(USE_CUDNN "Build with cudnn support" ON) # one could set CUDNN_ROOT for search path
-mxnet_option(USE_SSE "Build with x86 SSE instruction support" ON IF NOT ARM)
-mxnet_option(USE_F16C "Build with x86 F16C instruction support" ON) # autodetects support if ON
-mxnet_option(USE_LAPACK "Build with lapack support" ON)
-mxnet_option(USE_MKL_IF_AVAILABLE "Use MKL if found" ON)
-mxnet_option(USE_MKLDNN "Build with MKL-DNN support" ON IF USE_MKL_IF_AVAILABLE AND (NOT APPLE) AND (NOT MSVC) AND (CMAKE_HOST_SYSTEM_PROCESSOR STREQUAL "x86_64") AND (NOT CMAKE_CROSSCOMPILING))
-mxnet_option(USE_OPERATOR_TUNING "Enable auto-tuning of operators" ON IF NOT MSVC)
-mxnet_option(USE_GPERFTOOLS "Build with GPerfTools support" OFF)
-mxnet_option(USE_JEMALLOC "Build with Jemalloc support" ON)
-mxnet_option(USE_DIST_KVSTORE "Build with DIST_KVSTORE support" OFF)
-mxnet_option(USE_PLUGINS_WARPCTC "Use WARPCTC Plugins" OFF)
-mxnet_option(USE_PLUGIN_CAFFE "Use Caffe Plugin" OFF)
-mxnet_option(USE_CPP_PACKAGE "Build C++ Package" OFF)
-mxnet_option(USE_MXNET_LIB_NAMING "Use MXNet library naming conventions." ON)
-mxnet_option(USE_GPROF "Compile with gprof (profiling) flag" OFF)
-mxnet_option(USE_CXX14_IF_AVAILABLE "Build with C++14 if the compiler supports it" OFF)
-mxnet_option(USE_VTUNE "Enable use of Intel Amplifier XE (VTune)" OFF) # one could set VTUNE_ROOT for search path
-mxnet_option(USE_TVM_OP "Enable use of TVM operator build system." OFF)
-mxnet_option(ENABLE_CUDA_RTC "Build with CUDA runtime compilation support" ON)
-mxnet_option(BUILD_CPP_EXAMPLES "Build cpp examples" ON)
-mxnet_option(INSTALL_EXAMPLES "Install the example source files." OFF)
-mxnet_option(USE_SIGNAL_HANDLER "Print stack traces on segfaults." ON)
-mxnet_option(USE_TENSORRT "Enable inference optimization with TensorRT." OFF)
-mxnet_option(USE_ASAN "Enable Clang/GCC ASAN sanitizers." OFF)
-mxnet_option(ENABLE_TESTCOVERAGE "Enable compilation with test coverage metric output" OFF)
-mxnet_option(USE_INT64_TENSOR_SIZE "Use int64_t to represent the total number of elements in a tensor" OFF)
-mxnet_option(BUILD_CYTHON_MODULES "Build cython modules." OFF)
+option(USE_CUDA "Build with CUDA support" ON)
+set(MXNET_CUDA_ARCH "Auto" CACHE STRING "Target NVIDIA GPU achitecture.
+Format: Auto | Common | All | LIST(ARCH_AND_PTX ...)
+- \"Auto\" detects local machine GPU compute arch at runtime.
+- \"Common\" and \"All\" cover common and entire subsets of architectures
+- ARCH_AND_PTX : NAME | NUM.NUM | NUM.NUM(NUM.NUM) | NUM.NUM+PTX
+- NAME: Fermi Kepler Maxwell Kepler+Tegra Kepler+Tesla Maxwell+Tegra Pascal Volta Turing
+- NUM: Any number. Only those pairs are currently accepted by NVCC though:
+ 2.0 2.1 3.0 3.2 3.5 3.7 5.0 5.2 5.3 6.0 6.2 7.0 7.2 7.5")
+option(USE_NCCL "Use NVidia NCCL with CUDA" OFF)
+option(USE_OPENCV "Build with OpenCV support" ON)
+option(USE_OPENMP "Build with Openmp support" ON)
+cmake_dependent_option(USE_CUDNN "Build with cudnn support" ON "USE_CUDA" OFF) # one could set CUDNN_ROOT for search path
+cmake_dependent_option(USE_SSE "Build with x86 SSE instruction support" ON "NOT ARM" OFF)
+option(USE_F16C "Build with x86 F16C instruction support" ON) # autodetects support if ON
+option(USE_LAPACK "Build with lapack support" ON)
+option(USE_MKL_IF_AVAILABLE "Use MKL if found" ON)
+if(USE_MKL_IF_AVAILABLE AND (NOT APPLE) AND (NOT MSVC) AND (CMAKE_HOST_SYSTEM_PROCESSOR STREQUAL "x86_64") AND (NOT CMAKE_CROSSCOMPILING))
+ option(USE_MKLDNN "Build with MKL-DNN support" ON)
+else()
+ option(USE_MKLDNN "Build with MKL-DNN support" OFF)
+endif()
+if(NOT MSVC)
+ option(USE_OPERATOR_TUNING "Enable auto-tuning of operators" ON)
+else()
+ option(USE_OPERATOR_TUNING "Enable auto-tuning of operators" OFF)
+endif()
+option(USE_GPERFTOOLS "Build with GPerfTools support" OFF)
+option(USE_JEMALLOC "Build with Jemalloc support" ON)
+option(USE_DIST_KVSTORE "Build with DIST_KVSTORE support" OFF)
+option(USE_PLUGINS_WARPCTC "Use WARPCTC Plugins" OFF)
+option(USE_PLUGIN_CAFFE "Use Caffe Plugin" OFF)
+option(USE_CPP_PACKAGE "Build C++ Package" OFF)
+option(USE_MXNET_LIB_NAMING "Use MXNet library naming conventions." ON)
+option(USE_GPROF "Compile with gprof (profiling) flag" OFF)
+option(USE_CXX14_IF_AVAILABLE "Build with C++14 if the compiler supports it" OFF)
+option(USE_VTUNE "Enable use of Intel Amplifier XE (VTune)" OFF) # one could set VTUNE_ROOT for search path
+option(USE_TVM_OP "Enable use of TVM operator build system." OFF)
+option(ENABLE_CUDA_RTC "Build with CUDA runtime compilation support" ON)
+option(BUILD_CPP_EXAMPLES "Build cpp examples" ON)
+option(INSTALL_EXAMPLES "Install the example source files." OFF)
+option(USE_SIGNAL_HANDLER "Print stack traces on segfaults." ON)
+option(USE_TENSORRT "Enable inference optimization with TensorRT." OFF)
+option(USE_ASAN "Enable Clang/GCC ASAN sanitizers." OFF)
+option(ENABLE_TESTCOVERAGE "Enable compilation with test coverage metric output" OFF)
+option(USE_INT64_TENSOR_SIZE "Use int64_t to represent the total number of elements in a tensor" OFF)
+option(BUILD_CYTHON_MODULES "Build cython modules." OFF)
+cmake_dependent_option(USE_SPLIT_ARCH_DLL "Build a separate DLL for each Cuda arch (Windows only)." ON "MSVC" OFF)
+
message(STATUS "CMAKE_CROSSCOMPILING ${CMAKE_CROSSCOMPILING}")
message(STATUS "CMAKE_HOST_SYSTEM_PROCESSOR ${CMAKE_HOST_SYSTEM_PROCESSOR}")
@@ -62,31 +80,29 @@ if(USE_TVM_OP)
add_definitions(-DMXNET_USE_TVM_OP=1)
endif()
-if(USE_CUDA AND NOT USE_OLDCMAKECUDA)
- message(STATUS "CMake version '${CMAKE_VERSION}' using generator '${CMAKE_GENERATOR}'")
- if(
- (
- (${CMAKE_GENERATOR} MATCHES "Visual Studio.*")
- OR (${CMAKE_GENERATOR} MATCHES "Xcode.*")
- OR (${CMAKE_GENERATOR} STREQUAL "Unix Makefiles")
- ) AND (
- (${CMAKE_VERSION} VERSION_GREATER "3.9.0") OR (${CMAKE_VERSION} VERSION_EQUAL "3.9.0")
- )
- )
- set(FIRST_CUDA TRUE)
- project(mxnet C CXX CUDA)
- else()
- set(FIRST_CUDA FALSE)
- set(USE_OLDCMAKECUDA TRUE)
- project(mxnet C CXX)
+message(STATUS "CMake version '${CMAKE_VERSION}' using generator '${CMAKE_GENERATOR}'")
+project(mxnet C CXX)
+if(USE_CUDA)
+ cmake_minimum_required(VERSION 3.13.2) # CUDA 10 (Turing) detection available starting 3.13.2
+ enable_language(CUDA)
+ set(CMAKE_CUDA_STANDARD 11)
+ include(CheckCXXCompilerFlag)
+ if(USE_CXX14_IF_AVAILABLE)
+ check_cxx_compiler_flag("-std=c++14" SUPPORT_CXX14)
+ if (SUPPORT_CXX14)
+ set(CMAKE_CUDA_STANDARD 14)
+ endif()
endif()
-else()
- project(mxnet C CXX)
+ set(CMAKE_CUDA_STANDARD_REQUIRED ON)
endif()
+if(UNIX)
+ set(CMAKE_POSITION_INDEPENDENT_CODE ON)
+endif()
if(MSVC)
set(SYSTEM_ARCHITECTURE x86_64)
+ enable_language(ASM_MASM)
else()
execute_process(COMMAND uname -m COMMAND tr -d '\n' OUTPUT_VARIABLE SYSTEM_ARCHITECTURE)
endif()
@@ -119,7 +135,7 @@ if(MSVC)
endif()
set(CMAKE_C_FLAGS "/MP")
set(CMAKE_CXX_FLAGS "${CMAKE_C_FLAGS} /bigobj")
-else(MSVC)
+else()
include(CheckCXXCompilerFlag)
if(USE_CXX14_IF_AVAILABLE)
check_cxx_compiler_flag("-std=c++14" SUPPORT_CXX14)
@@ -132,6 +148,7 @@ else(MSVC)
check_cxx_compiler_flag("-msse3" SUPPORT_MSSE3)
check_cxx_compiler_flag("-msse2" SUPPORT_MSSE2)
else()
+ set(SUPPORT_MSSE3 FALSE)
set(SUPPORT_MSSE2 FALSE)
endif()
# For cross complication, turn off flag if target device does not support it
@@ -148,7 +165,6 @@ else(MSVC)
else()
add_definitions(-DMSHADOW_USE_F16C=0)
endif()
- set(CMAKE_POSITION_INDEPENDENT_CODE ON)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wall -Wno-unknown-pragmas -Wno-sign-compare")
if ("${CMAKE_CXX_COMPILER_ID}" MATCHES ".*Clang$")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wno-braced-scalar-init")
@@ -166,8 +182,12 @@ else(MSVC)
endif()
if(SUPPORT_MSSE3)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -msse3")
+ add_definitions(-DMSHADOW_USE_SSE=1)
elseif(SUPPORT_MSSE2)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -msse2")
+ add_definitions(-DMSHADOW_USE_SSE=1)
+ else()
+ add_definitions(-DMSHADOW_USE_SSE=0)
endif()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${CMAKE_C_FLAGS}")
if(SUPPORT_CXX14)
@@ -280,45 +300,6 @@ endif()
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/include)
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/src)
-if(USE_CUDA)
- find_package(CUDA REQUIRED)
- add_definitions(-DMSHADOW_USE_CUDA=1)
- if(FIRST_CUDA AND (NOT USE_OLDCMAKECUDA))
- if(NOT CUDA_TOOLSET)
- set(CUDA_TOOLSET "${CUDA_VERSION_STRING}")
- endif()
- else()
- set(FIRST_CUDA FALSE)
- endif()
- if(USE_NCCL)
- find_package(NCCL)
- if(NCCL_FOUND)
- include_directories(${NCCL_INCLUDE_DIRS})
- list(APPEND mxnet_LINKER_LIBS ${NCCL_LIBRARIES})
- else()
- message(WARNING "Could not find NCCL libraries")
- endif()
- endif()
- if(UNIX)
- find_package(NVTX)
- if(NVTX_FOUND)
- include_directories(${NVTX_INCLUDE_DIRS})
- list(APPEND mxnet_LINKER_LIBS ${NVTX_LIBRARIES})
- add_definitions(-DMXNET_USE_NVTX=1)
- else()
- message(WARNING "Could not find NVTX libraries")
- endif()
- endif()
-else()
- add_definitions(-DMSHADOW_USE_CUDA=0)
-endif()
-
-if(NCCL_FOUND)
- add_definitions(-DMXNET_USE_NCCL=1)
-else()
- add_definitions(-DMXNET_USE_NCCL=0)
-endif()
-
if (USE_INT64_TENSOR_SIZE)
message(STATUS "Using 64-bit integer for tensor size")
add_definitions(-DMSHADOW_INT64_TENSOR_SIZE=1)
@@ -327,21 +308,6 @@ else()
endif()
include(cmake/ChooseBlas.cmake)
-if(USE_CUDA AND FIRST_CUDA)
- include(3rdparty/mshadow/cmake/Utils.cmake)
- include(cmake/FirstClassLangCuda.cmake)
- include_directories(${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES})
-else()
- if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/3rdparty/mshadow/cmake)
- include(3rdparty/mshadow/cmake/mshadow.cmake)
- include(3rdparty/mshadow/cmake/Utils.cmake)
- include(3rdparty/mshadow/cmake/Cuda.cmake)
- else()
- include(mshadowUtils)
- include(Cuda)
- include(mshadow)
- endif()
-endif()
if(USE_ASAN)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fno-omit-frame-pointer -fsanitize=address")
@@ -508,13 +474,15 @@ add_subdirectory(${GTEST_ROOT})
find_package(GTest REQUIRED)
# cudnn detection
-if(USE_CUDNN AND USE_CUDA)
- detect_cuDNN()
- if(HAVE_CUDNN)
+if(USE_CUDNN)
+ find_package(CUDNN)
+ if(CUDNN_FOUND)
add_definitions(-DUSE_CUDNN)
include_directories(SYSTEM ${CUDNN_INCLUDE})
list(APPEND mxnet_LINKER_LIBS ${CUDNN_LIBRARY})
- add_definitions(-DMSHADOW_USE_CUDNN=1)
+ add_definitions(-DMSHADOW_USE_CUDNN=1)
+ else()
+ set(USE_CUDNN OFF)
endif()
endif()
@@ -522,9 +490,7 @@ if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/3rdparty/dmlc-core/cmake)
add_subdirectory("3rdparty/dmlc-core")
endif()
-if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/3rdparty/mshadow/cmake)
- add_subdirectory("3rdparty/mshadow")
-endif()
+add_subdirectory("3rdparty/mshadow")
FILE(GLOB_RECURSE SOURCE "src/*.cc" "src/*.h" "include/*.h")
FILE(GLOB_RECURSE CUDA "src/*.cu" "src/*.cuh")
@@ -624,61 +590,63 @@ if(MSVC)
endif()
if(USE_CUDA)
- if(FIRST_CUDA)
- mshadow_select_nvcc_arch_flags(NVCC_FLAGS_ARCH)
- string(REPLACE ";" " " NVCC_FLAGS_ARCH "${NVCC_FLAGS_ARCH}")
- set(CMAKE_CUDA_FLAGS "${NVCC_FLAGS_ARCH}")
- list(APPEND mxnet_LINKER_LIBS cublas cufft cusolver curand)
- if(ENABLE_CUDA_RTC)
- list(APPEND mxnet_LINKER_LIBS nvrtc cuda)
- add_definitions(-DMXNET_ENABLE_CUDA_RTC=1)
+ # CUDA_SELECT_NVCC_ARCH_FLAGS is not deprecated, though part of deprecated
+ # FindCUDA https://gitlab.kitware.com/cmake/cmake/issues/19199
+ include(${CMAKE_ROOT}/Modules/FindCUDA/select_compute_arch.cmake)
+ CUDA_SELECT_NVCC_ARCH_FLAGS(CUDA_ARCH_FLAGS ${MXNET_CUDA_ARCH})
+ message("-- CUDA: Using the following NVCC architecture flags ${CUDA_ARCH_FLAGS}")
+ set(arch_code_list)
+ foreach(arch_str ${CUDA_ARCH_FLAGS})
+ if((arch_str MATCHES ".*sm_[0-9]+"))
+ string( REGEX REPLACE ".*sm_([0-9]+)" "\\1" arch_code ${arch_str} )
+ list(APPEND arch_code_list ${arch_code})
endif()
- list(APPEND SOURCE ${CUDA})
- add_definitions(-DMXNET_USE_CUDA=1)
- link_directories(${CUDA_TOOLKIT_ROOT_DIR}/lib64)
- else()
- list(APPEND CUDA_INCLUDE_DIRS ${INCLUDE_DIRECTORIES})
- # define preprocessor macro so that we will not include the generated forcelink header
- if(ENABLE_CUDA_RTC)
+ endforeach()
+
+ string(REPLACE ";" " " CUDA_ARCH_FLAGS_SPACES "${CUDA_ARCH_FLAGS}")
+
+
+ find_package(CUDAToolkit REQUIRED cublas cufft cusolver curand
+ OPTIONAL_COMPONENTS nvToolsExt nvrtc)
+
+ list(APPEND mxnet_LINKER_LIBS CUDA::cudart CUDA::cublas CUDA::cufft CUDA::cusolver CUDA::curand)
+ if(ENABLE_CUDA_RTC)
+ if(CUDA_nvrtc_LIBRARY)
+ list(APPEND mxnet_LINKER_LIBS CUDA::nvrtc cuda)
add_definitions(-DMXNET_ENABLE_CUDA_RTC=1)
+ else()
+ message(FATAL_ERROR "ENABLE_CUDA_RTC=ON, but failed to find NVRTC. CMake will exit." )
endif()
- # Create '.cmake' files for cuda compiles given definitions added thus far
- mshadow_cuda_compile(cuda_objs ${CUDA})
- if(MSVC)
- if(ENABLE_CUDA_RTC)
- FIND_LIBRARY(CUDA_nvrtc_LIBRARY nvrtc "${CUDA_TOOLKIT_ROOT_DIR}/lib/x64" "${CUDA_TOOLKIT_ROOT_DIR}/lib/win32")
- list(APPEND mxnet_LINKER_LIBS ${CUDA_nvrtc_LIBRARY})
- set(CUDA_cuda_LIBRARY "${CUDA_nvrtc_LIBRARY}/../cuda.lib")
- list(APPEND mxnet_LINKER_LIBS ${CUDA_cuda_LIBRARY})
- endif()
- FIND_LIBRARY(CUDA_cufft_LIBRARY nvrtc "${CUDA_TOOLKIT_ROOT_DIR}/lib/x64" "${CUDA_TOOLKIT_ROOT_DIR}/lib/win32")
- list(APPEND mxnet_LINKER_LIBS "${CUDA_cufft_LIBRARY}/../cufft.lib") # For fft operator
- FIND_LIBRARY(CUDA_cusolver_LIBRARY nvrtc "${CUDA_TOOLKIT_ROOT_DIR}/lib/x64" "${CUDA_TOOLKIT_ROOT_DIR}/lib/win32")
- list(APPEND mxnet_LINKER_LIBS "${CUDA_cusolver_LIBRARY}/../cusolver.lib") # For cusolver
- link_directories(${CUDA_TOOLKIT_ROOT_DIR}/lib/win32)
- link_directories(${CUDA_TOOLKIT_ROOT_DIR}/lib/x64)
- else(MSVC)
- list(APPEND mxnet_LINKER_LIBS cufft cusolver)
- if(ENABLE_CUDA_RTC)
- list(APPEND mxnet_LINKER_LIBS nvrtc cuda)
- endif()
- link_directories("${CUDA_TOOLKIT_ROOT_DIR}/lib64")
+ endif()
+ list(APPEND SOURCE ${CUDA})
+ add_definitions(-DMXNET_USE_CUDA=1)
+ add_definitions(-DMSHADOW_USE_CUDA=1)
+ add_definitions(-DMSHADOW_FORCE_STREAM)
+
+ if(USE_NCCL)
+ find_package(NCCL)
+ if(NCCL_FOUND)
+ include_directories(${NCCL_INCLUDE_DIRS})
+ list(APPEND mxnet_LINKER_LIBS ${NCCL_LIBRARIES})
+ add_definitions(-DMXNET_USE_NCCL=1)
+ else()
+ add_definitions(-DMXNET_USE_NCCL=0)
+ message(WARNING "Could not find NCCL libraries")
endif()
- list(APPEND SOURCE ${cuda_objs} ${CUDA})
- add_definitions(-DMXNET_USE_CUDA=1)
- if(CUDA_LIBRARY_PATH)
- if(IS_CONTAINER_BUILD)
- # In case of building on a production-like build container which may not have Cuda installed
- if(NOT CMAKE_SYSTEM_HAS_CUDA)
- # Assuming building in a container that doesn't have CUDA installed (ie CPU-only build machine)
- # so use the stub cuda driver shared library
- if(EXISTS ${CUDA_LIBRARY_PATH}/stubs/libcuda.so)
- link_directories(${CUDA_LIBRARY_PATH}/stubs)
- endif()
- endif()
- endif()
+ endif()
+ if(UNIX)
+ if(CUDA_nvToolsExt_LIBRARY)
+ list(APPEND mxnet_LINKER_LIBS CUDA::nvToolsExt)
+ add_definitions(-DMXNET_USE_NVTX=1)
+ else()
+ message("Building without NVTX support.")
endif()
- endif()
+ endif()
+
+ include_directories(${CUDAToolkit_INCLUDE_DIRS})
+ link_directories(${CUDAToolkit_LIBRARY_DIR})
+else()
+ add_definitions(-DMSHADOW_USE_CUDA=0)
endif()
# unsupported: if caffe is a subdirectory of mxnet, load its CMakeLists.txt as well
@@ -709,6 +677,7 @@ add_library(sample_lib SHARED ${CMAKE_CURRENT_SOURCE_DIR}/example/lib_api/mylib.
target_include_directories(sample_lib PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include/mxnet)
set(MXNET_INSTALL_TARGETS mxnet)
if(UNIX)
+ string(APPEND CMAKE_CUDA_FLAGS "${CUDA_ARCH_FLAGS_SPACES}")
# Create dummy file since we want an empty shared library before linking
set(DUMMY_SOURCE ${CMAKE_BINARY_DIR}/dummy.c)
file(WRITE ${DUMMY_SOURCE} "")
@@ -720,32 +689,66 @@ if(UNIX)
target_link_libraries(mxnet_static PUBLIC ${CMAKE_DL_LIBS})
target_compile_options(sample_lib PUBLIC -shared)
set_target_properties(mxnet_static PROPERTIES OUTPUT_NAME mxnet)
-else()
- add_library(mxnet SHARED ${SOURCE})
+elseif(MSVC)
target_compile_options(sample_lib PUBLIC /LD)
set_target_properties(sample_lib PROPERTIES PREFIX "lib")
-endif()
-if(USE_CUDA)
- if(FIRST_CUDA AND MSVC)
- target_compile_options(mxnet PUBLIC "$<$<CONFIG:DEBUG>:-Xcompiler=-MTd -Gy>")
- target_compile_options(mxnet PUBLIC "$<$<CONFIG:RELEASE>:-Xcompiler=-MT -Gy>")
+ if(USE_CUDA)
+ if(MSVC)
+ if(USE_SPLIT_ARCH_DLL)
+ add_executable(gen_warp tools/windowsbuild/gen_warp.cpp)
+ add_library(mxnet SHARED tools/windowsbuild/warp_dll.cpp ${CMAKE_BINARY_DIR}/warp_gen_cpp.cpp
+ ${CMAKE_BINARY_DIR}/warp_gen.asm)
+ target_link_libraries(mxnet PRIVATE cudart Shlwapi)
+ list(GET arch_code_list 0 mxnet_first_arch)
+ foreach(arch ${arch_code_list})
+ add_library(mxnet_${arch} SHARED ${SOURCE})
+ target_compile_options(
+ mxnet_${arch}
+ PRIVATE
+ "$<$<COMPILE_LANGUAGE:CUDA>:--gpu-architecture=compute_${arch}>"
+ )
+ target_compile_options(
+ mxnet_${arch}
+ PRIVATE
+ "$<$<COMPILE_LANGUAGE:CUDA>:--gpu-code=sm_${arch},compute_${arch}>"
+ )
+ target_compile_options(
+ mxnet_${arch}
+ PRIVATE "$<$<AND:$<CONFIG:DEBUG>,$<COMPILE_LANGUAGE:CUDA>>:-Xcompiler=-MTd -Gy /bigobj>")
+ target_compile_options(
+ mxnet_${arch}
+ PRIVATE "$<$<AND:$<CONFIG:RELEASE>,$<COMPILE_LANGUAGE:CUDA>>:-Xcompiler=-MT -Gy /bigobj>")
+ endforeach()
+
+ add_custom_command(
+ OUTPUT ${CMAKE_BINARY_DIR}/warp_gen_cpp.cpp ${CMAKE_BINARY_DIR}/warp_gen.asm
+ COMMAND gen_warp $<TARGET_FILE:mxnet_${mxnet_first_arch}> WORKING_DIRECTORY ${CMAKE_BINARY_DIR}/ DEPENDS $<TARGET_FILE:mxnet_${mxnet_first_arch}>)
+ else(USE_SPLIT_ARCH_DLL)
+ string(REPLACE ";" " " NVCC_FLAGS_ARCH "${NVCC_FLAGS_ARCH}")
+ set(CMAKE_CUDA_FLAGS "${CUDA_ARCH_FLAGS_SPACES}")
+ add_library(mxnet SHARED ${SOURCE})
+ target_compile_options(
+ mxnet
+ PRIVATE "$<$<AND:$<CONFIG:DEBUG>,$<COMPILE_LANGUAGE:CUDA>>:-Xcompiler=-MTd -Gy /bigobj>")
+ target_compile_options(
+ mxnet
+ PRIVATE "$<$<AND:$<CONFIG:RELEASE>,$<COMPILE_LANGUAGE:CUDA>>:-Xcompiler=-MT -Gy /bigobj>")
+
+ endif(USE_SPLIT_ARCH_DLL)
+ else()
+ add_library(mxnet SHARED ${SOURCE})
+ endif()
+ else()
+ add_library(mxnet SHARED ${SOURCE})
endif()
+
endif()
+
if(USE_DIST_KVSTORE)
if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/3rdparty/ps-lite/CMakeLists.txt)
add_subdirectory("3rdparty/ps-lite")
list(APPEND pslite_LINKER_LIBS pslite protobuf)
- target_link_libraries(mxnet PUBLIC debug ${pslite_LINKER_LIBS_DEBUG})
- target_link_libraries(mxnet PUBLIC optimized ${pslite_LINKER_LIBS_RELEASE})
- if(CMAKE_BUILD_TYPE STREQUAL "Debug")
- list(APPEND mxnet_LINKER_LIBS ${pslite_LINKER_LIBS_DEBUG})
- else()
- list(APPEND mxnet_LINKER_LIBS ${pslite_LINKER_LIBS_RELEASE})
- endif()
- target_link_libraries(mxnet PUBLIC debug ${pslite_LINKER_LIBS_DEBUG})
- target_link_libraries(mxnet PUBLIC optimized ${pslite_LINKER_LIBS_RELEASE})
-
else()
set(pslite_LINKER_LIBS protobuf zmq-static)
endif()
@@ -768,8 +771,8 @@ if(USE_TVM_OP)
endif()
set(TVM_OP_COMPILE_OPTIONS "-o${CMAKE_CURRENT_BINARY_DIR}/libtvmop.so" "--config" "${CMAKE_CURRENT_BINARY_DIR}/tvmop.conf")
- if(CUDA_ARCH_BIN)
- set(TVM_OP_COMPILE_OPTIONS "${TVM_OP_COMPILE_OPTIONS}" "--cuda-arch" "${CUDA_ARCH_BIN}")
+ if(USE_CUDA)
+ set(TVM_OP_COMPILE_OPTIONS "${TVM_OP_COMPILE_OPTIONS}" "--cuda-arch" "\"${CUDA_ARCH_FLAGS}\"")
endif()
add_custom_command(TARGET mxnet POST_BUILD
COMMAND ${CMAKE_COMMAND} -E env
@@ -779,13 +782,24 @@ if(USE_TVM_OP)
)
endif()
-target_link_libraries(mxnet PUBLIC ${mxnet_LINKER_LIBS})
-
if(USE_PLUGINS_WARPCTC)
- target_link_libraries(mxnet PUBLIC debug ${WARPCTC_LIB_DEBUG})
- target_link_libraries(mxnet PUBLIC optimized ${WARPCTC_LIB_RELEASE})
+ list(APPEND mxnet_LINKER_LIBS ${WARPCTC_LIB})
endif()
+if(MSVC)
+ if(USE_SPLIT_ARCH_DLL AND USE_CUDA)
+ foreach(arch ${arch_code_list})
+ target_link_libraries(mxnet_${arch} PUBLIC ${mxnet_LINKER_LIBS})
+ target_link_libraries(mxnet_${arch} PUBLIC dmlc)
+ endforeach()
+ else()
+ target_link_libraries(mxnet PUBLIC ${mxnet_LINKER_LIBS})
+ target_link_libraries(mxnet PUBLIC dmlc)
+ endif()
+else()
+ target_link_libraries(mxnet PUBLIC ${mxnet_LINKER_LIBS})
+ target_link_libraries(mxnet PUBLIC dmlc)
+endif()
if(USE_OPENCV AND OpenCV_VERSION_MAJOR GREATER 2)
add_executable(im2rec "tools/im2rec.cc")
@@ -805,7 +819,6 @@ else()
is required for im2rec, im2rec will not be available")
endif()
-target_link_libraries(mxnet PUBLIC dmlc)
if(MSVC AND USE_MXNET_LIB_NAMING)
set_target_properties(mxnet PROPERTIES OUTPUT_NAME "libmxnet")
diff --git a/ci/build_windows.py b/ci/build_windows.py
index 5839e8d..b334b68 100755
--- a/ci/build_windows.py
+++ b/ci/build_windows.py
@@ -114,9 +114,7 @@ CMAKE_FLAGS = {
'-DUSE_BLAS=open '
'-DUSE_LAPACK=ON '
'-DUSE_DIST_KVSTORE=OFF '
- '-DCUDA_ARCH_NAME=Manual '
- '-DCUDA_ARCH_BIN=52 '
- '-DCUDA_ARCH_PTX=52 '
+ '-DMXNET_CUDA_ARCH="5.2" '
'-DCMAKE_CXX_FLAGS="/FS /MD /O2 /Ob2" '
'-DUSE_MKL_IF_AVAILABLE=OFF '
'-DCMAKE_BUILD_TYPE=Release')
@@ -130,9 +128,7 @@ CMAKE_FLAGS = {
'-DUSE_BLAS=open '
'-DUSE_LAPACK=ON '
'-DUSE_DIST_KVSTORE=OFF '
- '-DCUDA_ARCH_NAME=Manual '
- '-DCUDA_ARCH_BIN=52 '
- '-DCUDA_ARCH_PTX=52 '
+ '-DMXNET_CUDA_ARCH="5.2" '
'-DUSE_MKLDNN=ON '
'-DCMAKE_CXX_FLAGS="/FS /MD /O2 /Ob2" '
'-DCMAKE_BUILD_TYPE=Release')
diff --git a/ci/docker/install/ubuntu_core.sh b/ci/docker/install/ubuntu_core.sh
index 87e9dec..3f22992 100755
--- a/ci/docker/install/ubuntu_core.sh
+++ b/ci/docker/install/ubuntu_core.sh
@@ -49,7 +49,7 @@ apt-get install -y \
wget
# Use libturbojpeg package as it is correctly compiled with -fPIC flag
-# https://github.com/HaxeFoundation/hashlink/issues/147
+# https://github.com/HaxeFoundation/hashlink/issues/147
ln -s /usr/lib/x86_64-linux-gnu/libturbojpeg.so.0.1.0 /usr/lib/x86_64-linux-gnu/libturbojpeg.so
diff --git a/ci/docker/runtime_functions.sh b/ci/docker/runtime_functions.sh
index 581bb2f..745214a 100755
--- a/ci/docker/runtime_functions.sh
+++ b/ci/docker/runtime_functions.sh
@@ -25,7 +25,7 @@ set -ex
NOSE_COVERAGE_ARGUMENTS="--with-coverage --cover-inclusive --cover-xml --cover-branches --cover-package=mxnet"
NOSE_TIMER_ARGUMENTS="--with-timer --timer-ok 1 --timer-warning 15 --timer-filter warning,error"
CI_CUDA_COMPUTE_CAPABILITIES="-gencode=arch=compute_52,code=sm_52 -gencode=arch=compute_70,code=sm_70"
-CI_CMAKE_CUDA_ARCH_BIN="52,70"
+CI_CMAKE_CUDA_ARCH="5.2 7.0"
clean_repo() {
set -ex
@@ -753,8 +753,7 @@ build_ubuntu_gpu_tensorrt() {
-DUSE_OPENMP=0 \
-DUSE_MKLDNN=0 \
-DUSE_MKL_IF_AVAILABLE=OFF \
- -DCUDA_ARCH_NAME=Manual \
- -DCUDA_ARCH_BIN=$CI_CMAKE_CUDA_ARCH_BIN \
+ -DMXNET_CUDA_ARCH="$CI_CMAKE_CUDA_ARCH" \
-G Ninja \
/work/mxnet
@@ -872,8 +871,7 @@ build_ubuntu_gpu_cmake_mkldnn() {
-DPython3_EXECUTABLE=/usr/bin/python3 \
-DUSE_MKLML_MKL=1 \
-DCMAKE_BUILD_TYPE=Release \
- -DCUDA_ARCH_NAME=Manual \
- -DCUDA_ARCH_BIN=$CI_CMAKE_CUDA_ARCH_BIN \
+ -DMXNET_CUDA_ARCH="$CI_CMAKE_CUDA_ARCH" \
-G Ninja \
/work/mxnet
@@ -901,8 +899,7 @@ build_ubuntu_gpu_cmake() {
-DUSE_MKLDNN=OFF \
-DUSE_DIST_KVSTORE=ON \
-DCMAKE_BUILD_TYPE=Release \
- -DCUDA_ARCH_NAME=Manual \
- -DCUDA_ARCH_BIN=$CI_CMAKE_CUDA_ARCH_BIN \
+ -DMXNET_CUDA_ARCH="$CI_CMAKE_CUDA_ARCH" \
-DBUILD_CYTHON_MODULES=1 \
-G Ninja \
/work/mxnet
@@ -928,8 +925,7 @@ build_ubuntu_gpu_cmake_no_tvm_op() {
-DUSE_MKLDNN=OFF \
-DUSE_DIST_KVSTORE=ON \
-DCMAKE_BUILD_TYPE=Release \
- -DCUDA_ARCH_NAME=Manual \
- -DCUDA_ARCH_BIN=$CI_CMAKE_CUDA_ARCH_BIN \
+ -DMXNET_CUDA_ARCH="$CI_CMAKE_CUDA_ARCH" \
-DBUILD_CYTHON_MODULES=1 \
-G Ninja \
/work/mxnet
@@ -975,8 +971,7 @@ build_ubuntu_gpu_large_tensor() {
-DUSE_MKLDNN=OFF \
-DUSE_DIST_KVSTORE=ON \
-DCMAKE_BUILD_TYPE=Release \
- -DCUDA_ARCH_NAME=Manual \
- -DCUDA_ARCH_BIN=$CI_CMAKE_CUDA_ARCH_BIN \
+ -DMXNET_CUDA_ARCH="$CI_CMAKE_CUDA_ARCH" \
-DUSE_INT64_TENSOR_SIZE=ON \
-G Ninja \
/work/mxnet
diff --git a/cmake/BuildTVM.cmake b/cmake/BuildTVM.cmake
index db8b33b..5e4ccd4 100644
--- a/cmake/BuildTVM.cmake
+++ b/cmake/BuildTVM.cmake
@@ -98,16 +98,19 @@ set(USE_RANDOM OFF)
# Whether use NNPack
set(USE_NNPACK OFF)
-# Whether use CuDNN
-if(USE_CUDNN AND USE_CUDA)
- detect_cuDNN()
- if(HAVE_CUDNN)
- set(USE_CUDNN ON)
- else()
- set(USE_CUDNN OFF)
- endif()
-else()
- set(USE_CUDNN OFF)
+# First-class Cuda in modern CMake provides us with CMAKE_CUDA_COMPILER But TVM
+# uses the deprecated findCUDA functionality which requires
+# CUDA_TOOLKIT_ROOT_DIR We follow the FindCUDAToolkit.cmake logic to compute
+# CUDA_TOOLKIT_ROOT_DIR for TVM https://gitlab.kitware.com/cmake/cmake/merge_requests/4093/
+if(USE_CUDA)
+ get_filename_component(cuda_dir "${CMAKE_CUDA_COMPILER}" DIRECTORY)
+ set(CUDA_BIN_DIR "${cuda_dir}" CACHE PATH "" FORCE)
+ unset(cuda_dir)
+ get_filename_component(CUDA_TOOLKIT_ROOT_DIR ${CUDA_BIN_DIR} DIRECTORY ABSOLUTE)
+
+ message("CMAKE_CUDA_COMPILER: ${CMAKE_CUDA_COMPILER}")
+ message("Inferred CUDA_TOOLKIT_ROOT_DIR for TVM as: ${CUDA_TOOLKIT_ROOT_DIR}")
+ set(USE_CUDA ${CUDA_TOOLKIT_ROOT_DIR})
endif()
# Whether use cuBLAS
diff --git a/cmake/FirstClassLangCuda.cmake b/cmake/FirstClassLangCuda.cmake
deleted file mode 100644
index 8d79c2b..0000000
--- a/cmake/FirstClassLangCuda.cmake
+++ /dev/null
@@ -1,277 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-#this file is CUDA help function with CMAKE first class CUDA
-
-include(CheckCXXCompilerFlag)
-check_cxx_compiler_flag("-std=c++11" SUPPORT_CXX11)
-if(USE_CXX14_IF_AVAILABLE)
- check_cxx_compiler_flag("-std=c++14" SUPPORT_CXX14)
-endif()
-
-################################################################################################
-# Short command for cuDNN detection. Believe it soon will be a part of CUDA toolkit distribution.
-# That's why not FindcuDNN.cmake file, but just the macro
-# Usage:
-# detect_cuDNN()
-function(detect_cuDNN)
- set(CUDNN_ROOT "" CACHE PATH "CUDNN root folder")
-
- find_path(CUDNN_INCLUDE cudnn.h
- PATHS ${CUDNN_ROOT} $ENV{CUDNN_ROOT}
- DOC "Path to cuDNN include directory." )
-
-
- find_library(CUDNN_LIBRARY NAMES libcudnn.so cudnn.lib # libcudnn_static.a
- PATHS ${CUDNN_ROOT} $ENV{CUDNN_ROOT} ${CUDNN_INCLUDE}
- PATH_SUFFIXES lib lib/x64
- DOC "Path to cuDNN library.")
-
- if(CUDNN_INCLUDE AND CUDNN_LIBRARY)
- set(HAVE_CUDNN TRUE PARENT_SCOPE)
- set(CUDNN_FOUND TRUE PARENT_SCOPE)
-
- mark_as_advanced(CUDNN_INCLUDE CUDNN_LIBRARY CUDNN_ROOT)
- message(STATUS "Found cuDNN (include: ${CUDNN_INCLUDE}, library: ${CUDNN_LIBRARY})")
- endif()
-endfunction()
-
-
-
-################################################################################################
-# A function for automatic detection of GPUs installed (if autodetection is enabled)
-# Usage:
-# mshadow_detect_installed_gpus(out_variable)
-function(mshadow_detect_installed_gpus out_variable)
- if(NOT CUDA_gpu_detect_output)
- set(__cufile ${PROJECT_BINARY_DIR}/detect_cuda_archs.cu)
-
- file(WRITE ${__cufile} ""
- "#include <cstdio>\n"
- "int main()\n"
- "{\n"
- " int count = 0;\n"
- " if (cudaSuccess != cudaGetDeviceCount(&count)) return -1;\n"
- " if (count == 0) return -1;\n"
- " for (int device = 0; device < count; ++device)\n"
- " {\n"
- " cudaDeviceProp prop;\n"
- " if (cudaSuccess == cudaGetDeviceProperties(&prop, device))\n"
- " std::printf(\"%d.%d \", prop.major, prop.minor);\n"
- " }\n"
- " return 0;\n"
- "}\n")
- enable_language(CUDA)
-
- try_run(__nvcc_res __compile_result ${PROJECT_BINARY_DIR} ${__cufile}
- COMPILE_OUTPUT_VARIABLE __compile_out
- RUN_OUTPUT_VARIABLE __nvcc_out)
-
- if(__nvcc_res EQUAL 0 AND __compile_result)
- # nvcc outputs text containing line breaks when building with MSVC.
- # The line below prevents CMake from inserting a variable with line
- # breaks in the cache
- string(REGEX MATCH "([1-9].[0-9])" __nvcc_out "${__nvcc_out}")
- string(REPLACE "2.1" "2.1(2.0)" __nvcc_out "${__nvcc_out}")
- set(CUDA_gpu_detect_output ${__nvcc_out})
- else()
- message(WARNING "Running GPU detection script with nvcc failed: ${__nvcc_out} ${__compile_out}")
- endif()
- endif()
-
- if(NOT CUDA_gpu_detect_output)
- message(WARNING "Automatic GPU detection failed. Building for all known architectures (${mxnet_known_gpu_archs}).")
- set(${out_variable} ${mxnet_known_gpu_archs} PARENT_SCOPE)
- else()
- set(${out_variable} ${CUDA_gpu_detect_output} PARENT_SCOPE)
- endif()
-endfunction()
-
-
-# This list will be used for CUDA_ARCH_NAME = All option
-set(CUDA_KNOWN_GPU_ARCHITECTURES "Kepler" "Maxwell")
-
-# This list will be used for CUDA_ARCH_NAME = Common option (enabled by default)
-set(CUDA_COMMON_GPU_ARCHITECTURES "3.0" "3.5" "5.0")
-
-if (CUDA_TOOLSET VERSION_GREATER "6.5")
- list(APPEND CUDA_KNOWN_GPU_ARCHITECTURES "Kepler+Tegra" "Kepler+Tesla" "Maxwell+Tegra")
- list(APPEND CUDA_COMMON_GPU_ARCHITECTURES "5.2" "3.7")
-endif ()
-
-if (CUDA_TOOLSET VERSION_GREATER "7.5")
- list(APPEND CUDA_KNOWN_GPU_ARCHITECTURES "Pascal")
- list(APPEND CUDA_COMMON_GPU_ARCHITECTURES "6.0" "6.1" "6.1+PTX")
-else()
- list(APPEND CUDA_COMMON_GPU_ARCHITECTURES "5.2+PTX")
-endif ()
-
-if (CUDA_TOOLSET VERSION_GREATER "9.0")
- list(APPEND CUDA_KNOWN_GPU_ARCHITECTURES "Volta")
- list(APPEND CUDA_COMMON_GPU_ARCHITECTURES "7.0")
-endif()
-
-if (CUDA_TOOLSET VERSION_GREATER "10.0")
- list(APPEND CUDA_KNOWN_GPU_ARCHITECTURES "Turing")
- list(APPEND CUDA_COMMON_GPU_ARCHITECTURES "7.5")
-endif()
-
-################################################################################################
-# Function for selecting GPU arch flags for nvcc based on CUDA_ARCH_NAME
-# Usage:
-# mshadow_select_nvcc_arch_flags(out_variable)
-function(mshadow_select_nvcc_arch_flags out_variable)
-
- set(CUDA_ARCH_LIST "Auto" CACHE STRING "Select target NVIDIA GPU achitecture.")
- set_property( CACHE CUDA_ARCH_LIST PROPERTY STRINGS "" "Auto" "All" "Common" ${CUDA_KNOWN_GPU_ARCHITECTURES} )
- mark_as_advanced(CUDA_ARCH_NAME)
-
-
- if("X${CUDA_ARCH_LIST}" STREQUAL "X" )
- set(CUDA_ARCH_LIST "All")
- endif()
-
- set(cuda_arch_bin)
- set(cuda_arch_ptx)
-
- message(STATUS " CUDA_ARCH_LIST: ${CUDA_ARCH_LIST}")
- if("${CUDA_ARCH_LIST}" STREQUAL "All")
- set(CUDA_ARCH_LIST ${CUDA_KNOWN_GPU_ARCHITECTURES})
- elseif("${CUDA_ARCH_LIST}" STREQUAL "Common")
- set(CUDA_ARCH_LIST ${CUDA_COMMON_GPU_ARCHITECTURES})
- elseif("${CUDA_ARCH_LIST}" STREQUAL "Auto" OR "${CUDA_ARCH_LIST}" STREQUAL "")
- set(mxnet_known_gpu_archs ${CUDA_COMMON_GPU_ARCHITECTURES})
- mshadow_detect_installed_gpus(CUDA_ARCH_LIST)
- message(STATUS "Autodetected CUDA architecture(s): ${CUDA_ARCH_LIST}")
- endif()
-
- # Now process the list and look for names
- string(REGEX REPLACE "[ \t]+" ";" CUDA_ARCH_LIST "${CUDA_ARCH_LIST}")
- list(REMOVE_DUPLICATES CUDA_ARCH_LIST)
- foreach(arch_name ${CUDA_ARCH_LIST})
- set(arch_bin)
- set(arch_ptx)
- set(add_ptx FALSE)
- # Check to see if we are compiling PTX
- if(arch_name MATCHES "(.*)\\+PTX$")
- set(add_ptx TRUE)
- set(arch_name ${CMAKE_MATCH_1})
- endif()
- if(arch_name MATCHES "^([0-9]\\.[0-9](\\([0-9]\\.[0-9]\\))?)$")
- set(arch_bin ${CMAKE_MATCH_1})
- set(arch_ptx ${arch_bin})
- else()
- # Look for it in our list of known architectures
- if(${arch_name} STREQUAL "Fermi")
- if (CUDA_TOOLSET VERSION_LESS "8.0")
- set(arch_bin 2.0 "2.1(2.0)")
- endif()
- elseif(${arch_name} STREQUAL "Kepler+Tegra")
- set(arch_bin 3.2)
- elseif(${arch_name} STREQUAL "Kepler+Tesla")
- set(arch_bin 3.7)
- elseif(${arch_name} STREQUAL "Kepler")
- set(arch_bin 3.0 3.5)
- set(arch_ptx 3.5)
- elseif(${arch_name} STREQUAL "Maxwell+Tegra")
- set(arch_bin 5.3)
- elseif(${arch_name} STREQUAL "Maxwell")
- set(arch_bin 5.0 5.2)
- set(arch_ptx 5.2)
- elseif(${arch_name} STREQUAL "Pascal")
- set(arch_bin 6.0 6.1)
- set(arch_ptx 6.1)
- elseif(${arch_name} STREQUAL "Volta")
- set(arch_bin 7.0)
- set(arch_ptx 7.0)
- elseif(${arch_name} STREQUAL "Turing")
- set(arch_bin 7.5)
- set(arch_ptx 7.5)
- else()
- message(SEND_ERROR "Unknown CUDA Architecture Name ${arch_name} in CUDA_SELECT_NVCC_ARCH_FLAGS")
- endif()
- endif()
- list(APPEND cuda_arch_bin ${arch_bin})
- if(add_ptx)
- if (NOT arch_ptx)
- set(arch_ptx ${arch_bin})
- endif()
- list(APPEND cuda_arch_ptx ${arch_ptx})
- endif()
- endforeach()
-
- # remove dots and convert to lists
- string(REGEX REPLACE "\\." "" cuda_arch_bin "${cuda_arch_bin}")
- string(REGEX REPLACE "\\." "" cuda_arch_ptx "${cuda_arch_ptx}")
- string(REGEX MATCHALL "[0-9()]+" cuda_arch_bin "${cuda_arch_bin}")
- string(REGEX MATCHALL "[0-9]+" cuda_arch_ptx "${cuda_arch_ptx}")
-
- if(cuda_arch_bin)
- list(REMOVE_DUPLICATES cuda_arch_bin)
- endif()
- if(cuda_arch_ptx)
- list(REMOVE_DUPLICATES cuda_arch_ptx)
- endif()
-
- message(STATUS "cuda arch bin: ${cuda_arch_bin}")
- message(STATUS "cuda arch ptx: ${cuda_arch_ptx}")
- set(nvcc_flags "")
- set(nvcc_archs_readable "")
-
- # Tell NVCC to add binaries for the specified GPUs
- foreach(arch ${cuda_arch_bin})
- if(arch MATCHES "([0-9]+)\\(([0-9]+)\\)")
- # User explicitly specified ARCH for the concrete CODE
- list(APPEND nvcc_flags -gencode arch=compute_${CMAKE_MATCH_2},code=sm_${CMAKE_MATCH_1})
- list(APPEND nvcc_archs_readable sm_${CMAKE_MATCH_1})
- else()
- # User didn't explicitly specify ARCH for the concrete CODE, we assume ARCH=CODE
- list(APPEND nvcc_flags -gencode arch=compute_${arch},code=sm_${arch})
- list(APPEND nvcc_archs_readable sm_${arch})
- endif()
- endforeach()
-
- # Tell NVCC to add PTX intermediate code for the specified architectures
- foreach(arch ${cuda_arch_ptx})
- list(APPEND nvcc_flags -gencode arch=compute_${arch},code=compute_${arch})
- list(APPEND nvcc_archs_readable compute_${arch})
- endforeach()
-
- if(NOT MSVC)
- if(SUPPORT_CXX14)
- list(APPEND nvcc_flags "-std=c++14")
- elseif(SUPPORT_CXX11)
- list(APPEND nvcc_flags "-std=c++11")
- endif()
- endif()
-
- string (REPLACE " " ";" CMAKE_CXX_FLAGS_STR "${CMAKE_CXX_FLAGS}")
- foreach(_flag ${CMAKE_CXX_FLAGS_STR})
- # Remove -std=c++XX flags
- if(NOT "${_flag}" MATCHES "-std=.+")
- # Remove link flags
- if(NOT "${_flag}" MATCHES "-Wl,.+")
- list(APPEND nvcc_flags "-Xcompiler ${_flag}")
- endif()
- endif()
- endforeach()
-
- string(REPLACE ";" " " nvcc_archs_readable "${nvcc_archs_readable}")
- set(${out_variable} ${nvcc_flags} PARENT_SCOPE)
- set(${out_variable}_readable ${nvcc_archs_readable} PARENT_SCOPE)
-endfunction()
-
diff --git a/cmake/Modules/FindCUDAToolkit.cmake b/cmake/Modules/FindCUDAToolkit.cmake
new file mode 100644
index 0000000..1d9af2f
--- /dev/null
+++ b/cmake/Modules/FindCUDAToolkit.cmake
@@ -0,0 +1,833 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# Original license notice, prior to modification by MXNet Contributors:
+#
+# Copyright 2000-2019 Kitware, Inc. and Contributors
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+#
+# * Neither the name of Kitware, Inc. nor the names of Contributors
+# may be used to endorse or promote products derived from this
+# software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#[=======================================================================[.rst:
+FindCUDAToolkit
+---------------
+
+This script locates the NVIDIA CUDA toolkit and the associated libraries, but
+does not require the ``CUDA`` language be enabled for a given project. This
+module does not search for the NVIDIA CUDA Samples.
+
+Search Behavior
+^^^^^^^^^^^^^^^
+
+Finding the CUDA Toolkit requires finding the ``nvcc`` executable, which is
+searched for in the following order:
+
+1. If the ``CUDA`` language has been enabled we will use the directory
+ containing the compiler as the first search location for ``nvcc``.
+
+2. If the ``CUDAToolkit_ROOT`` cmake configuration variable (e.g.,
+ ``-DCUDAToolkit_ROOT=/some/path``) *or* environment variable is defined, it
+ will be searched. If both an environment variable **and** a
+ configuration variable are specified, the *configuration* variable takes
+ precedence.
+
+ The directory specified here must be such that the executable ``nvcc`` can be
+ found underneath the directory specified by ``CUDAToolkit_ROOT``. If
+ ``CUDAToolkit_ROOT`` is specified, but no ``nvcc`` is found underneath, this
+ package is marked as **not** found. No subsequent search attempts are
+ performed.
+
+3. If the CUDA_PATH environment variable is defined, it will be searched.
+
+4. The user's path is searched for ``nvcc`` using :command:`find_program`. If
+ this is found, no subsequent search attempts are performed. Users are
+ responsible for ensuring that the first ``nvcc`` to show up in the path is
+ the desired path in the event that multiple CUDA Toolkits are installed.
+
+5. On Unix systems, if the symbolic link ``/usr/local/cuda`` exists, this is
+ used. No subsequent search attempts are performed. No default symbolic link
+ location exists for the Windows platform.
+
+6. The platform specific default install locations are searched. If exactly one
+ candidate is found, this is used. The default CUDA Toolkit install locations
+ searched are:
+
+ +-------------+-------------------------------------------------------------+
+ | Platform | Search Pattern |
+ +=============+=============================================================+
+ | macOS | ``/Developer/NVIDIA/CUDA-X.Y`` |
+ +-------------+-------------------------------------------------------------+
+ | Other Unix | ``/usr/local/cuda-X.Y`` |
+ +-------------+-------------------------------------------------------------+
+ | Windows | ``C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\vX.Y`` |
+ +-------------+-------------------------------------------------------------+
+
+ Where ``X.Y`` would be a specific version of the CUDA Toolkit, such as
+ ``/usr/local/cuda-9.0`` or
+ ``C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v9.0``
+
+ .. note::
+
+ When multiple CUDA Toolkits are installed in the default location of a
+ system (e.g., both ``/usr/local/cuda-9.0`` and ``/usr/local/cuda-10.0``
+ exist but the ``/usr/local/cuda`` symbolic link does **not** exist), this
+ package is marked as **not** found.
+
+ There are too many factors involved in making an automatic decision in
+ the presence of multiple CUDA Toolkits being installed. In this
+ situation, users are encouraged to either (1) set ``CUDAToolkit_ROOT`` or
+ (2) ensure that the correct ``nvcc`` executable shows up in ``$PATH`` for
+ :command:`find_program` to find.
+
+Options
+^^^^^^^
+
+``VERSION``
+ If specified, describes the version of the CUDA Toolkit to search for.
+
+``REQUIRED``
+ If specified, configuration will error if a suitable CUDA Toolkit is not
+ found.
+
+``QUIET``
+ If specified, the search for a suitable CUDA Toolkit will not produce any
+ messages.
+
+``EXACT``
+ If specified, the CUDA Toolkit is considered found only if the exact
+ ``VERSION`` specified is recovered.
+
+Imported targets
+^^^^^^^^^^^^^^^^
+
+An :ref:`imported target <Imported targets>` named ``CUDA::toolkit`` is provided.
+
+This module defines :prop_tgt:`IMPORTED` targets for each
+of the following libraries that are part of the CUDAToolkit:
+
+- :ref:`CUDA Runtime Library<cuda_toolkit_rt_lib>`
+- :ref:`CUDA Driver Library<cuda_toolkit_driver_lib>`
+- :ref:`cuBLAS<cuda_toolkit_cuBLAS>`
+- :ref:`cuFFT<cuda_toolkit_cuFFT>`
+- :ref:`cuRAND<cuda_toolkit_cuRAND>`
+- :ref:`cuSOLVER<cuda_toolkit_cuSOLVER>`
+- :ref:`cuSPARSE<cuda_toolkit_cuSPARSE>`
+- :ref:`NPP<cuda_toolkit_NPP>`
+- :ref:`nvBLAS<cuda_toolkit_nvBLAS>`
+- :ref:`nvGRAPH<cuda_toolkit_nvGRAPH>`
+- :ref:`nvJPEG<cuda_toolkit_nvJPEG>`
+- :ref:`nvidia-ML<cuda_toolkit_nvML>`
+- :ref:`nvRTC<cuda_toolkit_nvRTC>`
+- :ref:`nvToolsExt<cuda_toolkit_nvToolsExt>`
+- :ref:`OpenCL<cuda_toolkit_opencl>`
+- :ref:`cuLIBOS<cuda_toolkit_cuLIBOS>`
+
+.. _`cuda_toolkit_rt_lib`:
+
+CUDA Runtime Library
+""""""""""""""""""""
+
+The CUDA Runtime library (cudart) are what most applications will typically
+need to link against to make any calls such as `cudaMalloc`, and `cudaFree`.
+They are an explicit dependency of almost every library.
+
+Targets Created:
+
+- ``CUDA::cudart``
+- ``CUDA::cudart_static``
+
+.. _`cuda_toolkit_driver_lib`:
+
+CUDA Driver Library
+""""""""""""""""""""
+
+The CUDA Driver library (cuda) are used by applications that use calls
+such as `cuMemAlloc`, and `cuMemFree`. This is generally used by advanced
+
+
+Targets Created:
+
+- ``CUDA::cuda_driver``
+- ``CUDA::cuda_driver``
+
+.. _`cuda_toolkit_cuBLAS`:
+
+cuBLAS
+""""""
+
+The `cuBLAS <https://docs.nvidia.com/cuda/cublas/index.html>`_ library.
+
+Targets Created:
+
+- ``CUDA::cublas``
+- ``CUDA::cublas_static``
+
+.. _`cuda_toolkit_cuFFT`:
+
+cuFFT
+"""""
+
+The `cuFFT <https://docs.nvidia.com/cuda/cufft/index.html>`_ library.
+
+Targets Created:
+
+- ``CUDA::cufft``
+- ``CUDA::cufftw``
+- ``CUDA::cufft_static``
+- ``CUDA::cufftw_static``
+
+cuRAND
+""""""
+
+The `cuRAND <https://docs.nvidia.com/cuda/curand/index.html>`_ library.
+
+Targets Created:
+
+- ``CUDA::curand``
+- ``CUDA::curand_static``
+
+.. _`cuda_toolkit_cuSOLVER`:
+
+cuSOLVER
+""""""""
+
+The `cuSOLVER <https://docs.nvidia.com/cuda/cusolver/index.html>`_ library.
+
+Targets Created:
+
+- ``CUDA::cusolver``
+- ``CUDA::cusolver_static``
+
+.. _`cuda_toolkit_cuSPARSE`:
+
+cuSPARSE
+""""""""
+
+The `cuSPARSE <https://docs.nvidia.com/cuda/cusparse/index.html>`_ library.
+
+Targets Created:
+
+- ``CUDA::cusparse``
+- ``CUDA::cusparse_static``
+
+.. _`cuda_toolkit_NPP`:
+
+NPP
+"""
+
+The `NPP <https://docs.nvidia.com/cuda/npp/index.html>`_ libraries.
+
+Targets Created:
+
+- `nppc`:
+
+ - ``CUDA::nppc``
+ - ``CUDA::nppc_static``
+
+- `nppial`: Arithmetic and logical operation functions in `nppi_arithmetic_and_logical_operations.h`
+
+ - ``CUDA::nppial``
+ - ``CUDA::nppial_static``
+
+- `nppicc`: Color conversion and sampling functions in `nppi_color_conversion.h`
+
+ - ``CUDA::nppicc``
+ - ``CUDA::nppicc_static``
+
+- `nppicom`: JPEG compression and decompression functions in `nppi_compression_functions.h`
+
+ - ``CUDA::nppicom``
+ - ``CUDA::nppicom_static``
+
+- `nppidei`: Data exchange and initialization functions in `nppi_data_exchange_and_initialization.h`
+
+ - ``CUDA::nppidei``
+ - ``CUDA::nppidei_static``
+
+- `nppif`: Filtering and computer vision functions in `nppi_filter_functions.h`
+
+ - ``CUDA::nppif``
+ - ``CUDA::nppif_static``
+
+- `nppig`: Geometry transformation functions found in `nppi_geometry_transforms.h`
+
+ - ``CUDA::nppig``
+ - ``CUDA::nppig_static``
+
+- `nppim`: Morphological operation functions found in `nppi_morphological_operations.h`
+
+ - ``CUDA::nppim``
+ - ``CUDA::nppim_static``
+
+- `nppist`: Statistics and linear transform in `nppi_statistics_functions.h` and `nppi_linear_transforms.h`
+
+ - ``CUDA::nppist``
+ - ``CUDA::nppist_static``
+
+- `nppisu`: Memory support functions in `nppi_support_functions.h`
+
+ - ``CUDA::nppisu``
+ - ``CUDA::nppisu_static``
+
+- `nppitc`: Threshold and compare operation functions in `nppi_threshold_and_compare_operations.h`
+
+ - ``CUDA::nppitc``
+ - ``CUDA::nppitc_static``
+
+- `npps`:
+
+ - ``CUDA::npps``
+ - ``CUDA::npps_static``
+
+.. _`cuda_toolkit_nvBLAS`:
+
+nvBLAS
+""""""
+
+The `nvBLAS <https://docs.nvidia.com/cuda/nvblas/index.html>`_ libraries.
+This is a shared library only.
+
+Targets Created:
+
+- ``CUDA::nvblas``
+
+.. _`cuda_toolkit_nvGRAPH`:
+
+nvGRAPH
+"""""""
+
+The `nvGRAPH <https://docs.nvidia.com/cuda/nvgraph/index.html>`_ library.
+
+Targets Created:
+
+- ``CUDA::nvgraph``
+- ``CUDA::nvgraph_static``
+
+
+.. _`cuda_toolkit_nvJPEG`:
+
+nvJPEG
+""""""
+
+The `nvJPEG <https://docs.nvidia.com/cuda/nvjpeg/index.html>`_ library.
+Introduced in CUDA 10.
+
+Targets Created:
+
+- ``CUDA::nvjpeg``
+- ``CUDA::nvjpeg_static``
+
+.. _`cuda_toolkit_nvRTC`:
+
+nvRTC
+"""""
+
+The `nvRTC <https://docs.nvidia.com/cuda/nvrtc/index.html>`_ (Runtime Compilation) library.
+This is a shared library only.
+
+Targets Created:
+
+- ``CUDA::nvrtc``
+
+.. _`cuda_toolkit_nvml`:
+
+nvidia-ML
+"""""""""
+
+The `NVIDIA Management Library <https://developer.nvidia.com/nvidia-management-library-nvml>`_.
+This is a shared library only.
+
+Targets Created:
+
+- ``CUDA::nvml``
+
+.. _`cuda_toolkit_opencl`:
+
+.. _`cuda_toolkit_nvToolsExt`:
+
+nvToolsExt
+""""""""""
+
+The `NVIDIA Tools Extension <https://docs.nvidia.com/gameworks/content/gameworkslibrary/nvtx/nvidia_tools_extension_library_nvtx.htm>`_.
+This is a shared library only.
+
+Targets Created:
+
+- ``CUDA::nvToolsExt``
+
+OpenCL
+""""""
+
+The `NVIDIA OpenCL Library <https://developer.nvidia.com/opencl>`_.
+This is a shared library only.
+
+Targets Created:
+
+- ``CUDA::OpenCL``
+
+.. _`cuda_toolkit_cuLIBOS`:
+
+cuLIBOS
+"""""""
+
+The cuLIBOS library is a backend thread abstraction layer library which is
+static only. The ``CUDA::cublas_static``, ``CUDA::cusparse_static``,
+``CUDA::cufft_static``, ``CUDA::curand_static``, and (when implemented) NPP
+libraries all automatically have this dependency linked.
+
+Target Created:
+
+- ``CUDA::culibos``
+
+**Note**: direct usage of this target by consumers should not be necessary.
+
+.. _`cuda_toolkit_cuRAND`:
+
+
+
+Result variables
+^^^^^^^^^^^^^^^^
+
+``CUDAToolkit_FOUND``
+ A boolean specifying whether or not the CUDA Toolkit was found.
+
+``CUDAToolkit_VERSION``
+ The exact version of the CUDA Toolkit found (as reported by
+ ``nvcc --version``).
+
+``CUDAToolkit_VERSION_MAJOR``
+ The major version of the CUDA Toolkit.
+
+``CUDAToolkit_VERSION_MAJOR``
+ The minor version of the CUDA Toolkit.
+
+``CUDAToolkit_VERSION_PATCH``
+ The patch version of the CUDA Toolkit.
+
+``CUDAToolkit_BIN_DIR``
+ The path to the CUDA Toolkit library directory that contains the CUDA
+ executable ``nvcc``.
+
+``CUDAToolkit_INCLUDE_DIRS``
+ The path to the CUDA Toolkit ``include`` folder containing the header files
+ required to compile a project linking against CUDA.
+
+``CUDAToolkit_LIBRARY_DIR``
+ The path to the CUDA Toolkit library directory that contains the CUDA
+ Runtime library ``cudart``.
+
+``CUDAToolkit_NVCC_EXECUTABLE``
+ The path to the NVIDIA CUDA compiler ``nvcc``. Note that this path may
+ **not** be the same as
+ :variable:`CMAKE_CUDA_COMPILER <CMAKE_<LANG>_COMPILER>`. ``nvcc`` must be
+ found to determine the CUDA Toolkit version as well as determining other
+ features of the Toolkit. This variable is set for the convenience of
+ modules that depend on this one.
+
+
+#]=======================================================================]
+
+# NOTE: much of this was simply extracted from FindCUDA.cmake.
+
+# James Bigler, NVIDIA Corp (nvidia.com - jbigler)
+# Abe Stephens, SCI Institute -- http://www.sci.utah.edu/~abe/FindCuda.html
+#
+# Copyright (c) 2008 - 2009 NVIDIA Corporation. All rights reserved.
+#
+# Copyright (c) 2007-2009
+# Scientific Computing and Imaging Institute, University of Utah
+#
+# This code is licensed under the MIT License. See the FindCUDA.cmake script
+# for the text of the license.
+
+# The MIT License
+#
+# License for the specific language governing rights and limitations under
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+#
+###############################################################################
+
+if(CMAKE_CUDA_COMPILER_LOADED AND NOT CUDAToolkit_BIN_DIR)
+ get_filename_component(cuda_dir "${CMAKE_CUDA_COMPILER}" DIRECTORY)
+ # use the already detected cuda compiler
+ set(CUDAToolkit_BIN_DIR "${cuda_dir}" CACHE PATH "")
+ unset(cuda_dir)
+endif()
+
+# Try language- or user-provided path first.
+if(CUDAToolkit_BIN_DIR)
+ find_program(CUDAToolkit_NVCC_EXECUTABLE
+ NAMES nvcc nvcc.exe
+ PATHS ${CUDAToolkit_BIN_DIR}
+ NO_DEFAULT_PATH
+ )
+endif()
+
+# Search using CUDAToolkit_ROOT
+find_program(CUDAToolkit_NVCC_EXECUTABLE
+ NAMES nvcc nvcc.exe
+ PATHS ENV CUDA_PATH
+ PATH_SUFFIXES bin
+)
+
+# If the user specified CUDAToolkit_ROOT but nvcc could not be found, this is an error.
+if (NOT CUDAToolkit_NVCC_EXECUTABLE AND (DEFINED CUDAToolkit_ROOT OR DEFINED ENV{CUDAToolkit_ROOT}))
+ # Declare error messages now, print later depending on find_package args.
+ set(fail_base "Could not find nvcc executable in path specified by")
+ set(cuda_root_fail "${fail_base} CUDAToolkit_ROOT=${CUDAToolkit_ROOT}")
+ set(env_cuda_root_fail "${fail_base} environment variable CUDAToolkit_ROOT=$ENV{CUDAToolkit_ROOT}")
+
+ if (CUDAToolkit_FIND_REQUIRED)
+ if (DEFINED CUDAToolkit_ROOT)
+ message(FATAL_ERROR ${cuda_root_fail})
+ elseif (DEFINED ENV{CUDAToolkit_ROOT})
+ message(FATAL_ERROR ${env_cuda_root_fail})
+ endif()
+ else()
+ if (NOT CUDAToolkit_FIND_QUIETLY)
+ if (DEFINED CUDAToolkit_ROOT)
+ message(STATUS ${cuda_root_fail})
+ elseif (DEFINED ENV{CUDAToolkit_ROOT})
+ message(STATUS ${env_cuda_root_fail})
+ endif()
+ endif()
+ set(CUDAToolkit_FOUND FALSE)
+ unset(fail_base)
+ unset(cuda_root_fail)
+ unset(env_cuda_root_fail)
+ return()
+ endif()
+endif()
+
+# CUDAToolkit_ROOT cmake / env variable not specified, try platform defaults.
+#
+# - Linux: /usr/local/cuda-X.Y
+# - macOS: /Developer/NVIDIA/CUDA-X.Y
+# - Windows: C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\vX.Y
+#
+# We will also search the default symlink location /usr/local/cuda first since
+# if CUDAToolkit_ROOT is not specified, it is assumed that the symlinked
+# directory is the desired location.
+if (NOT CUDAToolkit_NVCC_EXECUTABLE)
+ if (UNIX)
+ if (NOT APPLE)
+ set(platform_base "/usr/local/cuda-")
+ else()
+ set(platform_base "/Developer/NVIDIA/CUDA-")
+ endif()
+ else()
+ set(platform_base "C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v")
+ endif()
+
+ # Build out a descending list of possible cuda installations, e.g.
+ file(GLOB possible_paths "${platform_base}*")
+ # Iterate the glob results and create a descending list.
+ set(possible_versions)
+ foreach (p ${possible_paths})
+ # Extract version number from end of string
+ string(REGEX MATCH "[0-9][0-9]?\\.[0-9]$" p_version ${p})
+ if (IS_DIRECTORY ${p} AND p_version)
+ list(APPEND possible_versions ${p_version})
+ endif()
+ endforeach()
+
+ # Cannot use list(SORT) because that is alphabetical, we need numerical.
+ # NOTE: this is not an efficient sorting strategy. But even if a user had
+ # every possible version of CUDA installed, this wouldn't create any
+ # significant overhead.
+ set(versions)
+ foreach (v ${possible_versions})
+ list(LENGTH versions num_versions)
+ # First version, nothing to compare with so just append.
+ if (num_versions EQUAL 0)
+ list(APPEND versions ${v})
+ else()
+ # Loop through list. Insert at an index when comparison is
+ # VERSION_GREATER since we want a descending list. Duplicates will not
+ # happen since this came from a glob list of directories.
+ set(i 0)
+ set(early_terminate FALSE)
+ while (i LESS num_versions)
+ list(GET versions ${i} curr)
+ if (v VERSION_GREATER curr)
+ list(INSERT versions ${i} ${v})
+ set(early_terminate TRUE)
+ break()
+ endif()
+ math(EXPR i "${i} + 1")
+ endwhile()
+ # If it did not get inserted, place it at the end.
+ if (NOT early_terminate)
+ list(APPEND versions ${v})
+ endif()
+ endif()
+ endforeach()
+
+ # With a descending list of versions, populate possible paths to search.
+ set(search_paths)
+ foreach (v ${versions})
+ list(APPEND search_paths "${platform_base}${v}")
+ endforeach()
+
+ # Force the global default /usr/local/cuda to the front on Unix.
+ if (UNIX)
+ list(INSERT search_paths 0 "/usr/local/cuda")
+ endif()
+
+ # Now search for nvcc again using the platform default search paths.
+ find_program(CUDAToolkit_NVCC_EXECUTABLE
+ NAMES nvcc nvcc.exe
+ PATHS ${search_paths}
+ PATH_SUFFIXES bin
+ )
+
+ # We are done with these variables now, cleanup for caller.
+ unset(platform_base)
+ unset(possible_paths)
+ unset(possible_versions)
+ unset(versions)
+ unset(i)
+ unset(early_terminate)
+ unset(search_paths)
+
+ if (NOT CUDAToolkit_NVCC_EXECUTABLE)
+ if (CUDAToolkit_FIND_REQUIRED)
+ message(FATAL_ERROR "Could not find nvcc, please set CUDAToolkit_ROOT.")
+ elseif(NOT CUDAToolkit_FIND_QUIETLY)
+ message(STATUS "Could not find nvcc, please set CUDAToolkit_ROOT.")
+ endif()
+
+ set(CUDAToolkit_FOUND FALSE)
+ return()
+ endif()
+endif()
+
+if(NOT CUDAToolkit_BIN_DIR AND CUDAToolkit_NVCC_EXECUTABLE)
+ get_filename_component(cuda_dir "${CUDAToolkit_NVCC_EXECUTABLE}" DIRECTORY)
+ set(CUDAToolkit_BIN_DIR "${cuda_dir}" CACHE PATH "" FORCE)
+ unset(cuda_dir)
+endif()
+
+if(CUDAToolkit_NVCC_EXECUTABLE AND
+ CUDAToolkit_NVCC_EXECUTABLE STREQUAL CMAKE_CUDA_COMPILER)
+ # Need to set these based off the already computed CMAKE_CUDA_COMPILER_VERSION value
+ # This if statement will always match, but is used to provide variables for MATCH 1,2,3...
+ if(CMAKE_CUDA_COMPILER_VERSION MATCHES [=[([0-9]+)\.([0-9]+)\.([0-9]+)]=])
+ set(CUDAToolkit_VERSION_MAJOR "${CMAKE_MATCH_1}")
+ set(CUDAToolkit_VERSION_MINOR "${CMAKE_MATCH_2}")
+ set(CUDAToolkit_VERSION_PATCH "${CMAKE_MATCH_3}")
+ set(CUDAToolkit_VERSION "${CMAKE_CUDA_COMPILER_VERSION}")
+ endif()
+else()
+ # Compute the version by invoking nvcc
+ execute_process (COMMAND ${CUDAToolkit_NVCC_EXECUTABLE} "--version" OUTPUT_VARIABLE NVCC_OUT)
+ if(NVCC_OUT MATCHES [=[ V([0-9]+)\.([0-9]+)\.([0-9]+)]=])
+ set(CUDAToolkit_VERSION_MAJOR "${CMAKE_MATCH_1}")
+ set(CUDAToolkit_VERSION_MINOR "${CMAKE_MATCH_2}")
+ set(CUDAToolkit_VERSION_PATCH "${CMAKE_MATCH_3}")
+ set(CUDAToolkit_VERSION "${CMAKE_MATCH_1}.${CMAKE_MATCH_2}.${CMAKE_MATCH_3}")
+ endif()
+ unset(NVCC_OUT)
+endif()
+
+
+get_filename_component(CUDAToolkit_ROOT_DIR ${CUDAToolkit_BIN_DIR} DIRECTORY ABSOLUTE)
+
+# Now that we have the real ROOT_DIR, find components inside it.
+list(APPEND CMAKE_PREFIX_PATH ${CUDAToolkit_ROOT_DIR})
+
+# Find the include/ directory
+find_path(CUDAToolkit_INCLUDE_DIR
+ NAMES cuda_runtime.h
+)
+
+# And find the CUDA Runtime Library libcudart
+find_library(CUDA_CUDART
+ NAMES cudart
+ PATH_SUFFIXES lib64 lib/x64
+)
+if (NOT CUDA_CUDART AND NOT CUDAToolkit_FIND_QUIETLY)
+ message(STATUS "Unable to find cudart library.")
+endif()
+
+unset(CUDAToolkit_ROOT_DIR)
+list(REMOVE_AT CMAKE_PREFIX_PATH -1)
+
+#-----------------------------------------------------------------------------
+# Perform version comparison and validate all required variables are set.
+# MXNET NOTE: This differs from CMake source by ${CMAKE_CURRENT_LIST_DIR}
+# replaced with ${CMAKE_ROOT}/Modules
+include(${CMAKE_ROOT}/Modules/FindPackageHandleStandardArgs.cmake)
+find_package_handle_standard_args(CUDAToolkit
+ REQUIRED_VARS
+ CUDAToolkit_INCLUDE_DIR
+ CUDA_CUDART
+ CUDAToolkit_NVCC_EXECUTABLE
+ VERSION_VAR
+ CUDAToolkit_VERSION
+)
+
+#-----------------------------------------------------------------------------
+# Construct result variables
+if(CUDAToolkit_FOUND)
+ set(CUDAToolkit_INCLUDE_DIRS ${CUDAToolkit_INCLUDE_DIR})
+ get_filename_component(CUDAToolkit_LIBRARY_DIR ${CUDA_CUDART} DIRECTORY ABSOLUTE)
+endif()
+
+#-----------------------------------------------------------------------------
+# Construct import targets
+if(CUDAToolkit_FOUND)
+
+ function(find_and_add_cuda_import_lib lib_name)
+
+ if(ARGC GREATER 1)
+ set(search_names ${ARGN})
+ else()
+ set(search_names ${lib_name})
+ endif()
+
+ find_library(CUDA_${lib_name}_LIBRARY
+ NAMES ${search_names}
+ PATHS ${CUDAToolkit_LIBRARY_DIR}
+ ENV CUDA_PATH
+ PATH_SUFFIXES nvidia/current lib64 lib/x64 lib
+ )
+
+ if (NOT CUDA::${lib_name} AND CUDA_${lib_name}_LIBRARY)
+ add_library(CUDA::${lib_name} IMPORTED INTERFACE)
+ target_include_directories(CUDA::${lib_name} SYSTEM INTERFACE "${CUDAToolkit_INCLUDE_DIRS}")
+ target_link_libraries(CUDA::${lib_name} INTERFACE "${CUDA_${lib_name}_LIBRARY}")
+ endif()
+ endfunction()
+
+ function(add_cuda_link_dependency lib_name)
+ foreach(dependency IN LISTS ${ARGN})
+ target_link_libraries(CUDA::${lib_name} INTERFACE CUDA::${dependency})
+ endforeach()
+ endfunction()
+
+ add_library(CUDA::toolkit IMPORTED INTERFACE)
+ target_include_directories(CUDA::toolkit SYSTEM INTERFACE "${CUDAToolkit_INCLUDE_DIRS}")
+ target_link_directories(CUDA::toolkit INTERFACE "${CUDAToolkit_LIBRARY_DIR}")
+
+
+ find_and_add_cuda_import_lib(cuda_driver cuda)
+
+ find_and_add_cuda_import_lib(cudart)
+ find_and_add_cuda_import_lib(cudart_static)
+
+ foreach (cuda_lib cublas cufft cufftw curand cusolver cusparse nvgraph nvjpeg)
+ find_and_add_cuda_import_lib(${cuda_lib})
+ add_cuda_link_dependency(${cuda_lib} cudart)
+
+ find_and_add_cuda_import_lib(${cuda_lib}_static)
+ add_cuda_link_dependency(${cuda_lib}_static cudart_static)
+ endforeach()
+
+ # cuSOLVER depends on cuBLAS, and cuSPARSE
+ add_cuda_link_dependency(cusolver cublas cusparse)
+ add_cuda_link_dependency(cusolver_static cublas_static cusparse)
+
+ # nvGRAPH depends on cuRAND, and cuSOLVER.
+ add_cuda_link_dependency(nvgraph curand cusolver)
+ add_cuda_link_dependency(nvgraph_static curand_static cusolver_static)
+
+ find_and_add_cuda_import_lib(nppc)
+ find_and_add_cuda_import_lib(nppc_static)
+
+ add_cuda_link_dependency(nppc cudart)
+ add_cuda_link_dependency(nppc_static cudart_static culibos)
+
+ # Process the majority of the NPP libraries.
+ foreach (cuda_lib nppial nppicc nppidei nppif nppig nppim nppist nppitc npps nppicom nppisu)
+ find_and_add_cuda_import_lib(${cuda_lib})
+ find_and_add_cuda_import_lib(${cuda_lib}_static)
+ add_cuda_link_dependency(${cuda_lib} nppc)
+ add_cuda_link_dependency(${cuda_lib}_static nppc_static)
+ endforeach()
+
+ find_and_add_cuda_import_lib(nvrtc)
+ add_cuda_link_dependency(nvrtc cuda_driver)
+
+ find_and_add_cuda_import_lib(nvml nvidia-ml nvml)
+
+ if(WIN32)
+ # nvtools can be installed outside the CUDA toolkit directory
+ # so prefer the NVTOOLSEXT_PATH windows only environment variable
+ # In addition on windows the most common name is nvToolsExt64_1
+ find_library(CUDA_nvToolsExt_LIBRARY
+ NAMES nvToolsExt64_1 nvToolsExt64 nvToolsExt
+ PATHS ENV NVTOOLSEXT_PATH
+ ENV CUDA_PATH
+ PATH_SUFFIXES lib/x64 lib
+ )
+ endif()
+ find_and_add_cuda_import_lib(nvToolsExt nvToolsExt nvToolsExt64)
+
+ add_cuda_link_dependency(nvToolsExt cudart)
+
+ find_and_add_cuda_import_lib(OpenCL)
+
+ find_and_add_cuda_import_lib(culibos)
+ if(TARGET CUDA::culibos)
+ foreach (cuda_lib cublas cufft cusparse curand nvjpeg)
+ add_cuda_link_dependency(${cuda_lib}_static culibos)
+ endforeach()
+ endif()
+
+endif()
diff --git a/cmake/Modules/FindCUDNN.cmake b/cmake/Modules/FindCUDNN.cmake
new file mode 100644
index 0000000..a8fda5c
--- /dev/null
+++ b/cmake/Modules/FindCUDNN.cmake
@@ -0,0 +1,33 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+include(FindPackageHandleStandardArgs)
+
+set(CUDNN_ROOT "/usr/local/cuda/include" CACHE PATH "cuDNN root folder")
+
+find_path(CUDNN_INCLUDE cudnn.h
+ PATHS ${CUDNN_ROOT} $ENV{CUDNN_ROOT}
+ DOC "Path to cuDNN include directory." )
+
+find_library(CUDNN_LIBRARY NAMES libcudnn.so cudnn.lib # libcudnn_static.a
+ PATHS ${CUDNN_ROOT} $ENV{CUDNN_ROOT} ${CUDNN_INCLUDE}
+ PATH_SUFFIXES lib lib/x64 cuda/lib cuda/lib64 lib/x64
+ DOC "Path to cuDNN library.")
+
+find_package_handle_standard_args(CUDNN DEFAULT_MSG CUDNN_LIBRARY CUDNN_INCLUDE)
+
+mark_as_advanced(CUDNN_ROOT CUDNN_INCLUDE CUDNN_LIBRARY)
diff --git a/cmake/Modules/FindMKL.cmake b/cmake/Modules/FindMKL.cmake
index 51fca23..51eff8f 100644
--- a/cmake/Modules/FindMKL.cmake
+++ b/cmake/Modules/FindMKL.cmake
@@ -45,11 +45,11 @@ set(INTEL_ROOT "/opt/intel" CACHE PATH "Folder contains intel libs")
# ---[ Options
- mxnet_option(MKL_USE_SINGLE_DYNAMIC_LIBRARY "Use single dynamic library interface" ON)
- mxnet_option(MKL_USE_STATIC_LIBS "Use static libraries" OFF IF NOT MKL_USE_SINGLE_DYNAMIC_LIBRARY)
- mxnet_option(MKL_MULTI_THREADED "Use multi-threading" ON IF NOT MKL_USE_SINGLE_DYNAMIC_LIBRARY)
- mxnet_option(MKL_USE_ILP64 "Use ilp64 data model" OFF)
- mxnet_option(MKL_USE_CLUSTER "Use cluster functions" OFF IF CMAKE_SIZEOF_VOID_P EQUAL 4)
+ option(MKL_USE_SINGLE_DYNAMIC_LIBRARY "Use single dynamic library interface" ON)
+ cmake_dependent_option(MKL_USE_STATIC_LIBS "Use static libraries" OFF "NOT MKL_USE_SINGLE_DYNAMIC_LIBRARY" OFF)
+ cmake_dependent_option(MKL_MULTI_THREADED "Use multi-threading" ON "NOT MKL_USE_SINGLE_DYNAMIC_LIBRARY" OFF)
+ option(MKL_USE_ILP64 "Use ilp64 data model" OFF)
+ cmake_dependent_option(MKL_USE_CLUSTER "Use cluster functions" OFF "CMAKE_SIZEOF_VOID_P EQUAL 4" OFF)
find_path(MKL_ROOT include/mkl.h PATHS $ENV{MKL_ROOT} ${INTEL_ROOT}/mkl
DOC "Folder contains MKL")
diff --git a/cmake/Utils.cmake b/cmake/Utils.cmake
index 6b427db..294e7cf 100644
--- a/cmake/Utils.cmake
+++ b/cmake/Utils.cmake
@@ -179,47 +179,6 @@ function(mxnet_parse_header_single_define LIBNAME HDR_PATH VARNAME)
endif()
endfunction()
-########################################################################################################
-# An option that the user can select. Can accept condition to control when option is available for user.
-# Usage:
-# mxnet_option(<option_variable> "doc string" <initial value or boolean expression> [IF <condition>])
-function(mxnet_option variable description value)
- set(__value ${value})
- set(__condition "")
- set(__varname "__value")
- foreach(arg ${ARGN})
- if(arg STREQUAL "IF" OR arg STREQUAL "if")
- set(__varname "__condition")
- else()
- list(APPEND ${__varname} ${arg})
- endif()
- endforeach()
- unset(__varname)
- if("${__condition}" STREQUAL "")
- set(__condition 2 GREATER 1)
- endif()
-
- if(${__condition})
- if("${__value}" MATCHES ";")
- if(${__value})
- option(${variable} "${description}" ON)
- else()
- option(${variable} "${description}" OFF)
- endif()
- elseif(DEFINED ${__value})
- if(${__value})
- option(${variable} "${description}" ON)
- else()
- option(${variable} "${description}" OFF)
- endif()
- else()
- option(${variable} "${description}" ${__value})
- endif()
- else()
- option(${variable} "${description}" OFF)
- endif()
-endfunction()
-
################################################################################################
# Utility macro for comparing two lists. Used for CMake debugging purposes
# Usage:
diff --git a/contrib/tvmop/compile.py b/contrib/tvmop/compile.py
index b025421..43657f2 100644
--- a/contrib/tvmop/compile.py
+++ b/contrib/tvmop/compile.py
@@ -50,6 +50,11 @@ def get_cuda_arch(arch):
if len(arch) == 0:
return None
+ # the arch string is of format '-gencode;arch=compute_XX,code=sm_XX'
+ # this format is computed by CMake CUDA_SELECT_NVCC_ARCH_FLAGS
+ if arch.startswith('-gencode;'):
+ return arch.split(';')
+
# the arch string contains '-arch=sm_xx'
flags = arch.split()
for flag in flags:
diff --git a/tools/windowsbuild/README.md b/tools/windowsbuild/README.md
new file mode 100644
index 0000000..7d8e7cf
--- /dev/null
+++ b/tools/windowsbuild/README.md
@@ -0,0 +1,19 @@
+<!--- Licensed to the Apache Software Foundation (ASF) under one -->
+<!--- or more contributor license agreements. See the NOTICE file -->
+<!--- distributed with this work for additional information -->
+<!--- regarding copyright ownership. The ASF licenses this file -->
+<!--- to you under the Apache License, Version 2.0 (the -->
+<!--- "License"); you may not use this file except in compliance -->
+<!--- with the License. You may obtain a copy of the License at -->
+
+<!--- http://www.apache.org/licenses/LICENSE-2.0 -->
+
+<!--- Unless required by applicable law or agreed to in writing, -->
+<!--- software distributed under the License is distributed on an -->
+<!--- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -->
+<!--- KIND, either express or implied. See the License for the -->
+<!--- specific language governing permissions and limitations -->
+<!--- under the License. -->
+
+Due to dll size limitation under windows. Split dll into different dlls according to arch
+Reference https://github.com/apache/incubator-mxnet/pull/16980
\ No newline at end of file
diff --git a/tools/windowsbuild/gen_warp.cpp b/tools/windowsbuild/gen_warp.cpp
new file mode 100644
index 0000000..2d90eaf
--- /dev/null
+++ b/tools/windowsbuild/gen_warp.cpp
@@ -0,0 +1,209 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include <iostream>
+#include <io.h>
+#include <Windows.h>
+#include <cstdint>
+#include <memory>
+#include <vector>
+#include <string>
+#include <iostream>
+#include <fstream>
+
+#define IMAGE_SIZEOF_SIGNATURE 4
+
+
+DWORD rva_to_foa(IN DWORD RVA, IN PIMAGE_SECTION_HEADER section_header)
+{
+
+ size_t count = 0;
+ for (count = 1; RVA > (section_header->VirtualAddress + section_header->Misc.VirtualSize); count++, section_header++);
+
+ DWORD FOA = RVA - section_header->VirtualAddress + section_header->PointerToRawData;
+
+ return FOA;
+}
+
+std::string format(const char* format, ...)
+{
+ va_list args;
+ va_start(args, format);
+#ifndef _MSC_VER
+ size_t size = std::snprintf(nullptr, 0, format, args) + 1; // Extra space for '\0'
+ std::unique_ptr<char[]> buf(new char[size]);
+ std::vsnprintf(buf.get(), size, format, args);
+ return std::string(buf.get(), buf.get() + size - 1); // We don't want the '\0' inside
+#else
+ int size = _vscprintf(format, args) +1;
+ std::unique_ptr<char[]> buf(new char[size]);
+ vsnprintf_s(buf.get(), size, _TRUNCATE, format, args);
+ return std::string(buf.get());
+#endif
+ va_end(args);
+}
+
+int main(int argc, char* argv[])
+{
+
+ if (argc != 2)
+ {
+ return 0;
+ }
+
+ //open file
+ const HANDLE h_file = CreateFile(
+ argv[1],
+ GENERIC_READ ,
+ FILE_SHARE_READ ,
+ nullptr,
+ OPEN_EXISTING,
+ FILE_ATTRIBUTE_NORMAL,
+ nullptr);
+
+
+ DWORD size_high;
+ const DWORD size_low = GetFileSize(h_file, &size_high);
+
+ uint64_t dll_size = ((uint64_t(size_high)) << 32) + (uint64_t)size_low;
+
+ // Create File Mapping
+ const HANDLE h_map_file = CreateFileMapping(
+ h_file,
+ nullptr,
+ PAGE_READONLY,
+ size_high,
+ size_low,
+ nullptr);
+ if (h_map_file == INVALID_HANDLE_VALUE || h_map_file == nullptr)
+ {
+ std::cout << "error";
+ CloseHandle(h_file);
+ return 0;
+ }
+
+ //Map File to memory
+ void* pv_file = MapViewOfFile(
+ h_map_file,
+ FILE_MAP_READ,
+ 0,
+ 0,
+ 0);
+
+ if (pv_file == nullptr)
+ {
+ std::cout << "error";
+ CloseHandle(h_file);
+ return 0;
+ }
+
+ uint8_t* p = static_cast<uint8_t*>(pv_file);
+
+
+ PIMAGE_DOS_HEADER dos_header = reinterpret_cast<PIMAGE_DOS_HEADER>(p);
+
+ const PIMAGE_NT_HEADERS nt_headers = reinterpret_cast<const PIMAGE_NT_HEADERS>(p + dos_header->e_lfanew);
+
+ const PIMAGE_FILE_HEADER file_header = &nt_headers->FileHeader;
+
+ PIMAGE_OPTIONAL_HEADER optional_header = (PIMAGE_OPTIONAL_HEADER)(&nt_headers->OptionalHeader);
+
+ const DWORD file_alignment = optional_header->FileAlignment;
+
+
+ PIMAGE_SECTION_HEADER section_table =
+ reinterpret_cast<PIMAGE_SECTION_HEADER>(p + dos_header->e_lfanew +
+ IMAGE_SIZEOF_SIGNATURE +
+ IMAGE_SIZEOF_FILE_HEADER +
+ file_header->SizeOfOptionalHeader);
+
+ DWORD export_foa = rva_to_foa(optional_header->DataDirectory[0].VirtualAddress, section_table);
+
+ PIMAGE_EXPORT_DIRECTORY export_directory = (PIMAGE_EXPORT_DIRECTORY)(p + export_foa);
+
+
+ DWORD name_list_foa = rva_to_foa(export_directory->AddressOfNames, section_table);
+
+ PDWORD name_list = (PDWORD)(p + name_list_foa);
+
+
+
+
+ std::vector<std::string> func_list;
+
+ for (size_t i = 0; i < export_directory->NumberOfNames; i++, name_list++)
+ {
+
+ DWORD name_foa = rva_to_foa(* name_list, section_table);
+ char* name = (char*)(p + name_foa);
+ func_list.emplace_back(name);
+
+ }
+
+
+ UnmapViewOfFile(pv_file);
+ CloseHandle(h_map_file);
+ CloseHandle(h_file);
+
+
+ std::ofstream gen_cpp_obj;
+ gen_cpp_obj.open("warp_gen_cpp.cpp", std::ios::out | std::ios::trunc);
+ gen_cpp_obj << "#include <Windows.h>\n";
+ gen_cpp_obj << "extern \"C\" \n{\n";
+
+
+ for (size_t i = 0; i < func_list.size(); ++i)
+ {
+ auto fun = func_list[i];
+ gen_cpp_obj << format("void * warp_point_%d;\n", i);
+ gen_cpp_obj << format("#pragma comment(linker, \"/export:%s=warp_func_%d\")\n", fun.c_str(), i);
+ gen_cpp_obj << format("void warp_func_%d();\n", i);
+ gen_cpp_obj << ("\n");
+ }
+ gen_cpp_obj << ("}\n");
+
+
+ gen_cpp_obj << ("void load_function(HMODULE hm)\n{\n");
+ for (size_t i = 0; i < func_list.size(); ++i)
+ {
+ auto fun = func_list[i];
+ gen_cpp_obj << format("warp_point_%d = (void*)GetProcAddress(hm, \"%s\");\n", i, fun.c_str());
+ }
+ gen_cpp_obj << ("}\n");
+
+ gen_cpp_obj.close();
+
+
+
+ std::ofstream gen_asm_obj;
+ gen_asm_obj.open("warp_gen.asm", std::ios::out | std::ios::trunc);
+ for (size_t i = 0; i < func_list.size(); ++i)
+ {
+ auto fun = func_list[i];
+ gen_asm_obj << format("EXTERN warp_point_%d:QWORD;\n", i);
+ }
+ gen_asm_obj << ".CODE\n";
+ for (size_t i = 0; i < func_list.size(); ++i)
+ {
+ auto fun = func_list[i];
+ gen_asm_obj << format("warp_func_%d PROC\njmp warp_point_%d;\nwarp_func_%d ENDP\n", i,i,i);
+ }
+ gen_asm_obj << "END\n";
+ gen_asm_obj.close();
+}
diff --git a/tools/windowsbuild/warp_dll.cpp b/tools/windowsbuild/warp_dll.cpp
new file mode 100644
index 0000000..6a89a4e
--- /dev/null
+++ b/tools/windowsbuild/warp_dll.cpp
@@ -0,0 +1,151 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include <cstdio>
+#include <iostream>
+#include <cuda_runtime.h>
+#include <algorithm>
+#include <Windows.h>
+#include <io.h>
+#include <vector>
+#include <regex>
+#include <shlwapi.h>
+
+
+extern "C" IMAGE_DOS_HEADER __ImageBase;
+
+
+std::vector<int> find_mxnet_dll()
+{
+ std::vector<int> version;
+ intptr_t handle;
+
+ _wfinddata_t findData{};
+ std::wregex reg(L".*?mxnet_([0-9]+)\\.dll");
+
+ HMODULE hModule = reinterpret_cast<HMODULE>(&__ImageBase);
+ WCHAR szPathBuffer[MAX_PATH] = { 0 };
+ GetModuleFileNameW(hModule, szPathBuffer, MAX_PATH);
+
+ PathRemoveFileSpecW(szPathBuffer);
+ wcscat_s(szPathBuffer, L"\\mxnet_*.dll");
+
+ handle = _wfindfirst(szPathBuffer, &findData);
+ if (handle == -1)
+ {
+ return version;
+ }
+
+ do
+ {
+ if (!(findData.attrib & _A_SUBDIR) || wcscmp(findData.name, L".") != 0 || wcscmp(findData.name, L"..") != 0)
+ {
+ std::wstring str(findData.name);
+ std::wsmatch base_match;
+ if(std::regex_match(str, base_match, reg))
+ {
+ if (base_match.size() == 2) {
+ std::wssub_match base_sub_match = base_match[1];
+ std::wstring base = base_sub_match.str();
+ version.push_back(std::stoi(base)) ;
+ }
+ }
+ }
+ } while (_wfindnext(handle, &findData) == 0);
+
+ _findclose(handle);
+ std::sort(version.begin(), version.end());
+ return version;
+}
+
+int find_version()
+{
+ std::vector<int> known_sm = find_mxnet_dll();
+ int count = 0;
+ int version = 75;
+ if (cudaSuccess != cudaGetDeviceCount(&count))
+ {
+ return 30;
+ }
+ if (count == 0)
+ {
+ return 30;
+ }
+
+
+ for (int device = 0; device < count; ++device)
+ {
+ cudaDeviceProp prop{};
+ if (cudaSuccess == cudaGetDeviceProperties(&prop, device))
+ {
+ version = std::min(version, prop.major * 10 + prop.minor);
+ }
+ }
+
+ for (int i = known_sm.size() -1 ; i >=0; --i)
+ {
+ if(known_sm[i]<= version)
+ {
+ return known_sm[i];
+ }
+ }
+
+ return version;
+}
+
+void load_function(HMODULE hm);
+
+void mxnet_init()
+{
+ int version = find_version();
+ WCHAR dll_name[MAX_PATH];
+ wsprintfW(dll_name, L"mxnet_%d.dll", version);
+ HMODULE hm = LoadLibraryW(dll_name);
+ load_function(hm);
+}
+
+
+extern "C" BOOL WINAPI DllMain(
+ HINSTANCE const instance, // handle to DLL module
+ DWORD const reason, // reason for calling function
+ LPVOID const reserved) // reserved
+{
+ // Perform actions based on the reason for calling.
+ switch (reason)
+ {
+ case DLL_PROCESS_ATTACH:
+ mxnet_init();
+ // Initialize once for each new process.
+ // Return FALSE to fail DLL load.
+ break;
+
+ case DLL_THREAD_ATTACH:
+ // Do thread-specific initialization.
+ break;
+
+ case DLL_THREAD_DETACH:
+ // Do thread-specific cleanup.
+ break;
+
+ case DLL_PROCESS_DETACH:
+ // Perform any necessary cleanup.
+ break;
+ }
+ return TRUE; // Successful DLL_PROCESS_ATTACH.
+}
\ No newline at end of file