You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mxnet.apache.org by GitBox <gi...@apache.org> on 2017/11/11 02:55:48 UTC

[GitHub] cjolivier01 closed pull request #8572: use first class cuda with cmake 3.9 and cuda9.0 support

cjolivier01 closed pull request #8572: use first class cuda with cmake 3.9 and cuda9.0 support
URL: https://github.com/apache/incubator-mxnet/pull/8572
 
 
   

This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:

As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 539515b3a2..af681d00aa 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,17 +1,35 @@
 cmake_minimum_required(VERSION 3.0.2)
 
-project(mxnet C CXX)
+if((${CMAKE_VERSION} VERSION_GREATER "3.9.0") OR (${CMAKE_VERSION} VERSION_EQUAL "3.9.0"))
+  set(FIRST_CUDA TRUE)
+else()
+  set(FIRST_CUDA FALSE)
+endif()
+include(cmake/Utils.cmake)
 
-if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/build/private/local_config.cmake)
-  include(${CMAKE_CURRENT_SOURCE_DIR}/build/private/local_config.cmake)
+#Some things have order. This must be put in front alone
+mxnet_option(USE_CUDA             "Build with CUDA support"   ON)
+mxnet_option(USE_OLDCMAKECUDA           "Build with old cmake cuda" OFF)
+if(USE_CUDA)
+  add_definitions(-DMSHADOW_USE_CUDA=1)
+  IF(FIRST_CUDA AND (NOT USE_OLDCMAKECUDA))
+    set(__cuda_toolset "7.5" "8.0" "9.0")
+    set(CUDA_TOOLSET "8.0" CACHE STRING "Select CUDA Version.")
+    set_property( CACHE CUDA_TOOLSET PROPERTY STRINGS "" ${__cuda_toolset} )
+    set(CMAKE_GENERATOR_TOOLSET "cuda=${CUDA_TOOLSET},host=x64")
+    project(mxnet C CXX CUDA)
+  else()
+    project(mxnet C CXX)
+    set(FIRST_CUDA FALSE)
+  endif()
+else()
+  project(mxnet C CXX)
+  add_definitions(-DMSHADOW_USE_CUDA=0)
 endif()
 
-set(CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/cmake/Modules;${CMAKE_MODULE_PATH}")
 
-include(cmake/Utils.cmake)
 mxnet_option(USE_OPENCV           "Build with OpenCV support" ON)
 mxnet_option(USE_OPENMP           "Build with Openmp support" ON)
-mxnet_option(USE_CUDA             "Build with CUDA support"   ON)
 mxnet_option(USE_CUDNN            "Build with cudnn support"  ON) # one could set CUDNN_ROOT for search path
 mxnet_option(USE_LAPACK           "Build with lapack support" ON IF NOT MSVC)
 mxnet_option(USE_MKL_IF_AVAILABLE "Use MKL if found" ON)
@@ -29,6 +47,17 @@ mxnet_option(USE_GPROF            "Compile with gprof (profiling) flag" OFF)
 mxnet_option(USE_VTUNE            "Enable use of Intel Amplifier XE (VTune)" OFF) # one could set VTUNE_ROOT for search path
 mxnet_option(INSTALL_EXAMPLES     "Install the example source files." OFF)
 
+
+
+if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/build/private/local_config.cmake)
+  include(${CMAKE_CURRENT_SOURCE_DIR}/build/private/local_config.cmake)
+endif()
+
+set(CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/cmake/Modules;${CMAKE_MODULE_PATH}")
+
+
+
+
 SET(EXTRA_OPERATORS "" CACHE PATH "EXTRA OPERATORS PATH")
 
 if("$ENV{VERBOSE}" STREQUAL "1")
@@ -128,14 +157,20 @@ endif()
 include_directories(${CMAKE_CURRENT_SOURCE_DIR}/include)
 include_directories(${CMAKE_CURRENT_SOURCE_DIR}/src)
 
-if(EXISTS ${PROJECT_SOURCE_DIR}/mshadow/cmake)
-  include(mshadow/cmake/mshadow.cmake)
+if(FIRST_CUDA)
+  include(cmake/ChooseBlas.cmake)
   include(mshadow/cmake/Utils.cmake)
-  include(mshadow/cmake/Cuda.cmake)
+  include(cmake/FirstClassLangCuda.cmake)
 else()
-  include(mshadowUtils)
-  include(Cuda)
-  include(mshadow)
+  if(EXISTS ${PROJECT_SOURCE_DIR}/mshadow/cmake)
+    include(mshadow/cmake/mshadow.cmake)
+    include(mshadow/cmake/Utils.cmake)
+    include(mshadow/cmake/Cuda.cmake)
+  else()
+    include(mshadowUtils)
+    include(Cuda)
+    include(mshadow)
+  endif()
 endif()
 
 list(APPEND mxnet_LINKER_LIBS ${mshadow_LINKER_LIBS})
@@ -241,7 +276,7 @@ if(USE_LAPACK)
   list(APPEND mxnet_LINKER_LIBS lapack)
 else(USE_LAPACK)
   # Workaround for Windows until using new Jenkinsfile.
-  if(USE_BLAS STREQUAL "open")
+  if(BLAS STREQUAL "Open" OR BLAS STREQUAL "open")
     add_definitions(-DMXNET_USE_LAPACK=1)
   endif()
 endif()
@@ -372,36 +407,46 @@ if(MSVC)
 endif()
 
 if(USE_CUDA)
-  list(APPEND CUDA_INCLUDE_DIRS ${INCLUDE_DIRECTORIES})
-  # define preprocessor macro so that we will not include the generated forcelink header
-  mshadow_cuda_compile(cuda_objs ${CUDA})
-  if(MSVC)
-    FIND_LIBRARY(CUDA_nvrtc_LIBRARY nvrtc "${CUDA_TOOLKIT_ROOT_DIR}/lib/x64"  "${CUDA_TOOLKIT_ROOT_DIR}/lib/win32")
-    list(APPEND mxnet_LINKER_LIBS ${CUDA_nvrtc_LIBRARY})
-    set(CUDA_cuda_LIBRARY "${CUDA_nvrtc_LIBRARY}/../cuda.lib")
-    list(APPEND mxnet_LINKER_LIBS ${CUDA_cuda_LIBRARY})
-    FIND_LIBRARY(CUDA_cufft_LIBRARY nvrtc "${CUDA_TOOLKIT_ROOT_DIR}/lib/x64"  "${CUDA_TOOLKIT_ROOT_DIR}/lib/win32")
-    list(APPEND mxnet_LINKER_LIBS "${CUDA_cufft_LIBRARY}/../cufft.lib") # For fft operator
-    FIND_LIBRARY(CUDA_cusolver_LIBRARY nvrtc "${CUDA_TOOLKIT_ROOT_DIR}/lib/x64"  "${CUDA_TOOLKIT_ROOT_DIR}/lib/win32")
-    list(APPEND mxnet_LINKER_LIBS "${CUDA_cusolver_LIBRARY}/../cusolver.lib") # For cusolver
-  else(MSVC)
-    list(APPEND mxnet_LINKER_LIBS nvrtc cuda cufft cusolver)
-    link_directories("${CUDA_TOOLKIT_ROOT_DIR}/lib64")
-  endif()
-  list(APPEND SOURCE ${cuda_objs} ${CUDA})
-  add_definitions(-DMXNET_USE_CUDA=1)
-  if(CUDA_LIBRARY_PATH)
-    if(IS_CONTAINER_BUILD)
-      # In case of building on a production-like build container which may not have Cuda installed
-      if(NOT CMAKE_SYSTEM_HAS_CUDA)
-        # Assuming building in a container that doesn't have CUDA installed (ie CPU-only build machine)
-        # so use the stub cuda driver shared library
-        if(EXISTS ${CUDA_LIBRARY_PATH}/stubs/libcuda.so)
-          link_directories(${CUDA_LIBRARY_PATH}/stubs)
+  if(FIRST_CUDA)
+    mshadow_select_nvcc_arch_flags(NVCC_FLAGS_ARCH)
+    string(REPLACE ";" " " NVCC_FLAGS_ARCH "${NVCC_FLAGS_ARCH}")
+    set(CMAKE_CUDA_FLAGS "${NVCC_FLAGS_ARCH}")
+    set(CMAKE_CUDA_FLAGS_RELEASE "${NVCC_FLAGS_ARCH} -use_fast_math")
+    list(APPEND mxnet_LINKER_LIBS nvrtc cuda cublas cufft cusolver curand)
+    list(APPEND SOURCE ${CUDA})
+    add_definitions(-DMXNET_USE_CUDA=1)
+  else()
+    list(APPEND CUDA_INCLUDE_DIRS ${INCLUDE_DIRECTORIES})
+    # define preprocessor macro so that we will not include the generated forcelink header
+    mshadow_cuda_compile(cuda_objs ${CUDA})
+    if(MSVC)
+        FIND_LIBRARY(CUDA_nvrtc_LIBRARY nvrtc "${CUDA_TOOLKIT_ROOT_DIR}/lib/x64"  "${CUDA_TOOLKIT_ROOT_DIR}/lib/win32")
+        list(APPEND mxnet_LINKER_LIBS ${CUDA_nvrtc_LIBRARY})
+        set(CUDA_cuda_LIBRARY "${CUDA_nvrtc_LIBRARY}/../cuda.lib")
+        list(APPEND mxnet_LINKER_LIBS ${CUDA_cuda_LIBRARY})
+        FIND_LIBRARY(CUDA_cufft_LIBRARY nvrtc "${CUDA_TOOLKIT_ROOT_DIR}/lib/x64"  "${CUDA_TOOLKIT_ROOT_DIR}/lib/win32")
+        list(APPEND mxnet_LINKER_LIBS "${CUDA_cufft_LIBRARY}/../cufft.lib") # For fft operator
+        FIND_LIBRARY(CUDA_cusolver_LIBRARY nvrtc "${CUDA_TOOLKIT_ROOT_DIR}/lib/x64"  "${CUDA_TOOLKIT_ROOT_DIR}/lib/win32")
+        list(APPEND mxnet_LINKER_LIBS "${CUDA_cusolver_LIBRARY}/../cusolver.lib") # For cusolver
+    else(MSVC)
+        list(APPEND mxnet_LINKER_LIBS nvrtc cuda cufft cusolver)
+        link_directories("${CUDA_TOOLKIT_ROOT_DIR}/lib64")
+    endif()
+    list(APPEND SOURCE ${cuda_objs} ${CUDA})
+    add_definitions(-DMXNET_USE_CUDA=1)
+    if(CUDA_LIBRARY_PATH)
+        if(IS_CONTAINER_BUILD)
+        # In case of building on a production-like build container which may not have Cuda installed
+        if(NOT CMAKE_SYSTEM_HAS_CUDA)
+            # Assuming building in a container that doesn't have CUDA installed (ie CPU-only build machine)
+            # so use the stub cuda driver shared library
+            if(EXISTS ${CUDA_LIBRARY_PATH}/stubs/libcuda.so)
+            link_directories(${CUDA_LIBRARY_PATH}/stubs)
+            endif()
+        endif()
         endif()
-      endif()
     endif()
-  endif()
+ endif()
 endif()
 
 # unsupported: if caffe is a subdirectory of mxnet, load its CMakeLists.txt as well
@@ -444,6 +489,12 @@ else()
   endif()
 endif()
 
+if(USE_CUDA)
+  if(FIRST_CUDA)
+    target_compile_options(mxnet PUBLIC "$<$<CONFIG:DEBUG>:-Xcompiler=-MTd>")
+    target_compile_options(mxnet PUBLIC "$<$<CONFIG:RELEASE>:-Xcompiler=-MT>")
+  endif()
+endif()
 if(USE_DIST_KVSTORE)
   if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/ps-lite/CMakeLists.txt)
     add_subdirectory("ps-lite")
diff --git a/cmake/ChooseBlas.cmake b/cmake/ChooseBlas.cmake
new file mode 100644
index 0000000000..3a8723a5dd
--- /dev/null
+++ b/cmake/ChooseBlas.cmake
@@ -0,0 +1,58 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set(BLAS "Open" CACHE STRING "Selected BLAS library")
+set_property(CACHE BLAS PROPERTY STRINGS "Atlas;Open;MKL")
+
+if(USE_MKL_IF_AVAILABLE)
+  if(NOT MKL_FOUND)
+    find_package(MKL)
+  endif()
+  if(MKL_FOUND)
+    if(USE_MKLML_MKL)
+      set(BLAS "open")
+    else()
+      set(BLAS "MKL")
+    endif()
+  endif()
+endif()
+
+if(BLAS STREQUAL "Atlas" OR BLAS STREQUAL "atlas")
+  find_package(Atlas REQUIRED)
+  include_directories(SYSTEM ${Atlas_INCLUDE_DIR})
+  list(APPEND mshadow_LINKER_LIBS ${Atlas_LIBRARIES})
+  add_definitions(-DMSHADOW_USE_CBLAS=1)
+  add_definitions(-DMSHADOW_USE_MKL=0)
+elseif(BLAS STREQUAL "Open" OR BLAS STREQUAL "open")
+  find_package(OpenBLAS REQUIRED)
+  include_directories(SYSTEM ${OpenBLAS_INCLUDE_DIR})
+  list(APPEND mshadow_LINKER_LIBS ${OpenBLAS_LIB})
+  add_definitions(-DMSHADOW_USE_CBLAS=1)
+  add_definitions(-DMSHADOW_USE_MKL=0)
+elseif(BLAS STREQUAL "MKL" OR BLAS STREQUAL "mkl")
+  find_package(MKL REQUIRED)
+  include_directories(SYSTEM ${MKL_INCLUDE_DIR})
+  list(APPEND mshadow_LINKER_LIBS ${MKL_LIBRARIES})
+  add_definitions(-DMSHADOW_USE_CBLAS=0)
+  add_definitions(-DMSHADOW_USE_MKL=1)
+elseif(BLAS STREQUAL "apple")
+  find_package(Accelerate REQUIRED)
+  include_directories(SYSTEM ${Accelerate_INCLUDE_DIR})
+  list(APPEND mshadow_LINKER_LIBS ${Accelerate_LIBRARIES})
+  add_definitions(-DMSHADOW_USE_MKL=0)
+  add_definitions(-DMSHADOW_USE_CBLAS=1)
+endif()
\ No newline at end of file
diff --git a/cmake/FirstClassLangCuda.cmake b/cmake/FirstClassLangCuda.cmake
new file mode 100644
index 0000000000..73f0758062
--- /dev/null
+++ b/cmake/FirstClassLangCuda.cmake
@@ -0,0 +1,236 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+#this file is CUDA help function with CMAKE first class CUDA
+
+include(CheckCXXCompilerFlag)
+check_cxx_compiler_flag("-std=c++11"   SUPPORT_CXX11)
+
+################################################################################################
+# Short command for cuDNN detection. Believe it soon will be a part of CUDA toolkit distribution.
+# That's why not FindcuDNN.cmake file, but just the macro
+# Usage:
+#   detect_cuDNN()
+function(detect_cuDNN)
+  set(CUDNN_ROOT "" CACHE PATH "CUDNN root folder")
+
+  find_path(CUDNN_INCLUDE cudnn.h
+            PATHS ${CUDNN_ROOT} $ENV{CUDNN_ROOT}
+            DOC "Path to cuDNN include directory." )
+
+
+  find_library(CUDNN_LIBRARY NAMES libcudnn.so cudnn.lib # libcudnn_static.a
+                             PATHS ${CUDNN_ROOT} $ENV{CUDNN_ROOT} ${CUDNN_INCLUDE}
+                             DOC "Path to cuDNN library.")
+
+  if(CUDNN_INCLUDE AND CUDNN_LIBRARY)
+    set(HAVE_CUDNN  TRUE PARENT_SCOPE)
+    set(CUDNN_FOUND TRUE PARENT_SCOPE)
+
+    mark_as_advanced(CUDNN_INCLUDE CUDNN_LIBRARY CUDNN_ROOT)
+    message(STATUS "Found cuDNN (include: ${CUDNN_INCLUDE}, library: ${CUDNN_LIBRARY})")
+  endif()
+endfunction()
+
+
+
+################################################################################################
+# A function for automatic detection of GPUs installed  (if autodetection is enabled)
+# Usage:
+#   mshadow_detect_installed_gpus(out_variable)
+function(mshadow_detect_installed_gpus out_variable)
+  if(NOT CUDA_gpu_detect_output)
+    set(__cufile ${PROJECT_BINARY_DIR}/detect_cuda_archs.cu)
+
+    file(WRITE ${__cufile} ""
+      "#include <cstdio>\n"
+      "int main()\n"
+      "{\n"
+      "  int count = 0;\n"
+      "  if (cudaSuccess != cudaGetDeviceCount(&count)) return -1;\n"
+      "  if (count == 0) return -1;\n"
+      "  for (int device = 0; device < count; ++device)\n"
+      "  {\n"
+      "    cudaDeviceProp prop;\n"
+      "    if (cudaSuccess == cudaGetDeviceProperties(&prop, device))\n"
+      "      std::printf(\"%d.%d \", prop.major, prop.minor);\n"
+      "  }\n"
+      "  return 0;\n"
+      "}\n")
+    enable_language(CUDA)
+
+    try_run(__nvcc_res __compile_result ${PROJECT_BINARY_DIR} ${file}
+            COMPILE_OUTPUT_VARIABLE __compile_out
+            RUN_OUTPUT_VARIABLE __nvcc_out)
+
+    if(__nvcc_res EQUAL 0 AND __compile_result)
+      # nvcc outputs text containing line breaks when building with MSVC.
+      # The line below prevents CMake from inserting a variable with line
+      # breaks in the cache
+      string(REGEX MATCH "([1-9].[0-9])" __nvcc_out "${__nvcc_out}")
+      string(REPLACE "2.1" "2.1(2.0)" __nvcc_out "${__nvcc_out}")
+      set(CUDA_gpu_detect_output ${__nvcc_out})
+    else()
+      message(WARNING "Running GPU detection script with nvcc failed: ${__nvcc_out} ${__compile_out}")
+    endif()
+  endif()
+
+  if(NOT CUDA_gpu_detect_output)
+    message(WARNING "Automatic GPU detection failed. Building for all known architectures (${mshadow_known_gpu_archs}).")
+    set(${out_variable} ${mshadow_known_gpu_archs} PARENT_SCOPE)
+  else()
+    set(${out_variable} ${CUDA_gpu_detect_output} PARENT_SCOPE)
+  endif()
+endfunction()
+
+
+# This list will be used for CUDA_ARCH_NAME = All option
+set(CUDA_KNOWN_GPU_ARCHITECTURES "Fermi" "Kepler" "Maxwell")
+
+# This list will be used for CUDA_ARCH_NAME = Common option (enabled by default)
+set(CUDA_COMMON_GPU_ARCHITECTURES "3.0" "3.5" "5.0")
+
+if (CUDA_TOOLSET VERSION_GREATER "6.5")
+  list(APPEND CUDA_KNOWN_GPU_ARCHITECTURES "Kepler+Tegra" "Kepler+Tesla" "Maxwell+Tegra")
+  list(APPEND CUDA_COMMON_GPU_ARCHITECTURES "5.2")
+endif ()
+
+if (CUDA_TOOLSET VERSION_GREATER "7.5")
+  list(APPEND CUDA_KNOWN_GPU_ARCHITECTURES "Pascal")
+  list(APPEND CUDA_COMMON_GPU_ARCHITECTURES "6.0" "6.1" "6.1+PTX")
+else()
+  list(APPEND CUDA_COMMON_GPU_ARCHITECTURES "5.2+PTX")
+endif ()
+
+################################################################################################
+# Function for selecting GPU arch flags for nvcc based on CUDA_ARCH_NAME
+# Usage:
+#   mshadow_select_nvcc_arch_flags(out_variable)
+function(mshadow_select_nvcc_arch_flags out_variable)
+  
+  set(CUDA_ARCH_LIST "All" CACHE STRING "Select target NVIDIA GPU achitecture.")
+  set_property( CACHE CUDA_ARCH_LIST PROPERTY STRINGS "" "All" ${CUDA_KNOWN_GPU_ARCHITECTURES} )
+  mark_as_advanced(CUDA_ARCH_NAME)
+    
+    
+  if("X${CUDA_ARCH_LIST}" STREQUAL "X" )
+    set(CUDA_ARCH_LIST "All")
+  endif()
+
+  set(cuda_arch_bin)
+  set(cuda_arch_ptx)
+
+  if("${CUDA_ARCH_LIST}" STREQUAL "All")
+    set(CUDA_ARCH_LIST ${CUDA_KNOWN_GPU_ARCHITECTURES})
+  elseif("${CUDA_ARCH_LIST}" STREQUAL "Common")
+    set(CUDA_ARCH_LIST ${CUDA_COMMON_GPU_ARCHITECTURES})
+  elseif("${CUDA_ARCH_LIST}" STREQUAL "Auto")
+    mshadow_detect_installed_gpus(CUDA_ARCH_LIST)
+    message(STATUS "Autodetected CUDA architecture(s): ${CUDA_ARCH_LIST}")
+  endif()
+
+  # Now process the list and look for names
+  string(REGEX REPLACE "[ \t]+" ";" CUDA_ARCH_LIST "${CUDA_ARCH_LIST}")
+  list(REMOVE_DUPLICATES CUDA_ARCH_LIST)
+  foreach(arch_name ${CUDA_ARCH_LIST})
+    set(arch_bin)
+    set(arch_ptx)
+    set(add_ptx FALSE)
+    # Check to see if we are compiling PTX
+    if(arch_name MATCHES "(.*)\\+PTX$")
+      set(add_ptx TRUE)
+      set(arch_name ${CMAKE_MATCH_1})
+    endif()
+    if(arch_name MATCHES "^([0-9]\\.[0-9](\\([0-9]\\.[0-9]\\))?)$")
+      set(arch_bin ${CMAKE_MATCH_1})
+      set(arch_ptx ${arch_bin})
+    else()
+      # Look for it in our list of known architectures
+      if(${arch_name} STREQUAL "Fermi")
+        if (CUDA_TOOLSET VERSION_LESS "8.0")
+          set(arch_bin 2.0 "2.1(2.0)")
+        endif()
+      elseif(${arch_name} STREQUAL "Kepler+Tegra")
+        set(arch_bin 3.2)
+      elseif(${arch_name} STREQUAL "Kepler+Tesla")
+        set(arch_bin 3.7)
+      elseif(${arch_name} STREQUAL "Kepler")
+        set(arch_bin 3.0 3.5)
+        set(arch_ptx 3.5)
+      elseif(${arch_name} STREQUAL "Maxwell+Tegra")
+        set(arch_bin 5.3)
+      elseif(${arch_name} STREQUAL "Maxwell")
+        set(arch_bin 5.0 5.2)
+        set(arch_ptx 5.2)
+      elseif(${arch_name} STREQUAL "Pascal")
+        set(arch_bin 6.0 6.1)
+        set(arch_ptx 6.1)
+      else()
+        message(SEND_ERROR "Unknown CUDA Architecture Name ${arch_name} in CUDA_SELECT_NVCC_ARCH_FLAGS")
+      endif()
+    endif()
+    list(APPEND cuda_arch_bin ${arch_bin})
+    if(add_ptx)
+      if (NOT arch_ptx)
+        set(arch_ptx ${arch_bin})
+      endif()
+      list(APPEND cuda_arch_ptx ${arch_ptx})
+    endif()
+  endforeach()
+
+  # remove dots and convert to lists
+  string(REGEX REPLACE "\\." "" cuda_arch_bin "${cuda_arch_bin}")
+  string(REGEX REPLACE "\\." "" cuda_arch_ptx "${cuda_arch_ptx}")
+  string(REGEX MATCHALL "[0-9()]+" cuda_arch_bin "${cuda_arch_bin}")
+  string(REGEX MATCHALL "[0-9]+"   cuda_arch_ptx "${cuda_arch_ptx}")
+
+  if(cuda_arch_bin)
+    list(REMOVE_DUPLICATES cuda_arch_bin)
+  endif()
+  if(cuda_arch_ptx)
+    list(REMOVE_DUPLICATES cuda_arch_ptx)
+  endif()
+    
+  message(STATUS "cuda arch bin: ${cuda_arch_bin}")
+  message(STATUS "cuda arch ptx: ${cuda_arch_ptx}")
+  set(nvcc_flags "")
+  set(nvcc_archs_readable "")
+
+  # Tell NVCC to add binaries for the specified GPUs
+  foreach(arch ${cuda_arch_bin})
+    if(arch MATCHES "([0-9]+)\\(([0-9]+)\\)")
+      # User explicitly specified ARCH for the concrete CODE
+      list(APPEND nvcc_flags -gencode arch=compute_${CMAKE_MATCH_2},code=sm_${CMAKE_MATCH_1})
+      list(APPEND nvcc_archs_readable sm_${CMAKE_MATCH_1})
+    else()
+      # User didn't explicitly specify ARCH for the concrete CODE, we assume ARCH=CODE
+      list(APPEND nvcc_flags -gencode arch=compute_${arch},code=sm_${arch})
+      list(APPEND nvcc_archs_readable sm_${arch})
+    endif()
+  endforeach()
+
+  # Tell NVCC to add PTX intermediate code for the specified architectures
+  foreach(arch ${cuda_arch_ptx})
+    list(APPEND nvcc_flags -gencode arch=compute_${arch},code=compute_${arch})
+    list(APPEND nvcc_archs_readable compute_${arch})
+  endforeach()
+
+  string(REPLACE ";" " " nvcc_archs_readable "${nvcc_archs_readable}")
+  set(${out_variable}          ${nvcc_flags}          PARENT_SCOPE)
+  set(${out_variable}_readable ${nvcc_archs_readable} PARENT_SCOPE)
+endfunction()
+


 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services