You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mxnet.apache.org by GitBox <gi...@apache.org> on 2018/01/15 20:21:27 UTC

[GitHub] cjolivier01 closed pull request #9428: Introduce the ENABLE_CUDA_RTC build option

cjolivier01 closed pull request #9428: Introduce the ENABLE_CUDA_RTC build option
URL: https://github.com/apache/incubator-mxnet/pull/9428
 
 
   

This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:

As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):

diff --git a/CMakeLists.txt b/CMakeLists.txt
index bbbb51dfb2..e9aa826357 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -46,6 +46,7 @@ mxnet_option(USE_CPP_PACKAGE      "Build C++ Package" OFF)
 mxnet_option(USE_MXNET_LIB_NAMING "Use MXNet library naming conventions." ON)
 mxnet_option(USE_GPROF            "Compile with gprof (profiling) flag" OFF)
 mxnet_option(USE_VTUNE            "Enable use of Intel Amplifier XE (VTune)" OFF) # one could set VTUNE_ROOT for search path
+mxnet_option(ENABLE_CUDA_RTC      "Build with CUDA runtime compilation support" ON)
 mxnet_option(INSTALL_EXAMPLES     "Install the example source files." OFF)
 mxnet_option(USE_SIGNAL_HANDLER   "Print stack traces on segfaults." OFF)
 
@@ -452,7 +453,11 @@ if(USE_CUDA)
     string(REPLACE ";" " " NVCC_FLAGS_ARCH "${NVCC_FLAGS_ARCH}")
     set(CMAKE_CUDA_FLAGS "${NVCC_FLAGS_ARCH}")
     set(CMAKE_CUDA_FLAGS_RELEASE "${NVCC_FLAGS_ARCH} -use_fast_math")
-    list(APPEND mxnet_LINKER_LIBS nvrtc cuda cublas cufft cusolver curand)
+    list(APPEND mxnet_LINKER_LIBS cublas cufft cusolver curand)
+    if(ENABLE_CUDA_RTC)
+        list(APPEND mxnet_LINKER_LIBS nvrtc cuda)
+        add_definitions(-DMXNET_ENABLE_CUDA_RTC=1)
+    endif()
     list(APPEND SOURCE ${CUDA})
     add_definitions(-DMXNET_USE_CUDA=1)
   else()
@@ -460,16 +465,23 @@ if(USE_CUDA)
     # define preprocessor macro so that we will not include the generated forcelink header
     mshadow_cuda_compile(cuda_objs ${CUDA})
     if(MSVC)
-        FIND_LIBRARY(CUDA_nvrtc_LIBRARY nvrtc "${CUDA_TOOLKIT_ROOT_DIR}/lib/x64"  "${CUDA_TOOLKIT_ROOT_DIR}/lib/win32")
-        list(APPEND mxnet_LINKER_LIBS ${CUDA_nvrtc_LIBRARY})
-        set(CUDA_cuda_LIBRARY "${CUDA_nvrtc_LIBRARY}/../cuda.lib")
-        list(APPEND mxnet_LINKER_LIBS ${CUDA_cuda_LIBRARY})
+        if(ENABLE_CUDA_RTC)
+            FIND_LIBRARY(CUDA_nvrtc_LIBRARY nvrtc "${CUDA_TOOLKIT_ROOT_DIR}/lib/x64"  "${CUDA_TOOLKIT_ROOT_DIR}/lib/win32")
+            list(APPEND mxnet_LINKER_LIBS ${CUDA_nvrtc_LIBRARY})
+            set(CUDA_cuda_LIBRARY "${CUDA_nvrtc_LIBRARY}/../cuda.lib")
+            list(APPEND mxnet_LINKER_LIBS ${CUDA_cuda_LIBRARY})
+            add_definitions(-DMXNET_ENABLE_CUDA_RTC=1)
+        endif()
         FIND_LIBRARY(CUDA_cufft_LIBRARY nvrtc "${CUDA_TOOLKIT_ROOT_DIR}/lib/x64"  "${CUDA_TOOLKIT_ROOT_DIR}/lib/win32")
         list(APPEND mxnet_LINKER_LIBS "${CUDA_cufft_LIBRARY}/../cufft.lib") # For fft operator
         FIND_LIBRARY(CUDA_cusolver_LIBRARY nvrtc "${CUDA_TOOLKIT_ROOT_DIR}/lib/x64"  "${CUDA_TOOLKIT_ROOT_DIR}/lib/win32")
         list(APPEND mxnet_LINKER_LIBS "${CUDA_cusolver_LIBRARY}/../cusolver.lib") # For cusolver
     else(MSVC)
-        list(APPEND mxnet_LINKER_LIBS nvrtc cuda cufft cusolver)
+        list(APPEND mxnet_LINKER_LIBS cufft cusolver)
+        if(ENABLE_CUDA_RTC)
+            list(APPEND mxnet_LINKER_LIBS nvrtc cuda)
+            add_definitions(-DMXNET_ENABLE_CUDA_RTC=1)
+        endif()
         link_directories("${CUDA_TOOLKIT_ROOT_DIR}/lib64")
     endif()
     list(APPEND SOURCE ${cuda_objs} ${CUDA})
diff --git a/Makefile b/Makefile
index aae0ba91a7..b0cff74e69 100644
--- a/Makefile
+++ b/Makefile
@@ -334,7 +334,11 @@ ALL_DEP = $(OBJ) $(EXTRA_OBJ) $(PLUGIN_OBJ) $(LIB_DEP)
 ifeq ($(USE_CUDA), 1)
 	CFLAGS += -I$(ROOTDIR)/3rdparty/cub
 	ALL_DEP += $(CUOBJ) $(EXTRA_CUOBJ) $(PLUGIN_CUOBJ)
-	LDFLAGS += -lcuda -lcufft -lnvrtc
+	LDFLAGS += -lcufft
+	ifeq ($(ENABLE_CUDA_RTC), 1)
+		LDFLAGS += -lcuda -lnvrtc
+		CFLAGS += -DMXNET_ENABLE_CUDA_RTC=1
+	endif
 	# Make sure to add stubs as fallback in order to be able to build 
 	# without full CUDA install (especially if run without nvidia-docker)
 	LDFLAGS += -L/usr/local/cuda/lib64/stubs
diff --git a/include/mxnet/rtc.h b/include/mxnet/rtc.h
index 747c0b5c94..76c3064db7 100644
--- a/include/mxnet/rtc.h
+++ b/include/mxnet/rtc.h
@@ -20,7 +20,7 @@
 #ifndef MXNET_RTC_H_
 #define MXNET_RTC_H_
 #include "./base.h"
-#if MXNET_USE_CUDA
+#if MXNET_USE_CUDA && MXNET_ENABLE_CUDA_RTC
 #include <nvrtc.h>
 #include <cuda.h>
 
@@ -132,5 +132,5 @@ class CudaModule {
 }  // namespace rtc
 }  // namespace mxnet
 
-#endif  // MXNET_USE_CUDA
+#endif  // MXNET_USE_CUDA && MXNET_ENABLE_CUDA_RTC
 #endif  // MXNET_RTC_H_
diff --git a/make/config.mk b/make/config.mk
index 9f7564b88f..6a3f0357a7 100644
--- a/make/config.mk
+++ b/make/config.mk
@@ -54,6 +54,9 @@ USE_CUDA = 0
 # USE_CUDA_PATH = /usr/local/cuda
 USE_CUDA_PATH = NONE
 
+# whether to enable CUDA runtime compilation
+ENABLE_CUDA_RTC = 1
+
 # whether use CuDNN R3 library
 USE_CUDNN = 0
 
diff --git a/make/osx.mk b/make/osx.mk
index b17b04cfdb..5bc3e4887b 100644
--- a/make/osx.mk
+++ b/make/osx.mk
@@ -48,6 +48,9 @@ USE_CUDA = 0
 # USE_CUDA_PATH = /usr/local/cuda
 USE_CUDA_PATH = NONE
 
+# whether to enable CUDA runtime compilation
+ENABLE_CUDA_RTC = 1
+
 # whether use CUDNN R3 library
 USE_CUDNN = 0
 
diff --git a/src/c_api/c_api.cc b/src/c_api/c_api.cc
index c55f6c5781..9be8c6eb0a 100644
--- a/src/c_api/c_api.cc
+++ b/src/c_api/c_api.cc
@@ -1179,24 +1179,24 @@ int MXRtcCudaModuleCreate(const char* source, int num_options,
                           const char** options, int num_exports,
                           const char** exports, CudaModuleHandle *out) {
   API_BEGIN();
-#if MXNET_USE_CUDA
+#if MXNET_USE_CUDA && MXNET_ENABLE_CUDA_RTC
   std::vector<std::string> str_opts;
   for (int i = 0; i < num_options; ++i) str_opts.emplace_back(options[i]);
   std::vector<std::string> str_exports;
   for (int i = 0; i < num_exports; ++i) str_exports.emplace_back(exports[i]);
   *out = new rtc::CudaModule(source, str_opts, str_exports);
 #else
-  LOG(FATAL) << "Compile with USE_CUDA=1 to use GPU.";
+  LOG(FATAL) << "Compile with USE_CUDA=1 and ENABLE_CUDA_RTC=1 to have CUDA runtime compilation.";
 #endif
   API_END();
 }
 
 int MXRtcCudaModuleFree(CudaModuleHandle handle) {
   API_BEGIN();
-#if MXNET_USE_CUDA
+#if MXNET_USE_CUDA && MXNET_ENABLE_CUDA_RTC
   delete reinterpret_cast<rtc::CudaModule*>(handle);
 #else
-  LOG(FATAL) << "Compile with USE_CUDA=1 to use GPU.";
+  LOG(FATAL) << "Compile with USE_CUDA=1 and ENABLE_CUDA_RTC=1 to have CUDA runtime compilation.";
 #endif
   API_END();
 }
@@ -1205,7 +1205,7 @@ int MXRtcCudaKernelCreate(CudaModuleHandle handle, const char* name, int num_arg
                           int* is_ndarray, int* is_const, int* arg_types,
                           CudaKernelHandle *out) {
   API_BEGIN();
-#if MXNET_USE_CUDA
+#if MXNET_USE_CUDA && MXNET_ENABLE_CUDA_RTC
   auto module = reinterpret_cast<rtc::CudaModule*>(handle);
   std::vector<rtc::CudaModule::ArgType> signature;
   for (int i = 0; i < num_args; ++i) {
@@ -1216,17 +1216,17 @@ int MXRtcCudaKernelCreate(CudaModuleHandle handle, const char* name, int num_arg
   auto kernel = module->GetKernel(name, signature);
   *out = new std::shared_ptr<rtc::CudaModule::Kernel>(kernel);
 #else
-  LOG(FATAL) << "Compile with USE_CUDA=1 to use GPU.";
+  LOG(FATAL) << "Compile with USE_CUDA=1 and ENABLE_CUDA_RTC=1 to have CUDA runtime compilation.";
 #endif
   API_END();
 }
 
 int MXRtcCudaKernelFree(CudaKernelHandle handle) {
   API_BEGIN();
-#if MXNET_USE_CUDA
+#if MXNET_USE_CUDA && MXNET_ENABLE_CUDA_RTC
   delete reinterpret_cast<std::shared_ptr<rtc::CudaModule::Kernel>*>(handle);
 #else
-  LOG(FATAL) << "Compile with USE_CUDA=1 to use GPU.";
+  LOG(FATAL) << "Compile with USE_CUDA=1 and ENABLE_CUDA_RTC=1 to have CUDA runtime compilation.";
 #endif
   API_END();
 }
@@ -1237,7 +1237,7 @@ int MXRtcCudaKernelCall(CudaKernelHandle handle, int dev_id, void** args,
                         mx_uint block_dim_y, mx_uint block_dim_z,
                         mx_uint shared_mem) {
   API_BEGIN();
-#if MXNET_USE_CUDA
+#if MXNET_USE_CUDA && MXNET_ENABLE_CUDA_RTC
   auto kernel = reinterpret_cast<std::shared_ptr<rtc::CudaModule::Kernel>*>(handle);
   const auto& signature = (*kernel)->signature();
   std::vector<dmlc::any> any_args;
@@ -1253,7 +1253,7 @@ int MXRtcCudaKernelCall(CudaKernelHandle handle, int dev_id, void** args,
   (*kernel)->Launch(Context::GPU(dev_id), any_args, grid_dim_x, grid_dim_y,
                     grid_dim_z, block_dim_x, block_dim_y, block_dim_z, shared_mem);
 #else
-  LOG(FATAL) << "Compile with USE_CUDA=1 to use GPU.";
+  LOG(FATAL) << "Compile with USE_CUDA=1 and ENABLE_CUDA_RTC=1 to have CUDA runtime compilation.";
 #endif
   API_END();
 }
diff --git a/src/common/rtc.cc b/src/common/rtc.cc
index c48afc6895..444553b128 100644
--- a/src/common/rtc.cc
+++ b/src/common/rtc.cc
@@ -23,7 +23,7 @@
 #include "../common/cuda_utils.h"
 #include "../operator/operator_common.h"
 
-#if MXNET_USE_CUDA
+#if MXNET_USE_CUDA && MXNET_ENABLE_CUDA_RTC
 
 namespace mxnet {
 namespace rtc {
@@ -185,4 +185,4 @@ void CudaModule::Kernel::Launch(
 }  // namespace rtc
 }  // namespace mxnet
 
-#endif  // ((MXNET_USE_CUDA) && (MXNET_USE_NVRTC))
+#endif  // MXNET_USE_CUDA && MXNET_ENABLE_CUDA_RTC


 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services