You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@singa.apache.org by wa...@apache.org on 2016/06/03 07:48:48 UTC
[43/60] incubator-singa git commit: SINGA-181 Add NVCC supporting for
.cu files
SINGA-181 Add NVCC supporting for .cu files
Use nvcc to compile math_kernel.cu. The output file is cuda_compile_generated_math_kernel.cu.o, which is linked to libsinga_core.so later.
Also fix some bugs/typos in source code.
fix bugs from kernel functions by using std::sqrt to differentiate with cuda::sqrt. error in linking libsinga_core.so, the kernel functions are not included in the link arg list
Fix compilation bugs.
Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/668ae167
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/668ae167
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/668ae167
Branch: refs/heads/dev
Commit: 668ae1679be1975fd153c30b522797988863dff7
Parents: d680079
Author: xiezl <xi...@comp.nus.edu.sg>
Authored: Wed May 25 23:10:43 2016 +0800
Committer: Wei Wang <wa...@comp.nus.edu.sg>
Committed: Thu May 26 14:11:18 2016 +0800
----------------------------------------------------------------------
CMakeLists.txt | 2 +-
src/CMakeLists.txt | 12 ++++++++++--
src/core/tensor/math_kernel.cu | 19 +++++++++++--------
src/core/tensor/math_kernel.h | 7 ++++++-
src/model/metric/accuracy.h | 1 +
test/singa/test_cudnn_dropout.cc | 4 ++--
6 files changed, 31 insertions(+), 14 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/668ae167/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 8cb42fb..e08fb98 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -2,7 +2,6 @@ CMAKE_MINIMUM_REQUIRED(VERSION 2.6)
PROJECT(singa)
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")
-#message(STATUS "${CMAKE_CXX_FLAGS}")
LIST(APPEND CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake/Thirdparty)
#message(STATUS "module path: ${CMAKE_MODULE_PATH}")
@@ -12,6 +11,7 @@ IF(UNIX OR APPLE)
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC -Wall")
ENDIF()
+#message(STATUS "${CMAKE_CXX_FLAGS}")
SET(SINGA_INCLUDE_DIR "${CMAKE_SOURCE_DIR}/include;${PROJECT_BINARY_DIR}")
#message(STATUS "include path: ${SINGA_INCLUDE_DIR}")
INCLUDE_DIRECTORIES(${SINGA_INCLUDE_DIR})
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/668ae167/src/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 92e7fe5..df8b22b 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -18,8 +18,16 @@ AUX_SOURCE_DIRECTORY(core/device core_source)
AUX_SOURCE_DIRECTORY(core/memory core_source)
AUX_SOURCE_DIRECTORY(core/scheduler core_source)
AUX_SOURCE_DIRECTORY(core/tensor core_source)
-#message(STATUS "CORE ${core_source}")
-ADD_LIBRARY(singa_core SHARED ${core_source})
+FILE(GLOB_RECURSE cuda_source core "*.cu")
+set(FLAGS_BACKUP ${CMAKE_CXX_FLAGS})
+set(CMAKE_CXX_FLAGS "")
+CUDA_COMPILE(cuda_objs SHARED ${cuda_source} OPTIONS "-Xcompiler -fPIC")
+#message(STATUS "FLAGS ${CMAKE_CXX_FLAGS}")
+#message(STATUS "CORE ${cuda_source}")
+#message(STATUS "OBJ ${cuda_objs}")
+include_directories("${CMAKE_CURRENT_SOURCE_DIR}/core/tensor")
+set(CMAKE_CXX_FLAGS ${FLAGS_BACKUP})
+ADD_LIBRARY(singa_core SHARED ${core_source} ${cuda_objs})
TARGET_LINK_LIBRARIES(singa_core ${SINGA_LINKER_LIBS})
LIST(APPEND SINGA_LINKER_LIBS singa_core)
#MESSAGE(STATUS "link libs " ${SINGA_LINKER_LIBS})
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/668ae167/src/core/tensor/math_kernel.cu
----------------------------------------------------------------------
diff --git a/src/core/tensor/math_kernel.cu b/src/core/tensor/math_kernel.cu
index 585d65d..30863a1 100644
--- a/src/core/tensor/math_kernel.cu
+++ b/src/core/tensor/math_kernel.cu
@@ -19,9 +19,11 @@
*
*************************************************************/
+#include "singa_config.h"
#ifdef USE_CUDA
#include <cmath>
#include <algorithm>
+#include <cfloat>
#include "./math_kernel.h"
#define CU2DBLOCK_X 32
@@ -30,6 +32,7 @@
#define CU1DBLOCK 1024
#define CU1DBLOCKF 1024.0
+namespace singa{
// Cuda Kernel Functions
namespace cuda {
__global__ void kernel_softmax_loss(const float *prob, const int *label,
@@ -38,7 +41,7 @@ __global__ void kernel_softmax_loss(const float *prob, const int *label,
int num_threads = blockDim.x * gridDim.x;
for (; index < n; index += num_threads) {
float prob_of_truth = prob[index * dim + label[index]];
- loss[index] -= log(max(prob_of_truth, FLT_MIN));
+ loss[index] -= std::log(max(prob_of_truth, FLT_MIN));
}
}
@@ -52,7 +55,7 @@ __global__ void kernel_softmax_gradient(float *grad, const int *label, int n,
}
}
-__global__ void kernel_sum_vec(float *data, float *sum, int n) {
+__global__ void kernel_sum_vec(const float *data, float *sum, int n) {
int THREADS = blockDim.x;
__shared__ float aux[CU1DBLOCK];
@@ -149,7 +152,7 @@ __global__ void kernel_exp(const float *src_data, float *des_data, int n) {
int index = blockIdx.x * blockDim.x + threadIdx.x;
int num_threads = blockDim.x * gridDim.x;
for (; index < n; index += num_threads) {
- des_data[index] = exp(src_data[index]);
+ des_data[index] = std::exp(src_data[index]);
}
}
@@ -157,7 +160,7 @@ __global__ void kernel_log(const float *src_data, float *des_data, int n) {
int index = blockIdx.x * blockDim.x + threadIdx.x;
int num_threads = blockDim.x * gridDim.x;
for (; index < n; index += num_threads) {
- des_data[index] = log(src_data[index]);
+ des_data[index] = std::log(src_data[index]);
}
}
@@ -242,7 +245,7 @@ __global__ void kernel_square_grad(const float *src_data, float *des_data,
int index = blockIdx.x * blockDim.x + threadIdx.x;
int num_threads = blockDim.x * gridDim.x;
for (; index < n; index += num_threads) {
- des_data[index] = 2 * sqrt(src_data[index]);
+ des_data[index] = 2 * src_data[index];
}
}
@@ -250,7 +253,7 @@ __global__ void kernel_sqrt(const float *src_data, float *des_data, int n) {
int index = blockIdx.x * blockDim.x + threadIdx.x;
int num_threads = blockDim.x * gridDim.x;
for (; index < n; index += num_threads) {
- des_data[index] = sqrt(src_data[index]);
+ des_data[index] = std::sqrt(src_data[index]);
}
}
@@ -259,7 +262,7 @@ __global__ void kernel_pow(const float *src_data_a, const float *src_data_b,
int index = blockIdx.x * blockDim.x + threadIdx.x;
int num_threads = blockDim.x * gridDim.x;
for (; index < n; index += num_threads) {
- des_data[index] = pow(src_data_a[index], src_data_b[index]);
+ des_data[index] = std::pow(src_data_a[index], src_data_b[index]);
}
}
@@ -331,7 +334,7 @@ void sum_col(int rows, int cols, int stride, const float *in, float *out) {
int threads_per_block = cols > CU1DBLOCK ? CU1DBLOCK : cols;
int num_blocks = rows;
- kernel_sum_col<<<num_blocks, threads_per_block>>>(src_mat_data, dst_vec_data,
+ kernel_sum_col<<<num_blocks, threads_per_block>>>(in, out,
rows, cols, stride);
}
void add_row(int rows, int cols, int stride, const float *in_row,
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/668ae167/src/core/tensor/math_kernel.h
----------------------------------------------------------------------
diff --git a/src/core/tensor/math_kernel.h b/src/core/tensor/math_kernel.h
index 7629ac8..f5da772 100644
--- a/src/core/tensor/math_kernel.h
+++ b/src/core/tensor/math_kernel.h
@@ -21,8 +21,11 @@
#ifndef SRC_CORE_TENSOR__MATH_KERNEL_H_
#define SRC_CORE_TENSOR__MATH_KERNEL_H_
-namespace singa {
+#include "singa_config.h"
+#ifdef USE_CUDA
+
+namespace singa {
/*
void softmaxloss_forward(int n, int dim, const float *prob,
const int *label, float *loss);
@@ -77,6 +80,8 @@ void set_value(int n, float v, float *out);
void threshold(int n, float alpha, const float *in, float *out);
} // cuda
+
} // namespace singa
+#endif
#endif // SRC_CORE_TENSOR__MATH_KERNEL_H_
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/668ae167/src/model/metric/accuracy.h
----------------------------------------------------------------------
diff --git a/src/model/metric/accuracy.h b/src/model/metric/accuracy.h
index 05c1643..fb23634 100644
--- a/src/model/metric/accuracy.h
+++ b/src/model/metric/accuracy.h
@@ -19,6 +19,7 @@
#ifndef SINGA_MODEL_METRIC_ACCURACY_H_
#define SINGA_MODEL_METRIC_ACCURACY_H_
#include "singa/model/metric.h"
+#include <algorithm>
namespace singa {
/// Compute the accuray of the prediction, which is matched against the
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/668ae167/test/singa/test_cudnn_dropout.cc
----------------------------------------------------------------------
diff --git a/test/singa/test_cudnn_dropout.cc b/test/singa/test_cudnn_dropout.cc
index 393d555..e1a6333 100644
--- a/test/singa/test_cudnn_dropout.cc
+++ b/test/singa/test_cudnn_dropout.cc
@@ -21,7 +21,7 @@
#include "../src/model/layer/cudnn_dropout.h"
#ifdef USE_CUDNN
// cudnn dropout is added in cudnn 5
-//#if CUDNN_MAJOR_VERSION >= 5
+#if CUDNN_MAJOR_VERSION >= 5
#include "gtest/gtest.h"
@@ -123,5 +123,5 @@ TEST(CudnnDropout, Backward) {
EXPECT_FLOAT_EQ(dx[1], dy[1] * GetBitValue(mptr, 1) * scale);
EXPECT_FLOAT_EQ(dx[7], dy[7] * GetBitValue(mptr, 7) * scale);
}
-//#endif // CUDNN_VERSION_MAJOR>=5
+#endif // CUDNN_VERSION_MAJOR>=5
#endif // USE_CUDNN