You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@singa.apache.org by wa...@apache.org on 2015/11/26 12:31:58 UTC
[1/6] incubator-singa git commit: SINGA-104 Add Context Class
Repository: incubator-singa
Updated Branches:
refs/heads/master 364c88562 -> b2cfa17b8
SINGA-104 Add Context Class
Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/771ff328
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/771ff328
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/771ff328
Branch: refs/heads/master
Commit: 771ff328098fed89177a0f8e0d41a44c4c1c7e0c
Parents: 364c885
Author: seaok <se...@gmail.com>
Authored: Wed Nov 25 13:19:42 2015 +0800
Committer: seaok <se...@gmail.com>
Committed: Wed Nov 25 13:19:42 2015 +0800
----------------------------------------------------------------------
include/singa/utils/context.h | 87 +++++++++++++++++++++++++++++++++++++
src/test/test_context.cc | 66 ++++++++++++++++++++++++++++
src/utils/context.cc | 89 ++++++++++++++++++++++++++++++++++++++
3 files changed, 242 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/771ff328/include/singa/utils/context.h
----------------------------------------------------------------------
diff --git a/include/singa/utils/context.h b/include/singa/utils/context.h
new file mode 100644
index 0000000..762ae75
--- /dev/null
+++ b/include/singa/utils/context.h
@@ -0,0 +1,87 @@
+/************************************************************
+*
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements. See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership. The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License. You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing,
+* software distributed under the License is distributed on an
+* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+* KIND, either express or implied. See the License for the
+* specific language governing permissions and limitations
+* under the License.
+*
+*************************************************************/
+
+#ifndef SINGA_UTILS_CONTEXT_H_
+#define SINGA_UTILS_CONTEXT_H_
+
+#include <vector>
+
+#ifdef USE_GPU
+#include <cublas_v2.h>
+#include <cuda_runtime.h>
+#include <curand.h>
+#endif
+
+
+namespace singa {
+
+const int kDefaultDevice = 20;
+
+class Context {
+ public:
+
+ ~Context();
+
+ void Setup();
+
+#ifdef USE_GPU
+ int DeviceID(const int index) {
+ return device_ids_[index];
+ }
+
+ void SetDeviceID(const int index, const int id) {
+ device_ids_[index] = id;
+ }
+
+ void SetDevice(const int index) {
+ cudaSetDevice(device_ids_[index]);
+ }
+
+ cublasHandle_t Handle(const int index) {
+ return handles_[index];
+ }
+
+ void CreateHandle(const int index);
+
+ void DestoryHandle(const int index);
+
+ curandGenerator_t GpuRandGenerator(const int index) {
+ return gpu_rand_generators_[index];
+ }
+
+ void CreateGpuRandGenerator(const int index);
+
+ void DestoryGpuRandGenerator(const int index);
+
+#endif
+
+ protected:
+ std::vector<int> device_ids_;
+#ifdef USE_GPU
+ std::vector<cublasHandle_t> handles_;
+ std::vector<curandGenerator_t> gpu_rand_generators_;
+#endif
+
+};
+
+} // namespace singa
+
+#endif // SINGA_UTILS_MATH_ADDR_H_
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/771ff328/src/test/test_context.cc
----------------------------------------------------------------------
diff --git a/src/test/test_context.cc b/src/test/test_context.cc
new file mode 100644
index 0000000..3a23b23
--- /dev/null
+++ b/src/test/test_context.cc
@@ -0,0 +1,66 @@
+#include "gtest/gtest.h"
+#include "singa/utils/singleton.h"
+#include "singa/utils/context.h"
+
+//#include <cuda_runtime.h>
+//#include "cublas_v2.h"
+
+using namespace singa;
+using namespace std;
+
+TEST(ContextTest, TestDevice) {
+ auto context = Singleton<Context>::Instance();
+ context->Setup();
+
+ int index = 4;
+ int device_id = context->DeviceID(index);
+ ASSERT_EQ(4,device_id);
+
+ context->SetDeviceID(index,6);
+ device_id = context->DeviceID(index);
+ ASSERT_EQ(6,device_id);
+}
+
+TEST(ContextTest, TestHandle) {
+ auto context = Singleton<Context>::Instance();
+ context->Setup();
+
+ int index = 2;
+ context->CreateHandle(index);
+
+ float cpu_ret = 0.0f;
+ float gpu_ret = 0.0f;
+
+ float A[12];
+ float B[12];
+
+ for(int i = 0; i < 12; i++) {
+ A[i]=i-1;
+ B[i]=i+1;
+ }
+
+ float* A_gpu = NULL;
+ float* B_gpu = NULL;
+
+ cudaMalloc((void**)&A_gpu, 12*sizeof(float));
+ cudaMalloc((void**)&B_gpu, 12*sizeof(float));
+
+ cudaMemcpy(A_gpu,A,12*sizeof(float),cudaMemcpyHostToDevice);
+ cudaMemcpy(B_gpu,B,12*sizeof(float),cudaMemcpyHostToDevice);
+
+ cublasHandle_t handle = context->Handle(index);
+ /*cublasHandle_t handle;
+ cudaSetDevice(0);
+ cublasCreate(&handle);*/
+
+ cublasSdot(handle, 12, A_gpu, 1, B_gpu, 1, &gpu_ret);
+
+ for(int i = 0; i < 12;++i) {
+ cpu_ret += A[i] * B[i];
+ }
+
+ ASSERT_EQ(gpu_ret,cpu_ret);
+
+ cudaFree(A_gpu);
+ cudaFree(B_gpu);
+}
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/771ff328/src/utils/context.cc
----------------------------------------------------------------------
diff --git a/src/utils/context.cc b/src/utils/context.cc
new file mode 100644
index 0000000..671bec0
--- /dev/null
+++ b/src/utils/context.cc
@@ -0,0 +1,89 @@
+/************************************************************
+*
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements. See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership. The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License. You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing,
+* software distributed under the License is distributed on an
+* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+* KIND, either express or implied. See the License for the
+* specific language governing permissions and limitations
+* under the License.
+*
+*************************************************************/
+#include "singa/utils/context.h"
+#include "singa/utils/factory.h"
+#include "singa/utils/singleton.h"
+
+namespace singa {
+
+Context::~Context() {
+#ifdef USE_GPU
+ for(int i = 0; i < kDefaultDevice; ++i) {
+ SetDevice(i);
+
+ if(handles_[i] != NULL) {
+ cublasDestroy(handles_[i]);
+ }
+
+ if(gpu_rand_generators_[i] != NULL) {
+ curandDestroyGenerator(gpu_rand_generators_[i]);
+ }
+ }
+#endif
+}
+
+void Context::Setup() {
+
+ for(int i = 0; i < kDefaultDevice; ++i) {
+ //init device index
+ device_ids_.push_back(i);
+ }
+
+#ifdef USE_GPU
+ for(int i = 0; i < kDefaultDevice; ++i) {
+ //init handle
+ cublasHandle_t handle = NULL;
+ handles_.push_back(handle);
+
+ curandGenerator_t gpu_rand_generator = NULL;
+ gpu_rand_generators_.push_back(gpu_rand_generator);
+ }
+#endif
+}
+
+#ifdef USE_GPU
+void Context::CreateHandle(const int index) {
+ SetDevice(device_ids_[index]);
+ cublasCreate(&handles_[index]);
+}
+
+void Context::DestoryHandle(const int index) {
+ SetDevice(device_ids_[index]);
+ cublasDestroy(handles_[index]);
+ handles_[index] = NULL;
+}
+
+void Context::CreateGpuRandGenerator(const int index) {
+ SetDevice(device_ids_[index]);
+ curandCreateGenerator(&gpu_rand_generators_[index], CURAND_RNG_PSEUDO_DEFAULT);
+}
+
+void Context::DestoryGpuRandGenerator(const int index) {
+ SetDevice(device_ids_[index]);
+ curandDestroyGenerator(gpu_rand_generators_[index]);
+ gpu_rand_generators_[index] = NULL;
+}
+
+#endif
+
+
+} // namespace singa
+
[3/6] incubator-singa git commit: SINGA-104 Add Context Class
Posted by wa...@apache.org.
SINGA-104 Add Context Class
Update the Context class:
1. function, variable names.
2. add random generators for CPU threads.
TODO run test for test_context.cu. Add implicit/automatic init (using device 0).
Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/9aff30aa
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/9aff30aa
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/9aff30aa
Branch: refs/heads/master
Commit: 9aff30aab69f81e45d3986c0337346e0e9170936
Parents: 35de4f9
Author: Wei Wang <wa...@comp.nus.edu.sg>
Authored: Thu Nov 26 11:53:10 2015 +0800
Committer: Wei Wang <wa...@comp.nus.edu.sg>
Committed: Thu Nov 26 11:56:37 2015 +0800
----------------------------------------------------------------------
Makefile.gpu | 10 +--
include/singa/utils/context.h | 140 +++++++++++++++++++++++++++----------
src/test/test_context.cc | 66 -----------------
src/test/test_context.cu | 55 +++++++++++++++
src/utils/context.cc | 82 +++++++++++-----------
5 files changed, 203 insertions(+), 150 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/9aff30aa/Makefile.gpu
----------------------------------------------------------------------
diff --git a/Makefile.gpu b/Makefile.gpu
index 2fea3b2..35b81b9 100644
--- a/Makefile.gpu
+++ b/Makefile.gpu
@@ -20,16 +20,16 @@
###################User Config Varaibles #############################
# third-party library installation folder
-HOME_DIR := /usr
+HOME_DIR := /home/wangwei/local
CUDA_DIR := /usr/local/cuda
#CUDA_DIR :=
# Lib folder for system and external libs. You may need to change it.
-LIBRARY_DIRS := $(HOME_DIR)/lib64 $(HOME_DIR)/lib $(HOME_DIR)/local/lib $(CUDA_DIR)/lib64 $(CUDA_DIR)/lib
+LIBRARY_DIRS := $(CUDA_DIR)/lib64 $(CUDA_DIR)/lib $(HOME_DIR)/lib64 $(HOME_DIR)/lib
# Header folder for system and external libs. You may need to change it.
-INCLUDE_DIRS := $(HOME_DIR)/include ./include $(HOME_DIR)/local/include/zookeeper $(CUDA_DIR)/include
+INCLUDE_DIRS := $(CUDA_DIR)/include $(HOME_DIR)/include ./include
# g++ location, should support c++11, tested with 4.8.1
CXX := g++
CUCXX := nvcc
@@ -50,7 +50,7 @@ ZK_FLAGS :=-DTHREADED -fpermissive
CXXFLAGS := -O2 -msse3 -Wall -pthread -fPIC -std=c++11 -Wno-unknown-pragmas \
$(MSHADOW_FLAGS) -DCPU_ONLY=1 $(ZK_FLAGS)\
-funroll-loops $(foreach includedir, $(INCLUDE_DIRS), -I$(includedir))
-CUCXXFLAGS := $(MSHADOW_FLAGS) -std=c++11 -G $(CUDA_ARCH) \
+CUCXXFLAGS := $(MSHADOW_FLAGS) -DUSE_GPU -std=c++11 -G $(CUDA_ARCH) \
$(foreach includedir, $(INCLUDE_DIRS), -I$(includedir))
#Add device compile option
@@ -84,7 +84,7 @@ TEST_CUDA_SRCS :=$(shell find src/test/ -maxdepth 1 -name "*.cu")
TEST_CUDA_OBJS := $(sort $(addprefix $(BUILD_DIR)/, $(TEST_CUDA_SRCS:.cu=.o)))
-include $(TEST_CUDA_OBJS:%.o=%.P)
-SINGA_CUDA_SRCS :=$(shell find src/ -maxdepth 2 -name "*.cu")
+SINGA_CUDA_SRCS := $(shell find src/ \( -path "src/test" \) -prune -o \( -name "*.cu" -type f \) -print )
SINGA_CUDA_OBJS := $(sort $(addprefix $(BUILD_DIR)/, $(SINGA_CUDA_SRCS:.cu=.o)))
-include $(SINGA_CUDA_OBJS:%.o=%.P)
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/9aff30aa/include/singa/utils/context.h
----------------------------------------------------------------------
diff --git a/include/singa/utils/context.h b/include/singa/utils/context.h
index 762ae75..7a41dac 100644
--- a/include/singa/utils/context.h
+++ b/include/singa/utils/context.h
@@ -23,63 +23,129 @@
#define SINGA_UTILS_CONTEXT_H_
#include <vector>
+#include <random>
+#include <chrono>
+#include <thread>
+#include <unordered_map>
+#include <glog/logging.h>
+
#ifdef USE_GPU
-#include <cublas_v2.h>
-#include <cuda_runtime.h>
-#include <curand.h>
+#include "singa/utils/cuda_utils.h"
#endif
namespace singa {
-const int kDefaultDevice = 20;
+// max num of threads per process
+const int kNumMaxThreads = 1024;
+/**
+ * Context is used as a global singleton, which stores the mapping from CPU
+ * thread id to GPU device id. It manages the handlers for GPU
+ * devices. It also manages the GPU and CPU random generators, which are created
+ * when accessed. One CPU thread has a CPU random generator. A CPU device
+ * has a GPU random generator.
+ */
class Context {
- public:
-
+ public:
+ /**
+ * Destructor, release random generators and handlers.
+ */
~Context();
-
- void Setup();
-
-#ifdef USE_GPU
- int DeviceID(const int index) {
- return device_ids_[index];
- }
-
- void SetDeviceID(const int index, const int id) {
- device_ids_[index] = id;
- }
-
- void SetDevice(const int index) {
- cudaSetDevice(device_ids_[index]);
+ /**
+ * Constructor, init arrays for random generators and handlers.
+ */
+ Context();
+
+ /**
+ * @return the ID of the device attached to a given CPU thread:
+ * if the device is a GPU card, then returns the GPU device ID;
+ * Else return -1.
+ */
+ int device_id(const std::thread::id tid) {
+ CHECK(device_id_.find(tid) != device_id_.end());
+ return device_id_[tid];
}
- cublasHandle_t Handle(const int index) {
- return handles_[index];
+ /**
+ * Setup the CPU thread, which may be assigned a GPU device.
+ * Set the random seed to -1.
+ * A GPU handler will be created for the GPU device.
+ * @param[in] thread::id CPU thread ID
+ * @param[in] device_id GPU device ID
+ */
+ void SetupDevice(const std::thread::id tid, const int did);
+
+ /**
+ * @copy SetupDevice(const int, const int);
+ * @param[in] seed random seed
+ */
+ void SetupDevice(const std::thread::id tid, const int did, long long seed);
+
+ /**
+ * Get the CPU random generator.
+ * If the generator does not exist, then create it now.
+ * If the seed is not set, i.e., seed=-1, then get a seed from system time.
+ * @param[in] thread::id CPU thread ID
+ * @return the CPU random generator
+ */
+ std::mt19937* rand_generator(const std::thread::id tid) {
+ if (rand_generator_.find(tid) == rand_generator_.end()) {
+ CHECK(seed_.find(tid) != seed_.end());
+ auto seed = static_cast<unsigned>(seed_[tid]);
+ if (seed_[tid] == -1)
+ seed = std::chrono::system_clock::now().time_since_epoch().count();
+ rand_generator_[tid] = new std::mt19937(seed);
+ }
+ return rand_generator_[tid];
+ }
+#ifdef USE_GPU
+ /**
+ * Get the handler of the GPU device attached to a CPU thread.
+ * @param[in] thread::id
+ * @return the GPU handler, or nullptr if this thread does not have any GPU.
+ */
+ cublasHandle_t cublas_handle(const std::thread::id tid) {
+ CHECK(cublas_handle_.find(tid) != cublas_handle_.end());
+ return cublas_handle_[tid];
}
-
- void CreateHandle(const int index);
-
- void DestoryHandle(const int index);
-
- curandGenerator_t GpuRandGenerator(const int index) {
- return gpu_rand_generators_[index];
+ /**
+ * Get the random generator of the GPU device assigned to the given thread.
+ * @param[in] thread::id
+ * @return random generator. If it does not exist, then create one.
+ * The random seed will be set to CURAND_RNG_PSEUDO_DEFAULT if it is not set.
+ */
+ curandGenerator_t curand_generator(const std::thread::id tid) {
+ if (curand_generator_.find(tid) == curand_generator_.end()) {
+ CHECK(seed_.find(tid) != seed_.end());
+ auto seed = seed_[tid];
+ // TODO handle user set seed
+ cudaSetDevice(device_id_[tid]);
+ curandCreateGenerator(&curand_generator_[tid], CURAND_RNG_PSEUDO_DEFAULT);
+ }
+ return curand_generator_[tid];
}
- void CreateGpuRandGenerator(const int index);
-
- void DestoryGpuRandGenerator(const int index);
+ /*
+ protected:
+ void CreateHandle(const int thread::id);
+ void DestoryHandle(const int thread::id);
+ void CreateGpuRandGenerator(const int thread::id);
+ void DestoryGpuRandGenerator(const int thread::id);
+ */
#endif
- protected:
- std::vector<int> device_ids_;
+ protected:
+
+ std::unordered_map<std::thread::id, int> device_id_;
+ std::unordered_map<std::thread::id, std::mt19937 *> rand_generator_;
+ std::unordered_map<std::thread::id, int> seed_;
#ifdef USE_GPU
- std::vector<cublasHandle_t> handles_;
- std::vector<curandGenerator_t> gpu_rand_generators_;
+ std::unordered_map<std::thread::id, cublasHandle_t> cublas_handle_;
+ std::unordered_map<std::thread::id, curandGenerator_t> curand_generator_;
#endif
-
};
} // namespace singa
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/9aff30aa/src/test/test_context.cc
----------------------------------------------------------------------
diff --git a/src/test/test_context.cc b/src/test/test_context.cc
deleted file mode 100644
index 3a23b23..0000000
--- a/src/test/test_context.cc
+++ /dev/null
@@ -1,66 +0,0 @@
-#include "gtest/gtest.h"
-#include "singa/utils/singleton.h"
-#include "singa/utils/context.h"
-
-//#include <cuda_runtime.h>
-//#include "cublas_v2.h"
-
-using namespace singa;
-using namespace std;
-
-TEST(ContextTest, TestDevice) {
- auto context = Singleton<Context>::Instance();
- context->Setup();
-
- int index = 4;
- int device_id = context->DeviceID(index);
- ASSERT_EQ(4,device_id);
-
- context->SetDeviceID(index,6);
- device_id = context->DeviceID(index);
- ASSERT_EQ(6,device_id);
-}
-
-TEST(ContextTest, TestHandle) {
- auto context = Singleton<Context>::Instance();
- context->Setup();
-
- int index = 2;
- context->CreateHandle(index);
-
- float cpu_ret = 0.0f;
- float gpu_ret = 0.0f;
-
- float A[12];
- float B[12];
-
- for(int i = 0; i < 12; i++) {
- A[i]=i-1;
- B[i]=i+1;
- }
-
- float* A_gpu = NULL;
- float* B_gpu = NULL;
-
- cudaMalloc((void**)&A_gpu, 12*sizeof(float));
- cudaMalloc((void**)&B_gpu, 12*sizeof(float));
-
- cudaMemcpy(A_gpu,A,12*sizeof(float),cudaMemcpyHostToDevice);
- cudaMemcpy(B_gpu,B,12*sizeof(float),cudaMemcpyHostToDevice);
-
- cublasHandle_t handle = context->Handle(index);
- /*cublasHandle_t handle;
- cudaSetDevice(0);
- cublasCreate(&handle);*/
-
- cublasSdot(handle, 12, A_gpu, 1, B_gpu, 1, &gpu_ret);
-
- for(int i = 0; i < 12;++i) {
- cpu_ret += A[i] * B[i];
- }
-
- ASSERT_EQ(gpu_ret,cpu_ret);
-
- cudaFree(A_gpu);
- cudaFree(B_gpu);
-}
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/9aff30aa/src/test/test_context.cu
----------------------------------------------------------------------
diff --git a/src/test/test_context.cu b/src/test/test_context.cu
new file mode 100644
index 0000000..88ab06b
--- /dev/null
+++ b/src/test/test_context.cu
@@ -0,0 +1,55 @@
+#include <thread>
+#include "gtest/gtest.h"
+#include "singa/utils/singleton.h"
+#include "singa/utils/context.h"
+#include "singa/utils/cuda_utils.h"
+
+using namespace singa;
+using namespace std;
+
+TEST(ContextTest, TestDevice) {
+ auto context = Singleton<Context>::Instance();
+
+ auto id = std::this_thread::get_id();
+ context->SetupDevice(id, 0);
+ auto device_id = context->device_id(id);
+ ASSERT_EQ(1,device_id);
+}
+
+TEST(ContextTest, TestHandle) {
+ auto context = Singleton<Context>::Instance();
+
+ float cpu_ret = 0.0f;
+ float gpu_ret = 0.0f;
+
+ float A[12];
+ float B[12];
+
+ for(int i = 0; i < 12; i++) {
+ A[i]=i-1;
+ B[i]=i+1;
+ }
+
+ float* A_gpu = NULL;
+ float* B_gpu = NULL;
+ context->SetupDevice(std::this_thread::get_id(), 0);
+
+ cudaMalloc((void**)&A_gpu, 12 * sizeof(float));
+ cudaMalloc((void**)&B_gpu, 12 * sizeof(float));
+
+ cudaMemcpy(A_gpu, A, 12 * sizeof(float), cudaMemcpyHostToDevice);
+ cudaMemcpy(B_gpu, B, 12 * sizeof(float), cudaMemcpyHostToDevice);
+
+ cublasHandle_t handle = context->cublas_handle(std::this_thread::get_id());
+
+ cublasSdot(handle, 12, A_gpu, 1, B_gpu, 1, &gpu_ret);
+
+ for(int i = 0; i < 12;++i) {
+ cpu_ret += A[i] * B[i];
+ }
+
+ ASSERT_EQ(gpu_ret,cpu_ret);
+
+ cudaFree(A_gpu);
+ cudaFree(B_gpu);
+}
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/9aff30aa/src/utils/context.cc
----------------------------------------------------------------------
diff --git a/src/utils/context.cc b/src/utils/context.cc
index 671bec0..37c8f39 100644
--- a/src/utils/context.cc
+++ b/src/utils/context.cc
@@ -23,66 +23,64 @@
#include "singa/utils/singleton.h"
namespace singa {
-
+
Context::~Context() {
#ifdef USE_GPU
- for(int i = 0; i < kDefaultDevice; ++i) {
- SetDevice(i);
-
- if(handles_[i] != NULL) {
- cublasDestroy(handles_[i]);
- }
-
- if(gpu_rand_generators_[i] != NULL) {
- curandDestroyGenerator(gpu_rand_generators_[i]);
- }
+ for (auto& entry : device_id_) {
+ if (entry.second != -1) {
+ cudaSetDevice(entry.second);
+ if (cublas_handle_[entry.first] != nullptr) {
+ cublasDestroy(cublas_handle_[entry.first]);
+ cublas_handle_[entry.first] = nullptr;
+ }
+ if(curand_generator_[entry.first] != nullptr) {
+ curandDestroyGenerator(curand_generator_[entry.first]);
+ curand_generator_[entry.first] = nullptr;
+ }
+ }
}
#endif
+ for (auto& entry : rand_generator_) {
+ if (entry.second != nullptr) {
+ delete entry.second;
+ entry.second = nullptr;
+ }
+ }
}
-void Context::Setup() {
+Context::Context() { }
- for(int i = 0; i < kDefaultDevice; ++i) {
- //init device index
- device_ids_.push_back(i);
- }
+void Context::SetupDevice(const std::thread::id thread, const int did) {
+ SetupDevice(thread, did, -1);
+}
+void Context::SetupDevice(const std::thread::id thread, const int did,
+ long long seed) {
+ device_id_[thread] = did;
#ifdef USE_GPU
- for(int i = 0; i < kDefaultDevice; ++i) {
- //init handle
- cublasHandle_t handle = NULL;
- handles_.push_back(handle);
-
- curandGenerator_t gpu_rand_generator = NULL;
- gpu_rand_generators_.push_back(gpu_rand_generator);
+ if (did > -1) {
+ cudaSetDevice(did);
+ cublasCreate(&handle_[thread]);
}
#endif
+ seed_[thread] = seed;
}
+/*
#ifdef USE_GPU
-void Context::CreateHandle(const int index) {
- SetDevice(device_ids_[index]);
- cublasCreate(&handles_[index]);
-}
-
-void Context::DestoryHandle(const int index) {
- SetDevice(device_ids_[index]);
- cublasDestroy(handles_[index]);
- handles_[index] = NULL;
+void Context::DestoryHandle(const int thread::id) {
+ cudaSetDevice(device_id_[thread::id]);
+ cublasDestroy(handle_[thread::id]);
+ handle_[thread::id] = nullptr;
}
-void Context::CreateGpuRandGenerator(const int index) {
- SetDevice(device_ids_[index]);
- curandCreateGenerator(&gpu_rand_generators_[index], CURAND_RNG_PSEUDO_DEFAULT);
+void Context::DestoryGpuRandGenerator(const int thread::id) {
+ cudaSetDevice(device_id_[thread::id]);
+ curandDestroyGenerator(curand_generator_[thread::id]);
+ curand_generator_[thread::id] = nullptr;
}
-
-void Context::DestoryGpuRandGenerator(const int index) {
- SetDevice(device_ids_[index]);
- curandDestroyGenerator(gpu_rand_generators_[index]);
- gpu_rand_generators_[index] = NULL;
-}
-
#endif
+*/
} // namespace singa
[6/6] incubator-singa git commit: SINGA-104 Add Context Class
Posted by wa...@apache.org.
SINGA-104 Add Context Class
check with cpplint
Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/b2cfa17b
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/b2cfa17b
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/b2cfa17b
Branch: refs/heads/master
Commit: b2cfa17b8564dff993d195b0dd89be0bad0813a6
Parents: e3bda08
Author: WANG Sheng <wa...@gmail.com>
Authored: Thu Nov 26 19:30:42 2015 +0800
Committer: WANG Sheng <wa...@gmail.com>
Committed: Thu Nov 26 19:30:42 2015 +0800
----------------------------------------------------------------------
include/singa/utils/context.h | 74 ++--
src/test/test_context.cc | 35 +-
src/test/test_math.cc | 747 ++++++++++++++++++-------------------
src/test/test_msg.cc | 2 +-
src/test/test_paramslicer.cc | 2 +-
5 files changed, 423 insertions(+), 437 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/b2cfa17b/include/singa/utils/context.h
----------------------------------------------------------------------
diff --git a/include/singa/utils/context.h b/include/singa/utils/context.h
index 5066633..905b810 100644
--- a/include/singa/utils/context.h
+++ b/include/singa/utils/context.h
@@ -22,22 +22,19 @@
#ifndef SINGA_UTILS_CONTEXT_H_
#define SINGA_UTILS_CONTEXT_H_
-#include <vector>
-#include <random>
+#include <glog/logging.h>
#include <chrono>
+#include <random>
#include <thread>
#include <unordered_map>
-#include <glog/logging.h>
-
+#include <vector>
#ifdef USE_GPU
#include "singa/utils/cuda_utils.h"
#endif
-
namespace singa {
-
/**
* Context is used as a global singleton, which stores the mapping from CPU
* thread id to GPU device id. If a thread has no GPU, then its associated
@@ -52,30 +49,29 @@ class Context {
/**
* Destructor, release random generators and handlers.
*/
- ~Context() {
+ ~Context() {
#ifdef USE_GPU
- for (auto& entry : device_id_) {
- if (entry.second != -1) {
- cudaSetDevice(entry.second);
- if (cublas_handle_[entry.second] != nullptr) {
- cublasDestroy(cublas_handle_[entry.second]);
- cublas_handle_[entry.second] = nullptr;
- }
- if(curand_generator_[entry.second] != nullptr) {
- curandDestroyGenerator(curand_generator_[entry.second]);
- curand_generator_[entry.second] = nullptr;
- }
- }
- }
+ for (auto& entry : device_id_) {
+ if (entry.second != -1) {
+ cudaSetDevice(entry.second);
+ if (cublas_handle_[entry.second] != nullptr) {
+ cublasDestroy(cublas_handle_[entry.second]);
+ cublas_handle_[entry.second] = nullptr;
+ }
+ if (curand_generator_[entry.second] != nullptr) {
+ curandDestroyGenerator(curand_generator_[entry.second]);
+ curand_generator_[entry.second] = nullptr;
+ }
+ }
+ }
#endif
- for (auto& entry : rand_generator_) {
- if (entry.second != nullptr) {
- delete entry.second;
- entry.second = nullptr;
- }
- }
-
- }
+ for (auto& entry : rand_generator_) {
+ if (entry.second != nullptr) {
+ delete entry.second;
+ entry.second = nullptr;
+ }
+ }
+ }
/**
* Constructor, init handlers and GPU rand generators to nullptr.
*/
@@ -90,12 +86,12 @@ class Context {
* @return the ID of the device attached to a given CPU thread, or -1 if this
* thread has not been attached GPU device.
*/
- int device_id(const std::thread::id& tid) {
+ int device_id(const std::thread::id& tid) {
if (device_id_.find(tid) != device_id_.end())
return device_id_[tid];
else
return -1;
- }
+ }
/**
* Setup the CPU thread, which may be assigned a GPU device.
* If there is no GPU device, then set did to -1.
@@ -168,7 +164,7 @@ class Context {
/**
* Get the rand generator of the GPU device assigned to the given thread.
*/
- curandGenerator_t curand_generator(const std::thread::id thread_id) {
+ curandGenerator_t curand_generator(const std::thread::id thread_id) {
return curand_generator(device_id(thread_id));
}
/**
@@ -177,10 +173,10 @@ class Context {
* @return random generator. If it does not exist, then create one.
* The random seed will be set to CURAND_RNG_PSEUDO_DEFAULT if it is not set.
*/
- curandGenerator_t curand_generator(const int device_id) {
+ curandGenerator_t curand_generator(const int device_id) {
CHECK_GE(device_id, 0);
if (curand_generator_.at(device_id) == nullptr) {
- // TODO handle user set seed
+ // TODO(wangwei) handle user set seed
/*
CHECK(seed_.find(tid) != seed_.end());
auto seed = seed_[tid];
@@ -189,8 +185,8 @@ class Context {
curandCreateGenerator(&curand_generator_[device_id],
CURAND_RNG_PSEUDO_DEFAULT);
}
- return curand_generator_[device_id];
- }
+ return curand_generator_[device_id];
+ }
#endif
@@ -198,19 +194,19 @@ class Context {
//!< max num of GPUs per process
const int kMaxNumGPU = 64;
//!< map from thread id to device id
- std::unordered_map<std::thread::id, int> device_id_;
+ std::unordered_map<std::thread::id, int> device_id_;
//!< map from thread id to cpu rand generator
std::unordered_map<std::thread::id, std::mt19937 *> rand_generator_;
//!< map from thread id to cpu rand generator seed
std::unordered_map<std::thread::id, int> seed_;
#ifdef USE_GPU
//!< cublas handler indexed by GPU device ID
- std::vector<cublasHandle_t> cublas_handle_;
+ std::vector<cublasHandle_t> cublas_handle_;
//!< cublas rand generator indexed by GPU device ID
- std::vector<curandGenerator_t> curand_generator_;
+ std::vector<curandGenerator_t> curand_generator_;
#endif
};
} // namespace singa
-#endif // SINGA_UTILS_MATH_ADDR_H_
+#endif // SINGA_UTILS_CONTEXT_H_
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/b2cfa17b/src/test/test_context.cc
----------------------------------------------------------------------
diff --git a/src/test/test_context.cc b/src/test/test_context.cc
index 5e501b9..70f6d07 100644
--- a/src/test/test_context.cc
+++ b/src/test/test_context.cc
@@ -1,3 +1,24 @@
+/************************************************************
+*
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements. See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership. The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License. You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing,
+* software distributed under the License is distributed on an
+* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+* KIND, either express or implied. See the License for the
+* specific language governing permissions and limitations
+* under the License.
+*
+*************************************************************/
+
#include <thread>
#include "gtest/gtest.h"
#include "singa/utils/singleton.h"
@@ -25,17 +46,17 @@ TEST(ContextTest, TestHandle) {
float A[12];
float B[12];
- for(int i = 0; i < 12; i++) {
- A[i]=i-1;
- B[i]=i+1;
+ for (int i = 0; i < 12; i++) {
+ A[i] = i - 1;
+ B[i] = i + 1;
}
float* A_gpu = NULL;
float* B_gpu = NULL;
context->SetupDevice(std::this_thread::get_id(), 0);
- cudaMalloc((void**)&A_gpu, 12 * sizeof(float));
- cudaMalloc((void**)&B_gpu, 12 * sizeof(float));
+ cudaMalloc(reinterpret_cast<void**>(&A_gpu), 12 * sizeof(float));
+ cudaMalloc(reinterpret_cast<void**>(&B_gpu), 12 * sizeof(float));
cudaMemcpy(A_gpu, A, 12 * sizeof(float), cudaMemcpyHostToDevice);
cudaMemcpy(B_gpu, B, 12 * sizeof(float), cudaMemcpyHostToDevice);
@@ -44,11 +65,11 @@ TEST(ContextTest, TestHandle) {
cublasSdot(handle, 12, A_gpu, 1, B_gpu, 1, &gpu_ret);
- for(int i = 0; i < 12;++i) {
+ for (int i = 0; i < 12; ++i) {
cpu_ret += A[i] * B[i];
}
- ASSERT_EQ(gpu_ret,cpu_ret);
+ ASSERT_EQ(gpu_ret, cpu_ret);
cudaFree(A_gpu);
cudaFree(B_gpu);
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/b2cfa17b/src/test/test_math.cc
----------------------------------------------------------------------
diff --git a/src/test/test_math.cc b/src/test/test_math.cc
index 8f8c633..39ec2a0 100644
--- a/src/test/test_math.cc
+++ b/src/test/test_math.cc
@@ -1,3 +1,24 @@
+/************************************************************
+*
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements. See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership. The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License. You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing,
+* software distributed under the License is distributed on an
+* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+* KIND, either express or implied. See the License for the
+* specific language governing permissions and limitations
+* under the License.
+*
+*************************************************************/
+
#include "gtest/gtest.h"
#include "singa/utils/math_addr.h"
#include "singa/utils/math_kernel.h"
@@ -12,459 +33,407 @@ using namespace singa;
using namespace std;
TEST(MathTest, TestGemmCPU) {
- float A[3][2] = {};
- float B[3][2] = {};
- float C[2][2] = {};
- for(int i = 0; i < 3; i++)
- for(int j = 0; j < 2; j++)
- {
- A[i][j] = i+j;
- B[i][j] = i+j - i*j;
- }
- cpu_gemm(A[0], B[0], 2, 2, 3 , 1.0f, 0.0f, true, false, C[0]);
- float D[2][2] = {};
- for(int i = 0; i < 2; i++)
- for(int j = 0; j < 2; j++)
- {
- D[i][j] = 0;
- for(int k = 0; k < 3; k++)
- D[i][j] += A[k][i]*B[k][j];
- }
- for(int i = 0; i < 2; i++)
- for(int j = 0; j < 2; j++)
- {
- ASSERT_EQ(C[i][j], D[i][j]);
- }
+ float A[3][2] = {};
+ float B[3][2] = {};
+ float C[2][2] = {};
+ for (int i = 0; i < 3; i++)
+ for (int j = 0; j < 2; j++) {
+ A[i][j] = i+j;
+ B[i][j] = i+j - i*j;
+ }
+ cpu_gemm(A[0], B[0], 2, 2, 3 , 1.0f, 0.0f, true, false, C[0]);
+ float D[2][2] = {};
+ for (int i = 0; i < 2; i++)
+ for (int j = 0; j < 2; j++) {
+ D[i][j] = 0;
+ for (int k = 0; k < 3; k++)
+ D[i][j] += A[k][i]*B[k][j];
+ }
+ for (int i = 0; i < 2; i++)
+ for (int j = 0; j < 2; j++) {
+ ASSERT_EQ(C[i][j], D[i][j]);
+ }
}
TEST(MathTest, TestGemvCPU) {
- float A[4][3] = {};
- float B[4]= {};
- float C[3] = {};
- float D[3] = {};
-
- for(int i = 0; i < 3; i++)
- {
- for(int j = 0; j < 4; j++)
- {
- A[j][i] = i-j + i*j;
- }
- }
-
- for(int i = 0; i < 4; i++)B[i] = i;
- for(int i = 0; i < 3; i++)C[i] = 10;
- cpu_gemv(A[0], B, 4, 3, 1.0f, 1.0f, true, C);
-
- for(int i = 0; i < 3; i++)
- {
- for(int j = 0; j < 4; j++)
- {
- D[i] += A[j][i]*B[j];
- }
- }
- for(int i = 0; i < 3; i++)
- {
- ASSERT_EQ(C[i], D[i]+10);
- }
+ float A[4][3] = {};
+ float B[4]= {};
+ float C[3] = {};
+ float D[3] = {};
+
+ for (int i = 0; i < 3; i++) {
+ for (int j = 0; j < 4; j++) {
+ A[j][i] = i-j + i*j;
+ }
+ }
+
+ for (int i = 0; i < 4; i++)B[i] = i;
+ for (int i = 0; i < 3; i++)C[i] = 10;
+ cpu_gemv(A[0], B, 4, 3, 1.0f, 1.0f, true, C);
+
+ for (int i = 0; i < 3; i++) {
+ for (int j = 0; j < 4; j++) {
+ D[i] += A[j][i]*B[j];
+ }
+ }
+ for (int i = 0; i < 3; i++) {
+ ASSERT_EQ(C[i], D[i]+10);
+ }
}
TEST(MathTest, TestAxpyCPU) {
- float A[4][3] = {};
- float C[4][3] = {};
- float B[3][4] = {};
- float D[3][4] = {};
-
- for(int i = 0; i < 4; i++)
- {
- for(int j = 0; j < 3; j++)
- {
- A[i][j] = i-j + i*j;
- B[j][i] = i-j + i*j;
- C[i][j] = A[i][j];
- D[j][i] = B[j][i];
- }
- }
-
- cpu_axpy(A[0], 12, 2.0f, B[0]);
- for(int i = 0; i < 12; i++)
- {
- D[i / 4][i % 4] += 2*C[i / 3][i % 3];
- }
-
- for(int i = 0; i < 3; i++)
- {
- for(int j = 0; j < 4; j++)
- {
- ASSERT_EQ(B[i][j],D[i][j]);
- }
- }
+ float A[4][3] = {};
+ float C[4][3] = {};
+ float B[3][4] = {};
+ float D[3][4] = {};
+
+ for (int i = 0; i < 4; i++) {
+ for (int j = 0; j < 3; j++) {
+ A[i][j] = i-j + i*j;
+ B[j][i] = i-j + i*j;
+ C[i][j] = A[i][j];
+ D[j][i] = B[j][i];
+ }
+ }
+
+ cpu_axpy(A[0], 12, 2.0f, B[0]);
+ for (int i = 0; i < 12; i++) {
+ D[i / 4][i % 4] += 2*C[i / 3][i % 3];
+ }
+
+ for (int i = 0; i < 3; i++) {
+ for (int j = 0; j < 4; j++) {
+ ASSERT_EQ(B[i][j], D[i][j]);
+ }
+ }
}
/*
TEST(MathTest, TestEopCPU) {
- float A[10] = {};
- float B[10] = {};
- float C[10] = {};
- float O[10] = {};
-
- for(int i = 0; i < 10; i++)
- {
- A[i] = i;
- B[i] = -i;
- C[i] = i;
-
- }
- cpu_e_f<singa::op::Set>(5, 15.0f, O, O);
- for(int i = 0; i < 5; i++)
- {
- ASSERT_EQ(O[i]-15,0);
- }
- for(int i = 5; i < 10; i++)
- {
- ASSERT_EQ(O[i],0);
- }
+ float A[10] = {};
+ float B[10] = {};
+ float C[10] = {};
+ float O[10] = {};
+
+ for (int i = 0; i < 10; i++) {
+ A[i] = i;
+ B[i] = -i;
+ C[i] = i;
+ }
+ cpu_e_f<singa::op::Set>(5, 15.0f, O, O);
+ for (int i = 0; i < 5; i++) {
+ ASSERT_EQ(O[i]-15,0);
+ }
+ for (int i = 5; i < 10; i++) {
+ ASSERT_EQ(O[i],0);
+ }
}
*/
#ifdef USE_GPU
TEST(MathTest, TestGemmGPU) {
- float A[3][2] = {};
- float B[3][2] = {};
- float C[2][2] = {};
- for(int i = 0; i < 3; i++)
- {
- for(int j = 0; j < 2; j++)
- {
- A[i][j] = i+j;
- B[i][j] = i+j - i*j;
- }
- }
-
- float* A_gpu=NULL;
- float* B_gpu=NULL;
- float* C_gpu=NULL;
-
- cudaMalloc((void**)&A_gpu, 3*2*sizeof(float));
- cudaMalloc((void**)&B_gpu, 3*2*sizeof(float));
- cudaMalloc((void**)&C_gpu, 2*2*sizeof(float));
-
- cudaMemcpy(A_gpu,A,3*2*sizeof(float),cudaMemcpyHostToDevice);
- cudaMemcpy(B_gpu,B,3*2*sizeof(float),cudaMemcpyHostToDevice);
-
- gpu_gemm<float>(A_gpu, B_gpu, 2, 2, 3 , 1, 0, true, false, C_gpu);
-
- cudaMemcpy(C,C_gpu,2*2*sizeof(float),cudaMemcpyDeviceToHost);
-
- float D[2][2] = {};
- for(int i = 0; i < 2; i++)
- {
- for(int j = 0; j < 2; j++)
- {
- D[i][j] = 0;
- for(int k = 0; k < 3; k++)
- {
- D[i][j] += A[k][i]*B[k][j];
- }
- }
- }
-
- for(int i = 0; i < 2; i++)
- {
- for(int j = 0; j < 2; j++)
- {
- ASSERT_EQ(C[i][j],D[i][j]);
- }
- }
-
- cudaFree(A_gpu);
- cudaFree(B_gpu);
- cudaFree(C_gpu);
+ float A[3][2] = {};
+ float B[3][2] = {};
+ float C[2][2] = {};
+ for (int i = 0; i < 3; i++) {
+ for (int j = 0; j < 2; j++) {
+ A[i][j] = i+j;
+ B[i][j] = i+j - i*j;
+ }
+ }
+
+ float* A_gpu = NULL;
+ float* B_gpu = NULL;
+ float* C_gpu = NULL;
+
+ cudaMalloc(reinterpret_cast<void**>(&A_gpu), 3*2*sizeof(float));
+ cudaMalloc(reinterpret_cast<void**>(&B_gpu), 3*2*sizeof(float));
+ cudaMalloc(reinterpret_cast<void**>(&C_gpu), 2*2*sizeof(float));
+
+ cudaMemcpy(A_gpu, A, 3*2*sizeof(float), cudaMemcpyHostToDevice);
+ cudaMemcpy(B_gpu, B, 3*2*sizeof(float), cudaMemcpyHostToDevice);
+
+ gpu_gemm<float>(A_gpu, B_gpu, 2, 2, 3 , 1, 0, true, false, C_gpu);
+
+ cudaMemcpy(C, C_gpu, 2*2*sizeof(float), cudaMemcpyDeviceToHost);
+
+ float D[2][2] = {};
+ for (int i = 0; i < 2; i++) {
+ for (int j = 0; j < 2; j++) {
+ D[i][j] = 0;
+ for (int k = 0; k < 3; k++) {
+ D[i][j] += A[k][i]*B[k][j];
+ }
+ }
+ }
+
+ for (int i = 0; i < 2; i++) {
+ for (int j = 0; j < 2; j++) {
+ ASSERT_EQ(C[i][j], D[i][j]);
+ }
+ }
+
+ cudaFree(A_gpu);
+ cudaFree(B_gpu);
+ cudaFree(C_gpu);
}
TEST(MathTest, TestGemvGPU) {
- float A[4][3] = {};
- float B[4]= {};
- float C[3] = {};
- float D[3] = {};
-
- for(int i = 0; i < 4; i++)
- {
- for(int j = 0; j < 3; j++)
- {
- A[i][j] = i-j + i*j;
- }
- }
-
- for(int i = 0; i < 4; i++)B[i] = i;
- for(int i = 0; i < 3; i++)C[i] = 10;
-
- float* A_gpu=NULL;
- float* B_gpu=NULL;
- float* C_gpu=NULL;
-
- cudaMalloc((void**)&A_gpu, 4*3*sizeof(float));
- cudaMalloc((void**)&B_gpu, 4*sizeof(float));
- cudaMalloc((void**)&C_gpu, 3*sizeof(float));
-
- cudaMemcpy(A_gpu,A,4*3*sizeof(float),cudaMemcpyHostToDevice);
- cudaMemcpy(B_gpu,B,4*sizeof(float),cudaMemcpyHostToDevice);
- cudaMemcpy(C_gpu,C,3*sizeof(float),cudaMemcpyHostToDevice);
-
- gpu_gemv<float>(A_gpu, B_gpu, 4, 3, 1, 1, true, C_gpu);
-
- cudaMemcpy(C,C_gpu,3*sizeof(float),cudaMemcpyDeviceToHost);
-
- for(int i = 0; i < 3; i++)
- {
- for(int j = 0; j < 4; j++)
- {
- D[i] += A[j][i]*B[j];
- }
- }
-
- for(int i = 0; i < 3; i++)
- {
- ASSERT_EQ(C[i],D[i]+10);
- }
-
- cudaFree(A_gpu);
- cudaFree(B_gpu);
- cudaFree(C_gpu);
+ float A[4][3] = {};
+ float B[4]= {};
+ float C[3] = {};
+ float D[3] = {};
+
+ for (int i = 0; i < 4; i++) {
+ for (int j = 0; j < 3; j++) {
+ A[i][j] = i-j + i*j;
+ }
+ }
+
+ for (int i = 0; i < 4; i++) B[i] = i;
+ for (int i = 0; i < 3; i++) C[i] = 10;
+
+ float* A_gpu = NULL;
+ float* B_gpu = NULL;
+ float* C_gpu = NULL;
+
+ cudaMalloc(reinterpret_cast<void**>(&A_gpu), 4*3*sizeof(float));
+ cudaMalloc(reinterpret_cast<void**>(&B_gpu), 4*sizeof(float));
+ cudaMalloc(reinterpret_cast<void**>(&C_gpu), 3*sizeof(float));
+
+ cudaMemcpy(A_gpu, A, 4*3*sizeof(float), cudaMemcpyHostToDevice);
+ cudaMemcpy(B_gpu, B, 4*sizeof(float), cudaMemcpyHostToDevice);
+ cudaMemcpy(C_gpu, C, 3*sizeof(float), cudaMemcpyHostToDevice);
+
+ gpu_gemv<float>(A_gpu, B_gpu, 4, 3, 1, 1, true, C_gpu);
+
+ cudaMemcpy(C, C_gpu, 3*sizeof(float), cudaMemcpyDeviceToHost);
+
+ for (int i = 0; i < 3; i++) {
+ for (int j = 0; j < 4; j++) {
+ D[i] += A[j][i]*B[j];
+ }
+ }
+
+ for (int i = 0; i < 3; i++) {
+ ASSERT_EQ(C[i], D[i]+10);
+ }
+
+ cudaFree(A_gpu);
+ cudaFree(B_gpu);
+ cudaFree(C_gpu);
}
/*
TEST(MathTest, TestAxpyGPU) {
- float A[4][3] = {};
- float C[4][3] = {};
- float B[3][4] = {};
- float D[3][4] = {};
-
- for(int i = 0; i < 4; i++)
- {
- for(int j = 0; j < 3; j++)
- {
- A[i][j] = i-j + i*j;
- B[j][i] = i-j + i*j;
- C[i][j] = A[i][j];
- D[j][i] = B[j][i];
- }
- }
-
- float* A_gpu=NULL;
- float* B_gpu=NULL;
-
- cudaMalloc((void**)&A_gpu, 4*3*sizeof(float));
- cudaMalloc((void**)&B_gpu, 3*4*sizeof(float));
-
- cudaMemcpy(A_gpu,A,4*3*sizeof(float),cudaMemcpyHostToDevice);
- cudaMemcpy(B_gpu,B,3*4*sizeof(float),cudaMemcpyHostToDevice);
-
- gpu_axpy<float>(A_gpu, 12, 2, B_gpu);
-
- cudaMemcpy(A,A_gpu,4*3*sizeof(float),cudaMemcpyDeviceToHost);
- cudaMemcpy(B,B_gpu,3*4*sizeof(float),cudaMemcpyDeviceToHost);
-
- //for(int i = 0; i < 12; i++)D[0][i] += 2*C[0][i];
-
- for(int i = 0; i < 4; i++)
- {
- for(int j = 0; j < 3; j++)
- {
- D[i][j] += C[i][j];
- ASSERT_EQ(B[i][j],D[i][j]);
- }
- }
-
- cudaFree(A_gpu);
- cudaFree(B_gpu);
+ float A[4][3] = {};
+ float C[4][3] = {};
+ float B[3][4] = {};
+ float D[3][4] = {};
+
+ for (int i = 0; i < 4; i++)
+ {
+ for (int j = 0; j < 3; j++)
+ {
+ A[i][j] = i-j + i*j;
+ B[j][i] = i-j + i*j;
+ C[i][j] = A[i][j];
+ D[j][i] = B[j][i];
+ }
+ }
+
+ float* A_gpu=NULL;
+ float* B_gpu=NULL;
+
+ cudaMalloc((void**)&A_gpu, 4*3*sizeof(float));
+ cudaMalloc((void**)&B_gpu, 3*4*sizeof(float));
+
+ cudaMemcpy(A_gpu,A,4*3*sizeof(float),cudaMemcpyHostToDevice);
+ cudaMemcpy(B_gpu,B,3*4*sizeof(float),cudaMemcpyHostToDevice);
+
+ gpu_axpy<float>(A_gpu, 12, 2, B_gpu);
+
+ cudaMemcpy(A,A_gpu,4*3*sizeof(float),cudaMemcpyDeviceToHost);
+ cudaMemcpy(B,B_gpu,3*4*sizeof(float),cudaMemcpyDeviceToHost);
+
+ //for (int i = 0; i < 12; i++)D[0][i] += 2*C[0][i];
+
+ for (int i = 0; i < 4; i++)
+ {
+ for (int j = 0; j < 3; j++)
+ {
+ D[i][j] += C[i][j];
+ ASSERT_EQ(B[i][j],D[i][j]);
+ }
+ }
+
+ cudaFree(A_gpu);
+ cudaFree(B_gpu);
}
*/
TEST(MathTest, TestDotGPU) {
- float A[12];
- float B[12];
-
- for(int i = 0; i < 12; i++)
- {
- A[i]=i-1;
- B[i]=i+1;
- }
+ float A[12];
+ float B[12];
- float* A_gpu=NULL;
- float* B_gpu=NULL;
+ for (int i = 0; i < 12; i++) {
+ A[i] = i - 1;
+ B[i] = i + 1;
+ }
- cudaMalloc((void**)&A_gpu, 12*sizeof(float));
- cudaMalloc((void**)&B_gpu, 12*sizeof(float));
+ float* A_gpu = NULL;
+ float* B_gpu = NULL;
- cudaMemcpy(A_gpu,A,12*sizeof(float),cudaMemcpyHostToDevice);
- cudaMemcpy(B_gpu,B,12*sizeof(float),cudaMemcpyHostToDevice);
- float gpu_ret=gpu_dot<float>(A_gpu,B_gpu,12);
+ cudaMalloc(reinterpret_cast<void**>(&A_gpu), 12*sizeof(float));
+ cudaMalloc(reinterpret_cast<void**>(&B_gpu), 12*sizeof(float));
- float cpu_ret=0.0f;
- for(int i = 0; i < 12; i++)
- {
- cpu_ret+=A[i]*B[i];
- }
+ cudaMemcpy(A_gpu, A, 12*sizeof(float), cudaMemcpyHostToDevice);
+ cudaMemcpy(B_gpu, B, 12*sizeof(float), cudaMemcpyHostToDevice);
+ float gpu_ret = gpu_dot<float>(A_gpu, B_gpu, 12);
- ASSERT_EQ(gpu_ret,cpu_ret);
+ float cpu_ret = 0.0f;
+ for (int i = 0; i < 12; i++) {
+ cpu_ret += A[i] * B[i];
+ }
- cudaFree(A_gpu);
- cudaFree(B_gpu);
+ ASSERT_EQ(gpu_ret, cpu_ret);
+ cudaFree(A_gpu);
+ cudaFree(B_gpu);
}
TEST(MathTest, TestSingaSumColGPU) {
+ float A[3][4];
+ float B[4];
+ float C[4];
- float A[3][4];
- float B[4];
- float C[4];
-
- for(int i = 0; i < 3; i++)
- {
- for(int j = 0; j < 4; j++)
- {
- A[i][j]=i+j;
- }
- }
-
- for(int i = 0; i < 4; i++)
- {
- B[i]=0.0f;
- C[i]=0.0f;
- }
-
- float* A_gpu=NULL;
- float* B_gpu=NULL;
-
- cudaMalloc((void**)&A_gpu, 12*sizeof(float));
- cudaMalloc((void**)&B_gpu, 4*sizeof(float));
- cudaMemcpy(A_gpu,A,12*sizeof(float),cudaMemcpyHostToDevice);
-
- singa_gpu_sum_col(A_gpu,B_gpu,3,4,4);
-
- cudaMemcpy(B,B_gpu,4*sizeof(float),cudaMemcpyDeviceToHost);
-
- for(int i = 0; i < 4; i++)
- {
- for(int j = 0; j < 3; j++)
- {
- C[i]+=A[j][i];
- }
- }
-
- for(int i = 0; i <4; i++)
- {
- ASSERT_EQ(B[i],C[i]);
- }
-
- cudaFree(A_gpu);
- cudaFree(B_gpu);
+ for (int i = 0; i < 3; i++) {
+ for (int j = 0; j < 4; j++) {
+ A[i][j] = i + j;
+ }
+ }
+
+ for (int i = 0; i < 4; i++) {
+ B[i] = 0.0f;
+ C[i] = 0.0f;
+ }
+
+ float* A_gpu = NULL;
+ float* B_gpu = NULL;
+
+ cudaMalloc(reinterpret_cast<void**>(&A_gpu), 12*sizeof(float));
+ cudaMalloc(reinterpret_cast<void**>(&B_gpu), 4*sizeof(float));
+ cudaMemcpy(A_gpu, A, 12*sizeof(float), cudaMemcpyHostToDevice);
+
+ singa_gpu_sum_col(A_gpu, B_gpu, 3, 4, 4);
+
+ cudaMemcpy(B, B_gpu, 4*sizeof(float), cudaMemcpyDeviceToHost);
+
+ for (int i = 0; i < 4; i++) {
+ for (int j = 0; j < 3; j++) {
+ C[i] += A[j][i];
+ }
+ }
+
+ for (int i = 0; i < 4; i++) {
+ ASSERT_EQ(B[i], C[i]);
+ }
+
+ cudaFree(A_gpu);
+ cudaFree(B_gpu);
}
TEST(MathTest, TestSingaAddVecRowGPU) {
-
- float A[3][4];
- float B[4];
- float C[3][4];
- float D[3][4];
-
- for(int i = 0; i < 4; i++)
- {
- B[i]=i;
- }
-
- for(int i = 0; i < 3; i++)
- {
- for(int j = 0; j < 4; j++)
- {
- A[i][j]=i+j;
- D[i][j]=A[i][j]+B[j];
- }
- }
-
-
- float* A_gpu=NULL;
- float* B_gpu=NULL;
- float* C_gpu=NULL;
-
- cudaMalloc((void**)&A_gpu, 3*4*sizeof(float));
- cudaMalloc((void**)&B_gpu, 4*sizeof(float));
- cudaMalloc((void**)&C_gpu, 3*4*sizeof(float));
- cudaMemcpy(A_gpu,A,3*4*sizeof(float),cudaMemcpyHostToDevice);
- cudaMemcpy(B_gpu,B,4*sizeof(float),cudaMemcpyHostToDevice);
-
- singa_gpu_add_vec_row(B_gpu,A_gpu,C_gpu,3,4,4);
-
- cudaMemcpy(C,C_gpu,3*4*sizeof(float),cudaMemcpyDeviceToHost);
-
- for(int i = 0; i < 3; i++)
- {
- for(int j = 0; j < 4; j++)
- {
- ASSERT_EQ(C[i][j],D[i][j]);
- }
- }
-
- cudaFree(A_gpu);
- cudaFree(B_gpu);
- cudaFree(C_gpu);
+ float A[3][4];
+ float B[4];
+ float C[3][4];
+ float D[3][4];
+
+ for (int i = 0; i < 4; i++) {
+ B[i] = i;
+ }
+
+ for (int i = 0; i < 3; i++) {
+ for (int j = 0; j < 4; j++) {
+ A[i][j] = i + j;
+ D[i][j] = A[i][j] + B[j];
+ }
+ }
+
+ float* A_gpu = NULL;
+ float* B_gpu = NULL;
+ float* C_gpu = NULL;
+
+ cudaMalloc(reinterpret_cast<void**>(&A_gpu), 3*4*sizeof(float));
+ cudaMalloc(reinterpret_cast<void**>(&B_gpu), 4*sizeof(float));
+ cudaMalloc(reinterpret_cast<void**>(&C_gpu), 3*4*sizeof(float));
+ cudaMemcpy(A_gpu, A, 3*4*sizeof(float), cudaMemcpyHostToDevice);
+ cudaMemcpy(B_gpu, B, 4*sizeof(float), cudaMemcpyHostToDevice);
+
+ singa_gpu_add_vec_row(B_gpu, A_gpu, C_gpu, 3, 4, 4);
+
+ cudaMemcpy(C, C_gpu, 3*4*sizeof(float), cudaMemcpyDeviceToHost);
+
+ for (int i = 0; i < 3; i++) {
+ for (int j = 0; j < 4; j++) {
+ ASSERT_EQ(C[i][j], D[i][j]);
+ }
+ }
+
+ cudaFree(A_gpu);
+ cudaFree(B_gpu);
+ cudaFree(C_gpu);
}
TEST(MathTest, TestSingaSetValueGPU) {
+ float A[3][4];
+ float* A_gpu = NULL;
- float A[3][4];
+ cudaMalloc(reinterpret_cast<void**>(&A_gpu), 3*4*sizeof(float));
- float* A_gpu=NULL;
+ cudaMemcpy(A_gpu, A, 3*4*sizeof(float), cudaMemcpyHostToDevice);
- cudaMalloc((void**)&A_gpu, 3*4*sizeof(float));
+ singa_gpu_set_value(A_gpu, 4.0, 3*4);
- cudaMemcpy(A_gpu,A,3*4*sizeof(float),cudaMemcpyHostToDevice);
+ cudaMemcpy(A, A_gpu, 3*4*sizeof(float), cudaMemcpyDeviceToHost);
- singa_gpu_set_value(A_gpu,4.0,3*4);
+ for (int i = 0; i < 3; i++) {
+ for (int j = 0; j < 4; j++) {
+ ASSERT_EQ(A[i][j], 4.0f);
+ }
+ }
- cudaMemcpy(A,A_gpu,3*4*sizeof(float),cudaMemcpyDeviceToHost);
-
- for(int i = 0; i < 3; i++)
- {
- for(int j = 0; j < 4; j++)
- {
- ASSERT_EQ(A[i][j],4.0f);
- }
- }
-
- cudaFree(A_gpu);
+ cudaFree(A_gpu);
}
TEST(MathTest, TestEopGPU) {
+ float A[10] = {};
+ float B[10] = {};
- float A[10] = {};
- float B[10] = {};
-
- for(int i = 0; i < 10; i++)
- {
- A[i] = i;
- B[i] = -i;
- }
+ for (int i = 0; i < 10; i++) {
+ A[i] = i;
+ B[i] = -i;
+ }
- float* A_gpu=NULL;
- float* B_gpu=NULL;
+ float* A_gpu = NULL;
+ float* B_gpu = NULL;
- cudaMalloc((void**)&A_gpu, 10*sizeof(float));
- cudaMalloc((void**)&B_gpu, 10*sizeof(float));
+ cudaMalloc(reinterpret_cast<void**>(&A_gpu), 10*sizeof(float));
+ cudaMalloc(reinterpret_cast<void**>(&B_gpu), 10*sizeof(float));
- cudaMemcpy(A_gpu,A,10*sizeof(float),cudaMemcpyHostToDevice);
- cudaMemcpy(B_gpu,B,10*sizeof(float),cudaMemcpyHostToDevice);
+ cudaMemcpy(A_gpu, A, 10*sizeof(float), cudaMemcpyHostToDevice);
+ cudaMemcpy(B_gpu, B, 10*sizeof(float), cudaMemcpyHostToDevice);
- gpu_e_f<singa::op::Sigmoid<float>, float>(10, A_gpu, B_gpu);
+ gpu_e_f<singa::op::Sigmoid<float>, float>(10, A_gpu, B_gpu);
- cudaFree(A_gpu);
- cudaFree(B_gpu);
+ cudaFree(A_gpu);
+ cudaFree(B_gpu);
}
#endif // USE_GPU
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/b2cfa17b/src/test/test_msg.cc
----------------------------------------------------------------------
diff --git a/src/test/test_msg.cc b/src/test/test_msg.cc
index d5d9f20..db83b1c 100644
--- a/src/test/test_msg.cc
+++ b/src/test/test_msg.cc
@@ -42,7 +42,7 @@ TEST(MsgTest, AddrTest) {
}
TEST(MsgTest, AddFrameTest) {
- int buf[5]={1,2,3,4,5};
+ int buf[5] = {1, 2, 3, 4, 5};
Msg msg;
msg.AddFrame("abcdefg", 7);
msg.AddFrame(buf, sizeof(int) * 5);
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/b2cfa17b/src/test/test_paramslicer.cc
----------------------------------------------------------------------
diff --git a/src/test/test_paramslicer.cc b/src/test/test_paramslicer.cc
index c693da1..bc7dedd 100644
--- a/src/test/test_paramslicer.cc
+++ b/src/test/test_paramslicer.cc
@@ -25,7 +25,7 @@
using namespace singa;
-const int param_size[]={2400,32,25600,32, 51200,64,57600,10};
+const int param_size[] = {2400, 32, 25600, 32, 51200, 64, 57600, 10};
/*
class ParamSlicerTest : public ::testing::Test {
[2/6] incubator-singa git commit: SINGA-104 Add Context Class
Posted by wa...@apache.org.
SINGA-104 Add Context Class
Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/35de4f91
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/35de4f91
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/35de4f91
Branch: refs/heads/master
Commit: 35de4f913e97c64dbf27ff37c999aaa5a3ce40f6
Parents: 771ff32
Author: seaok <se...@gmail.com>
Authored: Thu Nov 26 10:51:55 2015 +0800
Committer: seaok <se...@gmail.com>
Committed: Thu Nov 26 10:51:55 2015 +0800
----------------------------------------------------------------------
Makefile.gpu | 2 ++
1 file changed, 2 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/35de4f91/Makefile.gpu
----------------------------------------------------------------------
diff --git a/Makefile.gpu b/Makefile.gpu
index 2dc2a71..2fea3b2 100644
--- a/Makefile.gpu
+++ b/Makefile.gpu
@@ -57,6 +57,8 @@ CUCXXFLAGS := $(MSHADOW_FLAGS) -std=c++11 -G $(CUDA_ARCH) \
ifeq ($(CUDA_DIR),)
MSHADOW_FLAGS := $(MSHADOW_FLAGS) -DCPU_ONLY
CXXFLAGS := $(CXXFLAGS) -DCPU_ONLY
+else
+ CXXFLAGS := $(CXXFLAGS) -DUSE_GPU
endif
# find user defined .proto file, and then compute the corresponding .h, .cc
[5/6] incubator-singa git commit: SINGA-104 Add Context Class
Posted by wa...@apache.org.
SINGA-104 Add Context Class
Add cuda_utils.h;
Add comments for context.h
Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/e3bda08d
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/e3bda08d
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/e3bda08d
Branch: refs/heads/master
Commit: e3bda08d8428e3a6a23bf4de8c356406a8126cd8
Parents: 3841bc5
Author: Wei Wang <wa...@comp.nus.edu.sg>
Authored: Thu Nov 26 16:22:54 2015 +0800
Committer: Wei Wang <wa...@comp.nus.edu.sg>
Committed: Thu Nov 26 16:34:22 2015 +0800
----------------------------------------------------------------------
include/singa/utils/context.h | 25 +++++++---
include/singa/utils/cuda_utils.h | 91 +++++++++++++++++++++++++++++++++++
2 files changed, 109 insertions(+), 7 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/e3bda08d/include/singa/utils/context.h
----------------------------------------------------------------------
diff --git a/include/singa/utils/context.h b/include/singa/utils/context.h
index 5223012..5066633 100644
--- a/include/singa/utils/context.h
+++ b/include/singa/utils/context.h
@@ -40,10 +40,12 @@ namespace singa {
/**
* Context is used as a global singleton, which stores the mapping from CPU
- * thread id to GPU device id. It manages the handlers for GPU
+ * thread id to GPU device id. If a thread has no GPU, then its associated
+ * device id is -1. It manages (e.g., creating) the handlers for GPU
* devices. It also manages the GPU and CPU random generators, which are created
* when accessed. One CPU thread has a CPU random generator. A GPU device
- * has a GPU random generator.
+ * has a GPU random generator, which is accessible after assigning the GPU
+ * device with a CPU thread via SetupDevice.
*/
class Context {
public:
@@ -75,7 +77,7 @@ class Context {
}
/**
- * Constructor.
+ * Constructor, init handlers and GPU rand generators to nullptr.
*/
Context() {
for (int i = 0; i < kMaxNumGPU; i++) {
@@ -86,7 +88,7 @@ class Context {
/**
* @return the ID of the device attached to a given CPU thread, or -1 if this
- * thread has not attached GPU device.
+ * thread has not been attached GPU device.
*/
int device_id(const std::thread::id& tid) {
if (device_id_.find(tid) != device_id_.end())
@@ -94,11 +96,10 @@ class Context {
else
return -1;
}
-
/**
* Setup the CPU thread, which may be assigned a GPU device.
+ * If there is no GPU device, then set did to -1.
* Set the random seed to -1.
- * A GPU handler will be created for the GPU device.
* @param[in] thread::id CPU thread ID
* @param[in] device_id GPU device ID
*/
@@ -109,11 +110,14 @@ class Context {
* @copy SetupDevice(const int, const int);
* @param[in] seed random seed
*/
- void SetupDevice(const std::thread::id& tid, const int did, long long seed) {
+ void SetupDevice(const std::thread::id& tid, const int did, const int seed) {
device_id_[tid] = did;
seed_[tid] = seed;
}
+ /**
+ * Activate the GPU device by calling cudaSetDevice.
+ */
void ActivateDevice(const int device_id) {
CHECK_GE(device_id, 0);
#ifdef USE_GPU
@@ -139,6 +143,10 @@ class Context {
return rand_generator_[tid];
}
#ifdef USE_GPU
+ /**
+ * Get the handler of the GPU which is assigned to the given thread.
+ * Calls cublas_handle(const int);
+ */
cublasHandle_t cublas_handle(const std::thread::id thread_id) {
return cublas_handle(device_id(thread_id));
}
@@ -157,6 +165,9 @@ class Context {
}
return cublas_handle_[device_id];
}
+ /**
+ * Get the rand generator of the GPU device assigned to the given thread.
+ */
curandGenerator_t curand_generator(const std::thread::id thread_id) {
return curand_generator(device_id(thread_id));
}
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/e3bda08d/include/singa/utils/cuda_utils.h
----------------------------------------------------------------------
diff --git a/include/singa/utils/cuda_utils.h b/include/singa/utils/cuda_utils.h
new file mode 100644
index 0000000..b27a6bb
--- /dev/null
+++ b/include/singa/utils/cuda_utils.h
@@ -0,0 +1,91 @@
+/************************************************************
+*
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements. See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership. The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License. You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing,
+* software distributed under the License is distributed on an
+* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+* KIND, either express or implied. See the License for the
+* specific language governing permissions and limitations
+* under the License.
+*
+*************************************************************/
+
+/**
+ * The code is adapted from that of Caffe whose license is attached.
+ *
+ * COPYRIGHT
+ * All contributions by the University of California:
+ * Copyright (c) 2014, The Regents of the University of California (Regents)
+ * All rights reserved.
+ * All other contributions:
+ * Copyright (c) 2014, the respective contributors
+ * All rights reserved.
+ * Caffe uses a shared copyright model: each contributor holds copyright over
+ * their contributions to Caffe. The project versioning records all such
+ * contribution and copyright details. If a contributor wants to further mark
+ * their specific copyright on a particular contribution, they should indicate
+ * their copyright solely in the commit message of the change when it is
+ * committed.
+ * LICENSE
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ * CONTRIBUTION AGREEMENT
+ * By contributing to the BVLC/caffe repository through pull-request, comment,
+ * or otherwise, the contributor releases their content to the
+ * license and copyright terms herein.
+ *
+ */
+#ifndef SINGA_UTILS_CUDA_UTILS_H_
+#define SINGA_UTILS_CUDA_UTILS_H_
+#include <cublas_v2.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+#include <curand.h>
+
+// CUDA: various checks for different function calls.
+#define CUDA_CHECK(condition) \
+ /* Code block avoids redefinition of cudaError_t error */ \
+ do { \
+ cudaError_t error = condition; \
+ CHECK_EQ(error, cudaSuccess) << " " << cudaGetErrorString(error); \
+ } while (0)
+
+#define CUBLAS_CHECK(condition) \
+ do { \
+ cublasStatus_t status = condition; \
+ CHECK_EQ(status, CUBLAS_STATUS_SUCCESS) << " " \
+ << caffe::cublasGetErrorString(status); \
+ } while (0)
+
+#define CURAND_CHECK(condition) \
+ do { \
+ curandStatus_t status = condition; \
+ CHECK_EQ(status, CURAND_STATUS_SUCCESS) << " " \
+ << caffe::curandGetErrorString(status); \
+ } while (0)
+
+#endif // SINGA_UTILS_CUDA_UTILS_H_
[4/6] incubator-singa git commit: SINGA-104 Add Context Class
Posted by wa...@apache.org.
SINGA-104 Add Context Class
Update Context class:
1. SetupDevice can setup the GPU device (device id>=0) random seed and
CPU thread random seed; random generator and handlers are created when
accessed. only GPU device has handler and curand_generator.
2. pass test_context.cc
Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/3841bc54
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/3841bc54
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/3841bc54
Branch: refs/heads/master
Commit: 3841bc5484fc47eae3f5877d2264c5188675818c
Parents: 9aff30a
Author: Wei Wang <wa...@comp.nus.edu.sg>
Authored: Thu Nov 26 16:06:08 2015 +0800
Committer: Wei Wang <wa...@comp.nus.edu.sg>
Committed: Thu Nov 26 16:06:08 2015 +0800
----------------------------------------------------------------------
include/singa/utils/context.h | 136 +++++++++++++++++++++++++------------
src/test/test_context.cc | 55 +++++++++++++++
src/test/test_context.cu | 55 ---------------
src/test/test_math.cc | 2 +
src/utils/context.cc | 87 ------------------------
5 files changed, 151 insertions(+), 184 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/3841bc54/include/singa/utils/context.h
----------------------------------------------------------------------
diff --git a/include/singa/utils/context.h b/include/singa/utils/context.h
index 7a41dac..5223012 100644
--- a/include/singa/utils/context.h
+++ b/include/singa/utils/context.h
@@ -37,14 +37,12 @@
namespace singa {
-// max num of threads per process
-const int kNumMaxThreads = 1024;
/**
* Context is used as a global singleton, which stores the mapping from CPU
* thread id to GPU device id. It manages the handlers for GPU
* devices. It also manages the GPU and CPU random generators, which are created
- * when accessed. One CPU thread has a CPU random generator. A CPU device
+ * when accessed. One CPU thread has a CPU random generator. A GPU device
* has a GPU random generator.
*/
class Context {
@@ -52,20 +50,49 @@ class Context {
/**
* Destructor, release random generators and handlers.
*/
- ~Context();
+ ~Context() {
+#ifdef USE_GPU
+ for (auto& entry : device_id_) {
+ if (entry.second != -1) {
+ cudaSetDevice(entry.second);
+ if (cublas_handle_[entry.second] != nullptr) {
+ cublasDestroy(cublas_handle_[entry.second]);
+ cublas_handle_[entry.second] = nullptr;
+ }
+ if(curand_generator_[entry.second] != nullptr) {
+ curandDestroyGenerator(curand_generator_[entry.second]);
+ curand_generator_[entry.second] = nullptr;
+ }
+ }
+ }
+#endif
+ for (auto& entry : rand_generator_) {
+ if (entry.second != nullptr) {
+ delete entry.second;
+ entry.second = nullptr;
+ }
+ }
+
+ }
/**
- * Constructor, init arrays for random generators and handlers.
+ * Constructor.
*/
- Context();
+ Context() {
+ for (int i = 0; i < kMaxNumGPU; i++) {
+ cublas_handle_.push_back(nullptr);
+ curand_generator_.push_back(nullptr);
+ }
+ }
/**
- * @return the ID of the device attached to a given CPU thread:
- * if the device is a GPU card, then returns the GPU device ID;
- * Else return -1.
+ * @return the ID of the device attached to a given CPU thread, or -1 if this
+ * thread has not attached GPU device.
*/
- int device_id(const std::thread::id tid) {
- CHECK(device_id_.find(tid) != device_id_.end());
- return device_id_[tid];
+ int device_id(const std::thread::id& tid) {
+ if (device_id_.find(tid) != device_id_.end())
+ return device_id_[tid];
+ else
+ return -1;
}
/**
@@ -75,13 +102,24 @@ class Context {
* @param[in] thread::id CPU thread ID
* @param[in] device_id GPU device ID
*/
- void SetupDevice(const std::thread::id tid, const int did);
-
+ void SetupDevice(const std::thread::id& tid, const int did) {
+ SetupDevice(tid, did, -1);
+ }
/**
* @copy SetupDevice(const int, const int);
* @param[in] seed random seed
*/
- void SetupDevice(const std::thread::id tid, const int did, long long seed);
+ void SetupDevice(const std::thread::id& tid, const int did, long long seed) {
+ device_id_[tid] = did;
+ seed_[tid] = seed;
+ }
+
+ void ActivateDevice(const int device_id) {
+ CHECK_GE(device_id, 0);
+#ifdef USE_GPU
+ cudaSetDevice(device_id);
+#endif
+ }
/**
* Get the CPU random generator.
@@ -90,7 +128,7 @@ class Context {
* @param[in] thread::id CPU thread ID
* @return the CPU random generator
*/
- std::mt19937* rand_generator(const std::thread::id tid) {
+ std::mt19937* rand_generator(const std::thread::id& tid) {
if (rand_generator_.find(tid) == rand_generator_.end()) {
CHECK(seed_.find(tid) != seed_.end());
auto seed = static_cast<unsigned>(seed_[tid]);
@@ -101,50 +139,64 @@ class Context {
return rand_generator_[tid];
}
#ifdef USE_GPU
+ cublasHandle_t cublas_handle(const std::thread::id thread_id) {
+ return cublas_handle(device_id(thread_id));
+ }
/**
- * Get the handler of the GPU device attached to a CPU thread.
- * @param[in] thread::id
- * @return the GPU handler, or nullptr if this thread does not have any GPU.
+ * Get the handler of the GPU device given its device ID. The device
+ * must be set up via SetupDevice(const std::thread::id, const int) before
+ * calling this function.
+ * @param[in] device_id GPU device ID
+ * @return the GPU handler
*/
- cublasHandle_t cublas_handle(const std::thread::id tid) {
- CHECK(cublas_handle_.find(tid) != cublas_handle_.end());
- return cublas_handle_[tid];
- }
+ cublasHandle_t cublas_handle(const int device_id) {
+ CHECK_GE(device_id, 0);
+ if (cublas_handle_.at(device_id) == nullptr) {
+ cudaSetDevice(device_id);
+ cublasCreate(&cublas_handle_[device_id]);
+ }
+ return cublas_handle_[device_id];
+ }
+ curandGenerator_t curand_generator(const std::thread::id thread_id) {
+ return curand_generator(device_id(thread_id));
+ }
/**
- * Get the random generator of the GPU device assigned to the given thread.
- * @param[in] thread::id
+ * Get the random generator of the GPU device given the device id.
+ * @param[in] device_id GPU device ID
* @return random generator. If it does not exist, then create one.
* The random seed will be set to CURAND_RNG_PSEUDO_DEFAULT if it is not set.
*/
- curandGenerator_t curand_generator(const std::thread::id tid) {
- if (curand_generator_.find(tid) == curand_generator_.end()) {
+ curandGenerator_t curand_generator(const int device_id) {
+ CHECK_GE(device_id, 0);
+ if (curand_generator_.at(device_id) == nullptr) {
+ // TODO handle user set seed
+ /*
CHECK(seed_.find(tid) != seed_.end());
auto seed = seed_[tid];
- // TODO handle user set seed
- cudaSetDevice(device_id_[tid]);
- curandCreateGenerator(&curand_generator_[tid], CURAND_RNG_PSEUDO_DEFAULT);
+ */
+ ActivateDevice(device_id);
+ curandCreateGenerator(&curand_generator_[device_id],
+ CURAND_RNG_PSEUDO_DEFAULT);
}
- return curand_generator_[tid];
+ return curand_generator_[device_id];
}
- /*
- protected:
- void CreateHandle(const int thread::id);
- void DestoryHandle(const int thread::id);
- void CreateGpuRandGenerator(const int thread::id);
- void DestoryGpuRandGenerator(const int thread::id);
- */
-
#endif
protected:
-
+ //!< max num of GPUs per process
+ const int kMaxNumGPU = 64;
+ //!< map from thread id to device id
std::unordered_map<std::thread::id, int> device_id_;
+ //!< map from thread id to cpu rand generator
std::unordered_map<std::thread::id, std::mt19937 *> rand_generator_;
+ //!< map from thread id to cpu rand generator seed
std::unordered_map<std::thread::id, int> seed_;
#ifdef USE_GPU
- std::unordered_map<std::thread::id, cublasHandle_t> cublas_handle_;
- std::unordered_map<std::thread::id, curandGenerator_t> curand_generator_;
+ //!< cublas handler indexed by GPU device ID
+ std::vector<cublasHandle_t> cublas_handle_;
+ //!< cublas rand generator indexed by GPU device ID
+ std::vector<curandGenerator_t> curand_generator_;
#endif
};
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/3841bc54/src/test/test_context.cc
----------------------------------------------------------------------
diff --git a/src/test/test_context.cc b/src/test/test_context.cc
new file mode 100644
index 0000000..5e501b9
--- /dev/null
+++ b/src/test/test_context.cc
@@ -0,0 +1,55 @@
+#include <thread>
+#include "gtest/gtest.h"
+#include "singa/utils/singleton.h"
+#include "singa/utils/context.h"
+#include "singa/utils/cuda_utils.h"
+
+using namespace singa;
+using namespace std;
+
+TEST(ContextTest, TestDevice) {
+ auto context = Singleton<Context>::Instance();
+
+ auto id = std::this_thread::get_id();
+ context->SetupDevice(id, 0);
+ auto device_id = context->device_id(id);
+ ASSERT_EQ(0, device_id);
+}
+
+TEST(ContextTest, TestHandle) {
+ auto context = Singleton<Context>::Instance();
+
+ float cpu_ret = 0.0f;
+ float gpu_ret = 0.0f;
+
+ float A[12];
+ float B[12];
+
+ for(int i = 0; i < 12; i++) {
+ A[i]=i-1;
+ B[i]=i+1;
+ }
+
+ float* A_gpu = NULL;
+ float* B_gpu = NULL;
+ context->SetupDevice(std::this_thread::get_id(), 0);
+
+ cudaMalloc((void**)&A_gpu, 12 * sizeof(float));
+ cudaMalloc((void**)&B_gpu, 12 * sizeof(float));
+
+ cudaMemcpy(A_gpu, A, 12 * sizeof(float), cudaMemcpyHostToDevice);
+ cudaMemcpy(B_gpu, B, 12 * sizeof(float), cudaMemcpyHostToDevice);
+
+ cublasHandle_t handle = context->cublas_handle(std::this_thread::get_id());
+
+ cublasSdot(handle, 12, A_gpu, 1, B_gpu, 1, &gpu_ret);
+
+ for(int i = 0; i < 12;++i) {
+ cpu_ret += A[i] * B[i];
+ }
+
+ ASSERT_EQ(gpu_ret,cpu_ret);
+
+ cudaFree(A_gpu);
+ cudaFree(B_gpu);
+}
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/3841bc54/src/test/test_context.cu
----------------------------------------------------------------------
diff --git a/src/test/test_context.cu b/src/test/test_context.cu
deleted file mode 100644
index 88ab06b..0000000
--- a/src/test/test_context.cu
+++ /dev/null
@@ -1,55 +0,0 @@
-#include <thread>
-#include "gtest/gtest.h"
-#include "singa/utils/singleton.h"
-#include "singa/utils/context.h"
-#include "singa/utils/cuda_utils.h"
-
-using namespace singa;
-using namespace std;
-
-TEST(ContextTest, TestDevice) {
- auto context = Singleton<Context>::Instance();
-
- auto id = std::this_thread::get_id();
- context->SetupDevice(id, 0);
- auto device_id = context->device_id(id);
- ASSERT_EQ(1,device_id);
-}
-
-TEST(ContextTest, TestHandle) {
- auto context = Singleton<Context>::Instance();
-
- float cpu_ret = 0.0f;
- float gpu_ret = 0.0f;
-
- float A[12];
- float B[12];
-
- for(int i = 0; i < 12; i++) {
- A[i]=i-1;
- B[i]=i+1;
- }
-
- float* A_gpu = NULL;
- float* B_gpu = NULL;
- context->SetupDevice(std::this_thread::get_id(), 0);
-
- cudaMalloc((void**)&A_gpu, 12 * sizeof(float));
- cudaMalloc((void**)&B_gpu, 12 * sizeof(float));
-
- cudaMemcpy(A_gpu, A, 12 * sizeof(float), cudaMemcpyHostToDevice);
- cudaMemcpy(B_gpu, B, 12 * sizeof(float), cudaMemcpyHostToDevice);
-
- cublasHandle_t handle = context->cublas_handle(std::this_thread::get_id());
-
- cublasSdot(handle, 12, A_gpu, 1, B_gpu, 1, &gpu_ret);
-
- for(int i = 0; i < 12;++i) {
- cpu_ret += A[i] * B[i];
- }
-
- ASSERT_EQ(gpu_ret,cpu_ret);
-
- cudaFree(A_gpu);
- cudaFree(B_gpu);
-}
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/3841bc54/src/test/test_math.cc
----------------------------------------------------------------------
diff --git a/src/test/test_math.cc b/src/test/test_math.cc
index a5bf180..8f8c633 100644
--- a/src/test/test_math.cc
+++ b/src/test/test_math.cc
@@ -236,6 +236,7 @@ TEST(MathTest, TestGemvGPU) {
}
+/*
TEST(MathTest, TestAxpyGPU) {
float A[4][3] = {};
float C[4][3] = {};
@@ -281,6 +282,7 @@ TEST(MathTest, TestAxpyGPU) {
cudaFree(A_gpu);
cudaFree(B_gpu);
}
+*/
TEST(MathTest, TestDotGPU) {
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/3841bc54/src/utils/context.cc
----------------------------------------------------------------------
diff --git a/src/utils/context.cc b/src/utils/context.cc
deleted file mode 100644
index 37c8f39..0000000
--- a/src/utils/context.cc
+++ /dev/null
@@ -1,87 +0,0 @@
-/************************************************************
-*
-* Licensed to the Apache Software Foundation (ASF) under one
-* or more contributor license agreements. See the NOTICE file
-* distributed with this work for additional information
-* regarding copyright ownership. The ASF licenses this file
-* to you under the Apache License, Version 2.0 (the
-* "License"); you may not use this file except in compliance
-* with the License. You may obtain a copy of the License at
-*
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing,
-* software distributed under the License is distributed on an
-* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-* KIND, either express or implied. See the License for the
-* specific language governing permissions and limitations
-* under the License.
-*
-*************************************************************/
-#include "singa/utils/context.h"
-#include "singa/utils/factory.h"
-#include "singa/utils/singleton.h"
-
-namespace singa {
-
-Context::~Context() {
-#ifdef USE_GPU
- for (auto& entry : device_id_) {
- if (entry.second != -1) {
- cudaSetDevice(entry.second);
- if (cublas_handle_[entry.first] != nullptr) {
- cublasDestroy(cublas_handle_[entry.first]);
- cublas_handle_[entry.first] = nullptr;
- }
- if(curand_generator_[entry.first] != nullptr) {
- curandDestroyGenerator(curand_generator_[entry.first]);
- curand_generator_[entry.first] = nullptr;
- }
- }
- }
-#endif
- for (auto& entry : rand_generator_) {
- if (entry.second != nullptr) {
- delete entry.second;
- entry.second = nullptr;
- }
- }
-}
-
-Context::Context() { }
-
-void Context::SetupDevice(const std::thread::id thread, const int did) {
- SetupDevice(thread, did, -1);
-}
-
-void Context::SetupDevice(const std::thread::id thread, const int did,
- long long seed) {
- device_id_[thread] = did;
-#ifdef USE_GPU
- if (did > -1) {
- cudaSetDevice(did);
- cublasCreate(&handle_[thread]);
- }
-#endif
- seed_[thread] = seed;
-}
-
-/*
-#ifdef USE_GPU
-void Context::DestoryHandle(const int thread::id) {
- cudaSetDevice(device_id_[thread::id]);
- cublasDestroy(handle_[thread::id]);
- handle_[thread::id] = nullptr;
-}
-
-void Context::DestoryGpuRandGenerator(const int thread::id) {
- cudaSetDevice(device_id_[thread::id]);
- curandDestroyGenerator(curand_generator_[thread::id]);
- curand_generator_[thread::id] = nullptr;
-}
-#endif
-*/
-
-
-} // namespace singa
-