You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@singa.apache.org by wa...@apache.org on 2015/11/26 12:31:58 UTC

[1/6] incubator-singa git commit: SINGA-104 Add Context Class

Repository: incubator-singa
Updated Branches:
  refs/heads/master 364c88562 -> b2cfa17b8


SINGA-104 Add Context Class


Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/771ff328
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/771ff328
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/771ff328

Branch: refs/heads/master
Commit: 771ff328098fed89177a0f8e0d41a44c4c1c7e0c
Parents: 364c885
Author: seaok <se...@gmail.com>
Authored: Wed Nov 25 13:19:42 2015 +0800
Committer: seaok <se...@gmail.com>
Committed: Wed Nov 25 13:19:42 2015 +0800

----------------------------------------------------------------------
 include/singa/utils/context.h | 87 +++++++++++++++++++++++++++++++++++++
 src/test/test_context.cc      | 66 ++++++++++++++++++++++++++++
 src/utils/context.cc          | 89 ++++++++++++++++++++++++++++++++++++++
 3 files changed, 242 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/771ff328/include/singa/utils/context.h
----------------------------------------------------------------------
diff --git a/include/singa/utils/context.h b/include/singa/utils/context.h
new file mode 100644
index 0000000..762ae75
--- /dev/null
+++ b/include/singa/utils/context.h
@@ -0,0 +1,87 @@
+/************************************************************
+*
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements.  See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership.  The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License.  You may obtain a copy of the License at
+*
+*   http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing,
+* software distributed under the License is distributed on an
+* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+* KIND, either express or implied.  See the License for the
+* specific language governing permissions and limitations
+* under the License.
+*
+*************************************************************/
+
+#ifndef SINGA_UTILS_CONTEXT_H_
+#define SINGA_UTILS_CONTEXT_H_
+
+#include <vector>
+
+#ifdef USE_GPU
+#include <cublas_v2.h>
+#include <cuda_runtime.h>
+#include <curand.h>
+#endif
+
+
+namespace singa {
+
+const int kDefaultDevice = 20;
+
+class Context {
+  public:
+
+	~Context();
+
+	void Setup();
+
+#ifdef USE_GPU
+	int DeviceID(const int index) {
+	  return device_ids_[index];
+	}
+
+	void SetDeviceID(const int index, const int id) {
+	  device_ids_[index] = id;
+	}
+
+	void SetDevice(const int index) {
+	  cudaSetDevice(device_ids_[index]);
+	}
+
+	cublasHandle_t Handle(const int index) {
+	  return handles_[index];
+	}
+
+	void CreateHandle(const int index);
+
+	void DestoryHandle(const int index);
+
+	curandGenerator_t GpuRandGenerator(const int index) {
+	  return gpu_rand_generators_[index];
+	}
+
+	void CreateGpuRandGenerator(const int index);
+
+	void DestoryGpuRandGenerator(const int index);
+
+#endif
+
+  protected:
+	std::vector<int> device_ids_;
+#ifdef USE_GPU
+	std::vector<cublasHandle_t> handles_;
+	std::vector<curandGenerator_t> gpu_rand_generators_;
+#endif
+
+};
+
+}  // namespace singa
+
+#endif  // SINGA_UTILS_MATH_ADDR_H_

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/771ff328/src/test/test_context.cc
----------------------------------------------------------------------
diff --git a/src/test/test_context.cc b/src/test/test_context.cc
new file mode 100644
index 0000000..3a23b23
--- /dev/null
+++ b/src/test/test_context.cc
@@ -0,0 +1,66 @@
+#include "gtest/gtest.h"
+#include "singa/utils/singleton.h"
+#include "singa/utils/context.h"
+
+//#include <cuda_runtime.h>
+//#include "cublas_v2.h"
+
+using namespace singa;
+using namespace std;
+
+TEST(ContextTest, TestDevice) {
+  auto context = Singleton<Context>::Instance();
+  context->Setup();
+
+  int index = 4;
+  int device_id = context->DeviceID(index);
+  ASSERT_EQ(4,device_id);
+
+  context->SetDeviceID(index,6);
+  device_id = context->DeviceID(index);
+  ASSERT_EQ(6,device_id);
+}
+
+TEST(ContextTest, TestHandle) {
+  auto context = Singleton<Context>::Instance();
+  context->Setup();
+
+  int index = 2;
+  context->CreateHandle(index);
+
+  float cpu_ret = 0.0f;
+  float gpu_ret = 0.0f;
+
+  float A[12];
+  float B[12];
+  
+  for(int i = 0; i < 12; i++) {
+	A[i]=i-1;
+	B[i]=i+1;
+  }
+
+  float* A_gpu = NULL;
+  float* B_gpu = NULL;
+  
+  cudaMalloc((void**)&A_gpu, 12*sizeof(float));
+  cudaMalloc((void**)&B_gpu, 12*sizeof(float));
+
+  cudaMemcpy(A_gpu,A,12*sizeof(float),cudaMemcpyHostToDevice);
+  cudaMemcpy(B_gpu,B,12*sizeof(float),cudaMemcpyHostToDevice);
+
+  cublasHandle_t handle = context->Handle(index);
+  /*cublasHandle_t handle;
+  cudaSetDevice(0);
+  cublasCreate(&handle);*/
+
+  cublasSdot(handle, 12, A_gpu, 1, B_gpu, 1, &gpu_ret);
+
+  for(int i = 0; i < 12;++i) {
+	cpu_ret += A[i] * B[i];
+  }
+  
+  ASSERT_EQ(gpu_ret,cpu_ret);
+  
+  cudaFree(A_gpu);
+  cudaFree(B_gpu);
+}

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/771ff328/src/utils/context.cc
----------------------------------------------------------------------
diff --git a/src/utils/context.cc b/src/utils/context.cc
new file mode 100644
index 0000000..671bec0
--- /dev/null
+++ b/src/utils/context.cc
@@ -0,0 +1,89 @@
+/************************************************************
+*
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements.  See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership.  The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License.  You may obtain a copy of the License at
+*
+*   http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing,
+* software distributed under the License is distributed on an
+* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+* KIND, either express or implied.  See the License for the
+* specific language governing permissions and limitations
+* under the License.
+*
+*************************************************************/
+#include "singa/utils/context.h"
+#include "singa/utils/factory.h"
+#include "singa/utils/singleton.h"
+
+namespace singa {
+  
+Context::~Context() {
+#ifdef USE_GPU
+  for(int i = 0; i < kDefaultDevice; ++i) {
+	SetDevice(i);
+
+	if(handles_[i] != NULL) {
+	  cublasDestroy(handles_[i]);
+	}
+
+	if(gpu_rand_generators_[i] != NULL) {
+      curandDestroyGenerator(gpu_rand_generators_[i]);
+	}
+  }
+#endif
+}
+
+void Context::Setup() {
+
+  for(int i = 0; i < kDefaultDevice; ++i) {
+	//init device index
+	device_ids_.push_back(i);
+  }
+
+#ifdef USE_GPU
+  for(int i = 0; i < kDefaultDevice; ++i) {
+	//init handle
+	cublasHandle_t handle = NULL;
+	handles_.push_back(handle);
+
+	curandGenerator_t gpu_rand_generator = NULL;
+	gpu_rand_generators_.push_back(gpu_rand_generator);
+  }
+#endif
+}
+
+#ifdef USE_GPU
+void Context::CreateHandle(const int index) {
+  SetDevice(device_ids_[index]);
+  cublasCreate(&handles_[index]);
+}
+
+void Context::DestoryHandle(const int index) {
+  SetDevice(device_ids_[index]);
+  cublasDestroy(handles_[index]);
+  handles_[index] = NULL;
+}
+
+void Context::CreateGpuRandGenerator(const int index) {
+  SetDevice(device_ids_[index]);
+  curandCreateGenerator(&gpu_rand_generators_[index], CURAND_RNG_PSEUDO_DEFAULT);
+}
+
+void Context::DestoryGpuRandGenerator(const int index) {
+  SetDevice(device_ids_[index]);
+  curandDestroyGenerator(gpu_rand_generators_[index]);
+  gpu_rand_generators_[index] = NULL;
+}
+
+#endif
+
+
+}  // namespace singa
+


[3/6] incubator-singa git commit: SINGA-104 Add Context Class

Posted by wa...@apache.org.
SINGA-104 Add Context Class

Update the Context class:
1. function, variable names.
2. add random generators for CPU threads.

TODO run test for test_context.cu. Add implicit/automatic init (using device 0).


Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/9aff30aa
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/9aff30aa
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/9aff30aa

Branch: refs/heads/master
Commit: 9aff30aab69f81e45d3986c0337346e0e9170936
Parents: 35de4f9
Author: Wei Wang <wa...@comp.nus.edu.sg>
Authored: Thu Nov 26 11:53:10 2015 +0800
Committer: Wei Wang <wa...@comp.nus.edu.sg>
Committed: Thu Nov 26 11:56:37 2015 +0800

----------------------------------------------------------------------
 Makefile.gpu                  |  10 +--
 include/singa/utils/context.h | 140 +++++++++++++++++++++++++++----------
 src/test/test_context.cc      |  66 -----------------
 src/test/test_context.cu      |  55 +++++++++++++++
 src/utils/context.cc          |  82 +++++++++++-----------
 5 files changed, 203 insertions(+), 150 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/9aff30aa/Makefile.gpu
----------------------------------------------------------------------
diff --git a/Makefile.gpu b/Makefile.gpu
index 2fea3b2..35b81b9 100644
--- a/Makefile.gpu
+++ b/Makefile.gpu
@@ -20,16 +20,16 @@
 
 ###################User Config Varaibles #############################
 # third-party library installation folder
-HOME_DIR := /usr
+HOME_DIR := /home/wangwei/local
 
 CUDA_DIR := /usr/local/cuda
 
 #CUDA_DIR :=
 
 # Lib folder for system and external libs. You may need to change it.
-LIBRARY_DIRS := $(HOME_DIR)/lib64 $(HOME_DIR)/lib $(HOME_DIR)/local/lib $(CUDA_DIR)/lib64 $(CUDA_DIR)/lib
+LIBRARY_DIRS := $(CUDA_DIR)/lib64 $(CUDA_DIR)/lib $(HOME_DIR)/lib64 $(HOME_DIR)/lib
 # Header folder for system and external libs. You may need to change it.
-INCLUDE_DIRS := $(HOME_DIR)/include ./include $(HOME_DIR)/local/include/zookeeper $(CUDA_DIR)/include
+INCLUDE_DIRS := $(CUDA_DIR)/include $(HOME_DIR)/include ./include
 # g++ location, should support c++11, tested with 4.8.1
 CXX := g++
 CUCXX := nvcc
@@ -50,7 +50,7 @@ ZK_FLAGS :=-DTHREADED -fpermissive
 CXXFLAGS := -O2 -msse3 -Wall -pthread -fPIC -std=c++11 -Wno-unknown-pragmas \
 	$(MSHADOW_FLAGS) -DCPU_ONLY=1 $(ZK_FLAGS)\
 	-funroll-loops $(foreach includedir, $(INCLUDE_DIRS), -I$(includedir))
-CUCXXFLAGS := $(MSHADOW_FLAGS) -std=c++11 -G $(CUDA_ARCH) \
+CUCXXFLAGS := $(MSHADOW_FLAGS) -DUSE_GPU -std=c++11 -G $(CUDA_ARCH) \
 	$(foreach includedir, $(INCLUDE_DIRS), -I$(includedir))
 
 #Add device compile option
@@ -84,7 +84,7 @@ TEST_CUDA_SRCS :=$(shell find src/test/ -maxdepth 1 -name "*.cu")
 TEST_CUDA_OBJS := $(sort $(addprefix $(BUILD_DIR)/, $(TEST_CUDA_SRCS:.cu=.o)))
 -include $(TEST_CUDA_OBJS:%.o=%.P)
 
-SINGA_CUDA_SRCS :=$(shell find src/ -maxdepth 2 -name "*.cu")
+SINGA_CUDA_SRCS := $(shell find src/ \( -path "src/test" \) -prune -o \( -name "*.cu" -type f \) -print )
 SINGA_CUDA_OBJS := $(sort $(addprefix $(BUILD_DIR)/, $(SINGA_CUDA_SRCS:.cu=.o)))
 -include $(SINGA_CUDA_OBJS:%.o=%.P)
 

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/9aff30aa/include/singa/utils/context.h
----------------------------------------------------------------------
diff --git a/include/singa/utils/context.h b/include/singa/utils/context.h
index 762ae75..7a41dac 100644
--- a/include/singa/utils/context.h
+++ b/include/singa/utils/context.h
@@ -23,63 +23,129 @@
 #define SINGA_UTILS_CONTEXT_H_
 
 #include <vector>
+#include <random>
+#include <chrono>
+#include <thread>
+#include <unordered_map>
+#include <glog/logging.h>
+
 
 #ifdef USE_GPU
-#include <cublas_v2.h>
-#include <cuda_runtime.h>
-#include <curand.h>
+#include "singa/utils/cuda_utils.h"
 #endif
 
 
 namespace singa {
 
-const int kDefaultDevice = 20;
+// max num of threads per process
+const int kNumMaxThreads = 1024;
 
+/**
+ * Context is used as a global singleton, which stores the mapping from CPU
+ * thread id to GPU device id. It manages the handlers for GPU
+ * devices. It also manages the GPU and CPU random generators, which are created
+ * when accessed. One CPU thread has a CPU random generator. A CPU device
+ * has a GPU random generator.
+ */
 class Context {
-  public:
-
+ public:
+   /**
+    * Destructor, release random generators and handlers.
+    */
 	~Context();
-
-	void Setup();
-
-#ifdef USE_GPU
-	int DeviceID(const int index) {
-	  return device_ids_[index];
-	}
-
-	void SetDeviceID(const int index, const int id) {
-	  device_ids_[index] = id;
-	}
-
-	void SetDevice(const int index) {
-	  cudaSetDevice(device_ids_[index]);
+  /**
+   * Constructor, init arrays for random generators and handlers.
+   */
+  Context();
+
+  /**
+   * @return the ID of the device attached to a given CPU thread:
+   * if the device is a GPU card, then returns the GPU device ID;
+   * Else return -1.
+   */
+	int device_id(const std::thread::id tid) {
+    CHECK(device_id_.find(tid) != device_id_.end());
+	  return device_id_[tid];
 	}
 
-	cublasHandle_t Handle(const int index) {
-	  return handles_[index];
+  /**
+   * Setup the CPU thread, which may be assigned a GPU device.
+   * Set the random seed to -1.
+   * A GPU handler will be created for the GPU device.
+   * @param[in] thread::id CPU thread ID
+   * @param[in] device_id GPU device ID
+   */
+	void SetupDevice(const std::thread::id tid, const int did);
+
+  /**
+   * @copy SetupDevice(const int, const int);
+   * @param[in] seed random seed
+   */
+  void SetupDevice(const std::thread::id tid, const int did, long long seed);
+
+  /**
+   * Get the CPU random generator.
+   * If the generator does not exist, then create it now.
+   * If the seed is not set, i.e., seed=-1, then get a seed from system time.
+   * @param[in] thread::id CPU thread ID
+   * @return the CPU random generator
+   */
+  std::mt19937* rand_generator(const std::thread::id tid) {
+    if (rand_generator_.find(tid) == rand_generator_.end()) {
+      CHECK(seed_.find(tid) != seed_.end());
+      auto seed = static_cast<unsigned>(seed_[tid]);
+      if (seed_[tid] == -1)
+        seed = std::chrono::system_clock::now().time_since_epoch().count();
+      rand_generator_[tid] = new std::mt19937(seed);
+    }
+    return rand_generator_[tid];
+  }
+#ifdef USE_GPU
+  /**
+   * Get the handler of the GPU device attached to a CPU thread.
+   * @param[in] thread::id
+   * @return the GPU handler, or nullptr if this thread does not have any GPU.
+   */
+	cublasHandle_t cublas_handle(const std::thread::id tid) {
+    CHECK(cublas_handle_.find(tid) != cublas_handle_.end());
+	  return cublas_handle_[tid];
 	}
-
-	void CreateHandle(const int index);
-
-	void DestoryHandle(const int index);
-
-	curandGenerator_t GpuRandGenerator(const int index) {
-	  return gpu_rand_generators_[index];
+  /**
+   * Get the random generator of the GPU device assigned to the given thread.
+   * @param[in] thread::id
+   * @return random generator. If it does not exist, then create one.
+   * The random seed will be set to CURAND_RNG_PSEUDO_DEFAULT if it is not set.
+   */
+	curandGenerator_t curand_generator(const std::thread::id tid) {
+    if (curand_generator_.find(tid) == curand_generator_.end()) {
+      CHECK(seed_.find(tid) != seed_.end());
+      auto seed = seed_[tid];
+      // TODO handle user set seed
+      cudaSetDevice(device_id_[tid]);
+      curandCreateGenerator(&curand_generator_[tid], CURAND_RNG_PSEUDO_DEFAULT);
+    }
+	  return curand_generator_[tid];
 	}
 
-	void CreateGpuRandGenerator(const int index);
-
-	void DestoryGpuRandGenerator(const int index);
+  /*
+ protected:
+	void CreateHandle(const int thread::id);
+	void DestoryHandle(const int thread::id);
+	void CreateGpuRandGenerator(const int thread::id);
+	void DestoryGpuRandGenerator(const int thread::id);
+  */
 
 #endif
 
-  protected:
-	std::vector<int> device_ids_;
+ protected:
+
+	std::unordered_map<std::thread::id, int> device_id_;
+  std::unordered_map<std::thread::id, std::mt19937 *> rand_generator_;
+  std::unordered_map<std::thread::id, int> seed_;
 #ifdef USE_GPU
-	std::vector<cublasHandle_t> handles_;
-	std::vector<curandGenerator_t> gpu_rand_generators_;
+	std::unordered_map<std::thread::id, cublasHandle_t> cublas_handle_;
+	std::unordered_map<std::thread::id, curandGenerator_t> curand_generator_;
 #endif
-
 };
 
 }  // namespace singa

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/9aff30aa/src/test/test_context.cc
----------------------------------------------------------------------
diff --git a/src/test/test_context.cc b/src/test/test_context.cc
deleted file mode 100644
index 3a23b23..0000000
--- a/src/test/test_context.cc
+++ /dev/null
@@ -1,66 +0,0 @@
-#include "gtest/gtest.h"
-#include "singa/utils/singleton.h"
-#include "singa/utils/context.h"
-
-//#include <cuda_runtime.h>
-//#include "cublas_v2.h"
-
-using namespace singa;
-using namespace std;
-
-TEST(ContextTest, TestDevice) {
-  auto context = Singleton<Context>::Instance();
-  context->Setup();
-
-  int index = 4;
-  int device_id = context->DeviceID(index);
-  ASSERT_EQ(4,device_id);
-
-  context->SetDeviceID(index,6);
-  device_id = context->DeviceID(index);
-  ASSERT_EQ(6,device_id);
-}
-
-TEST(ContextTest, TestHandle) {
-  auto context = Singleton<Context>::Instance();
-  context->Setup();
-
-  int index = 2;
-  context->CreateHandle(index);
-
-  float cpu_ret = 0.0f;
-  float gpu_ret = 0.0f;
-
-  float A[12];
-  float B[12];
-  
-  for(int i = 0; i < 12; i++) {
-	A[i]=i-1;
-	B[i]=i+1;
-  }
-
-  float* A_gpu = NULL;
-  float* B_gpu = NULL;
-  
-  cudaMalloc((void**)&A_gpu, 12*sizeof(float));
-  cudaMalloc((void**)&B_gpu, 12*sizeof(float));
-
-  cudaMemcpy(A_gpu,A,12*sizeof(float),cudaMemcpyHostToDevice);
-  cudaMemcpy(B_gpu,B,12*sizeof(float),cudaMemcpyHostToDevice);
-
-  cublasHandle_t handle = context->Handle(index);
-  /*cublasHandle_t handle;
-  cudaSetDevice(0);
-  cublasCreate(&handle);*/
-
-  cublasSdot(handle, 12, A_gpu, 1, B_gpu, 1, &gpu_ret);
-
-  for(int i = 0; i < 12;++i) {
-	cpu_ret += A[i] * B[i];
-  }
-  
-  ASSERT_EQ(gpu_ret,cpu_ret);
-  
-  cudaFree(A_gpu);
-  cudaFree(B_gpu);
-}

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/9aff30aa/src/test/test_context.cu
----------------------------------------------------------------------
diff --git a/src/test/test_context.cu b/src/test/test_context.cu
new file mode 100644
index 0000000..88ab06b
--- /dev/null
+++ b/src/test/test_context.cu
@@ -0,0 +1,55 @@
+#include <thread>
+#include "gtest/gtest.h"
+#include "singa/utils/singleton.h"
+#include "singa/utils/context.h"
+#include "singa/utils/cuda_utils.h"
+
+using namespace singa;
+using namespace std;
+
+TEST(ContextTest, TestDevice) {
+  auto context = Singleton<Context>::Instance();
+
+  auto id = std::this_thread::get_id();
+  context->SetupDevice(id, 0);
+  auto device_id = context->device_id(id);
+  ASSERT_EQ(1,device_id);
+}
+
+TEST(ContextTest, TestHandle) {
+  auto context = Singleton<Context>::Instance();
+
+  float cpu_ret = 0.0f;
+  float gpu_ret = 0.0f;
+
+  float A[12];
+  float B[12];
+
+  for(int i = 0; i < 12; i++) {
+    A[i]=i-1;
+    B[i]=i+1;
+  }
+
+  float* A_gpu = NULL;
+  float* B_gpu = NULL;
+  context->SetupDevice(std::this_thread::get_id(), 0);
+
+  cudaMalloc((void**)&A_gpu, 12 * sizeof(float));
+  cudaMalloc((void**)&B_gpu, 12 * sizeof(float));
+
+  cudaMemcpy(A_gpu, A, 12 * sizeof(float), cudaMemcpyHostToDevice);
+  cudaMemcpy(B_gpu, B, 12 * sizeof(float), cudaMemcpyHostToDevice);
+
+  cublasHandle_t handle = context->cublas_handle(std::this_thread::get_id());
+
+  cublasSdot(handle, 12, A_gpu, 1, B_gpu, 1, &gpu_ret);
+
+  for(int i = 0; i < 12;++i) {
+    cpu_ret += A[i] * B[i];
+  }
+
+  ASSERT_EQ(gpu_ret,cpu_ret);
+
+  cudaFree(A_gpu);
+  cudaFree(B_gpu);
+}

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/9aff30aa/src/utils/context.cc
----------------------------------------------------------------------
diff --git a/src/utils/context.cc b/src/utils/context.cc
index 671bec0..37c8f39 100644
--- a/src/utils/context.cc
+++ b/src/utils/context.cc
@@ -23,66 +23,64 @@
 #include "singa/utils/singleton.h"
 
 namespace singa {
-  
+
 Context::~Context() {
 #ifdef USE_GPU
-  for(int i = 0; i < kDefaultDevice; ++i) {
-	SetDevice(i);
-
-	if(handles_[i] != NULL) {
-	  cublasDestroy(handles_[i]);
-	}
-
-	if(gpu_rand_generators_[i] != NULL) {
-      curandDestroyGenerator(gpu_rand_generators_[i]);
-	}
+  for (auto& entry : device_id_) {
+    if (entry.second != -1) {
+      cudaSetDevice(entry.second);
+      if (cublas_handle_[entry.first] != nullptr) {
+        cublasDestroy(cublas_handle_[entry.first]);
+        cublas_handle_[entry.first] = nullptr;
+      }
+      if(curand_generator_[entry.first] != nullptr) {
+        curandDestroyGenerator(curand_generator_[entry.first]);
+        curand_generator_[entry.first] = nullptr;
+      }
+    }
   }
 #endif
+  for (auto& entry : rand_generator_) {
+    if (entry.second != nullptr) {
+      delete entry.second;
+      entry.second = nullptr;
+    }
+  }
 }
 
-void Context::Setup() {
+Context::Context() { }
 
-  for(int i = 0; i < kDefaultDevice; ++i) {
-	//init device index
-	device_ids_.push_back(i);
-  }
+void Context::SetupDevice(const std::thread::id thread, const int did) {
+  SetupDevice(thread, did, -1);
+}
 
+void Context::SetupDevice(const std::thread::id thread, const int did,
+    long long seed) {
+  device_id_[thread] = did;
 #ifdef USE_GPU
-  for(int i = 0; i < kDefaultDevice; ++i) {
-	//init handle
-	cublasHandle_t handle = NULL;
-	handles_.push_back(handle);
-
-	curandGenerator_t gpu_rand_generator = NULL;
-	gpu_rand_generators_.push_back(gpu_rand_generator);
+  if (did > -1) {
+    cudaSetDevice(did);
+    cublasCreate(&handle_[thread]);
   }
 #endif
+  seed_[thread] = seed;
 }
 
+/*
 #ifdef USE_GPU
-void Context::CreateHandle(const int index) {
-  SetDevice(device_ids_[index]);
-  cublasCreate(&handles_[index]);
-}
-
-void Context::DestoryHandle(const int index) {
-  SetDevice(device_ids_[index]);
-  cublasDestroy(handles_[index]);
-  handles_[index] = NULL;
+void Context::DestoryHandle(const int thread::id) {
+  cudaSetDevice(device_id_[thread::id]);
+  cublasDestroy(handle_[thread::id]);
+  handle_[thread::id] = nullptr;
 }
 
-void Context::CreateGpuRandGenerator(const int index) {
-  SetDevice(device_ids_[index]);
-  curandCreateGenerator(&gpu_rand_generators_[index], CURAND_RNG_PSEUDO_DEFAULT);
+void Context::DestoryGpuRandGenerator(const int thread::id) {
+  cudaSetDevice(device_id_[thread::id]);
+  curandDestroyGenerator(curand_generator_[thread::id]);
+  curand_generator_[thread::id] = nullptr;
 }
-
-void Context::DestoryGpuRandGenerator(const int index) {
-  SetDevice(device_ids_[index]);
-  curandDestroyGenerator(gpu_rand_generators_[index]);
-  gpu_rand_generators_[index] = NULL;
-}
-
 #endif
+*/
 
 
 }  // namespace singa


[6/6] incubator-singa git commit: SINGA-104 Add Context Class

Posted by wa...@apache.org.
SINGA-104 Add Context Class

check with cpplint


Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/b2cfa17b
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/b2cfa17b
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/b2cfa17b

Branch: refs/heads/master
Commit: b2cfa17b8564dff993d195b0dd89be0bad0813a6
Parents: e3bda08
Author: WANG Sheng <wa...@gmail.com>
Authored: Thu Nov 26 19:30:42 2015 +0800
Committer: WANG Sheng <wa...@gmail.com>
Committed: Thu Nov 26 19:30:42 2015 +0800

----------------------------------------------------------------------
 include/singa/utils/context.h |  74 ++--
 src/test/test_context.cc      |  35 +-
 src/test/test_math.cc         | 747 ++++++++++++++++++-------------------
 src/test/test_msg.cc          |   2 +-
 src/test/test_paramslicer.cc  |   2 +-
 5 files changed, 423 insertions(+), 437 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/b2cfa17b/include/singa/utils/context.h
----------------------------------------------------------------------
diff --git a/include/singa/utils/context.h b/include/singa/utils/context.h
index 5066633..905b810 100644
--- a/include/singa/utils/context.h
+++ b/include/singa/utils/context.h
@@ -22,22 +22,19 @@
 #ifndef SINGA_UTILS_CONTEXT_H_
 #define SINGA_UTILS_CONTEXT_H_
 
-#include <vector>
-#include <random>
+#include <glog/logging.h>
 #include <chrono>
+#include <random>
 #include <thread>
 #include <unordered_map>
-#include <glog/logging.h>
-
+#include <vector>
 
 #ifdef USE_GPU
 #include "singa/utils/cuda_utils.h"
 #endif
 
-
 namespace singa {
 
-
 /**
  * Context is used as a global singleton, which stores the mapping from CPU
  * thread id to GPU device id. If a thread has no GPU, then its associated
@@ -52,30 +49,29 @@ class Context {
    /**
     * Destructor, release random generators and handlers.
     */
-   ~Context() {
+  ~Context() {
 #ifdef USE_GPU
-     for (auto& entry : device_id_) {
-       if (entry.second != -1) {
-         cudaSetDevice(entry.second);
-         if (cublas_handle_[entry.second] != nullptr) {
-           cublasDestroy(cublas_handle_[entry.second]);
-           cublas_handle_[entry.second] = nullptr;
-         }
-         if(curand_generator_[entry.second] != nullptr) {
-           curandDestroyGenerator(curand_generator_[entry.second]);
-           curand_generator_[entry.second] = nullptr;
-         }
-       }
-     }
+    for (auto& entry : device_id_) {
+      if (entry.second != -1) {
+        cudaSetDevice(entry.second);
+        if (cublas_handle_[entry.second] != nullptr) {
+          cublasDestroy(cublas_handle_[entry.second]);
+          cublas_handle_[entry.second] = nullptr;
+        }
+        if (curand_generator_[entry.second] != nullptr) {
+          curandDestroyGenerator(curand_generator_[entry.second]);
+          curand_generator_[entry.second] = nullptr;
+        }
+      }
+    }
 #endif
-     for (auto& entry : rand_generator_) {
-       if (entry.second != nullptr) {
-         delete entry.second;
-         entry.second = nullptr;
-       }
-     }
-
-   }
+    for (auto& entry : rand_generator_) {
+      if (entry.second != nullptr) {
+        delete entry.second;
+        entry.second = nullptr;
+      }
+    }
+  }
   /**
    * Constructor, init handlers and GPU rand generators to nullptr.
    */
@@ -90,12 +86,12 @@ class Context {
    * @return the ID of the device attached to a given CPU thread, or -1 if this
    * thread has not been attached GPU device.
    */
-	int device_id(const std::thread::id& tid) {
+  int device_id(const std::thread::id& tid) {
     if (device_id_.find(tid) != device_id_.end())
       return device_id_[tid];
     else
       return -1;
-	}
+  }
   /**
    * Setup the CPU thread, which may be assigned a GPU device.
    * If there is no GPU device, then set did to -1.
@@ -168,7 +164,7 @@ class Context {
   /**
    * Get the rand generator of the GPU device assigned to the given thread.
    */
-	curandGenerator_t curand_generator(const std::thread::id thread_id) {
+  curandGenerator_t curand_generator(const std::thread::id thread_id) {
     return curand_generator(device_id(thread_id));
   }
   /**
@@ -177,10 +173,10 @@ class Context {
    * @return random generator. If it does not exist, then create one.
    * The random seed will be set to CURAND_RNG_PSEUDO_DEFAULT if it is not set.
    */
-	curandGenerator_t curand_generator(const int device_id) {
+  curandGenerator_t curand_generator(const int device_id) {
     CHECK_GE(device_id, 0);
     if (curand_generator_.at(device_id) == nullptr) {
-      // TODO handle user set seed
+      // TODO(wangwei) handle user set seed
       /*
       CHECK(seed_.find(tid) != seed_.end());
       auto seed = seed_[tid];
@@ -189,8 +185,8 @@ class Context {
       curandCreateGenerator(&curand_generator_[device_id],
           CURAND_RNG_PSEUDO_DEFAULT);
     }
-	  return curand_generator_[device_id];
-	}
+    return curand_generator_[device_id];
+  }
 
 #endif
 
@@ -198,19 +194,19 @@ class Context {
   //!< max num of GPUs per process
   const int kMaxNumGPU = 64;
   //!< map from thread id to device id
-	std::unordered_map<std::thread::id, int> device_id_;
+  std::unordered_map<std::thread::id, int> device_id_;
   //!< map from thread id to cpu rand generator
   std::unordered_map<std::thread::id, std::mt19937 *> rand_generator_;
   //!< map from thread id to cpu rand generator seed
   std::unordered_map<std::thread::id, int> seed_;
 #ifdef USE_GPU
   //!< cublas handler indexed by GPU device ID
-	std::vector<cublasHandle_t> cublas_handle_;
+  std::vector<cublasHandle_t> cublas_handle_;
   //!< cublas rand generator indexed by GPU device ID
-	std::vector<curandGenerator_t> curand_generator_;
+  std::vector<curandGenerator_t> curand_generator_;
 #endif
 };
 
 }  // namespace singa
 
-#endif  // SINGA_UTILS_MATH_ADDR_H_
+#endif  // SINGA_UTILS_CONTEXT_H_

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/b2cfa17b/src/test/test_context.cc
----------------------------------------------------------------------
diff --git a/src/test/test_context.cc b/src/test/test_context.cc
index 5e501b9..70f6d07 100644
--- a/src/test/test_context.cc
+++ b/src/test/test_context.cc
@@ -1,3 +1,24 @@
+/************************************************************
+*
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements.  See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership.  The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License.  You may obtain a copy of the License at
+*
+*   http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing,
+* software distributed under the License is distributed on an
+* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+* KIND, either express or implied.  See the License for the
+* specific language governing permissions and limitations
+* under the License.
+*
+*************************************************************/
+
 #include <thread>
 #include "gtest/gtest.h"
 #include "singa/utils/singleton.h"
@@ -25,17 +46,17 @@ TEST(ContextTest, TestHandle) {
   float A[12];
   float B[12];
 
-  for(int i = 0; i < 12; i++) {
-    A[i]=i-1;
-    B[i]=i+1;
+  for (int i = 0; i < 12; i++) {
+    A[i] = i - 1;
+    B[i] = i + 1;
   }
 
   float* A_gpu = NULL;
   float* B_gpu = NULL;
   context->SetupDevice(std::this_thread::get_id(), 0);
 
-  cudaMalloc((void**)&A_gpu, 12 * sizeof(float));
-  cudaMalloc((void**)&B_gpu, 12 * sizeof(float));
+  cudaMalloc(reinterpret_cast<void**>(&A_gpu), 12 * sizeof(float));
+  cudaMalloc(reinterpret_cast<void**>(&B_gpu), 12 * sizeof(float));
 
   cudaMemcpy(A_gpu, A, 12 * sizeof(float), cudaMemcpyHostToDevice);
   cudaMemcpy(B_gpu, B, 12 * sizeof(float), cudaMemcpyHostToDevice);
@@ -44,11 +65,11 @@ TEST(ContextTest, TestHandle) {
 
   cublasSdot(handle, 12, A_gpu, 1, B_gpu, 1, &gpu_ret);
 
-  for(int i = 0; i < 12;++i) {
+  for (int i = 0; i < 12; ++i) {
     cpu_ret += A[i] * B[i];
   }
 
-  ASSERT_EQ(gpu_ret,cpu_ret);
+  ASSERT_EQ(gpu_ret, cpu_ret);
 
   cudaFree(A_gpu);
   cudaFree(B_gpu);

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/b2cfa17b/src/test/test_math.cc
----------------------------------------------------------------------
diff --git a/src/test/test_math.cc b/src/test/test_math.cc
index 8f8c633..39ec2a0 100644
--- a/src/test/test_math.cc
+++ b/src/test/test_math.cc
@@ -1,3 +1,24 @@
+/************************************************************
+*
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements.  See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership.  The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License.  You may obtain a copy of the License at
+* 
+*   http://www.apache.org/licenses/LICENSE-2.0
+* 
+* Unless required by applicable law or agreed to in writing,
+* software distributed under the License is distributed on an
+* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+* KIND, either express or implied.  See the License for the
+* specific language governing permissions and limitations
+* under the License.
+*
+*************************************************************/
+
 #include "gtest/gtest.h"
 #include "singa/utils/math_addr.h"
 #include "singa/utils/math_kernel.h"
@@ -12,459 +33,407 @@ using namespace singa;
 using namespace std;
 
 TEST(MathTest, TestGemmCPU) {
-	float A[3][2] = {};
-	float B[3][2] = {};
-	float C[2][2] = {};
-	for(int i = 0; i < 3; i++)
-		for(int j = 0; j < 2; j++)
-		{
-			A[i][j] = i+j;
-			B[i][j] = i+j - i*j;
-		}
-	cpu_gemm(A[0], B[0], 2, 2, 3 , 1.0f, 0.0f, true, false, C[0]);
-	float D[2][2] = {};
-	for(int i = 0; i < 2; i++)
-		for(int j = 0; j < 2; j++)
-		{
-			D[i][j] = 0;
-			for(int k = 0; k < 3; k++)
-				D[i][j] += A[k][i]*B[k][j];
-		}
-    for(int i = 0; i < 2; i++)
-        for(int j = 0; j < 2; j++)
-        {
-			ASSERT_EQ(C[i][j], D[i][j]);
-		}
+  float A[3][2] = {};
+  float B[3][2] = {};
+  float C[2][2] = {};
+  for (int i = 0; i < 3; i++)
+    for (int j = 0; j < 2; j++) {
+      A[i][j] = i+j;
+      B[i][j] = i+j - i*j;
+    }
+  cpu_gemm(A[0], B[0], 2, 2, 3 , 1.0f, 0.0f, true, false, C[0]);
+  float D[2][2] = {};
+  for (int i = 0; i < 2; i++)
+    for (int j = 0; j < 2; j++) {
+      D[i][j] = 0;
+      for (int k = 0; k < 3; k++)
+        D[i][j] += A[k][i]*B[k][j];
+    }
+    for (int i = 0; i < 2; i++)
+      for (int j = 0; j < 2; j++) {
+      ASSERT_EQ(C[i][j], D[i][j]);
+    }
 }
 
 TEST(MathTest, TestGemvCPU) {
-	float A[4][3] = {};
-	float B[4]= {};
-	float C[3] = {};
-	float D[3] = {};
-
-	for(int i = 0; i < 3; i++)
-	{
-		for(int j = 0; j < 4; j++)
-		{
-			A[j][i] = i-j + i*j;
-		}
-	}
-
-	for(int i = 0; i < 4; i++)B[i] = i;
-	for(int i = 0; i < 3; i++)C[i] = 10;
-	cpu_gemv(A[0], B, 4, 3, 1.0f, 1.0f, true, C);
-
-	for(int i = 0; i < 3; i++)
-	{
-		for(int j = 0; j < 4; j++)
-		{
-			D[i] += A[j][i]*B[j];
-		}
-	}
-	for(int i = 0; i < 3; i++)
-	{
-		ASSERT_EQ(C[i], D[i]+10);
-	}
+  float A[4][3] = {};
+  float B[4]= {};
+  float C[3] = {};
+  float D[3] = {};
+
+  for (int i = 0; i < 3; i++) {
+    for (int j = 0; j < 4; j++) {
+      A[j][i] = i-j + i*j;
+    }
+  }
+
+  for (int i = 0; i < 4; i++)B[i] = i;
+  for (int i = 0; i < 3; i++)C[i] = 10;
+  cpu_gemv(A[0], B, 4, 3, 1.0f, 1.0f, true, C);
+
+  for (int i = 0; i < 3; i++) {
+    for (int j = 0; j < 4; j++) {
+      D[i] += A[j][i]*B[j];
+    }
+  }
+  for (int i = 0; i < 3; i++) {
+    ASSERT_EQ(C[i], D[i]+10);
+  }
 }
 
 
 TEST(MathTest, TestAxpyCPU) {
-	float A[4][3] = {};
-	float C[4][3] = {};
-	float B[3][4] = {};
-	float D[3][4] = {};
-
-	for(int i = 0; i < 4; i++)
-	{
-		for(int j = 0; j < 3; j++)
-		{
-			A[i][j] = i-j + i*j;
-			B[j][i] = i-j + i*j;
-			C[i][j] = A[i][j];
-			D[j][i] = B[j][i];
-		}
-	}
-
-	cpu_axpy(A[0], 12, 2.0f, B[0]);
-	for(int i = 0; i < 12; i++)
-	{
-		D[i / 4][i % 4] += 2*C[i / 3][i % 3];
-	}
-
-	for(int i = 0; i < 3; i++)
-	{
-		for(int j = 0; j < 4; j++)
-		{
-			ASSERT_EQ(B[i][j],D[i][j]);
-		}
-	}
+  float A[4][3] = {};
+  float C[4][3] = {};
+  float B[3][4] = {};
+  float D[3][4] = {};
+
+  for (int i = 0; i < 4; i++) {
+    for (int j = 0; j < 3; j++) {
+      A[i][j] = i-j + i*j;
+      B[j][i] = i-j + i*j;
+      C[i][j] = A[i][j];
+      D[j][i] = B[j][i];
+    }
+  }
+
+  cpu_axpy(A[0], 12, 2.0f, B[0]);
+  for (int i = 0; i < 12; i++) {
+    D[i / 4][i % 4] += 2*C[i / 3][i % 3];
+  }
+
+  for (int i = 0; i < 3; i++) {
+    for (int j = 0; j < 4; j++) {
+      ASSERT_EQ(B[i][j], D[i][j]);
+    }
+  }
 }
 
 /*
 TEST(MathTest, TestEopCPU) {
 
-	float A[10] = {};
-	float B[10] = {};
-	float C[10] = {};
-	float O[10] = {};
-
-	for(int i = 0; i < 10; i++)
-	{
-		A[i] = i;
-		B[i] = -i;
-		C[i] = i;
-
-	}
-	cpu_e_f<singa::op::Set>(5, 15.0f, O, O);
-	for(int i = 0; i < 5; i++)
-	{
-		ASSERT_EQ(O[i]-15,0);
-	}
-	for(int i = 5; i < 10; i++)
-	{
-		ASSERT_EQ(O[i],0);
-	}
+  float A[10] = {};
+  float B[10] = {};
+  float C[10] = {};
+  float O[10] = {};
+
+  for (int i = 0; i < 10; i++) {
+    A[i] = i;
+    B[i] = -i;
+    C[i] = i;
+  }
+  cpu_e_f<singa::op::Set>(5, 15.0f, O, O);
+  for (int i = 0; i < 5; i++) {
+    ASSERT_EQ(O[i]-15,0);
+  }
+  for (int i = 5; i < 10; i++) {
+    ASSERT_EQ(O[i],0);
+  }
 }
 */
 
 #ifdef USE_GPU
 TEST(MathTest, TestGemmGPU) {
-	float A[3][2] = {};
-	float B[3][2] = {};
-	float C[2][2] = {};
-	for(int i = 0; i < 3; i++)
-	{
-		for(int j = 0; j < 2; j++)
-		{
-			A[i][j] = i+j;
-			B[i][j] = i+j - i*j;
-		}
-	}
-
-	float* A_gpu=NULL;
-	float* B_gpu=NULL;
-	float* C_gpu=NULL;
-
-	cudaMalloc((void**)&A_gpu, 3*2*sizeof(float));
-	cudaMalloc((void**)&B_gpu, 3*2*sizeof(float));
-	cudaMalloc((void**)&C_gpu, 2*2*sizeof(float));
-
-	cudaMemcpy(A_gpu,A,3*2*sizeof(float),cudaMemcpyHostToDevice);
-	cudaMemcpy(B_gpu,B,3*2*sizeof(float),cudaMemcpyHostToDevice);
-
-	gpu_gemm<float>(A_gpu, B_gpu, 2, 2, 3 , 1, 0, true, false, C_gpu);
-
-	cudaMemcpy(C,C_gpu,2*2*sizeof(float),cudaMemcpyDeviceToHost);
-
-	float D[2][2] = {};
-	for(int i = 0; i < 2; i++)
-	{
-		for(int j = 0; j < 2; j++)
-		{
-			D[i][j] = 0;
-			for(int k = 0; k < 3; k++)
-			{
-				D[i][j] += A[k][i]*B[k][j];
-			}
-		}
-	}
-
-	for(int i = 0; i < 2; i++)
-	{
-		for(int j = 0; j < 2; j++)
-		{
-			ASSERT_EQ(C[i][j],D[i][j]);
-		}
-	}
-
-	cudaFree(A_gpu);
-	cudaFree(B_gpu);
-	cudaFree(C_gpu);
+  float A[3][2] = {};
+  float B[3][2] = {};
+  float C[2][2] = {};
+  for (int i = 0; i < 3; i++) {
+    for (int j = 0; j < 2; j++) {
+      A[i][j] = i+j;
+      B[i][j] = i+j - i*j;
+    }
+  }
+
+  float* A_gpu = NULL;
+  float* B_gpu = NULL;
+  float* C_gpu = NULL;
+
+  cudaMalloc(reinterpret_cast<void**>(&A_gpu), 3*2*sizeof(float));
+  cudaMalloc(reinterpret_cast<void**>(&B_gpu), 3*2*sizeof(float));
+  cudaMalloc(reinterpret_cast<void**>(&C_gpu), 2*2*sizeof(float));
+
+  cudaMemcpy(A_gpu, A, 3*2*sizeof(float), cudaMemcpyHostToDevice);
+  cudaMemcpy(B_gpu, B, 3*2*sizeof(float), cudaMemcpyHostToDevice);
+
+  gpu_gemm<float>(A_gpu, B_gpu, 2, 2, 3 , 1, 0, true, false, C_gpu);
+
+  cudaMemcpy(C, C_gpu, 2*2*sizeof(float), cudaMemcpyDeviceToHost);
+
+  float D[2][2] = {};
+  for (int i = 0; i < 2; i++) {
+    for (int j = 0; j < 2; j++) {
+      D[i][j] = 0;
+      for (int k = 0; k < 3; k++) {
+        D[i][j] += A[k][i]*B[k][j];
+      }
+    }
+  }
+
+  for (int i = 0; i < 2; i++) {
+    for (int j = 0; j < 2; j++) {
+      ASSERT_EQ(C[i][j], D[i][j]);
+    }
+  }
+
+  cudaFree(A_gpu);
+  cudaFree(B_gpu);
+  cudaFree(C_gpu);
 }
 
 
 TEST(MathTest, TestGemvGPU) {
-	float A[4][3] = {};
-	float B[4]= {};
-	float C[3] = {};
-	float D[3] = {};
-
-	for(int i = 0; i < 4; i++)
-	{
-		for(int j = 0; j < 3; j++)
-		{
-			A[i][j] = i-j + i*j;
-		}
-	}
-
-	for(int i = 0; i < 4; i++)B[i] = i;
-	for(int i = 0; i < 3; i++)C[i] = 10;
-
-	float* A_gpu=NULL;
-	float* B_gpu=NULL;
-	float* C_gpu=NULL;
-
-	cudaMalloc((void**)&A_gpu, 4*3*sizeof(float));
-	cudaMalloc((void**)&B_gpu, 4*sizeof(float));
-	cudaMalloc((void**)&C_gpu, 3*sizeof(float));
-
-	cudaMemcpy(A_gpu,A,4*3*sizeof(float),cudaMemcpyHostToDevice);
-	cudaMemcpy(B_gpu,B,4*sizeof(float),cudaMemcpyHostToDevice);
-	cudaMemcpy(C_gpu,C,3*sizeof(float),cudaMemcpyHostToDevice);
-
-	gpu_gemv<float>(A_gpu, B_gpu, 4, 3, 1, 1, true, C_gpu);
-
-	cudaMemcpy(C,C_gpu,3*sizeof(float),cudaMemcpyDeviceToHost);
-
-	for(int i = 0; i < 3; i++)
-	{
-		for(int j = 0; j < 4; j++)
-		{
-			D[i] += A[j][i]*B[j];
-		}
-	}
-
-	for(int i = 0; i < 3; i++)
-	{
-		ASSERT_EQ(C[i],D[i]+10);
-	}
-
-	cudaFree(A_gpu);
-	cudaFree(B_gpu);
-	cudaFree(C_gpu);
+  float A[4][3] = {};
+  float B[4]= {};
+  float C[3] = {};
+  float D[3] = {};
+
+  for (int i = 0; i < 4; i++) {
+    for (int j = 0; j < 3; j++) {
+      A[i][j] = i-j + i*j;
+    }
+  }
+
+  for (int i = 0; i < 4; i++) B[i] = i;
+  for (int i = 0; i < 3; i++) C[i] = 10;
+
+  float* A_gpu = NULL;
+  float* B_gpu = NULL;
+  float* C_gpu = NULL;
+
+  cudaMalloc(reinterpret_cast<void**>(&A_gpu), 4*3*sizeof(float));
+  cudaMalloc(reinterpret_cast<void**>(&B_gpu), 4*sizeof(float));
+  cudaMalloc(reinterpret_cast<void**>(&C_gpu), 3*sizeof(float));
+
+  cudaMemcpy(A_gpu, A, 4*3*sizeof(float), cudaMemcpyHostToDevice);
+  cudaMemcpy(B_gpu, B, 4*sizeof(float), cudaMemcpyHostToDevice);
+  cudaMemcpy(C_gpu, C, 3*sizeof(float), cudaMemcpyHostToDevice);
+
+  gpu_gemv<float>(A_gpu, B_gpu, 4, 3, 1, 1, true, C_gpu);
+
+  cudaMemcpy(C, C_gpu, 3*sizeof(float), cudaMemcpyDeviceToHost);
+
+  for (int i = 0; i < 3; i++) {
+    for (int j = 0; j < 4; j++) {
+      D[i] += A[j][i]*B[j];
+    }
+  }
+
+  for (int i = 0; i < 3; i++) {
+    ASSERT_EQ(C[i], D[i]+10);
+  }
+
+  cudaFree(A_gpu);
+  cudaFree(B_gpu);
+  cudaFree(C_gpu);
 }
 
 
 /*
 TEST(MathTest, TestAxpyGPU) {
-	float A[4][3] = {};
-	float C[4][3] = {};
-	float B[3][4] = {};
-	float D[3][4] = {};
-
-	for(int i = 0; i < 4; i++)
-	{
-		for(int j = 0; j < 3; j++)
-		{
-			A[i][j] = i-j + i*j;
-			B[j][i] = i-j + i*j;
-			C[i][j] = A[i][j];
-			D[j][i] = B[j][i];
-		}
-	}
-
-	float* A_gpu=NULL;
-	float* B_gpu=NULL;
-
-	cudaMalloc((void**)&A_gpu, 4*3*sizeof(float));
-	cudaMalloc((void**)&B_gpu, 3*4*sizeof(float));
-
-	cudaMemcpy(A_gpu,A,4*3*sizeof(float),cudaMemcpyHostToDevice);
-	cudaMemcpy(B_gpu,B,3*4*sizeof(float),cudaMemcpyHostToDevice);
-
-	gpu_axpy<float>(A_gpu, 12, 2, B_gpu);
-
-	cudaMemcpy(A,A_gpu,4*3*sizeof(float),cudaMemcpyDeviceToHost);
-	cudaMemcpy(B,B_gpu,3*4*sizeof(float),cudaMemcpyDeviceToHost);
-
-	//for(int i = 0; i < 12; i++)D[0][i] += 2*C[0][i];
-
-	for(int i = 0; i < 4; i++)
-	{
-		for(int j = 0; j < 3; j++)
-		{
-			D[i][j] += C[i][j];
-			ASSERT_EQ(B[i][j],D[i][j]);
-		}
-	}
-
-	cudaFree(A_gpu);
-	cudaFree(B_gpu);
+  float A[4][3] = {};
+  float C[4][3] = {};
+  float B[3][4] = {};
+  float D[3][4] = {};
+
+  for (int i = 0; i < 4; i++)
+  {
+    for (int j = 0; j < 3; j++)
+    {
+      A[i][j] = i-j + i*j;
+      B[j][i] = i-j + i*j;
+      C[i][j] = A[i][j];
+      D[j][i] = B[j][i];
+    }
+  }
+
+  float* A_gpu=NULL;
+  float* B_gpu=NULL;
+
+  cudaMalloc((void**)&A_gpu, 4*3*sizeof(float));
+  cudaMalloc((void**)&B_gpu, 3*4*sizeof(float));
+
+  cudaMemcpy(A_gpu,A,4*3*sizeof(float),cudaMemcpyHostToDevice);
+  cudaMemcpy(B_gpu,B,3*4*sizeof(float),cudaMemcpyHostToDevice);
+
+  gpu_axpy<float>(A_gpu, 12, 2, B_gpu);
+
+  cudaMemcpy(A,A_gpu,4*3*sizeof(float),cudaMemcpyDeviceToHost);
+  cudaMemcpy(B,B_gpu,3*4*sizeof(float),cudaMemcpyDeviceToHost);
+
+  //for (int i = 0; i < 12; i++)D[0][i] += 2*C[0][i];
+
+  for (int i = 0; i < 4; i++)
+  {
+    for (int j = 0; j < 3; j++)
+    {
+      D[i][j] += C[i][j];
+      ASSERT_EQ(B[i][j],D[i][j]);
+    }
+  }
+
+  cudaFree(A_gpu);
+  cudaFree(B_gpu);
 }
 */
 
 
 TEST(MathTest, TestDotGPU) {
-	float A[12];
-	float B[12];
-
-	for(int i = 0; i < 12; i++)
-	{
-		A[i]=i-1;
-		B[i]=i+1;
-	}
+  float A[12];
+  float B[12];
 
-	float* A_gpu=NULL;
-	float* B_gpu=NULL;
+  for (int i = 0; i < 12; i++) {
+    A[i] = i - 1;
+    B[i] = i + 1;
+  }
 
-	cudaMalloc((void**)&A_gpu, 12*sizeof(float));
-	cudaMalloc((void**)&B_gpu, 12*sizeof(float));
+  float* A_gpu = NULL;
+  float* B_gpu = NULL;
 
-	cudaMemcpy(A_gpu,A,12*sizeof(float),cudaMemcpyHostToDevice);
-	cudaMemcpy(B_gpu,B,12*sizeof(float),cudaMemcpyHostToDevice);
-	float gpu_ret=gpu_dot<float>(A_gpu,B_gpu,12);
+  cudaMalloc(reinterpret_cast<void**>(&A_gpu), 12*sizeof(float));
+  cudaMalloc(reinterpret_cast<void**>(&B_gpu), 12*sizeof(float));
 
-	float cpu_ret=0.0f;
-	for(int i = 0; i < 12; i++)
-	{
-		cpu_ret+=A[i]*B[i];
-	}
+  cudaMemcpy(A_gpu, A, 12*sizeof(float), cudaMemcpyHostToDevice);
+  cudaMemcpy(B_gpu, B, 12*sizeof(float), cudaMemcpyHostToDevice);
+  float gpu_ret = gpu_dot<float>(A_gpu, B_gpu, 12);
 
-	ASSERT_EQ(gpu_ret,cpu_ret);
+  float cpu_ret = 0.0f;
+  for (int i = 0; i < 12; i++) {
+    cpu_ret += A[i] * B[i];
+  }
 
-	cudaFree(A_gpu);
-	cudaFree(B_gpu);
+  ASSERT_EQ(gpu_ret, cpu_ret);
 
+  cudaFree(A_gpu);
+  cudaFree(B_gpu);
 }
 
 TEST(MathTest, TestSingaSumColGPU) {
+  float A[3][4];
+  float B[4];
+  float C[4];
 
-	float A[3][4];
-	float B[4];
-	float C[4];
-
-	for(int i = 0; i < 3; i++)
-	{
-		for(int j = 0; j < 4; j++)
-		{
-			A[i][j]=i+j;
-		}
-	}
-
-	for(int i = 0; i < 4; i++)
-	{
-		B[i]=0.0f;
-		C[i]=0.0f;
-	}
-
-	float* A_gpu=NULL;
-	float* B_gpu=NULL;
-
-	cudaMalloc((void**)&A_gpu, 12*sizeof(float));
-	cudaMalloc((void**)&B_gpu, 4*sizeof(float));
-	cudaMemcpy(A_gpu,A,12*sizeof(float),cudaMemcpyHostToDevice);
-
-	singa_gpu_sum_col(A_gpu,B_gpu,3,4,4);
-
-	cudaMemcpy(B,B_gpu,4*sizeof(float),cudaMemcpyDeviceToHost);
-
-	for(int i = 0; i < 4; i++)
-	{
-		for(int j = 0; j < 3; j++)
-		{
-			C[i]+=A[j][i];
-		}
-	}
-
-	for(int i = 0; i <4; i++)
-	{
-		ASSERT_EQ(B[i],C[i]);
-	}
-
-	cudaFree(A_gpu);
-	cudaFree(B_gpu);
+  for (int i = 0; i < 3; i++) {
+    for (int j = 0; j < 4; j++) {
+      A[i][j] = i + j;
+    }
+  }
+
+  for (int i = 0; i < 4; i++) {
+    B[i] = 0.0f;
+    C[i] = 0.0f;
+  }
+
+  float* A_gpu = NULL;
+  float* B_gpu = NULL;
+
+  cudaMalloc(reinterpret_cast<void**>(&A_gpu), 12*sizeof(float));
+  cudaMalloc(reinterpret_cast<void**>(&B_gpu), 4*sizeof(float));
+  cudaMemcpy(A_gpu, A, 12*sizeof(float), cudaMemcpyHostToDevice);
+
+  singa_gpu_sum_col(A_gpu, B_gpu, 3, 4, 4);
+
+  cudaMemcpy(B, B_gpu, 4*sizeof(float), cudaMemcpyDeviceToHost);
+
+  for (int i = 0; i < 4; i++) {
+    for (int j = 0; j < 3; j++) {
+      C[i] += A[j][i];
+    }
+  }
+
+  for (int i = 0; i < 4; i++) {
+    ASSERT_EQ(B[i], C[i]);
+  }
+
+  cudaFree(A_gpu);
+  cudaFree(B_gpu);
 }
 
 TEST(MathTest, TestSingaAddVecRowGPU) {
-
-	float A[3][4];
-	float B[4];
-	float C[3][4];
-	float D[3][4];
-
-	for(int i = 0; i < 4; i++)
-	{
-		B[i]=i;
-	}
-
-	for(int i = 0; i < 3; i++)
-	{
-		for(int j = 0; j < 4; j++)
-		{
-			A[i][j]=i+j;
-			D[i][j]=A[i][j]+B[j];
-		}
-	}
-
-
-	float* A_gpu=NULL;
-	float* B_gpu=NULL;
-	float* C_gpu=NULL;
-
-	cudaMalloc((void**)&A_gpu, 3*4*sizeof(float));
-	cudaMalloc((void**)&B_gpu, 4*sizeof(float));
-	cudaMalloc((void**)&C_gpu, 3*4*sizeof(float));
-	cudaMemcpy(A_gpu,A,3*4*sizeof(float),cudaMemcpyHostToDevice);
-	cudaMemcpy(B_gpu,B,4*sizeof(float),cudaMemcpyHostToDevice);
-
-	singa_gpu_add_vec_row(B_gpu,A_gpu,C_gpu,3,4,4);
-
-	cudaMemcpy(C,C_gpu,3*4*sizeof(float),cudaMemcpyDeviceToHost);
-
-	for(int i = 0; i < 3; i++)
-	{
-		for(int j = 0; j < 4; j++)
-		{
-			ASSERT_EQ(C[i][j],D[i][j]);
-		}
-	}
-
-	cudaFree(A_gpu);
-	cudaFree(B_gpu);
-	cudaFree(C_gpu);
+  float A[3][4];
+  float B[4];
+  float C[3][4];
+  float D[3][4];
+
+  for (int i = 0; i < 4; i++) {
+    B[i] = i;
+  }
+
+  for (int i = 0; i < 3; i++) {
+    for (int j = 0; j < 4; j++) {
+      A[i][j] = i + j;
+      D[i][j] = A[i][j] + B[j];
+    }
+  }
+
+  float* A_gpu = NULL;
+  float* B_gpu = NULL;
+  float* C_gpu = NULL;
+
+  cudaMalloc(reinterpret_cast<void**>(&A_gpu), 3*4*sizeof(float));
+  cudaMalloc(reinterpret_cast<void**>(&B_gpu), 4*sizeof(float));
+  cudaMalloc(reinterpret_cast<void**>(&C_gpu), 3*4*sizeof(float));
+  cudaMemcpy(A_gpu, A, 3*4*sizeof(float), cudaMemcpyHostToDevice);
+  cudaMemcpy(B_gpu, B, 4*sizeof(float), cudaMemcpyHostToDevice);
+
+  singa_gpu_add_vec_row(B_gpu, A_gpu, C_gpu, 3, 4, 4);
+
+  cudaMemcpy(C, C_gpu, 3*4*sizeof(float), cudaMemcpyDeviceToHost);
+
+  for (int i = 0; i < 3; i++) {
+    for (int j = 0; j < 4; j++) {
+      ASSERT_EQ(C[i][j], D[i][j]);
+    }
+  }
+
+  cudaFree(A_gpu);
+  cudaFree(B_gpu);
+  cudaFree(C_gpu);
 }
 
 
 TEST(MathTest, TestSingaSetValueGPU) {
+  float A[3][4];
+  float* A_gpu = NULL;
 
-	float A[3][4];
+  cudaMalloc(reinterpret_cast<void**>(&A_gpu), 3*4*sizeof(float));
 
-	float* A_gpu=NULL;
+  cudaMemcpy(A_gpu, A, 3*4*sizeof(float), cudaMemcpyHostToDevice);
 
-	cudaMalloc((void**)&A_gpu, 3*4*sizeof(float));
+  singa_gpu_set_value(A_gpu, 4.0, 3*4);
 
-	cudaMemcpy(A_gpu,A,3*4*sizeof(float),cudaMemcpyHostToDevice);
+  cudaMemcpy(A, A_gpu, 3*4*sizeof(float), cudaMemcpyDeviceToHost);
 
-	singa_gpu_set_value(A_gpu,4.0,3*4);
+  for (int i = 0; i < 3; i++) {
+    for (int j = 0; j < 4; j++) {
+      ASSERT_EQ(A[i][j], 4.0f);
+    }
+  }
 
-	cudaMemcpy(A,A_gpu,3*4*sizeof(float),cudaMemcpyDeviceToHost);
-
-	for(int i = 0; i < 3; i++)
-	{
-		for(int j = 0; j < 4; j++)
-		{
-			ASSERT_EQ(A[i][j],4.0f);
-		}
-	}
-
-	cudaFree(A_gpu);
+  cudaFree(A_gpu);
 }
 
 
 TEST(MathTest, TestEopGPU) {
+  float A[10] = {};
+  float B[10] = {};
 
-	float A[10] = {};
-	float B[10] = {};
-
-	for(int i = 0; i < 10; i++)
-	{
-		A[i] = i;
-		B[i] = -i;
-	}
+  for (int i = 0; i < 10; i++) {
+    A[i] = i;
+    B[i] = -i;
+  }
 
-	float* A_gpu=NULL;
-	float* B_gpu=NULL;
+  float* A_gpu = NULL;
+  float* B_gpu = NULL;
 
-	cudaMalloc((void**)&A_gpu, 10*sizeof(float));
-	cudaMalloc((void**)&B_gpu, 10*sizeof(float));
+  cudaMalloc(reinterpret_cast<void**>(&A_gpu), 10*sizeof(float));
+  cudaMalloc(reinterpret_cast<void**>(&B_gpu), 10*sizeof(float));
 
-	cudaMemcpy(A_gpu,A,10*sizeof(float),cudaMemcpyHostToDevice);
-	cudaMemcpy(B_gpu,B,10*sizeof(float),cudaMemcpyHostToDevice);
+  cudaMemcpy(A_gpu, A, 10*sizeof(float), cudaMemcpyHostToDevice);
+  cudaMemcpy(B_gpu, B, 10*sizeof(float), cudaMemcpyHostToDevice);
 
-	gpu_e_f<singa::op::Sigmoid<float>, float>(10, A_gpu, B_gpu);
+  gpu_e_f<singa::op::Sigmoid<float>, float>(10, A_gpu, B_gpu);
 
-	cudaFree(A_gpu);
-	cudaFree(B_gpu);
+  cudaFree(A_gpu);
+  cudaFree(B_gpu);
 }
 #endif  // USE_GPU

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/b2cfa17b/src/test/test_msg.cc
----------------------------------------------------------------------
diff --git a/src/test/test_msg.cc b/src/test/test_msg.cc
index d5d9f20..db83b1c 100644
--- a/src/test/test_msg.cc
+++ b/src/test/test_msg.cc
@@ -42,7 +42,7 @@ TEST(MsgTest, AddrTest) {
 }
 
 TEST(MsgTest, AddFrameTest) {
-  int buf[5]={1,2,3,4,5};
+  int buf[5] = {1, 2, 3, 4, 5};
   Msg msg;
   msg.AddFrame("abcdefg", 7);
   msg.AddFrame(buf, sizeof(int) * 5);

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/b2cfa17b/src/test/test_paramslicer.cc
----------------------------------------------------------------------
diff --git a/src/test/test_paramslicer.cc b/src/test/test_paramslicer.cc
index c693da1..bc7dedd 100644
--- a/src/test/test_paramslicer.cc
+++ b/src/test/test_paramslicer.cc
@@ -25,7 +25,7 @@
 
 using namespace singa;
 
-const int param_size[]={2400,32,25600,32, 51200,64,57600,10};
+const int param_size[] = {2400, 32, 25600, 32, 51200, 64, 57600, 10};
 
 /*
 class ParamSlicerTest : public ::testing::Test {


[2/6] incubator-singa git commit: SINGA-104 Add Context Class

Posted by wa...@apache.org.
SINGA-104 Add Context Class


Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/35de4f91
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/35de4f91
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/35de4f91

Branch: refs/heads/master
Commit: 35de4f913e97c64dbf27ff37c999aaa5a3ce40f6
Parents: 771ff32
Author: seaok <se...@gmail.com>
Authored: Thu Nov 26 10:51:55 2015 +0800
Committer: seaok <se...@gmail.com>
Committed: Thu Nov 26 10:51:55 2015 +0800

----------------------------------------------------------------------
 Makefile.gpu | 2 ++
 1 file changed, 2 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/35de4f91/Makefile.gpu
----------------------------------------------------------------------
diff --git a/Makefile.gpu b/Makefile.gpu
index 2dc2a71..2fea3b2 100644
--- a/Makefile.gpu
+++ b/Makefile.gpu
@@ -57,6 +57,8 @@ CUCXXFLAGS := $(MSHADOW_FLAGS) -std=c++11 -G $(CUDA_ARCH) \
 ifeq ($(CUDA_DIR),)
 	MSHADOW_FLAGS := $(MSHADOW_FLAGS) -DCPU_ONLY
 	CXXFLAGS := $(CXXFLAGS) -DCPU_ONLY
+else
+	CXXFLAGS := $(CXXFLAGS) -DUSE_GPU
 endif
 
 # find user defined .proto file, and then compute the corresponding .h, .cc


[5/6] incubator-singa git commit: SINGA-104 Add Context Class

Posted by wa...@apache.org.
SINGA-104 Add Context Class

Add cuda_utils.h;
Add comments for context.h


Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/e3bda08d
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/e3bda08d
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/e3bda08d

Branch: refs/heads/master
Commit: e3bda08d8428e3a6a23bf4de8c356406a8126cd8
Parents: 3841bc5
Author: Wei Wang <wa...@comp.nus.edu.sg>
Authored: Thu Nov 26 16:22:54 2015 +0800
Committer: Wei Wang <wa...@comp.nus.edu.sg>
Committed: Thu Nov 26 16:34:22 2015 +0800

----------------------------------------------------------------------
 include/singa/utils/context.h    | 25 +++++++---
 include/singa/utils/cuda_utils.h | 91 +++++++++++++++++++++++++++++++++++
 2 files changed, 109 insertions(+), 7 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/e3bda08d/include/singa/utils/context.h
----------------------------------------------------------------------
diff --git a/include/singa/utils/context.h b/include/singa/utils/context.h
index 5223012..5066633 100644
--- a/include/singa/utils/context.h
+++ b/include/singa/utils/context.h
@@ -40,10 +40,12 @@ namespace singa {
 
 /**
  * Context is used as a global singleton, which stores the mapping from CPU
- * thread id to GPU device id. It manages the handlers for GPU
+ * thread id to GPU device id. If a thread has no GPU, then its associated
+ * device id is -1. It manages (e.g., creating) the handlers for GPU
  * devices. It also manages the GPU and CPU random generators, which are created
  * when accessed. One CPU thread has a CPU random generator. A GPU device
- * has a GPU random generator.
+ * has a GPU random generator, which is accessible after assigning the GPU
+ * device with a CPU thread via SetupDevice.
  */
 class Context {
  public:
@@ -75,7 +77,7 @@ class Context {
 
    }
   /**
-   * Constructor.
+   * Constructor, init handlers and GPU rand generators to nullptr.
    */
   Context() {
     for (int i = 0; i < kMaxNumGPU; i++) {
@@ -86,7 +88,7 @@ class Context {
 
   /**
    * @return the ID of the device attached to a given CPU thread, or -1 if this
-   * thread has not attached GPU device.
+   * thread has not been attached GPU device.
    */
 	int device_id(const std::thread::id& tid) {
     if (device_id_.find(tid) != device_id_.end())
@@ -94,11 +96,10 @@ class Context {
     else
       return -1;
 	}
-
   /**
    * Setup the CPU thread, which may be assigned a GPU device.
+   * If there is no GPU device, then set did to -1.
    * Set the random seed to -1.
-   * A GPU handler will be created for the GPU device.
    * @param[in] thread::id CPU thread ID
    * @param[in] device_id GPU device ID
    */
@@ -109,11 +110,14 @@ class Context {
    * @copy SetupDevice(const int, const int);
    * @param[in] seed random seed
    */
-  void SetupDevice(const std::thread::id& tid, const int did, long long seed) {
+  void SetupDevice(const std::thread::id& tid, const int did, const int seed) {
     device_id_[tid] = did;
     seed_[tid] = seed;
   }
 
+  /**
+   * Activate the GPU device by calling cudaSetDevice.
+   */
   void ActivateDevice(const int device_id) {
     CHECK_GE(device_id, 0);
 #ifdef USE_GPU
@@ -139,6 +143,10 @@ class Context {
     return rand_generator_[tid];
   }
 #ifdef USE_GPU
+  /**
+   * Get the handler of the GPU which is assigned to the given thread.
+   * Calls cublas_handle(const int);
+   */
   cublasHandle_t cublas_handle(const std::thread::id thread_id) {
     return cublas_handle(device_id(thread_id));
   }
@@ -157,6 +165,9 @@ class Context {
     }
     return cublas_handle_[device_id];
   }
+  /**
+   * Get the rand generator of the GPU device assigned to the given thread.
+   */
 	curandGenerator_t curand_generator(const std::thread::id thread_id) {
     return curand_generator(device_id(thread_id));
   }

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/e3bda08d/include/singa/utils/cuda_utils.h
----------------------------------------------------------------------
diff --git a/include/singa/utils/cuda_utils.h b/include/singa/utils/cuda_utils.h
new file mode 100644
index 0000000..b27a6bb
--- /dev/null
+++ b/include/singa/utils/cuda_utils.h
@@ -0,0 +1,91 @@
+/************************************************************
+*
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements.  See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership.  The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License.  You may obtain a copy of the License at
+*
+*   http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing,
+* software distributed under the License is distributed on an
+* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+* KIND, either express or implied.  See the License for the
+* specific language governing permissions and limitations
+* under the License.
+*
+*************************************************************/
+
+/**
+ * The code is adapted from that of Caffe whose license is attached.
+ *
+ * COPYRIGHT
+ * All contributions by the University of California:
+ * Copyright (c) 2014, The Regents of the University of California (Regents)
+ * All rights reserved.
+ * All other contributions:
+ * Copyright (c) 2014, the respective contributors
+ * All rights reserved.
+ * Caffe uses a shared copyright model: each contributor holds copyright over
+ * their contributions to Caffe. The project versioning records all such
+ * contribution and copyright details. If a contributor wants to further mark
+ * their specific copyright on a particular contribution, they should indicate
+ * their copyright solely in the commit message of the change when it is
+ * committed.
+ * LICENSE
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ * CONTRIBUTION AGREEMENT
+ * By contributing to the BVLC/caffe repository through pull-request, comment,
+ * or otherwise, the contributor releases their content to the
+ * license and copyright terms herein.
+ *
+ */
+#ifndef SINGA_UTILS_CUDA_UTILS_H_
+#define SINGA_UTILS_CUDA_UTILS_H_
+#include <cublas_v2.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+#include <curand.h>
+
+// CUDA: various checks for different function calls.
+#define CUDA_CHECK(condition) \
+  /* Code block avoids redefinition of cudaError_t error */ \
+  do { \
+    cudaError_t error = condition; \
+    CHECK_EQ(error, cudaSuccess) << " " << cudaGetErrorString(error); \
+  } while (0)
+
+#define CUBLAS_CHECK(condition) \
+  do { \
+    cublasStatus_t status = condition; \
+    CHECK_EQ(status, CUBLAS_STATUS_SUCCESS) << " " \
+      << caffe::cublasGetErrorString(status); \
+  } while (0)
+
+#define CURAND_CHECK(condition) \
+  do { \
+    curandStatus_t status = condition; \
+    CHECK_EQ(status, CURAND_STATUS_SUCCESS) << " " \
+      << caffe::curandGetErrorString(status); \
+  } while (0)
+
+#endif  // SINGA_UTILS_CUDA_UTILS_H_


[4/6] incubator-singa git commit: SINGA-104 Add Context Class

Posted by wa...@apache.org.
SINGA-104 Add Context Class

Update Context class:
1. SetupDevice can setup the GPU device (device id>=0) random seed and
CPU thread random seed; random generator and handlers are created when
accessed. only GPU device has handler and curand_generator.
2. pass test_context.cc


Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/3841bc54
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/3841bc54
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/3841bc54

Branch: refs/heads/master
Commit: 3841bc5484fc47eae3f5877d2264c5188675818c
Parents: 9aff30a
Author: Wei Wang <wa...@comp.nus.edu.sg>
Authored: Thu Nov 26 16:06:08 2015 +0800
Committer: Wei Wang <wa...@comp.nus.edu.sg>
Committed: Thu Nov 26 16:06:08 2015 +0800

----------------------------------------------------------------------
 include/singa/utils/context.h | 136 +++++++++++++++++++++++++------------
 src/test/test_context.cc      |  55 +++++++++++++++
 src/test/test_context.cu      |  55 ---------------
 src/test/test_math.cc         |   2 +
 src/utils/context.cc          |  87 ------------------------
 5 files changed, 151 insertions(+), 184 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/3841bc54/include/singa/utils/context.h
----------------------------------------------------------------------
diff --git a/include/singa/utils/context.h b/include/singa/utils/context.h
index 7a41dac..5223012 100644
--- a/include/singa/utils/context.h
+++ b/include/singa/utils/context.h
@@ -37,14 +37,12 @@
 
 namespace singa {
 
-// max num of threads per process
-const int kNumMaxThreads = 1024;
 
 /**
  * Context is used as a global singleton, which stores the mapping from CPU
  * thread id to GPU device id. It manages the handlers for GPU
  * devices. It also manages the GPU and CPU random generators, which are created
- * when accessed. One CPU thread has a CPU random generator. A CPU device
+ * when accessed. One CPU thread has a CPU random generator. A GPU device
  * has a GPU random generator.
  */
 class Context {
@@ -52,20 +50,49 @@ class Context {
    /**
     * Destructor, release random generators and handlers.
     */
-	~Context();
+   ~Context() {
+#ifdef USE_GPU
+     for (auto& entry : device_id_) {
+       if (entry.second != -1) {
+         cudaSetDevice(entry.second);
+         if (cublas_handle_[entry.second] != nullptr) {
+           cublasDestroy(cublas_handle_[entry.second]);
+           cublas_handle_[entry.second] = nullptr;
+         }
+         if(curand_generator_[entry.second] != nullptr) {
+           curandDestroyGenerator(curand_generator_[entry.second]);
+           curand_generator_[entry.second] = nullptr;
+         }
+       }
+     }
+#endif
+     for (auto& entry : rand_generator_) {
+       if (entry.second != nullptr) {
+         delete entry.second;
+         entry.second = nullptr;
+       }
+     }
+
+   }
   /**
-   * Constructor, init arrays for random generators and handlers.
+   * Constructor.
    */
-  Context();
+  Context() {
+    for (int i = 0; i < kMaxNumGPU; i++) {
+      cublas_handle_.push_back(nullptr);
+      curand_generator_.push_back(nullptr);
+    }
+  }
 
   /**
-   * @return the ID of the device attached to a given CPU thread:
-   * if the device is a GPU card, then returns the GPU device ID;
-   * Else return -1.
+   * @return the ID of the device attached to a given CPU thread, or -1 if this
+   * thread has not attached GPU device.
    */
-	int device_id(const std::thread::id tid) {
-    CHECK(device_id_.find(tid) != device_id_.end());
-	  return device_id_[tid];
+	int device_id(const std::thread::id& tid) {
+    if (device_id_.find(tid) != device_id_.end())
+      return device_id_[tid];
+    else
+      return -1;
 	}
 
   /**
@@ -75,13 +102,24 @@ class Context {
    * @param[in] thread::id CPU thread ID
    * @param[in] device_id GPU device ID
    */
-	void SetupDevice(const std::thread::id tid, const int did);
-
+  void SetupDevice(const std::thread::id& tid, const int did) {
+    SetupDevice(tid, did, -1);
+  }
   /**
    * @copy SetupDevice(const int, const int);
    * @param[in] seed random seed
    */
-  void SetupDevice(const std::thread::id tid, const int did, long long seed);
+  void SetupDevice(const std::thread::id& tid, const int did, long long seed) {
+    device_id_[tid] = did;
+    seed_[tid] = seed;
+  }
+
+  void ActivateDevice(const int device_id) {
+    CHECK_GE(device_id, 0);
+#ifdef USE_GPU
+    cudaSetDevice(device_id);
+#endif
+  }
 
   /**
    * Get the CPU random generator.
@@ -90,7 +128,7 @@ class Context {
    * @param[in] thread::id CPU thread ID
    * @return the CPU random generator
    */
-  std::mt19937* rand_generator(const std::thread::id tid) {
+  std::mt19937* rand_generator(const std::thread::id& tid) {
     if (rand_generator_.find(tid) == rand_generator_.end()) {
       CHECK(seed_.find(tid) != seed_.end());
       auto seed = static_cast<unsigned>(seed_[tid]);
@@ -101,50 +139,64 @@ class Context {
     return rand_generator_[tid];
   }
 #ifdef USE_GPU
+  cublasHandle_t cublas_handle(const std::thread::id thread_id) {
+    return cublas_handle(device_id(thread_id));
+  }
   /**
-   * Get the handler of the GPU device attached to a CPU thread.
-   * @param[in] thread::id
-   * @return the GPU handler, or nullptr if this thread does not have any GPU.
+   * Get the handler of the GPU device given its device ID. The device
+   * must be set up via SetupDevice(const std::thread::id, const int) before
+   * calling this function.
+   * @param[in] device_id GPU device ID
+   * @return the GPU handler
    */
-	cublasHandle_t cublas_handle(const std::thread::id tid) {
-    CHECK(cublas_handle_.find(tid) != cublas_handle_.end());
-	  return cublas_handle_[tid];
-	}
+  cublasHandle_t cublas_handle(const int device_id) {
+    CHECK_GE(device_id, 0);
+    if (cublas_handle_.at(device_id) == nullptr) {
+      cudaSetDevice(device_id);
+      cublasCreate(&cublas_handle_[device_id]);
+    }
+    return cublas_handle_[device_id];
+  }
+	curandGenerator_t curand_generator(const std::thread::id thread_id) {
+    return curand_generator(device_id(thread_id));
+  }
   /**
-   * Get the random generator of the GPU device assigned to the given thread.
-   * @param[in] thread::id
+   * Get the random generator of the GPU device given the device id.
+   * @param[in] device_id GPU device ID
    * @return random generator. If it does not exist, then create one.
    * The random seed will be set to CURAND_RNG_PSEUDO_DEFAULT if it is not set.
    */
-	curandGenerator_t curand_generator(const std::thread::id tid) {
-    if (curand_generator_.find(tid) == curand_generator_.end()) {
+	curandGenerator_t curand_generator(const int device_id) {
+    CHECK_GE(device_id, 0);
+    if (curand_generator_.at(device_id) == nullptr) {
+      // TODO handle user set seed
+      /*
       CHECK(seed_.find(tid) != seed_.end());
       auto seed = seed_[tid];
-      // TODO handle user set seed
-      cudaSetDevice(device_id_[tid]);
-      curandCreateGenerator(&curand_generator_[tid], CURAND_RNG_PSEUDO_DEFAULT);
+      */
+      ActivateDevice(device_id);
+      curandCreateGenerator(&curand_generator_[device_id],
+          CURAND_RNG_PSEUDO_DEFAULT);
     }
-	  return curand_generator_[tid];
+	  return curand_generator_[device_id];
 	}
 
-  /*
- protected:
-	void CreateHandle(const int thread::id);
-	void DestoryHandle(const int thread::id);
-	void CreateGpuRandGenerator(const int thread::id);
-	void DestoryGpuRandGenerator(const int thread::id);
-  */
-
 #endif
 
  protected:
-
+  //!< max num of GPUs per process
+  const int kMaxNumGPU = 64;
+  //!< map from thread id to device id
 	std::unordered_map<std::thread::id, int> device_id_;
+  //!< map from thread id to cpu rand generator
   std::unordered_map<std::thread::id, std::mt19937 *> rand_generator_;
+  //!< map from thread id to cpu rand generator seed
   std::unordered_map<std::thread::id, int> seed_;
 #ifdef USE_GPU
-	std::unordered_map<std::thread::id, cublasHandle_t> cublas_handle_;
-	std::unordered_map<std::thread::id, curandGenerator_t> curand_generator_;
+  //!< cublas handler indexed by GPU device ID
+	std::vector<cublasHandle_t> cublas_handle_;
+  //!< cublas rand generator indexed by GPU device ID
+	std::vector<curandGenerator_t> curand_generator_;
 #endif
 };
 

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/3841bc54/src/test/test_context.cc
----------------------------------------------------------------------
diff --git a/src/test/test_context.cc b/src/test/test_context.cc
new file mode 100644
index 0000000..5e501b9
--- /dev/null
+++ b/src/test/test_context.cc
@@ -0,0 +1,55 @@
+#include <thread>
+#include "gtest/gtest.h"
+#include "singa/utils/singleton.h"
+#include "singa/utils/context.h"
+#include "singa/utils/cuda_utils.h"
+
+using namespace singa;
+using namespace std;
+
+TEST(ContextTest, TestDevice) {
+  auto context = Singleton<Context>::Instance();
+
+  auto id = std::this_thread::get_id();
+  context->SetupDevice(id, 0);
+  auto device_id = context->device_id(id);
+  ASSERT_EQ(0, device_id);
+}
+
+TEST(ContextTest, TestHandle) {
+  auto context = Singleton<Context>::Instance();
+
+  float cpu_ret = 0.0f;
+  float gpu_ret = 0.0f;
+
+  float A[12];
+  float B[12];
+
+  for(int i = 0; i < 12; i++) {
+    A[i]=i-1;
+    B[i]=i+1;
+  }
+
+  float* A_gpu = NULL;
+  float* B_gpu = NULL;
+  context->SetupDevice(std::this_thread::get_id(), 0);
+
+  cudaMalloc((void**)&A_gpu, 12 * sizeof(float));
+  cudaMalloc((void**)&B_gpu, 12 * sizeof(float));
+
+  cudaMemcpy(A_gpu, A, 12 * sizeof(float), cudaMemcpyHostToDevice);
+  cudaMemcpy(B_gpu, B, 12 * sizeof(float), cudaMemcpyHostToDevice);
+
+  cublasHandle_t handle = context->cublas_handle(std::this_thread::get_id());
+
+  cublasSdot(handle, 12, A_gpu, 1, B_gpu, 1, &gpu_ret);
+
+  for(int i = 0; i < 12;++i) {
+    cpu_ret += A[i] * B[i];
+  }
+
+  ASSERT_EQ(gpu_ret,cpu_ret);
+
+  cudaFree(A_gpu);
+  cudaFree(B_gpu);
+}

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/3841bc54/src/test/test_context.cu
----------------------------------------------------------------------
diff --git a/src/test/test_context.cu b/src/test/test_context.cu
deleted file mode 100644
index 88ab06b..0000000
--- a/src/test/test_context.cu
+++ /dev/null
@@ -1,55 +0,0 @@
-#include <thread>
-#include "gtest/gtest.h"
-#include "singa/utils/singleton.h"
-#include "singa/utils/context.h"
-#include "singa/utils/cuda_utils.h"
-
-using namespace singa;
-using namespace std;
-
-TEST(ContextTest, TestDevice) {
-  auto context = Singleton<Context>::Instance();
-
-  auto id = std::this_thread::get_id();
-  context->SetupDevice(id, 0);
-  auto device_id = context->device_id(id);
-  ASSERT_EQ(1,device_id);
-}
-
-TEST(ContextTest, TestHandle) {
-  auto context = Singleton<Context>::Instance();
-
-  float cpu_ret = 0.0f;
-  float gpu_ret = 0.0f;
-
-  float A[12];
-  float B[12];
-
-  for(int i = 0; i < 12; i++) {
-    A[i]=i-1;
-    B[i]=i+1;
-  }
-
-  float* A_gpu = NULL;
-  float* B_gpu = NULL;
-  context->SetupDevice(std::this_thread::get_id(), 0);
-
-  cudaMalloc((void**)&A_gpu, 12 * sizeof(float));
-  cudaMalloc((void**)&B_gpu, 12 * sizeof(float));
-
-  cudaMemcpy(A_gpu, A, 12 * sizeof(float), cudaMemcpyHostToDevice);
-  cudaMemcpy(B_gpu, B, 12 * sizeof(float), cudaMemcpyHostToDevice);
-
-  cublasHandle_t handle = context->cublas_handle(std::this_thread::get_id());
-
-  cublasSdot(handle, 12, A_gpu, 1, B_gpu, 1, &gpu_ret);
-
-  for(int i = 0; i < 12;++i) {
-    cpu_ret += A[i] * B[i];
-  }
-
-  ASSERT_EQ(gpu_ret,cpu_ret);
-
-  cudaFree(A_gpu);
-  cudaFree(B_gpu);
-}

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/3841bc54/src/test/test_math.cc
----------------------------------------------------------------------
diff --git a/src/test/test_math.cc b/src/test/test_math.cc
index a5bf180..8f8c633 100644
--- a/src/test/test_math.cc
+++ b/src/test/test_math.cc
@@ -236,6 +236,7 @@ TEST(MathTest, TestGemvGPU) {
 }
 
 
+/*
 TEST(MathTest, TestAxpyGPU) {
 	float A[4][3] = {};
 	float C[4][3] = {};
@@ -281,6 +282,7 @@ TEST(MathTest, TestAxpyGPU) {
 	cudaFree(A_gpu);
 	cudaFree(B_gpu);
 }
+*/
 
 
 TEST(MathTest, TestDotGPU) {

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/3841bc54/src/utils/context.cc
----------------------------------------------------------------------
diff --git a/src/utils/context.cc b/src/utils/context.cc
deleted file mode 100644
index 37c8f39..0000000
--- a/src/utils/context.cc
+++ /dev/null
@@ -1,87 +0,0 @@
-/************************************************************
-*
-* Licensed to the Apache Software Foundation (ASF) under one
-* or more contributor license agreements.  See the NOTICE file
-* distributed with this work for additional information
-* regarding copyright ownership.  The ASF licenses this file
-* to you under the Apache License, Version 2.0 (the
-* "License"); you may not use this file except in compliance
-* with the License.  You may obtain a copy of the License at
-*
-*   http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing,
-* software distributed under the License is distributed on an
-* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-* KIND, either express or implied.  See the License for the
-* specific language governing permissions and limitations
-* under the License.
-*
-*************************************************************/
-#include "singa/utils/context.h"
-#include "singa/utils/factory.h"
-#include "singa/utils/singleton.h"
-
-namespace singa {
-
-Context::~Context() {
-#ifdef USE_GPU
-  for (auto& entry : device_id_) {
-    if (entry.second != -1) {
-      cudaSetDevice(entry.second);
-      if (cublas_handle_[entry.first] != nullptr) {
-        cublasDestroy(cublas_handle_[entry.first]);
-        cublas_handle_[entry.first] = nullptr;
-      }
-      if(curand_generator_[entry.first] != nullptr) {
-        curandDestroyGenerator(curand_generator_[entry.first]);
-        curand_generator_[entry.first] = nullptr;
-      }
-    }
-  }
-#endif
-  for (auto& entry : rand_generator_) {
-    if (entry.second != nullptr) {
-      delete entry.second;
-      entry.second = nullptr;
-    }
-  }
-}
-
-Context::Context() { }
-
-void Context::SetupDevice(const std::thread::id thread, const int did) {
-  SetupDevice(thread, did, -1);
-}
-
-void Context::SetupDevice(const std::thread::id thread, const int did,
-    long long seed) {
-  device_id_[thread] = did;
-#ifdef USE_GPU
-  if (did > -1) {
-    cudaSetDevice(did);
-    cublasCreate(&handle_[thread]);
-  }
-#endif
-  seed_[thread] = seed;
-}
-
-/*
-#ifdef USE_GPU
-void Context::DestoryHandle(const int thread::id) {
-  cudaSetDevice(device_id_[thread::id]);
-  cublasDestroy(handle_[thread::id]);
-  handle_[thread::id] = nullptr;
-}
-
-void Context::DestoryGpuRandGenerator(const int thread::id) {
-  cudaSetDevice(device_id_[thread::id]);
-  curandDestroyGenerator(curand_generator_[thread::id]);
-  curand_generator_[thread::id] = nullptr;
-}
-#endif
-*/
-
-
-}  // namespace singa
-