You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@singa.apache.org by wa...@apache.org on 2016/06/24 06:51:37 UTC
[4/6] incubator-singa git commit: changed all device pointer to
shared pointer
changed all device pointer to shared pointer
Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/5651383f
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/5651383f
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/5651383f
Branch: refs/heads/dev
Commit: 5651383f5dbe0ab17eeda70f491d837a24bcb4ab
Parents: 077d13e
Author: liyuchenmike@gmail.com <li...@gmail.com>
Authored: Wed Jun 22 21:06:38 2016 +0800
Committer: liyuchenmike@gmail.com <li...@gmail.com>
Committed: Wed Jun 22 21:06:38 2016 +0800
----------------------------------------------------------------------
include/singa/core/device.h | 7 ++--
include/singa/core/tensor.h | 10 ++---
include/singa/model/layer.h | 2 +-
src/core/device/cpp_cpu.cc | 2 +-
src/core/device/cuda_gpu.cc | 5 ---
src/core/device/device.cc | 2 +-
src/core/memory/memory.cc | 3 --
src/core/tensor/tensor.cc | 19 +++++-----
src/model/layer/batchnorm.cc | 2 +-
src/model/layer/batchnorm.h | 2 +-
src/model/layer/dense.cc | 2 +-
src/model/layer/dense.h | 2 +-
src/model/layer/dropout.cc | 2 +-
src/model/layer/dropout.h | 2 +-
test/singa/test_dense.cc | 33 +++++++----------
test/singa/test_memory.cc | 6 +--
test/singa/test_mse.cc | 17 ++++-----
test/singa/test_sgd.cc | 8 ++--
test/singa/test_tensor.cc | 6 +--
test/singa/test_tensor_math.cc | 74 ++++++++++++++++++-------------------
20 files changed, 94 insertions(+), 112 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/5651383f/include/singa/core/device.h
----------------------------------------------------------------------
diff --git a/include/singa/core/device.h b/include/singa/core/device.h
index fc98a23..d2b5b12 100644
--- a/include/singa/core/device.h
+++ b/include/singa/core/device.h
@@ -23,6 +23,7 @@
#include <vector>
#include <string>
#include <functional>
+#include <memory>
#include "singa/singa_config.h"
#include "singa/core/common.h"
#include "singa/core/memory.h"
@@ -75,7 +76,7 @@ class Device {
return lang_;
}
- Device* host() const { return host_;}
+ std::shared_ptr<Device> host() const { return host_;}
Context* context(int k) {
return &ctx_;
@@ -107,7 +108,7 @@ class Device {
// SafeQueue<Operation> op_queue_;
// SafeQueue<Operation> op_log_;
/// The host device
- Device* host_;
+ std::shared_ptr<Device> host_;
// TODO(wangwei) define multiple contexts, one per executor
Context ctx_;
};
@@ -134,7 +135,7 @@ class CppCPU : public Device {
};
/// a singleton CppDevice as the host for all devices.
-extern CppCPU defaultDevice;
+extern std::shared_ptr<Device> defaultDevice;
// Implement Device using OpenCL libs.
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/5651383f/include/singa/core/tensor.h
----------------------------------------------------------------------
diff --git a/include/singa/core/tensor.h b/include/singa/core/tensor.h
index bb8d7f8..8f73047 100644
--- a/include/singa/core/tensor.h
+++ b/include/singa/core/tensor.h
@@ -67,8 +67,8 @@ class Tensor {
Tensor();
explicit Tensor(Shape &&shape, DataType dtype = kFloat32);
explicit Tensor(const Shape &shape, DataType dtype = kFloat32);
- Tensor(Shape &&shape, Device *dev, DataType dtype = kFloat32);
- Tensor(const Shape &shape, Device *dev, DataType dtype = kFloat32);
+ Tensor(Shape &&shape, std::shared_ptr<Device> dev, DataType dtype = kFloat32);
+ Tensor(const Shape &shape, std::shared_ptr<Device> dev, DataType dtype = kFloat32);
/// Copy Tensor to share the internal data. No deep copy.
Tensor(const Tensor &from);
@@ -80,7 +80,7 @@ class Tensor {
/// blob_ is allocated in constructors.
Blob *blob() const { return blob_; }
- Device *device() const { return device_; }
+ std::shared_ptr<Device> device() const { return device_; }
/// Return immutable Tensor values with given type.
template <typename DType>
@@ -125,7 +125,7 @@ class Tensor {
/// Reset the device.
/// If the target device is a diff device, then do deep data copy.
- void ToDevice(Device *dev);
+ void ToDevice(std::shared_ptr<Device> dev);
/// Equivalent to ToDevice(host_dev).
void ToHost();
@@ -192,7 +192,7 @@ class Tensor {
protected:
bool transpose_ = false;
DataType data_type_ = kFloat32;
- Device *device_ = nullptr;
+ std::shared_ptr<Device> device_ = nullptr;
/// Note: blob_ is allocated in lazy manner to avoid frequent malloc/free.
/// If you want to get an allocated Blob, use blob() instead of blob_.
Blob *blob_ = nullptr;
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/5651383f/include/singa/model/layer.h
----------------------------------------------------------------------
diff --git a/include/singa/model/layer.h b/include/singa/model/layer.h
index 82c8edc..ee2b42b 100644
--- a/include/singa/model/layer.h
+++ b/include/singa/model/layer.h
@@ -125,7 +125,7 @@ class Layer {
/// Move the layer (including its parameters and other internal Tensor) onto
/// the given device
- virtual void ToDevice(Device* device) {
+ virtual void ToDevice(std::shared_ptr<Device> device) {
//for (auto p : param_values_) p->ToDevice(device);
}
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/5651383f/src/core/device/cpp_cpu.cc
----------------------------------------------------------------------
diff --git a/src/core/device/cpp_cpu.cc b/src/core/device/cpp_cpu.cc
index 44f614a..6884e35 100644
--- a/src/core/device/cpp_cpu.cc
+++ b/src/core/device/cpp_cpu.cc
@@ -17,7 +17,7 @@
*/
#include "singa/core/device.h"
namespace singa {
-CppCPU defaultDevice(-1, 1);
+std::shared_ptr<Device> defaultDevice=std::make_shared<CppCPU>(-1, 1);
CppCPU::CppCPU(int id, int num_executors, string scheduler,
string vm) : Device(id, num_executors, scheduler, vm) {
lang_ = kCpp;
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/5651383f/src/core/device/cuda_gpu.cc
----------------------------------------------------------------------
diff --git a/src/core/device/cuda_gpu.cc b/src/core/device/cuda_gpu.cc
index d9a0985..4da292f 100644
--- a/src/core/device/cuda_gpu.cc
+++ b/src/core/device/cuda_gpu.cc
@@ -43,7 +43,6 @@ CudaGPU::~CudaGPU() {
}
#endif
delete pool;
- LOG(INFO) << "device has been deleted";
}
CudaGPU::CudaGPU(int id, int num_executors,
@@ -143,14 +142,10 @@ void* CudaGPU::Malloc(int size) {
/// Free cpu memory.
void CudaGPU::Free(void* ptr) {
- LOG(INFO) << "Cuda free is called";
- LOG(INFO) << "pool pointer" << pool << "\n";
- LOG(INFO) << "pool status:" << ((CnMemPool*)pool)->status;
if (ptr != nullptr) {
//CUDA_CHECK(cudaFree(ptr));
pool->Free(ptr);
}
- LOG(INFO) << "free memory is successed";
}
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/5651383f/src/core/device/device.cc
----------------------------------------------------------------------
diff --git a/src/core/device/device.cc b/src/core/device/device.cc
index 1d3c446..1889339 100644
--- a/src/core/device/device.cc
+++ b/src/core/device/device.cc
@@ -22,7 +22,7 @@ namespace singa {
Device::Device(int id, int num_executors, string scheduler, string vm)
: id_(id), num_executors_(num_executors) {
// TODO(wangwei) create scheduler and vm.
- host_ = &defaultDevice;
+ host_ = defaultDevice;
}
void Device::Exec(function<void(Context*)>&& fn, const vector<Blob*> read_blobs,
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/5651383f/src/core/memory/memory.cc
----------------------------------------------------------------------
diff --git a/src/core/memory/memory.cc b/src/core/memory/memory.cc
index c5878a6..304c101 100644
--- a/src/core/memory/memory.cc
+++ b/src/core/memory/memory.cc
@@ -60,7 +60,6 @@ CnMemPool::~CnMemPool() {
initialized = false;
}
mtx.unlock();
- LOG(INFO) << "cnmem has been freed";
}
@@ -70,10 +69,8 @@ void CnMemPool::Malloc(void** ptr, const size_t size) {
}
void CnMemPool::Free(void* ptr) {
- LOG(INFO) << "cnmem free is called !!!!!!!!!!!";
cnmemStatus_t status = cnmemFree(ptr,NULL);
CHECK_EQ(status, cnmemStatus_t::CNMEM_STATUS_SUCCESS) << " " << cnmemGetErrorString(status);
- LOG(INFO) << "cnmem free is terminated";
}
void CudaMemPool::Malloc(void** ptr, const size_t size) {
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/5651383f/src/core/tensor/tensor.cc
----------------------------------------------------------------------
diff --git a/src/core/tensor/tensor.cc b/src/core/tensor/tensor.cc
index 5ae375c..a5b43d8 100644
--- a/src/core/tensor/tensor.cc
+++ b/src/core/tensor/tensor.cc
@@ -25,29 +25,28 @@
namespace singa {
Tensor::~Tensor() {
- // LOG(ERROR) << "~";
if (blob_ != nullptr && blob_->DecRefCount() == 0)
device_->FreeBlob(blob_);
blob_ = nullptr;
}
-Tensor::Tensor() { device_ = &defaultDevice; }
+Tensor::Tensor() { device_ = defaultDevice; }
Tensor::Tensor(const Shape &shape, DataType dtype)
- : data_type_(dtype), device_(&defaultDevice), shape_(shape) {
- device_ = &defaultDevice;
+ : data_type_(dtype), device_(defaultDevice), shape_(shape) {
+ device_ = defaultDevice;
blob_ = device_->NewBlob(Product(shape_) * SizeOf(data_type_));
}
Tensor::Tensor(Shape &&shape, DataType dtype)
- : data_type_(dtype), device_(&defaultDevice), shape_(shape) {
- device_ = &defaultDevice;
+ : data_type_(dtype), device_(defaultDevice), shape_(shape) {
+ device_ = defaultDevice;
blob_ = device_->NewBlob(Product(shape_) * SizeOf(data_type_));
}
-Tensor::Tensor(const Shape &shape, Device *device, DataType dtype)
+Tensor::Tensor(const Shape &shape, std::shared_ptr<Device> device, DataType dtype)
: data_type_(dtype), device_(device), shape_(shape) {
blob_ = device_->NewBlob(Product(shape_) * SizeOf(data_type_));
}
-Tensor::Tensor(Shape &&shape, Device *device, DataType dtype)
+Tensor::Tensor(Shape &&shape, std::shared_ptr<Device> device, DataType dtype)
: data_type_(dtype), device_(device), shape_(shape) {
blob_ = device_->NewBlob(Product(shape_) * SizeOf(data_type_));
}
@@ -104,7 +103,7 @@ void Tensor::AsType(DataType type) {
}
}
-void Tensor::ToDevice(Device *dst) {
+void Tensor::ToDevice(std::shared_ptr<Device> dst) {
// TODO(wangwei) the comparison is very strict. May compare against device ID?
if (device_ != dst) {
Tensor tmp(shape_, dst, data_type_);
@@ -234,7 +233,7 @@ void CopyDataToFrom(Tensor *dst, const Tensor &src, size_t num,
CHECK_GE(src.MemSize(), src_offset + nBytes);
CHECK_GE(dst->MemSize(), dst_offset + nBytes);
- Device *src_dev = src.device(), *dst_dev = dst->device();
+ std::shared_ptr<Device> src_dev = src.device(), dst_dev = dst->device();
Blob *from = src.blob(), *to = dst->blob();
if (dst_dev->lang() != src_dev->lang()) {
// let the none cpp device conduct copy op
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/5651383f/src/model/layer/batchnorm.cc
----------------------------------------------------------------------
diff --git a/src/model/layer/batchnorm.cc b/src/model/layer/batchnorm.cc
index bcd0870..1e6c39b 100644
--- a/src/model/layer/batchnorm.cc
+++ b/src/model/layer/batchnorm.cc
@@ -44,7 +44,7 @@ void BatchNorm::Setup(const LayerConf& conf) {
param_values_.push_back(&runningVariance_);
}
-void BatchNorm::ToDevice(Device* device) {
+void BatchNorm::ToDevice(std::shared_ptr<Device> device) {
bnScale_.ToDevice(device);
bnBias_.ToDevice(device);
dbnScale_.ToDevice(device);
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/5651383f/src/model/layer/batchnorm.h
----------------------------------------------------------------------
diff --git a/src/model/layer/batchnorm.h b/src/model/layer/batchnorm.h
index 0255179..83f143d 100644
--- a/src/model/layer/batchnorm.h
+++ b/src/model/layer/batchnorm.h
@@ -67,7 +67,7 @@ class BatchNorm : public Layer {
runningVariance_.ResetLike(x);
runningVariance_.CopyData(x);
}
- virtual void ToDevice(Device* device) override;
+ virtual void ToDevice(std::shared_ptr<Device> device) override;
protected:
float factor_;
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/5651383f/src/model/layer/dense.cc
----------------------------------------------------------------------
diff --git a/src/model/layer/dense.cc b/src/model/layer/dense.cc
index b349787..d47c1db 100644
--- a/src/model/layer/dense.cc
+++ b/src/model/layer/dense.cc
@@ -79,7 +79,7 @@ const std::pair<Tensor, vector<Tensor>> Dense::Backward(int flag,
return std::make_pair(dx, param_grad);
}
-void Dense::ToDevice(Device *device) {
+void Dense::ToDevice(std::shared_ptr<Device> device) {
weight_.ToDevice(device);
bias_.ToDevice(device);
}
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/5651383f/src/model/layer/dense.h
----------------------------------------------------------------------
diff --git a/src/model/layer/dense.h b/src/model/layer/dense.h
index a5a6f66..49cb986 100644
--- a/src/model/layer/dense.h
+++ b/src/model/layer/dense.h
@@ -40,7 +40,7 @@ class Dense : public Layer {
const std::pair<Tensor, vector<Tensor>> Backward(int flag,
const Tensor& grad) override;
- void ToDevice(Device* device) override;
+ void ToDevice(std::shared_ptr<Device> device) override;
size_t num_output() const { return hdim_; }
size_t num_input() const { return vdim_; }
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/5651383f/src/model/layer/dropout.cc
----------------------------------------------------------------------
diff --git a/src/model/layer/dropout.cc b/src/model/layer/dropout.cc
index c2c97be..695008e 100644
--- a/src/model/layer/dropout.cc
+++ b/src/model/layer/dropout.cc
@@ -52,7 +52,7 @@ const std::pair<Tensor, vector<Tensor>> Dropout::Backward(int flag,
return std::make_pair(input_grad, param_grad);
}
-void Dropout::ToDevice(Device* device) {
+void Dropout::ToDevice(std::shared_ptr<Device> device) {
Layer::ToDevice(device);
mask_.ToDevice(device);
}
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/5651383f/src/model/layer/dropout.h
----------------------------------------------------------------------
diff --git a/src/model/layer/dropout.h b/src/model/layer/dropout.h
index 5efaf6a..d5da79c 100644
--- a/src/model/layer/dropout.h
+++ b/src/model/layer/dropout.h
@@ -43,7 +43,7 @@ class Dropout : public Layer {
const std::pair<Tensor, vector<Tensor>> Backward(int flag,
const Tensor& grad) override;
- void ToDevice(Device* device) override;
+ void ToDevice(std::shared_ptr<Device> device) override;
float dropout_ratio() const {
return dropout_ratio_;
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/5651383f/test/singa/test_dense.cc
----------------------------------------------------------------------
diff --git a/test/singa/test_dense.cc b/test/singa/test_dense.cc
index 052d0e8..7ed4d33 100644
--- a/test/singa/test_dense.cc
+++ b/test/singa/test_dense.cc
@@ -66,7 +66,6 @@ TEST(Dense, ForwardCpp) {
dense.set_bias(bias);
singa::Tensor out1 = dense.Forward(singa::kTrain, in);
- singa::CppCPU host(0, 1);
const float *outptr1 = out1.data<const float *>();
EXPECT_EQ(9u, out1.Size());
for (int i = 0; i < 3; i++)
@@ -76,7 +75,6 @@ TEST(Dense, ForwardCpp) {
outptr1[i * 3 + j]);
}
#endif // USE_CBLAS
-#ifdef USE_CUDA
TEST(Dense, BackwardCpp) {
Dense dense;
@@ -89,7 +87,6 @@ TEST(Dense, BackwardCpp) {
const size_t batchsize = 3, vdim = 2, hdim = 3;
const float x[batchsize * vdim] = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f};
- singa::CudaGPU cuda(0, 1);
singa::Tensor in(singa::Shape{batchsize, vdim});
in.CopyDataFromHostPtr(x, batchsize * vdim);
@@ -114,7 +111,6 @@ TEST(Dense, BackwardCpp) {
grad.CopyDataFromHostPtr(dy, batchsize * hdim);
const auto ret = dense.Backward(singa::kTrain, grad);
- singa::CppCPU host(0, 1);
singa::Tensor in_grad = ret.first;
singa::Tensor dweight = ret.second.at(0);
singa::Tensor dbias = ret.second.at(1);
@@ -139,7 +135,6 @@ TEST(Dense, BackwardCpp) {
for (int i = 0; i < 3; i++)
EXPECT_FLOAT_EQ((dy[0 * 3 + i] + dy[1 * 3 + i] + dy[2 * 3 + i]), dbiasx[i]);
}
-#endif
#ifdef USE_CUDA
TEST(Dense, ForwardCuda) {
@@ -154,25 +149,24 @@ TEST(Dense, ForwardCuda) {
const size_t batchsize = 3, vdim = 2, hdim = 3;
const float x[batchsize * vdim] = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f};
- singa::CudaGPU cuda(0, 1);
- singa::Tensor in(singa::Shape{batchsize, vdim}, &cuda);
+ auto cuda = std::make_shared<singa::CudaGPU>(0, 1);
+ singa::Tensor in(singa::Shape{batchsize, vdim}, cuda);
in.CopyDataFromHostPtr(x, batchsize * vdim);
// set weight
const float we[hdim * vdim] = {1.0f, 1.0f, 1.0f, 2.0f, 0.0f, 1.0f};
- singa::Tensor weight(singa::Shape{hdim, vdim}, &cuda);
+ singa::Tensor weight(singa::Shape{hdim, vdim}, cuda);
weight.CopyDataFromHostPtr(we, hdim * vdim);
const float bia[hdim] = {1.0f, 1.0f, 1.0f};
- singa::Tensor bias(singa::Shape{hdim}, &cuda);
+ singa::Tensor bias(singa::Shape{hdim}, cuda);
bias.CopyDataFromHostPtr(bia, hdim);
dense.set_weight(weight);
dense.set_bias(bias);
singa::Tensor out1 = dense.Forward(singa::kTrain, in);
- singa::CppCPU host(0, 1);
- out1.ToDevice(&host);
+ out1.ToHost();
const float *outptr1 = out1.data<const float *>();
EXPECT_EQ(9u, out1.Size());
for (int i = 0; i < 3; i++)
@@ -193,17 +187,17 @@ TEST(Dense, BackwardCuda) {
const size_t batchsize = 3, vdim = 2, hdim = 3;
const float x[batchsize * vdim] = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f};
- singa::CudaGPU cuda(0, 1);
- singa::Tensor in(singa::Shape{batchsize, vdim}, &cuda);
+ auto cuda = std::make_shared<singa::CudaGPU>(0, 1);
+ singa::Tensor in(singa::Shape{batchsize, vdim}, cuda);
in.CopyDataFromHostPtr(x, batchsize * vdim);
// set weight
const float we[hdim * vdim] = {1.0f, 1.0f, 1.0f, 2.0f, 0.0f, 1.0f};
- singa::Tensor weight(singa::Shape{hdim, vdim}, &cuda);
+ singa::Tensor weight(singa::Shape{hdim, vdim}, cuda);
weight.CopyDataFromHostPtr(we, hdim * vdim);
const float bia[hdim] = {1.0f, 1.0f, 1.0f};
- singa::Tensor bias(singa::Shape{hdim}, &cuda);
+ singa::Tensor bias(singa::Shape{hdim}, cuda);
bias.CopyDataFromHostPtr(bia, hdim);
dense.set_weight(weight);
@@ -214,15 +208,14 @@ TEST(Dense, BackwardCuda) {
// grad
const float dy[batchsize * hdim] = {1.0f, 1.0f, 1.0f, 2.0f, 2.0f,
2.0f, 3.0f, 3.0f, 3.0f};
- singa::Tensor grad(singa::Shape{batchsize, hdim}, &cuda);
+ singa::Tensor grad(singa::Shape{batchsize, hdim}, cuda);
grad.CopyDataFromHostPtr(dy, batchsize * hdim);
const auto ret = dense.Backward(singa::kTrain, grad);
- singa::CppCPU host(0, 1);
singa::Tensor in_grad = ret.first;
singa::Tensor dweight = ret.second.at(0);
singa::Tensor dbias = ret.second.at(1);
- in_grad.ToDevice(&host);
+ in_grad.ToHost();
const float *dx = in_grad.data<const float *>();
EXPECT_EQ(6u, in_grad.Size());
for (int i = 0; i < 3; i++)
@@ -231,7 +224,7 @@ TEST(Dense, BackwardCuda) {
(dy[i * 3 + 0] * we[0 * 2 + j] + dy[i * 3 + 1] * we[1 * 2 + j] +
dy[i * 3 + 2] * we[2 * 2 + j]),
dx[i * 2 + j]);
- dweight.ToDevice(&host);
+ dweight.ToHost();
const float *dweightx = dweight.data<const float *>();
EXPECT_EQ(6u, dweight.Size());
for (int i = 0; i < 3; i++)
@@ -240,7 +233,7 @@ TEST(Dense, BackwardCuda) {
(dy[0 * 3 + i] * x[0 * 2 + j] + dy[1 * 3 + i] * x[1 * 2 + j] +
dy[2 * 3 + i] * x[2 * 2 + j]),
dweightx[i * 2 + j]);
- dbias.ToDevice(&host);
+ dbias.ToHost();
const float *dbiasx = dbias.data<const float *>();
EXPECT_EQ(3u, dbias.Size());
for (int i = 0; i < 3; i++)
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/5651383f/test/singa/test_memory.cc
----------------------------------------------------------------------
diff --git a/test/singa/test_memory.cc b/test/singa/test_memory.cc
index f5e464d..90fc99a 100644
--- a/test/singa/test_memory.cc
+++ b/test/singa/test_memory.cc
@@ -75,7 +75,7 @@ TEST(MemPool, CompareCudaCnmem) {
singa::CnMemPool cnPool;
cnPool.InitPool();
- int numOfTests = 10000;
+ int numOfTests = 5000;
int allocSize = 1000000U;
struct timeval start,end;
double t1,t2;
@@ -93,7 +93,7 @@ TEST(MemPool, CompareCudaCnmem) {
t1 = start.tv_sec * 1000 + start.tv_usec/1000;
t2 = end.tv_sec * 1000 + end.tv_usec/1000;
- LOG(INFO) << "cnmem time: " << t2-t1 << " ms" << std::endl;
+ LOG(INFO) << "cnmem memory time: " << t2-t1 << " ms" << std::endl;
pool = &cudaPool;
gettimeofday(&start,NULL);
@@ -106,6 +106,6 @@ TEST(MemPool, CompareCudaCnmem) {
t1 = start.tv_sec * 1000 + start.tv_usec/1000;
t2 = end.tv_sec * 1000 + end.tv_usec/1000;
- LOG(INFO) << "cuda time: " << t2-t1 << " ms" << std::endl;
+ LOG(INFO) << "cuda memory time: " << t2-t1 << " ms" << std::endl;
}
#endif // USE_CUDA
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/5651383f/test/singa/test_mse.cc
----------------------------------------------------------------------
diff --git a/test/singa/test_mse.cc b/test/singa/test_mse.cc
index 7c6066e..d2c5125 100644
--- a/test/singa/test_mse.cc
+++ b/test/singa/test_mse.cc
@@ -69,9 +69,9 @@ TEST_F(TestMSE, CppBackward) {
#ifdef USE_CUDA
TEST_F(TestMSE, CudaForward) {
singa::MSE* mse = new singa::MSE();
- singa::CudaGPU dev;
- p.ToDevice(&dev);
- t.ToDevice(&dev);
+ auto dev = std::make_shared<singa::CudaGPU>();
+ p.ToDevice(dev);
+ t.ToDevice(dev);
Tensor loss = mse->Forward(p, t);
loss.ToHost();
@@ -85,18 +85,15 @@ TEST_F(TestMSE, CudaForward) {
}
EXPECT_FLOAT_EQ(ldat[i], 0.5 * l);
}
- LOG(INFO) << "Before delete pxxxxxxxxxxxxxxxxxxxxxxxx";
p.ToHost();
- LOG(INFO) << "Before delete tyyyyyyyyyyyyyyyyyyyyyyy";
t.ToHost();
- LOG(INFO) << "terminate-xxxxxxxxxxxxxxxxxx-";
- delete mse;
}
+
TEST_F(TestMSE, CudaBackward) {
singa::MSE mse;
- singa::CudaGPU dev;
- p.ToDevice(&dev);
- t.ToDevice(&dev);
+ auto dev = std::make_shared<singa::CudaGPU>();
+ p.ToDevice(dev);
+ t.ToDevice(dev);
mse.Forward(p, t);
Tensor grad = mse.Backward();
grad.ToHost();
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/5651383f/test/singa/test_sgd.cc
----------------------------------------------------------------------
diff --git a/test/singa/test_sgd.cc b/test/singa/test_sgd.cc
index 71ab15e..3b04ab6 100644
--- a/test/singa/test_sgd.cc
+++ b/test/singa/test_sgd.cc
@@ -88,8 +88,8 @@ TEST(SGD, ApplyWithoutMomentumCuda) {
const float v[4] = {0.1, 0.2, 0.3, 0.4};
const float g[4] = {0.1, 0.1, 0.1, 0.1};
- singa::CudaGPU dev;
- singa::Tensor value(singa::Shape{4}, &dev), grad(singa::Shape{4}, &dev);
+ auto dev = std::make_shared<singa::CudaGPU>();
+ singa::Tensor value(singa::Shape{4}, dev), grad(singa::Shape{4}, dev);
value.CopyDataFromHostPtr(v, 4);
grad.CopyDataFromHostPtr(g, 4);
@@ -124,8 +124,8 @@ TEST(SGD, ApplyWithMomentumCuda) {
const float v[4] = {0.1, 0.2, 0.3, 0.4};
const float g[4] = {0.01, 0.02, 0.03, 0.04};
- singa::CudaGPU dev;
- singa::Tensor value(singa::Shape{4}, &dev), grad(singa::Shape{4}, &dev);
+ auto dev = std::make_shared<singa::CudaGPU>();
+ singa::Tensor value(singa::Shape{4}, dev), grad(singa::Shape{4}, dev);
value.CopyDataFromHostPtr(v, 4);
grad.CopyDataFromHostPtr(g, 4);
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/5651383f/test/singa/test_tensor.cc
----------------------------------------------------------------------
diff --git a/test/singa/test_tensor.cc b/test/singa/test_tensor.cc
index bd039ad..c351174 100644
--- a/test/singa/test_tensor.cc
+++ b/test/singa/test_tensor.cc
@@ -59,10 +59,10 @@ TEST(TensorClass, AsType) {
TEST(TensorClass, ToDevice) {
Tensor t(Shape{2,3});
- EXPECT_EQ(static_cast<Device*>(&singa::defaultDevice), t.device());
- singa::CppCPU *dev = new singa::CppCPU(0, 1);
+ EXPECT_EQ(singa::defaultDevice, t.device());
+ auto dev = std::make_shared<singa::CppCPU>(0, 1);
t.ToDevice(dev);
- EXPECT_NE(static_cast<Device*>(&singa::defaultDevice), t.device());
+ EXPECT_NE(singa::defaultDevice, t.device());
}
TEST(TensorClass, CopyDataFromHostPtr) {
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/5651383f/test/singa/test_tensor_math.cc
----------------------------------------------------------------------
diff --git a/test/singa/test_tensor_math.cc b/test/singa/test_tensor_math.cc
index b18e465..0f998c0 100644
--- a/test/singa/test_tensor_math.cc
+++ b/test/singa/test_tensor_math.cc
@@ -255,10 +255,10 @@ TEST_F(TestTensorMath, SumColumnsCpp) {
#ifdef USE_CUDA
TEST_F(TestTensorMath, MultCuda) {
const float x[4] = {1.0f, 2.0f, 3.0f, 4.0f};
- singa::CudaGPU dev;
- Tensor t(Shape{2, 2}, &dev);
+ auto dev = std::make_shared<singa::CudaGPU>();
+ Tensor t(Shape{2, 2}, dev);
t.CopyDataFromHostPtr(x, 4);
- d.ToDevice(&dev);
+ d.ToDevice(dev);
d.CopyDataFromHostPtr(dat1, 6);
Tensor C = Mult(d, t);
C.ToHost();
@@ -274,7 +274,7 @@ TEST_F(TestTensorMath, MultCuda) {
}
const float y[8] = {1.0f, 2.0f, 3.0f, 4.0f, 1.1f, 2.1f, 3.1f, 4.1f};
- Tensor s(Shape{4, 2}, &dev);
+ Tensor s(Shape{4, 2}, dev);
s.CopyDataFromHostPtr(y, 8);
Tensor D = Mult(d, s.T());
D.ToHost();
@@ -288,11 +288,11 @@ TEST_F(TestTensorMath, MultCuda) {
EXPECT_FLOAT_EQ(DPtr[i * 4 + j], tmp);
}
}
- Tensor p(Shape{4, 1}, &dev);
+ Tensor p(Shape{4, 1}, dev);
p.CopyDataFromHostPtr(x, 4);
- Tensor q(Shape{1, 4}, &dev);
+ Tensor q(Shape{1, 4}, dev);
q.SetValue(1.0f);
- Tensor o(Shape{4, 4}, &dev);
+ Tensor o(Shape{4, 4}, dev);
Mult(p, q, &o);
o.ToHost();
@@ -308,11 +308,11 @@ TEST_F(TestTensorMath, MultCuda) {
TEST_F(TestTensorMath, AddColumnCuda) {
const float x[3] = {1.0f, 2.0f, 3.0f};
- singa::CudaGPU dev;
- Tensor t(Shape{3}, &dev);
+ auto dev = std::make_shared<singa::CudaGPU>();
+ Tensor t(Shape{3}, dev);
t.CopyDataFromHostPtr(x, 3);
d.CopyDataFromHostPtr(dat1, 6);
- d.ToDevice(&dev);
+ d.ToDevice(dev);
AddColumn(t, &d);
d.ToHost();
const float *xptr = d.data<const float *>();
@@ -326,11 +326,11 @@ TEST_F(TestTensorMath, AddColumnCuda) {
TEST_F(TestTensorMath, SubColumnCuda) {
const float x[3] = {1.0f, 2.0f, 3.0f};
- singa::CudaGPU dev;
- Tensor t(Shape{3}, &dev);
+ auto dev = std::make_shared<singa::CudaGPU>();
+ Tensor t(Shape{3}, dev);
t.CopyDataFromHostPtr(x, 3);
d.CopyDataFromHostPtr(dat1, 6);
- d.ToDevice(&dev);
+ d.ToDevice(dev);
SubColumn(t, &d);
d.ToHost();
const float *xptr = d.data<const float *>();
@@ -357,11 +357,11 @@ TEST_F(TestTensorMath, MultColumnCpp) {
#ifdef USE_CUDA
TEST_F(TestTensorMath, MultColumnCuda) {
const float x[3] = {1.0f, 2.0f, 3.0f};
- singa::CudaGPU dev;
- Tensor t(Shape{3}, &dev);
+ auto dev = std::make_shared<singa::CudaGPU>();
+ Tensor t(Shape{3}, dev);
t.CopyDataFromHostPtr(x, 3);
d.CopyDataFromHostPtr(dat1, 6);
- d.ToDevice(&dev);
+ d.ToDevice(dev);
MultColumn(t, &d);
d.ToHost();
const float *xptr = d.data<const float *>();
@@ -373,11 +373,11 @@ TEST_F(TestTensorMath, MultColumnCuda) {
}
TEST_F(TestTensorMath, DivColumnCuda) {
const float x[3] = {1.0f, 2.0f, 3.0f};
- singa::CudaGPU dev;
- Tensor t(Shape{3}, &dev);
+ auto dev = std::make_shared<singa::CudaGPU>();
+ Tensor t(Shape{3}, dev);
t.CopyDataFromHostPtr(x, 3);
d.CopyDataFromHostPtr(dat1, 6);
- d.ToDevice(&dev);
+ d.ToDevice(dev);
DivColumn(t, &d);
d.ToHost();
const float *xptr = d.data<const float *>();
@@ -389,11 +389,11 @@ TEST_F(TestTensorMath, DivColumnCuda) {
}
TEST_F(TestTensorMath, AddRowCuda) {
const float x[2] = {1.1f, 2.1f};
- singa::CudaGPU dev;
- Tensor t(Shape{2}, &dev);
+ auto dev = std::make_shared<singa::CudaGPU>();
+ Tensor t(Shape{2}, dev);
t.CopyDataFromHostPtr(x, 2);
d.CopyDataFromHostPtr(dat1, 6);
- d.ToDevice(&dev);
+ d.ToDevice(dev);
AddRow(t, &d);
d.ToHost();
const float *xptr = d.data<const float *>();
@@ -405,11 +405,11 @@ TEST_F(TestTensorMath, AddRowCuda) {
}
TEST_F(TestTensorMath, SubRowCuda) {
const float x[2] = {1.1f, 2.1f};
- singa::CudaGPU dev;
- Tensor t(Shape{2}, &dev);
+ auto dev = std::make_shared<singa::CudaGPU>();
+ Tensor t(Shape{2}, dev);
t.CopyDataFromHostPtr(x, 2);
d.CopyDataFromHostPtr(dat1, 6);
- d.ToDevice(&dev);
+ d.ToDevice(dev);
SubRow(t, &d);
d.ToHost();
const float *xptr = d.data<const float *>();
@@ -421,11 +421,11 @@ TEST_F(TestTensorMath, SubRowCuda) {
}
TEST_F(TestTensorMath, MultRowCuda) {
const float x[2] = {1.1f, 2.1f};
- singa::CudaGPU dev;
- Tensor t(Shape{2}, &dev);
+ auto dev = std::make_shared<singa::CudaGPU>();
+ Tensor t(Shape{2}, dev);
t.CopyDataFromHostPtr(x, 2);
d.CopyDataFromHostPtr(dat1, 6);
- d.ToDevice(&dev);
+ d.ToDevice(dev);
MultRow(t, &d);
d.ToHost();
const float *xptr = d.data<const float *>();
@@ -452,11 +452,11 @@ TEST_F(TestTensorMath, DivRowCpp) {
#ifdef USE_CUDA
TEST_F(TestTensorMath, DivRowCuda) {
const float x[2] = {1.1f, 2.1f};
- singa::CudaGPU dev;
- Tensor t(Shape{2}, &dev);
+ auto dev = std::make_shared<singa::CudaGPU>();
+ Tensor t(Shape{2}, dev);
t.CopyDataFromHostPtr(x, 2);
d.CopyDataFromHostPtr(dat1, 6);
- d.ToDevice(&dev);
+ d.ToDevice(dev);
DivRow(t, &d);
d.ToHost();
const float *xptr = d.data<const float *>();
@@ -467,10 +467,10 @@ TEST_F(TestTensorMath, DivRowCuda) {
}
}
TEST_F(TestTensorMath, SumRowsCuda) {
- singa::CudaGPU dev;
- Tensor t(Shape{2}, &dev);
+ auto dev = std::make_shared<singa::CudaGPU>();
+ Tensor t(Shape{2}, dev);
d.CopyDataFromHostPtr(dat1, 6);
- d.ToDevice(&dev);
+ d.ToDevice(dev);
SumRows(d, &t);
t.ToHost();
const float *tptr = t.data<const float *>();
@@ -484,10 +484,10 @@ TEST_F(TestTensorMath, SumRowsCuda) {
d.ToHost();
}
TEST_F(TestTensorMath, SumColumnCuda) {
- singa::CudaGPU dev;
- Tensor t(Shape{3}, &dev);
+ auto dev = std::make_shared<singa::CudaGPU>();
+ Tensor t(Shape{3}, dev);
d.CopyDataFromHostPtr(dat1, 6);
- d.ToDevice(&dev);
+ d.ToDevice(dev);
SumColumns(d, &t);
t.ToHost();
const float *tptr = t.data<const float *>();