You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@singa.apache.org by zh...@apache.org on 2016/06/13 13:20:19 UTC
[26/50] [abbrv] incubator-singa git commit: SINGA-188 Add Dense layer
SINGA-188 Add Dense layer
Minor change to format code and update IDs of DenseConf fields.
Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/64ea2065
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/64ea2065
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/64ea2065
Branch: refs/heads/master
Commit: 64ea2065411ed29d6870d75c8577cbe086f4daa7
Parents: 73d4a34
Author: Wei Wang <wa...@comp.nus.edu.sg>
Authored: Thu Jun 2 12:02:16 2016 +0800
Committer: Wei Wang <wa...@comp.nus.edu.sg>
Committed: Thu Jun 2 12:02:16 2016 +0800
----------------------------------------------------------------------
src/model/layer/dense.cc | 20 ++---
src/model/layer/dense.h | 15 ++--
src/proto/model.proto | 8 +-
test/singa/test_dense.cc | 177 ++++++++++++++++++++++--------------------
4 files changed, 115 insertions(+), 105 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/64ea2065/src/model/layer/dense.cc
----------------------------------------------------------------------
diff --git a/src/model/layer/dense.cc b/src/model/layer/dense.cc
index ebee62a..29ff8cb 100644
--- a/src/model/layer/dense.cc
+++ b/src/model/layer/dense.cc
@@ -29,7 +29,7 @@ Dense::~Dense() {
}
void Dense::Setup(const LayerConf &conf) {
Layer::Setup(conf);
- DenseConf dense_conf = conf.dense_conf();
+ auto dense_conf = conf.dense_conf();
hdim_ = dense_conf.num_output();
vdim_ = dense_conf.num_input();
transpose_ = dense_conf.transpose();
@@ -45,7 +45,8 @@ void Dense::Setup(const LayerConf &conf) {
/// \copydoc Layer::Forward(int flag, const Tensor&)
const Tensor Dense::Forward(int flag, const Tensor &input) {
Tensor output;
- if (transpose_)
+
+ if (transpose_) // use the transposed version of weight_ for computing
output = Mult(input, weight_);
else
output = Mult(input, weight_.T());
@@ -55,8 +56,8 @@ const Tensor Dense::Forward(int flag, const Tensor &input) {
}
/// \copydoc Layer::Backward(int, const Tensor&, const Tensor&);
-const std::pair<Tensor, vector<Tensor>>
-Dense::Backward(int flag, const Tensor &grad) {
+const std::pair<Tensor, vector<Tensor>> Dense::Backward(int flag,
+ const Tensor &grad) {
vector<Tensor> param_grad;
Tensor src_data = buf_.top();
buf_.pop();
@@ -65,11 +66,10 @@ Dense::Backward(int flag, const Tensor &grad) {
dw.ResetLike(weight_);
dx.ResetLike(src_data);
SumRows(grad, &db);
- if (transpose_){
- dx = Mult(grad, weight_.T());
+ if (transpose_) {
+ dx = Mult(grad, weight_.T());
dw = Mult(src_data.T(), grad);
- }
- else{
+ } else {
dx = Mult(grad, weight_);
dw = Mult(grad.T(), src_data);
}
@@ -78,8 +78,8 @@ Dense::Backward(int flag, const Tensor &grad) {
return std::make_pair(dx, param_grad);
}
-void Dense::ToDevice(Device *device) {
+void Dense::ToDevice(Device *device) {
weight_.ToDevice(device);
- bias_.ToDevice(device);
+ bias_.ToDevice(device);
}
} // namespace singa
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/64ea2065/src/model/layer/dense.h
----------------------------------------------------------------------
diff --git a/src/model/layer/dense.h b/src/model/layer/dense.h
index d686a01..a5a6f66 100644
--- a/src/model/layer/dense.h
+++ b/src/model/layer/dense.h
@@ -33,7 +33,6 @@ class Dense : public Layer {
/// \copydoc Layer::Setup(const LayerConf&);
void Setup(const LayerConf& conf) override;
- void SetupParam(const Tensor& input);
/// \copydoc Layer::Forward(int flag, const Tensor&)
const Tensor Forward(int flag, const Tensor& input) override;
@@ -42,12 +41,12 @@ class Dense : public Layer {
const Tensor& grad) override;
void ToDevice(Device* device) override;
-
+
size_t num_output() const { return hdim_; }
size_t num_input() const { return vdim_; }
bool transpose() const { return transpose_; }
- const Tensor &weight() const { return weight_; }
- const Tensor &bias() const { return bias_; }
+ const Tensor& weight() const { return weight_; }
+ const Tensor& bias() const { return bias_; }
void set_weight(Tensor w) {
weight_.ResetLike(w);
@@ -58,9 +57,11 @@ class Dense : public Layer {
bias_.CopyData(b);
}
-protected:
- size_t batchsize_, vdim_, hdim_;
- bool transpose_;
+ protected:
+ /// Used in auto-encoder, where the decoder would share its weight matrix from
+ /// the encoder's transposed weight matrix.
+ bool transpose_ = false;
+ size_t vdim_, hdim_;
Tensor weight_, bias_;
// Tensor data_, grad_;
std::stack<Tensor> buf_;
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/64ea2065/src/proto/model.proto
----------------------------------------------------------------------
diff --git a/src/proto/model.proto b/src/proto/model.proto
index 75e2be7..16ba62f 100644
--- a/src/proto/model.proto
+++ b/src/proto/model.proto
@@ -208,11 +208,8 @@ message LayerConf {
// optional ImageDataConf image_data_conf = 115;
optional InfogainLossConf infogain_loss_conf = 116;
optional InnerProductConf inner_product_conf = 117;
- optional DenseConf dense_conf = 150;
optional LogConf log_conf = 134;
optional LRNConf lrn_conf = 118;
- // Used in SINGA
- optional MetricConf metric_conf = 200;
// optional MemoryDataConf memory_data_conf = 119;
optional MVNConf mvn_conf = 120;
optional PoolingConf pooling_conf = 121;
@@ -230,6 +227,10 @@ message LayerConf {
optional ThresholdConf threshold_conf = 128;
optional TileConf tile_conf = 138;
//optional WindowDataConf window_data_conf = 129;
+
+ // Used in SINGA
+ optional DenseConf dense_conf = 201;
+ optional MetricConf metric_conf = 200;
}
// Message that stores hyper-parameters used to apply transformation
@@ -584,6 +585,7 @@ message DenseConf {
// all preceding axes are retained in the output.
// May be negative to index from the end (e.g., -1 for the last axis).
optional int32 axis = 5 [default = 1];
+
optional uint32 num_input = 20; // The number of inputs for the layer
optional bool transpose = 21 [default = false]; // whether transpose or not
}
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/64ea2065/test/singa/test_dense.cc
----------------------------------------------------------------------
diff --git a/test/singa/test_dense.cc b/test/singa/test_dense.cc
index 6d136af..606f819 100644
--- a/test/singa/test_dense.cc
+++ b/test/singa/test_dense.cc
@@ -19,8 +19,8 @@
*
*************************************************************/
#include "../src/model/layer/dense.h"
-
#include "gtest/gtest.h"
+#include "singa_config.h"
using singa::Dense;
TEST(Dense, Setup) {
@@ -34,8 +34,8 @@ TEST(Dense, Setup) {
denseconf->set_transpose(false);
dense.Setup(conf);
- EXPECT_EQ(3, dense.num_output());
- EXPECT_EQ(2, dense.num_input());
+ EXPECT_EQ(3u, dense.num_output());
+ EXPECT_EQ(2u, dense.num_input());
}
TEST(Dense, ForwardCpp) {
@@ -47,82 +47,40 @@ TEST(Dense, ForwardCpp) {
denseconf->set_num_output(3);
denseconf->set_transpose(false);
dense.Setup(conf);
-
const size_t batchsize = 3, vdim = 2, hdim = 3;
- const float x[batchsize * vdim] = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f,
- 6.0f};
+ const float x[batchsize * vdim] = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f};
singa::Tensor in(singa::Shape{batchsize, vdim});
in.CopyDataFromHostPtr(x, batchsize * vdim);
-
+
// set weight
- const float we[hdim * vdim] = {
- 1.0f, 1.0f, 1.0f, 2.0f, 0.0f, 1.0f};
+ const float we[hdim * vdim] = {1.0f, 1.0f, 1.0f, 2.0f, 0.0f, 1.0f};
singa::Tensor weight(singa::Shape{hdim, vdim});
weight.CopyDataFromHostPtr(we, hdim * vdim);
- const float bia[hdim] = {
- 1.0f, 1.0f, 1.0f};
+ const float bia[hdim] = {1.0f, 1.0f, 1.0f};
singa::Tensor bias(singa::Shape{hdim});
bias.CopyDataFromHostPtr(bia, hdim);
-
+
dense.set_weight(weight);
dense.set_bias(bias);
-
+
singa::Tensor out1 = dense.Forward(singa::kTrain, in);
singa::CppCPU host(0, 1);
const float *outptr1 = out1.data<const float *>();
- EXPECT_EQ(9, out1.Size());
+ EXPECT_EQ(9u, out1.Size());
for (int i = 0; i < 3; i++)
for (int j = 0; j < 3; j++)
- EXPECT_FLOAT_EQ((x[i * 2 + 0] * we[j * 2 + 0] + x[i * 2 + 1] * we[j * 2 + 1] + bia[j]), outptr1[i * 3 + j]);
+ EXPECT_FLOAT_EQ((x[i * 2 + 0] * we[j * 2 + 0] +
+ x[i * 2 + 1] * we[j * 2 + 1] + bia[j]),
+ outptr1[i * 3 + j]);
}
-TEST(Dense, ForwardCuda) {
- Dense dense;
- singa::LayerConf conf;
- singa::DenseConf *denseconf = conf.mutable_dense_conf();
- denseconf->set_num_input(2);
- denseconf->set_num_output(3);
- denseconf->set_transpose(false);
- dense.Setup(conf);
-
-
- const size_t batchsize = 3, vdim = 2, hdim = 3;
- const float x[batchsize * vdim] = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f,
- 6.0f};
- singa::CudaGPU cuda(0, 1);
- singa::Tensor in(singa::Shape{batchsize, vdim}, &cuda);
- in.CopyDataFromHostPtr(x, batchsize * vdim);
-
- // set weight
- const float we[hdim * vdim] = {
- 1.0f, 1.0f, 1.0f, 2.0f, 0.0f, 1.0f};
- singa::Tensor weight(singa::Shape{hdim, vdim}, &cuda);
- weight.CopyDataFromHostPtr(we, hdim * vdim);
-
- const float bia[hdim] = {
- 1.0f, 1.0f, 1.0f};
- singa::Tensor bias(singa::Shape{hdim}, &cuda);
- bias.CopyDataFromHostPtr(bia, hdim);
-
- dense.set_weight(weight);
- dense.set_bias(bias);
-
- singa::Tensor out1 = dense.Forward(singa::kTrain, in);
- singa::CppCPU host(0, 1);
- out1.ToDevice(&host);
- const float *outptr1 = out1.data<const float *>();
- EXPECT_EQ(9, out1.Size());
- for (int i = 0; i < 3; i++)
- for (int j = 0; j < 3; j++)
- EXPECT_FLOAT_EQ((x[i * 2 + 0] * we[j * 2 + 0] + x[i * 2 + 1] * we[j * 2 + 1] + bia[j]), outptr1[i * 3 + j]);
-}
TEST(Dense, BackwardCpp) {
Dense dense;
-
+
singa::LayerConf conf;
singa::DenseConf *denseconf = conf.mutable_dense_conf();
denseconf->set_num_input(2);
@@ -131,30 +89,28 @@ TEST(Dense, BackwardCpp) {
dense.Setup(conf);
const size_t batchsize = 3, vdim = 2, hdim = 3;
- const float x[batchsize * vdim] = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f,
- 6.0f};
+ const float x[batchsize * vdim] = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f};
singa::CudaGPU cuda(0, 1);
singa::Tensor in(singa::Shape{batchsize, vdim});
in.CopyDataFromHostPtr(x, batchsize * vdim);
// set weight
- const float we[hdim * vdim] = {
- 1.0f, 1.0f, 1.0f, 2.0f, 0.0f, 1.0f};
+ const float we[hdim * vdim] = {1.0f, 1.0f, 1.0f, 2.0f, 0.0f, 1.0f};
singa::Tensor weight(singa::Shape{hdim, vdim});
weight.CopyDataFromHostPtr(we, hdim * vdim);
-
- const float bia[hdim] = {
- 1.0f, 1.0f, 1.0f};
+
+ const float bia[hdim] = {1.0f, 1.0f, 1.0f};
singa::Tensor bias(singa::Shape{hdim});
bias.CopyDataFromHostPtr(bia, hdim);
-
+
dense.set_weight(weight);
dense.set_bias(bias);
singa::Tensor out1 = dense.Forward(singa::kTrain, in);
// grad
- const float dy[batchsize * hdim] = {1.0f, 1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 3.0f, 3.0f, 3.0f};
+ const float dy[batchsize * hdim] = {1.0f, 1.0f, 1.0f, 2.0f, 2.0f,
+ 2.0f, 3.0f, 3.0f, 3.0f};
singa::Tensor grad(singa::Shape{batchsize, hdim});
grad.CopyDataFromHostPtr(dy, batchsize * hdim);
@@ -164,24 +120,70 @@ TEST(Dense, BackwardCpp) {
singa::Tensor dweight = ret.second.at(0);
singa::Tensor dbias = ret.second.at(1);
const float *dx = in_grad.data<const float *>();
- EXPECT_EQ(6, in_grad.Size());
+ EXPECT_EQ(6u, in_grad.Size());
for (int i = 0; i < 3; i++)
for (int j = 0; j < 2; j++)
- EXPECT_FLOAT_EQ((dy[i * 3 + 0] * we[0 * 2 + j] + dy[i * 3 + 1] * we[1 * 2 + j] + dy[i * 3 + 2] * we[2 * 2 + j]), dx[i * 2 + j]);
+ EXPECT_FLOAT_EQ(
+ (dy[i * 3 + 0] * we[0 * 2 + j] + dy[i * 3 + 1] * we[1 * 2 + j] +
+ dy[i * 3 + 2] * we[2 * 2 + j]),
+ dx[i * 2 + j]);
const float *dweightx = dweight.data<const float *>();
- EXPECT_EQ(6, dweight.Size());
+ EXPECT_EQ(6u, dweight.Size());
for (int i = 0; i < 3; i++)
for (int j = 0; j < 2; j++)
- EXPECT_FLOAT_EQ((dy[0 * 3 + i] * x[0 *2 + j] + dy[1 * 3 + i] * x[1 * 2 + j] + dy[2 * 3 + i] * x[2 * 2 + j]), dweightx[i * 2 + j]);
+ EXPECT_FLOAT_EQ(
+ (dy[0 * 3 + i] * x[0 * 2 + j] + dy[1 * 3 + i] * x[1 * 2 + j] +
+ dy[2 * 3 + i] * x[2 * 2 + j]),
+ dweightx[i * 2 + j]);
const float *dbiasx = dbias.data<const float *>();
- EXPECT_EQ(3, dbias.Size());
+ EXPECT_EQ(3u, dbias.Size());
for (int i = 0; i < 3; i++)
EXPECT_FLOAT_EQ((dy[0 * 3 + i] + dy[1 * 3 + i] + dy[2 * 3 + i]), dbiasx[i]);
}
+#ifdef USE_CUDA
+TEST(Dense, ForwardCuda) {
+ Dense dense;
+
+ singa::LayerConf conf;
+ singa::DenseConf *denseconf = conf.mutable_dense_conf();
+ denseconf->set_num_input(2);
+ denseconf->set_num_output(3);
+ denseconf->set_transpose(false);
+ dense.Setup(conf);
+
+ const size_t batchsize = 3, vdim = 2, hdim = 3;
+ const float x[batchsize * vdim] = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f};
+ singa::CudaGPU cuda(0, 1);
+ singa::Tensor in(singa::Shape{batchsize, vdim}, &cuda);
+ in.CopyDataFromHostPtr(x, batchsize * vdim);
+
+ // set weight
+ const float we[hdim * vdim] = {1.0f, 1.0f, 1.0f, 2.0f, 0.0f, 1.0f};
+ singa::Tensor weight(singa::Shape{hdim, vdim}, &cuda);
+ weight.CopyDataFromHostPtr(we, hdim * vdim);
+
+ const float bia[hdim] = {1.0f, 1.0f, 1.0f};
+ singa::Tensor bias(singa::Shape{hdim}, &cuda);
+ bias.CopyDataFromHostPtr(bia, hdim);
+
+ dense.set_weight(weight);
+ dense.set_bias(bias);
+
+ singa::Tensor out1 = dense.Forward(singa::kTrain, in);
+ singa::CppCPU host(0, 1);
+ out1.ToDevice(&host);
+ const float *outptr1 = out1.data<const float *>();
+ EXPECT_EQ(9u, out1.Size());
+ for (int i = 0; i < 3; i++)
+ for (int j = 0; j < 3; j++)
+ EXPECT_FLOAT_EQ((x[i * 2 + 0] * we[j * 2 + 0] +
+ x[i * 2 + 1] * we[j * 2 + 1] + bia[j]),
+ outptr1[i * 3 + j]);
+}
TEST(Dense, BackwardCuda) {
Dense dense;
-
+
singa::LayerConf conf;
singa::DenseConf *denseconf = conf.mutable_dense_conf();
denseconf->set_num_input(2);
@@ -190,30 +192,28 @@ TEST(Dense, BackwardCuda) {
dense.Setup(conf);
const size_t batchsize = 3, vdim = 2, hdim = 3;
- const float x[batchsize * vdim] = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f,
- 6.0f};
+ const float x[batchsize * vdim] = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f};
singa::CudaGPU cuda(0, 1);
singa::Tensor in(singa::Shape{batchsize, vdim}, &cuda);
in.CopyDataFromHostPtr(x, batchsize * vdim);
// set weight
- const float we[hdim * vdim] = {
- 1.0f, 1.0f, 1.0f, 2.0f, 0.0f, 1.0f};
+ const float we[hdim * vdim] = {1.0f, 1.0f, 1.0f, 2.0f, 0.0f, 1.0f};
singa::Tensor weight(singa::Shape{hdim, vdim}, &cuda);
weight.CopyDataFromHostPtr(we, hdim * vdim);
-
- const float bia[hdim] = {
- 1.0f, 1.0f, 1.0f};
+
+ const float bia[hdim] = {1.0f, 1.0f, 1.0f};
singa::Tensor bias(singa::Shape{hdim}, &cuda);
bias.CopyDataFromHostPtr(bia, hdim);
-
+
dense.set_weight(weight);
dense.set_bias(bias);
singa::Tensor out1 = dense.Forward(singa::kTrain, in);
// grad
- const float dy[batchsize * hdim] = {1.0f, 1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 3.0f, 3.0f, 3.0f};
+ const float dy[batchsize * hdim] = {1.0f, 1.0f, 1.0f, 2.0f, 2.0f,
+ 2.0f, 3.0f, 3.0f, 3.0f};
singa::Tensor grad(singa::Shape{batchsize, hdim}, &cuda);
grad.CopyDataFromHostPtr(dy, batchsize * hdim);
@@ -224,19 +224,26 @@ TEST(Dense, BackwardCuda) {
singa::Tensor dbias = ret.second.at(1);
in_grad.ToDevice(&host);
const float *dx = in_grad.data<const float *>();
- EXPECT_EQ(6, in_grad.Size());
+ EXPECT_EQ(6u, in_grad.Size());
for (int i = 0; i < 3; i++)
for (int j = 0; j < 2; j++)
- EXPECT_FLOAT_EQ((dy[i * 3 + 0] * we[0 * 2 + j] + dy[i * 3 + 1] * we[1 * 2 + j] + dy[i * 3 + 2] * we[2 * 2 + j]), dx[i * 2 + j]);
+ EXPECT_FLOAT_EQ(
+ (dy[i * 3 + 0] * we[0 * 2 + j] + dy[i * 3 + 1] * we[1 * 2 + j] +
+ dy[i * 3 + 2] * we[2 * 2 + j]),
+ dx[i * 2 + j]);
dweight.ToDevice(&host);
const float *dweightx = dweight.data<const float *>();
- EXPECT_EQ(6, dweight.Size());
+ EXPECT_EQ(6u, dweight.Size());
for (int i = 0; i < 3; i++)
for (int j = 0; j < 2; j++)
- EXPECT_FLOAT_EQ((dy[0 * 3 + i] * x[0 *2 + j] + dy[1 * 3 + i] * x[1 * 2 + j] + dy[2 * 3 + i] * x[2 * 2 + j]), dweightx[i * 2 + j]);
+ EXPECT_FLOAT_EQ(
+ (dy[0 * 3 + i] * x[0 * 2 + j] + dy[1 * 3 + i] * x[1 * 2 + j] +
+ dy[2 * 3 + i] * x[2 * 2 + j]),
+ dweightx[i * 2 + j]);
dbias.ToDevice(&host);
const float *dbiasx = dbias.data<const float *>();
- EXPECT_EQ(3, dbias.Size());
+ EXPECT_EQ(3u, dbias.Size());
for (int i = 0; i < 3; i++)
EXPECT_FLOAT_EQ((dy[0 * 3 + i] + dy[1 * 3 + i] + dy[2 * 3 + i]), dbiasx[i]);
}
+#endif