You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@singa.apache.org by zh...@apache.org on 2016/06/13 13:20:19 UTC
[26/50] [abbrv] incubator-singa git commit: SINGA-188 Add Dense layer

SINGA-188 Add Dense layer

Minor change to format code and update IDs of DenseConf fields.


Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/64ea2065
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/64ea2065
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/64ea2065

Branch: refs/heads/master
Commit: 64ea2065411ed29d6870d75c8577cbe086f4daa7
Parents: 73d4a34
Author: Wei Wang <wa...@comp.nus.edu.sg>
Authored: Thu Jun 2 12:02:16 2016 +0800
Committer: Wei Wang <wa...@comp.nus.edu.sg>
Committed: Thu Jun 2 12:02:16 2016 +0800

----------------------------------------------------------------------
 src/model/layer/dense.cc |  20 ++---
 src/model/layer/dense.h  |  15 ++--
 src/proto/model.proto    |   8 +-
 test/singa/test_dense.cc | 177 ++++++++++++++++++++++--------------------
 4 files changed, 115 insertions(+), 105 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/64ea2065/src/model/layer/dense.cc
----------------------------------------------------------------------
diff --git a/src/model/layer/dense.cc b/src/model/layer/dense.cc
index ebee62a..29ff8cb 100644
--- a/src/model/layer/dense.cc
+++ b/src/model/layer/dense.cc
@@ -29,7 +29,7 @@ Dense::~Dense() {
 }
 void Dense::Setup(const LayerConf &conf) {
   Layer::Setup(conf);
-  DenseConf dense_conf = conf.dense_conf();
+  auto dense_conf = conf.dense_conf();
   hdim_ = dense_conf.num_output();
   vdim_ = dense_conf.num_input();
   transpose_ = dense_conf.transpose();
@@ -45,7 +45,8 @@ void Dense::Setup(const LayerConf &conf) {
 /// \copydoc Layer::Forward(int flag, const Tensor&)
 const Tensor Dense::Forward(int flag, const Tensor &input) {
   Tensor output;
-  if (transpose_)
+
+  if (transpose_)  // use the transposed version of weight_ for computing
     output = Mult(input, weight_);
   else
     output = Mult(input, weight_.T());
@@ -55,8 +56,8 @@ const Tensor Dense::Forward(int flag, const Tensor &input) {
 }
 
 /// \copydoc Layer::Backward(int, const Tensor&, const Tensor&);
-const std::pair<Tensor, vector<Tensor>>
-Dense::Backward(int flag, const Tensor &grad) {
+const std::pair<Tensor, vector<Tensor>> Dense::Backward(int flag,
+                                                        const Tensor &grad) {
   vector<Tensor> param_grad;
   Tensor src_data = buf_.top();
   buf_.pop();
@@ -65,11 +66,10 @@ Dense::Backward(int flag, const Tensor &grad) {
   dw.ResetLike(weight_);
   dx.ResetLike(src_data);
   SumRows(grad, &db);
-  if (transpose_){
-    dx = Mult(grad, weight_.T()); 
+  if (transpose_) {
+    dx = Mult(grad, weight_.T());
     dw = Mult(src_data.T(), grad);
-  }
-  else{
+  } else {
     dx = Mult(grad, weight_);
     dw = Mult(grad.T(), src_data);
   }
@@ -78,8 +78,8 @@ Dense::Backward(int flag, const Tensor &grad) {
   return std::make_pair(dx, param_grad);
 }
 
-void Dense::ToDevice(Device *device) { 
+void Dense::ToDevice(Device *device) {
   weight_.ToDevice(device);
-  bias_.ToDevice(device); 
+  bias_.ToDevice(device);
 }
 } // namespace singa

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/64ea2065/src/model/layer/dense.h
----------------------------------------------------------------------
diff --git a/src/model/layer/dense.h b/src/model/layer/dense.h
index d686a01..a5a6f66 100644
--- a/src/model/layer/dense.h
+++ b/src/model/layer/dense.h
@@ -33,7 +33,6 @@ class Dense : public Layer {
   /// \copydoc Layer::Setup(const LayerConf&);
   void Setup(const LayerConf& conf) override;
 
-  void SetupParam(const Tensor& input);
   /// \copydoc Layer::Forward(int flag, const Tensor&)
   const Tensor Forward(int flag, const Tensor& input) override;
 
@@ -42,12 +41,12 @@ class Dense : public Layer {
                                                    const Tensor& grad) override;
 
   void ToDevice(Device* device) override;
-  
+
   size_t num_output() const { return hdim_; }
   size_t num_input() const { return vdim_; }
   bool transpose() const { return transpose_; }
-  const Tensor &weight() const { return weight_; }
-  const Tensor &bias() const { return bias_; }
+  const Tensor& weight() const { return weight_; }
+  const Tensor& bias() const { return bias_; }
 
   void set_weight(Tensor w) {
     weight_.ResetLike(w);
@@ -58,9 +57,11 @@ class Dense : public Layer {
     bias_.CopyData(b);
   }
 
-protected:
-  size_t batchsize_, vdim_, hdim_;
-  bool transpose_;
+ protected:
+  /// Used in auto-encoder, where the decoder would share its weight matrix from
+  /// the encoder's transposed weight matrix.
+  bool transpose_ = false;
+  size_t vdim_, hdim_;
   Tensor weight_, bias_;
   // Tensor data_, grad_;
   std::stack<Tensor> buf_;

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/64ea2065/src/proto/model.proto
----------------------------------------------------------------------
diff --git a/src/proto/model.proto b/src/proto/model.proto
index 75e2be7..16ba62f 100644
--- a/src/proto/model.proto
+++ b/src/proto/model.proto
@@ -208,11 +208,8 @@ message LayerConf {
   // optional ImageDataConf image_data_conf = 115;
   optional InfogainLossConf infogain_loss_conf = 116;
   optional InnerProductConf inner_product_conf = 117;
-  optional DenseConf dense_conf = 150;
   optional LogConf log_conf = 134;
   optional LRNConf lrn_conf = 118;
-  // Used in SINGA
-  optional MetricConf metric_conf = 200;
   // optional MemoryDataConf memory_data_conf = 119;
   optional MVNConf mvn_conf = 120;
   optional PoolingConf pooling_conf = 121;
@@ -230,6 +227,10 @@ message LayerConf {
   optional ThresholdConf threshold_conf = 128;
   optional TileConf tile_conf = 138;
   //optional WindowDataConf window_data_conf = 129;
+
+  // Used in SINGA
+  optional DenseConf dense_conf = 201;
+  optional MetricConf metric_conf = 200;
 }
 
 // Message that stores hyper-parameters used to apply transformation
@@ -584,6 +585,7 @@ message DenseConf {
   // all preceding axes are retained in the output.
   // May be negative to index from the end (e.g., -1 for the last axis).
   optional int32 axis = 5 [default = 1];
+
   optional uint32 num_input = 20; // The number of inputs for the layer
   optional bool transpose = 21 [default = false]; // whether transpose or not
 }

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/64ea2065/test/singa/test_dense.cc
----------------------------------------------------------------------
diff --git a/test/singa/test_dense.cc b/test/singa/test_dense.cc
index 6d136af..606f819 100644
--- a/test/singa/test_dense.cc
+++ b/test/singa/test_dense.cc
@@ -19,8 +19,8 @@
 *
 *************************************************************/
 #include "../src/model/layer/dense.h"
-
 #include "gtest/gtest.h"
+#include "singa_config.h"
 
 using singa::Dense;
 TEST(Dense, Setup) {
@@ -34,8 +34,8 @@ TEST(Dense, Setup) {
   denseconf->set_transpose(false);
   dense.Setup(conf);
 
-  EXPECT_EQ(3, dense.num_output());
-  EXPECT_EQ(2, dense.num_input());
+  EXPECT_EQ(3u, dense.num_output());
+  EXPECT_EQ(2u, dense.num_input());
 }
 
 TEST(Dense, ForwardCpp) {
@@ -47,82 +47,40 @@ TEST(Dense, ForwardCpp) {
   denseconf->set_num_output(3);
   denseconf->set_transpose(false);
   dense.Setup(conf);
-  
 
   const size_t batchsize = 3, vdim = 2, hdim = 3;
-  const float x[batchsize * vdim] = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f,
-                                      6.0f};
+  const float x[batchsize * vdim] = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f};
   singa::Tensor in(singa::Shape{batchsize, vdim});
   in.CopyDataFromHostPtr(x, batchsize * vdim);
-  
+
   // set weight
-  const float we[hdim * vdim] = {
-      1.0f, 1.0f, 1.0f, 2.0f, 0.0f, 1.0f};
+  const float we[hdim * vdim] = {1.0f, 1.0f, 1.0f, 2.0f, 0.0f, 1.0f};
   singa::Tensor weight(singa::Shape{hdim, vdim});
   weight.CopyDataFromHostPtr(we, hdim * vdim);
 
-  const float bia[hdim] = {
-      1.0f, 1.0f, 1.0f};
+  const float bia[hdim] = {1.0f, 1.0f, 1.0f};
   singa::Tensor bias(singa::Shape{hdim});
   bias.CopyDataFromHostPtr(bia, hdim);
- 
+
   dense.set_weight(weight);
   dense.set_bias(bias);
-  
+
   singa::Tensor out1 = dense.Forward(singa::kTrain, in);
   singa::CppCPU host(0, 1);
   const float *outptr1 = out1.data<const float *>();
-  EXPECT_EQ(9, out1.Size());
+  EXPECT_EQ(9u, out1.Size());
   for (int i = 0; i < 3; i++)
     for (int j = 0; j < 3; j++)
-      EXPECT_FLOAT_EQ((x[i * 2 +  0] * we[j * 2 + 0] + x[i * 2 + 1] * we[j * 2 + 1] + bia[j]), outptr1[i * 3 + j]);
+      EXPECT_FLOAT_EQ((x[i * 2 + 0] * we[j * 2 + 0] +
+                       x[i * 2 + 1] * we[j * 2 + 1] + bia[j]),
+                      outptr1[i * 3 + j]);
 }
 
-TEST(Dense, ForwardCuda) {
-  Dense dense;
 
-  singa::LayerConf conf;
-  singa::DenseConf *denseconf = conf.mutable_dense_conf();
-  denseconf->set_num_input(2);
-  denseconf->set_num_output(3);
-  denseconf->set_transpose(false);
-  dense.Setup(conf);
-  
-
-  const size_t batchsize = 3, vdim = 2, hdim = 3;
-  const float x[batchsize * vdim] = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f,
-                                      6.0f};
-  singa::CudaGPU cuda(0, 1);
-  singa::Tensor in(singa::Shape{batchsize, vdim}, &cuda);
-  in.CopyDataFromHostPtr(x, batchsize * vdim);
-  
-  // set weight
-  const float we[hdim * vdim] = {
-      1.0f, 1.0f, 1.0f, 2.0f, 0.0f, 1.0f};
-  singa::Tensor weight(singa::Shape{hdim, vdim}, &cuda);
-  weight.CopyDataFromHostPtr(we, hdim * vdim);
-
-  const float bia[hdim] = {
-      1.0f, 1.0f, 1.0f};
-  singa::Tensor bias(singa::Shape{hdim}, &cuda);
-  bias.CopyDataFromHostPtr(bia, hdim);
- 
-  dense.set_weight(weight);
-  dense.set_bias(bias);
-  
-  singa::Tensor out1 = dense.Forward(singa::kTrain, in);
-  singa::CppCPU host(0, 1);
-  out1.ToDevice(&host);
-  const float *outptr1 = out1.data<const float *>();
-  EXPECT_EQ(9, out1.Size());
-  for (int i = 0; i < 3; i++)
-    for (int j = 0; j < 3; j++)
-      EXPECT_FLOAT_EQ((x[i * 2 +  0] * we[j * 2 + 0] + x[i * 2 + 1] * we[j * 2 + 1] + bia[j]), outptr1[i * 3 + j]);
-}
 
 TEST(Dense, BackwardCpp) {
   Dense dense;
-  
+
   singa::LayerConf conf;
   singa::DenseConf *denseconf = conf.mutable_dense_conf();
   denseconf->set_num_input(2);
@@ -131,30 +89,28 @@ TEST(Dense, BackwardCpp) {
   dense.Setup(conf);
 
   const size_t batchsize = 3, vdim = 2, hdim = 3;
-  const float x[batchsize * vdim] = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f,
-                                      6.0f};
+  const float x[batchsize * vdim] = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f};
   singa::CudaGPU cuda(0, 1);
   singa::Tensor in(singa::Shape{batchsize, vdim});
   in.CopyDataFromHostPtr(x, batchsize * vdim);
 
   // set weight
-  const float we[hdim * vdim] = {
-      1.0f, 1.0f, 1.0f, 2.0f, 0.0f, 1.0f};
+  const float we[hdim * vdim] = {1.0f, 1.0f, 1.0f, 2.0f, 0.0f, 1.0f};
   singa::Tensor weight(singa::Shape{hdim, vdim});
   weight.CopyDataFromHostPtr(we, hdim * vdim);
-  
-  const float bia[hdim] = {
-      1.0f, 1.0f, 1.0f};
+
+  const float bia[hdim] = {1.0f, 1.0f, 1.0f};
   singa::Tensor bias(singa::Shape{hdim});
   bias.CopyDataFromHostPtr(bia, hdim);
-  
+
   dense.set_weight(weight);
   dense.set_bias(bias);
 
   singa::Tensor out1 = dense.Forward(singa::kTrain, in);
 
   // grad
-  const float dy[batchsize * hdim] = {1.0f, 1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 3.0f, 3.0f, 3.0f};
+  const float dy[batchsize * hdim] = {1.0f, 1.0f, 1.0f, 2.0f, 2.0f,
+                                      2.0f, 3.0f, 3.0f, 3.0f};
   singa::Tensor grad(singa::Shape{batchsize, hdim});
   grad.CopyDataFromHostPtr(dy, batchsize * hdim);
 
@@ -164,24 +120,70 @@ TEST(Dense, BackwardCpp) {
   singa::Tensor dweight = ret.second.at(0);
   singa::Tensor dbias = ret.second.at(1);
   const float *dx = in_grad.data<const float *>();
-  EXPECT_EQ(6, in_grad.Size());
+  EXPECT_EQ(6u, in_grad.Size());
   for (int i = 0; i < 3; i++)
     for (int j = 0; j < 2; j++)
-      EXPECT_FLOAT_EQ((dy[i * 3 + 0] * we[0 * 2 + j] + dy[i * 3 + 1] * we[1 * 2 +  j] + dy[i * 3 +  2] * we[2 * 2 + j]), dx[i * 2 + j]);
+      EXPECT_FLOAT_EQ(
+          (dy[i * 3 + 0] * we[0 * 2 + j] + dy[i * 3 + 1] * we[1 * 2 + j] +
+           dy[i * 3 + 2] * we[2 * 2 + j]),
+          dx[i * 2 + j]);
   const float *dweightx = dweight.data<const float *>();
-  EXPECT_EQ(6, dweight.Size());
+  EXPECT_EQ(6u, dweight.Size());
   for (int i = 0; i < 3; i++)
     for (int j = 0; j < 2; j++)
-      EXPECT_FLOAT_EQ((dy[0 * 3 + i] * x[0 *2 + j] + dy[1 * 3 + i] * x[1 * 2 + j] + dy[2 * 3 + i] * x[2 * 2 + j]), dweightx[i * 2 + j]);
+      EXPECT_FLOAT_EQ(
+          (dy[0 * 3 + i] * x[0 * 2 + j] + dy[1 * 3 + i] * x[1 * 2 + j] +
+           dy[2 * 3 + i] * x[2 * 2 + j]),
+          dweightx[i * 2 + j]);
   const float *dbiasx = dbias.data<const float *>();
-  EXPECT_EQ(3, dbias.Size());
+  EXPECT_EQ(3u, dbias.Size());
   for (int i = 0; i < 3; i++)
     EXPECT_FLOAT_EQ((dy[0 * 3 + i] + dy[1 * 3 + i] + dy[2 * 3 + i]), dbiasx[i]);
 }
 
+#ifdef USE_CUDA
+TEST(Dense, ForwardCuda) {
+  Dense dense;
+
+  singa::LayerConf conf;
+  singa::DenseConf *denseconf = conf.mutable_dense_conf();
+  denseconf->set_num_input(2);
+  denseconf->set_num_output(3);
+  denseconf->set_transpose(false);
+  dense.Setup(conf);
+
+  const size_t batchsize = 3, vdim = 2, hdim = 3;
+  const float x[batchsize * vdim] = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f};
+  singa::CudaGPU cuda(0, 1);
+  singa::Tensor in(singa::Shape{batchsize, vdim}, &cuda);
+  in.CopyDataFromHostPtr(x, batchsize * vdim);
+
+  // set weight
+  const float we[hdim * vdim] = {1.0f, 1.0f, 1.0f, 2.0f, 0.0f, 1.0f};
+  singa::Tensor weight(singa::Shape{hdim, vdim}, &cuda);
+  weight.CopyDataFromHostPtr(we, hdim * vdim);
+
+  const float bia[hdim] = {1.0f, 1.0f, 1.0f};
+  singa::Tensor bias(singa::Shape{hdim}, &cuda);
+  bias.CopyDataFromHostPtr(bia, hdim);
+
+  dense.set_weight(weight);
+  dense.set_bias(bias);
+
+  singa::Tensor out1 = dense.Forward(singa::kTrain, in);
+  singa::CppCPU host(0, 1);
+  out1.ToDevice(&host);
+  const float *outptr1 = out1.data<const float *>();
+  EXPECT_EQ(9u, out1.Size());
+  for (int i = 0; i < 3; i++)
+    for (int j = 0; j < 3; j++)
+      EXPECT_FLOAT_EQ((x[i * 2 + 0] * we[j * 2 + 0] +
+                       x[i * 2 + 1] * we[j * 2 + 1] + bia[j]),
+                      outptr1[i * 3 + j]);
+}
 TEST(Dense, BackwardCuda) {
   Dense dense;
-  
+
   singa::LayerConf conf;
   singa::DenseConf *denseconf = conf.mutable_dense_conf();
   denseconf->set_num_input(2);
@@ -190,30 +192,28 @@ TEST(Dense, BackwardCuda) {
   dense.Setup(conf);
 
   const size_t batchsize = 3, vdim = 2, hdim = 3;
-  const float x[batchsize * vdim] = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f,
-                                      6.0f};
+  const float x[batchsize * vdim] = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f};
   singa::CudaGPU cuda(0, 1);
   singa::Tensor in(singa::Shape{batchsize, vdim}, &cuda);
   in.CopyDataFromHostPtr(x, batchsize * vdim);
 
   // set weight
-  const float we[hdim * vdim] = {
-      1.0f, 1.0f, 1.0f, 2.0f, 0.0f, 1.0f};
+  const float we[hdim * vdim] = {1.0f, 1.0f, 1.0f, 2.0f, 0.0f, 1.0f};
   singa::Tensor weight(singa::Shape{hdim, vdim}, &cuda);
   weight.CopyDataFromHostPtr(we, hdim * vdim);
-  
-  const float bia[hdim] = {
-      1.0f, 1.0f, 1.0f};
+
+  const float bia[hdim] = {1.0f, 1.0f, 1.0f};
   singa::Tensor bias(singa::Shape{hdim}, &cuda);
   bias.CopyDataFromHostPtr(bia, hdim);
-  
+
   dense.set_weight(weight);
   dense.set_bias(bias);
 
   singa::Tensor out1 = dense.Forward(singa::kTrain, in);
 
   // grad
-  const float dy[batchsize * hdim] = {1.0f, 1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 3.0f, 3.0f, 3.0f};
+  const float dy[batchsize * hdim] = {1.0f, 1.0f, 1.0f, 2.0f, 2.0f,
+                                      2.0f, 3.0f, 3.0f, 3.0f};
   singa::Tensor grad(singa::Shape{batchsize, hdim}, &cuda);
   grad.CopyDataFromHostPtr(dy, batchsize * hdim);
 
@@ -224,19 +224,26 @@ TEST(Dense, BackwardCuda) {
   singa::Tensor dbias = ret.second.at(1);
   in_grad.ToDevice(&host);
   const float *dx = in_grad.data<const float *>();
-  EXPECT_EQ(6, in_grad.Size());
+  EXPECT_EQ(6u, in_grad.Size());
   for (int i = 0; i < 3; i++)
     for (int j = 0; j < 2; j++)
-      EXPECT_FLOAT_EQ((dy[i * 3 + 0] * we[0 * 2 + j] + dy[i * 3 + 1] * we[1 * 2 +  j] + dy[i * 3 +  2] * we[2 * 2 + j]), dx[i * 2 + j]);
+      EXPECT_FLOAT_EQ(
+          (dy[i * 3 + 0] * we[0 * 2 + j] + dy[i * 3 + 1] * we[1 * 2 + j] +
+           dy[i * 3 + 2] * we[2 * 2 + j]),
+          dx[i * 2 + j]);
   dweight.ToDevice(&host);
   const float *dweightx = dweight.data<const float *>();
-  EXPECT_EQ(6, dweight.Size());
+  EXPECT_EQ(6u, dweight.Size());
   for (int i = 0; i < 3; i++)
     for (int j = 0; j < 2; j++)
-      EXPECT_FLOAT_EQ((dy[0 * 3 + i] * x[0 *2 + j] + dy[1 * 3 + i] * x[1 * 2 + j] + dy[2 * 3 + i] * x[2 * 2 + j]), dweightx[i * 2 + j]);
+      EXPECT_FLOAT_EQ(
+          (dy[0 * 3 + i] * x[0 * 2 + j] + dy[1 * 3 + i] * x[1 * 2 + j] +
+           dy[2 * 3 + i] * x[2 * 2 + j]),
+          dweightx[i * 2 + j]);
   dbias.ToDevice(&host);
   const float *dbiasx = dbias.data<const float *>();
-  EXPECT_EQ(3, dbias.Size());
+  EXPECT_EQ(3u, dbias.Size());
   for (int i = 0; i < 3; i++)
     EXPECT_FLOAT_EQ((dy[0 * 3 + i] + dy[1 * 3 + i] + dy[2 * 3 + i]), dbiasx[i]);
 }
+#endif