You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@singa.apache.org by wa...@apache.org on 2016/06/03 07:48:59 UTC
[54/60] incubator-singa git commit: SINGA-188 Add Dense layer

SINGA-188 Add Dense layer

Add implementation for dense layer

Add test files for dense layer, both cpp version and cuda version

Pass all tests


Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/73d4a34b
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/73d4a34b
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/73d4a34b

Branch: refs/heads/dev
Commit: 73d4a34b6aea80da77b42f234c26244aa98d1d93
Parents: 3e2507b
Author: zhaojing <zh...@comp.nus.edu.sg>
Authored: Tue May 31 22:23:17 2016 +0800
Committer: zhaojing <zh...@comp.nus.edu.sg>
Committed: Thu Jun 2 11:17:24 2016 +0800

----------------------------------------------------------------------
 src/model/layer/dense.cc |  85 +++++++++++++++
 src/model/layer/dense.h  |  69 ++++++++++++
 src/proto/model.proto    |  15 +++
 test/singa/test_dense.cc | 242 ++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 411 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/73d4a34b/src/model/layer/dense.cc
----------------------------------------------------------------------
diff --git a/src/model/layer/dense.cc b/src/model/layer/dense.cc
new file mode 100644
index 0000000..ebee62a
--- /dev/null
+++ b/src/model/layer/dense.cc
@@ -0,0 +1,85 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "./dense.h"
+#include "singa/model/layer.h"
+#include <vector>
+
+namespace singa {
+using std::vector;
+
+Dense::~Dense() {
+  // delete weight_;
+  // delete bias_;
+}
+void Dense::Setup(const LayerConf &conf) {
+  Layer::Setup(conf);
+  DenseConf dense_conf = conf.dense_conf();
+  hdim_ = dense_conf.num_output();
+  vdim_ = dense_conf.num_input();
+  transpose_ = dense_conf.transpose();
+  if (transpose_)
+    weight_.Reshape(Shape{vdim_, hdim_});
+  else
+    weight_.Reshape(Shape{hdim_, vdim_});
+  bias_.Reshape(Shape{hdim_});
+  param_values_.push_back(&weight_);
+  param_values_.push_back(&bias_);
+}
+
+/// \copydoc Layer::Forward(int flag, const Tensor&)
+const Tensor Dense::Forward(int flag, const Tensor &input) {
+  Tensor output;
+  if (transpose_)
+    output = Mult(input, weight_);
+  else
+    output = Mult(input, weight_.T());
+  AddRow(bias_, &output);
+  buf_.push(input);
+  return output;
+}
+
+/// \copydoc Layer::Backward(int, const Tensor&, const Tensor&);
+const std::pair<Tensor, vector<Tensor>>
+Dense::Backward(int flag, const Tensor &grad) {
+  vector<Tensor> param_grad;
+  Tensor src_data = buf_.top();
+  buf_.pop();
+  Tensor db, dw, dx;
+  db.ResetLike(bias_);
+  dw.ResetLike(weight_);
+  dx.ResetLike(src_data);
+  SumRows(grad, &db);
+  if (transpose_){
+    dx = Mult(grad, weight_.T()); 
+    dw = Mult(src_data.T(), grad);
+  }
+  else{
+    dx = Mult(grad, weight_);
+    dw = Mult(grad.T(), src_data);
+  }
+  param_grad.push_back(dw);
+  param_grad.push_back(db);
+  return std::make_pair(dx, param_grad);
+}
+
+void Dense::ToDevice(Device *device) { 
+  weight_.ToDevice(device);
+  bias_.ToDevice(device); 
+}
+} // namespace singa

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/73d4a34b/src/model/layer/dense.h
----------------------------------------------------------------------
diff --git a/src/model/layer/dense.h b/src/model/layer/dense.h
new file mode 100644
index 0000000..d686a01
--- /dev/null
+++ b/src/model/layer/dense.h
@@ -0,0 +1,69 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef SRC_MODEL_LAYER_DENSE_H_
+#define SRC_MODEL_LAYER_DENSE_H_
+#include <string>
+#include <utility>
+#include <vector>
+#include <stack>
+#include "singa/model/layer.h"
+
+namespace singa {
+class Dense : public Layer {
+ public:
+  ~Dense();
+  /// \copydoc Layer::layer_type()
+  const std::string layer_type() const override { return "Dense"; }
+
+  /// \copydoc Layer::Setup(const LayerConf&);
+  void Setup(const LayerConf& conf) override;
+
+  void SetupParam(const Tensor& input);
+  /// \copydoc Layer::Forward(int flag, const Tensor&)
+  const Tensor Forward(int flag, const Tensor& input) override;
+
+  /// \copydoc Layer::Backward(int, const Tensor&, const Tensor&);
+  const std::pair<Tensor, vector<Tensor>> Backward(int flag,
+                                                   const Tensor& grad) override;
+
+  void ToDevice(Device* device) override;
+  
+  size_t num_output() const { return hdim_; }
+  size_t num_input() const { return vdim_; }
+  bool transpose() const { return transpose_; }
+  const Tensor &weight() const { return weight_; }
+  const Tensor &bias() const { return bias_; }
+
+  void set_weight(Tensor w) {
+    weight_.ResetLike(w);
+    weight_.CopyData(w);
+  }
+  void set_bias(Tensor b) {
+    bias_.ResetLike(b);
+    bias_.CopyData(b);
+  }
+
+protected:
+  size_t batchsize_, vdim_, hdim_;
+  bool transpose_;
+  Tensor weight_, bias_;
+  // Tensor data_, grad_;
+  std::stack<Tensor> buf_;
+};
+}  // namespace singa
+#endif  // SRC_MODEL_LAYER_DENSE_H_

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/73d4a34b/src/proto/model.proto
----------------------------------------------------------------------
diff --git a/src/proto/model.proto b/src/proto/model.proto
index 1b18703..75e2be7 100644
--- a/src/proto/model.proto
+++ b/src/proto/model.proto
@@ -208,6 +208,7 @@ message LayerConf {
   // optional ImageDataConf image_data_conf = 115;
   optional InfogainLossConf infogain_loss_conf = 116;
   optional InnerProductConf inner_product_conf = 117;
+  optional DenseConf dense_conf = 150;
   optional LogConf log_conf = 134;
   optional LRNConf lrn_conf = 118;
   // Used in SINGA
@@ -573,6 +574,20 @@ message InnerProductConf {
   optional int32 axis = 5 [default = 1];
 }
 
+message DenseConf {
+  optional uint32 num_output = 1; // The number of outputs for the layer
+  optional bool bias_term = 2 [default = true]; // whether to have bias terms
+  optional FillerConf weight_filler = 3; // The filler for the weight
+  optional FillerConf bias_filler = 4; // The filler for the bias
+
+  // The first axis to be lumped into a single inner product computation;
+  // all preceding axes are retained in the output.
+  // May be negative to index from the end (e.g., -1 for the last axis).
+  optional int32 axis = 5 [default = 1];
+  optional uint32 num_input = 20; // The number of inputs for the layer
+  optional bool transpose = 21 [default = false]; // whether transpose or not
+}
+
 // Message that stores hyper-parameters used by LogLayer
 message LogConf {
   // LogLayer computes outputs y = log_base(shift + scale * x), for base > 0.

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/73d4a34b/test/singa/test_dense.cc
----------------------------------------------------------------------
diff --git a/test/singa/test_dense.cc b/test/singa/test_dense.cc
new file mode 100644
index 0000000..6d136af
--- /dev/null
+++ b/test/singa/test_dense.cc
@@ -0,0 +1,242 @@
+/************************************************************
+*
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements.  See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership.  The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License.  You may obtain a copy of the License at
+*
+*   http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing,
+* software distributed under the License is distributed on an
+* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+* KIND, either express or implied.  See the License for the
+* specific language governing permissions and limitations
+* under the License.
+*
+*************************************************************/
+#include "../src/model/layer/dense.h"
+
+#include "gtest/gtest.h"
+
+using singa::Dense;
+TEST(Dense, Setup) {
+  Dense dense;
+  EXPECT_EQ("Dense", dense.layer_type());
+
+  singa::LayerConf conf;
+  singa::DenseConf *denseconf = conf.mutable_dense_conf();
+  denseconf->set_num_input(2);
+  denseconf->set_num_output(3);
+  denseconf->set_transpose(false);
+  dense.Setup(conf);
+
+  EXPECT_EQ(3, dense.num_output());
+  EXPECT_EQ(2, dense.num_input());
+}
+
+TEST(Dense, ForwardCpp) {
+  Dense dense;
+
+  singa::LayerConf conf;
+  singa::DenseConf *denseconf = conf.mutable_dense_conf();
+  denseconf->set_num_input(2);
+  denseconf->set_num_output(3);
+  denseconf->set_transpose(false);
+  dense.Setup(conf);
+  
+
+  const size_t batchsize = 3, vdim = 2, hdim = 3;
+  const float x[batchsize * vdim] = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f,
+                                      6.0f};
+  singa::Tensor in(singa::Shape{batchsize, vdim});
+  in.CopyDataFromHostPtr(x, batchsize * vdim);
+  
+  // set weight
+  const float we[hdim * vdim] = {
+      1.0f, 1.0f, 1.0f, 2.0f, 0.0f, 1.0f};
+  singa::Tensor weight(singa::Shape{hdim, vdim});
+  weight.CopyDataFromHostPtr(we, hdim * vdim);
+
+  const float bia[hdim] = {
+      1.0f, 1.0f, 1.0f};
+  singa::Tensor bias(singa::Shape{hdim});
+  bias.CopyDataFromHostPtr(bia, hdim);
+ 
+  dense.set_weight(weight);
+  dense.set_bias(bias);
+  
+  singa::Tensor out1 = dense.Forward(singa::kTrain, in);
+  singa::CppCPU host(0, 1);
+  const float *outptr1 = out1.data<const float *>();
+  EXPECT_EQ(9, out1.Size());
+  for (int i = 0; i < 3; i++)
+    for (int j = 0; j < 3; j++)
+      EXPECT_FLOAT_EQ((x[i * 2 +  0] * we[j * 2 + 0] + x[i * 2 + 1] * we[j * 2 + 1] + bia[j]), outptr1[i * 3 + j]);
+}
+
+TEST(Dense, ForwardCuda) {
+  Dense dense;
+
+  singa::LayerConf conf;
+  singa::DenseConf *denseconf = conf.mutable_dense_conf();
+  denseconf->set_num_input(2);
+  denseconf->set_num_output(3);
+  denseconf->set_transpose(false);
+  dense.Setup(conf);
+  
+
+  const size_t batchsize = 3, vdim = 2, hdim = 3;
+  const float x[batchsize * vdim] = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f,
+                                      6.0f};
+  singa::CudaGPU cuda(0, 1);
+  singa::Tensor in(singa::Shape{batchsize, vdim}, &cuda);
+  in.CopyDataFromHostPtr(x, batchsize * vdim);
+  
+  // set weight
+  const float we[hdim * vdim] = {
+      1.0f, 1.0f, 1.0f, 2.0f, 0.0f, 1.0f};
+  singa::Tensor weight(singa::Shape{hdim, vdim}, &cuda);
+  weight.CopyDataFromHostPtr(we, hdim * vdim);
+
+  const float bia[hdim] = {
+      1.0f, 1.0f, 1.0f};
+  singa::Tensor bias(singa::Shape{hdim}, &cuda);
+  bias.CopyDataFromHostPtr(bia, hdim);
+ 
+  dense.set_weight(weight);
+  dense.set_bias(bias);
+  
+  singa::Tensor out1 = dense.Forward(singa::kTrain, in);
+  singa::CppCPU host(0, 1);
+  out1.ToDevice(&host);
+  const float *outptr1 = out1.data<const float *>();
+  EXPECT_EQ(9, out1.Size());
+  for (int i = 0; i < 3; i++)
+    for (int j = 0; j < 3; j++)
+      EXPECT_FLOAT_EQ((x[i * 2 +  0] * we[j * 2 + 0] + x[i * 2 + 1] * we[j * 2 + 1] + bia[j]), outptr1[i * 3 + j]);
+}
+
+TEST(Dense, BackwardCpp) {
+  Dense dense;
+  
+  singa::LayerConf conf;
+  singa::DenseConf *denseconf = conf.mutable_dense_conf();
+  denseconf->set_num_input(2);
+  denseconf->set_num_output(3);
+  denseconf->set_transpose(false);
+  dense.Setup(conf);
+
+  const size_t batchsize = 3, vdim = 2, hdim = 3;
+  const float x[batchsize * vdim] = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f,
+                                      6.0f};
+  singa::CudaGPU cuda(0, 1);
+  singa::Tensor in(singa::Shape{batchsize, vdim});
+  in.CopyDataFromHostPtr(x, batchsize * vdim);
+
+  // set weight
+  const float we[hdim * vdim] = {
+      1.0f, 1.0f, 1.0f, 2.0f, 0.0f, 1.0f};
+  singa::Tensor weight(singa::Shape{hdim, vdim});
+  weight.CopyDataFromHostPtr(we, hdim * vdim);
+  
+  const float bia[hdim] = {
+      1.0f, 1.0f, 1.0f};
+  singa::Tensor bias(singa::Shape{hdim});
+  bias.CopyDataFromHostPtr(bia, hdim);
+  
+  dense.set_weight(weight);
+  dense.set_bias(bias);
+
+  singa::Tensor out1 = dense.Forward(singa::kTrain, in);
+
+  // grad
+  const float dy[batchsize * hdim] = {1.0f, 1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 3.0f, 3.0f, 3.0f};
+  singa::Tensor grad(singa::Shape{batchsize, hdim});
+  grad.CopyDataFromHostPtr(dy, batchsize * hdim);
+
+  const auto ret = dense.Backward(singa::kTrain, grad);
+  singa::CppCPU host(0, 1);
+  singa::Tensor in_grad = ret.first;
+  singa::Tensor dweight = ret.second.at(0);
+  singa::Tensor dbias = ret.second.at(1);
+  const float *dx = in_grad.data<const float *>();
+  EXPECT_EQ(6, in_grad.Size());
+  for (int i = 0; i < 3; i++)
+    for (int j = 0; j < 2; j++)
+      EXPECT_FLOAT_EQ((dy[i * 3 + 0] * we[0 * 2 + j] + dy[i * 3 + 1] * we[1 * 2 +  j] + dy[i * 3 +  2] * we[2 * 2 + j]), dx[i * 2 + j]);
+  const float *dweightx = dweight.data<const float *>();
+  EXPECT_EQ(6, dweight.Size());
+  for (int i = 0; i < 3; i++)
+    for (int j = 0; j < 2; j++)
+      EXPECT_FLOAT_EQ((dy[0 * 3 + i] * x[0 *2 + j] + dy[1 * 3 + i] * x[1 * 2 + j] + dy[2 * 3 + i] * x[2 * 2 + j]), dweightx[i * 2 + j]);
+  const float *dbiasx = dbias.data<const float *>();
+  EXPECT_EQ(3, dbias.Size());
+  for (int i = 0; i < 3; i++)
+    EXPECT_FLOAT_EQ((dy[0 * 3 + i] + dy[1 * 3 + i] + dy[2 * 3 + i]), dbiasx[i]);
+}
+
+TEST(Dense, BackwardCuda) {
+  Dense dense;
+  
+  singa::LayerConf conf;
+  singa::DenseConf *denseconf = conf.mutable_dense_conf();
+  denseconf->set_num_input(2);
+  denseconf->set_num_output(3);
+  denseconf->set_transpose(false);
+  dense.Setup(conf);
+
+  const size_t batchsize = 3, vdim = 2, hdim = 3;
+  const float x[batchsize * vdim] = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f,
+                                      6.0f};
+  singa::CudaGPU cuda(0, 1);
+  singa::Tensor in(singa::Shape{batchsize, vdim}, &cuda);
+  in.CopyDataFromHostPtr(x, batchsize * vdim);
+
+  // set weight
+  const float we[hdim * vdim] = {
+      1.0f, 1.0f, 1.0f, 2.0f, 0.0f, 1.0f};
+  singa::Tensor weight(singa::Shape{hdim, vdim}, &cuda);
+  weight.CopyDataFromHostPtr(we, hdim * vdim);
+  
+  const float bia[hdim] = {
+      1.0f, 1.0f, 1.0f};
+  singa::Tensor bias(singa::Shape{hdim}, &cuda);
+  bias.CopyDataFromHostPtr(bia, hdim);
+  
+  dense.set_weight(weight);
+  dense.set_bias(bias);
+
+  singa::Tensor out1 = dense.Forward(singa::kTrain, in);
+
+  // grad
+  const float dy[batchsize * hdim] = {1.0f, 1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 3.0f, 3.0f, 3.0f};
+  singa::Tensor grad(singa::Shape{batchsize, hdim}, &cuda);
+  grad.CopyDataFromHostPtr(dy, batchsize * hdim);
+
+  const auto ret = dense.Backward(singa::kTrain, grad);
+  singa::CppCPU host(0, 1);
+  singa::Tensor in_grad = ret.first;
+  singa::Tensor dweight = ret.second.at(0);
+  singa::Tensor dbias = ret.second.at(1);
+  in_grad.ToDevice(&host);
+  const float *dx = in_grad.data<const float *>();
+  EXPECT_EQ(6, in_grad.Size());
+  for (int i = 0; i < 3; i++)
+    for (int j = 0; j < 2; j++)
+      EXPECT_FLOAT_EQ((dy[i * 3 + 0] * we[0 * 2 + j] + dy[i * 3 + 1] * we[1 * 2 +  j] + dy[i * 3 +  2] * we[2 * 2 + j]), dx[i * 2 + j]);
+  dweight.ToDevice(&host);
+  const float *dweightx = dweight.data<const float *>();
+  EXPECT_EQ(6, dweight.Size());
+  for (int i = 0; i < 3; i++)
+    for (int j = 0; j < 2; j++)
+      EXPECT_FLOAT_EQ((dy[0 * 3 + i] * x[0 *2 + j] + dy[1 * 3 + i] * x[1 * 2 + j] + dy[2 * 3 + i] * x[2 * 2 + j]), dweightx[i * 2 + j]);
+  dbias.ToDevice(&host);
+  const float *dbiasx = dbias.data<const float *>();
+  EXPECT_EQ(3, dbias.Size());
+  for (int i = 0; i < 3; i++)
+    EXPECT_FLOAT_EQ((dy[0 * 3 + i] + dy[1 * 3 + i] + dy[2 * 3 + i]), dbiasx[i]);
+}