You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@singa.apache.org by wa...@apache.org on 2016/06/03 07:48:59 UTC
[54/60] incubator-singa git commit: SINGA-188 Add Dense layer
SINGA-188 Add Dense layer
Add implementation for dense layer
Add test files for dense layer, both cpp version and cuda version
Pass all tests
Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/73d4a34b
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/73d4a34b
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/73d4a34b
Branch: refs/heads/dev
Commit: 73d4a34b6aea80da77b42f234c26244aa98d1d93
Parents: 3e2507b
Author: zhaojing <zh...@comp.nus.edu.sg>
Authored: Tue May 31 22:23:17 2016 +0800
Committer: zhaojing <zh...@comp.nus.edu.sg>
Committed: Thu Jun 2 11:17:24 2016 +0800
----------------------------------------------------------------------
src/model/layer/dense.cc | 85 +++++++++++++++
src/model/layer/dense.h | 69 ++++++++++++
src/proto/model.proto | 15 +++
test/singa/test_dense.cc | 242 ++++++++++++++++++++++++++++++++++++++++++
4 files changed, 411 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/73d4a34b/src/model/layer/dense.cc
----------------------------------------------------------------------
diff --git a/src/model/layer/dense.cc b/src/model/layer/dense.cc
new file mode 100644
index 0000000..ebee62a
--- /dev/null
+++ b/src/model/layer/dense.cc
@@ -0,0 +1,85 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "./dense.h"
+#include "singa/model/layer.h"
+#include <vector>
+
+namespace singa {
+using std::vector;
+
+Dense::~Dense() {
+ // delete weight_;
+ // delete bias_;
+}
+void Dense::Setup(const LayerConf &conf) {
+ Layer::Setup(conf);
+ DenseConf dense_conf = conf.dense_conf();
+ hdim_ = dense_conf.num_output();
+ vdim_ = dense_conf.num_input();
+ transpose_ = dense_conf.transpose();
+ if (transpose_)
+ weight_.Reshape(Shape{vdim_, hdim_});
+ else
+ weight_.Reshape(Shape{hdim_, vdim_});
+ bias_.Reshape(Shape{hdim_});
+ param_values_.push_back(&weight_);
+ param_values_.push_back(&bias_);
+}
+
+/// \copydoc Layer::Forward(int flag, const Tensor&)
+const Tensor Dense::Forward(int flag, const Tensor &input) {
+ Tensor output;
+ if (transpose_)
+ output = Mult(input, weight_);
+ else
+ output = Mult(input, weight_.T());
+ AddRow(bias_, &output);
+ buf_.push(input);
+ return output;
+}
+
+/// \copydoc Layer::Backward(int, const Tensor&, const Tensor&);
+const std::pair<Tensor, vector<Tensor>>
+Dense::Backward(int flag, const Tensor &grad) {
+ vector<Tensor> param_grad;
+ Tensor src_data = buf_.top();
+ buf_.pop();
+ Tensor db, dw, dx;
+ db.ResetLike(bias_);
+ dw.ResetLike(weight_);
+ dx.ResetLike(src_data);
+ SumRows(grad, &db);
+ if (transpose_){
+ dx = Mult(grad, weight_.T());
+ dw = Mult(src_data.T(), grad);
+ }
+ else{
+ dx = Mult(grad, weight_);
+ dw = Mult(grad.T(), src_data);
+ }
+ param_grad.push_back(dw);
+ param_grad.push_back(db);
+ return std::make_pair(dx, param_grad);
+}
+
+void Dense::ToDevice(Device *device) {
+ weight_.ToDevice(device);
+ bias_.ToDevice(device);
+}
+} // namespace singa
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/73d4a34b/src/model/layer/dense.h
----------------------------------------------------------------------
diff --git a/src/model/layer/dense.h b/src/model/layer/dense.h
new file mode 100644
index 0000000..d686a01
--- /dev/null
+++ b/src/model/layer/dense.h
@@ -0,0 +1,69 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef SRC_MODEL_LAYER_DENSE_H_
+#define SRC_MODEL_LAYER_DENSE_H_
+#include <string>
+#include <utility>
+#include <vector>
+#include <stack>
+#include "singa/model/layer.h"
+
+namespace singa {
+class Dense : public Layer {
+ public:
+ ~Dense();
+ /// \copydoc Layer::layer_type()
+ const std::string layer_type() const override { return "Dense"; }
+
+ /// \copydoc Layer::Setup(const LayerConf&);
+ void Setup(const LayerConf& conf) override;
+
+ void SetupParam(const Tensor& input);
+ /// \copydoc Layer::Forward(int flag, const Tensor&)
+ const Tensor Forward(int flag, const Tensor& input) override;
+
+ /// \copydoc Layer::Backward(int, const Tensor&, const Tensor&);
+ const std::pair<Tensor, vector<Tensor>> Backward(int flag,
+ const Tensor& grad) override;
+
+ void ToDevice(Device* device) override;
+
+ size_t num_output() const { return hdim_; }
+ size_t num_input() const { return vdim_; }
+ bool transpose() const { return transpose_; }
+ const Tensor &weight() const { return weight_; }
+ const Tensor &bias() const { return bias_; }
+
+ void set_weight(Tensor w) {
+ weight_.ResetLike(w);
+ weight_.CopyData(w);
+ }
+ void set_bias(Tensor b) {
+ bias_.ResetLike(b);
+ bias_.CopyData(b);
+ }
+
+protected:
+ size_t batchsize_, vdim_, hdim_;
+ bool transpose_;
+ Tensor weight_, bias_;
+ // Tensor data_, grad_;
+ std::stack<Tensor> buf_;
+};
+} // namespace singa
+#endif // SRC_MODEL_LAYER_DENSE_H_
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/73d4a34b/src/proto/model.proto
----------------------------------------------------------------------
diff --git a/src/proto/model.proto b/src/proto/model.proto
index 1b18703..75e2be7 100644
--- a/src/proto/model.proto
+++ b/src/proto/model.proto
@@ -208,6 +208,7 @@ message LayerConf {
// optional ImageDataConf image_data_conf = 115;
optional InfogainLossConf infogain_loss_conf = 116;
optional InnerProductConf inner_product_conf = 117;
+ optional DenseConf dense_conf = 150;
optional LogConf log_conf = 134;
optional LRNConf lrn_conf = 118;
// Used in SINGA
@@ -573,6 +574,20 @@ message InnerProductConf {
optional int32 axis = 5 [default = 1];
}
+message DenseConf {
+ optional uint32 num_output = 1; // The number of outputs for the layer
+ optional bool bias_term = 2 [default = true]; // whether to have bias terms
+ optional FillerConf weight_filler = 3; // The filler for the weight
+ optional FillerConf bias_filler = 4; // The filler for the bias
+
+ // The first axis to be lumped into a single inner product computation;
+ // all preceding axes are retained in the output.
+ // May be negative to index from the end (e.g., -1 for the last axis).
+ optional int32 axis = 5 [default = 1];
+ optional uint32 num_input = 20; // The number of inputs for the layer
+ optional bool transpose = 21 [default = false]; // whether transpose or not
+}
+
// Message that stores hyper-parameters used by LogLayer
message LogConf {
// LogLayer computes outputs y = log_base(shift + scale * x), for base > 0.
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/73d4a34b/test/singa/test_dense.cc
----------------------------------------------------------------------
diff --git a/test/singa/test_dense.cc b/test/singa/test_dense.cc
new file mode 100644
index 0000000..6d136af
--- /dev/null
+++ b/test/singa/test_dense.cc
@@ -0,0 +1,242 @@
+/************************************************************
+*
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements. See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership. The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License. You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing,
+* software distributed under the License is distributed on an
+* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+* KIND, either express or implied. See the License for the
+* specific language governing permissions and limitations
+* under the License.
+*
+*************************************************************/
+#include "../src/model/layer/dense.h"
+
+#include "gtest/gtest.h"
+
+using singa::Dense;
+TEST(Dense, Setup) {
+ Dense dense;
+ EXPECT_EQ("Dense", dense.layer_type());
+
+ singa::LayerConf conf;
+ singa::DenseConf *denseconf = conf.mutable_dense_conf();
+ denseconf->set_num_input(2);
+ denseconf->set_num_output(3);
+ denseconf->set_transpose(false);
+ dense.Setup(conf);
+
+ EXPECT_EQ(3, dense.num_output());
+ EXPECT_EQ(2, dense.num_input());
+}
+
+TEST(Dense, ForwardCpp) {
+ Dense dense;
+
+ singa::LayerConf conf;
+ singa::DenseConf *denseconf = conf.mutable_dense_conf();
+ denseconf->set_num_input(2);
+ denseconf->set_num_output(3);
+ denseconf->set_transpose(false);
+ dense.Setup(conf);
+
+
+ const size_t batchsize = 3, vdim = 2, hdim = 3;
+ const float x[batchsize * vdim] = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f,
+ 6.0f};
+ singa::Tensor in(singa::Shape{batchsize, vdim});
+ in.CopyDataFromHostPtr(x, batchsize * vdim);
+
+ // set weight
+ const float we[hdim * vdim] = {
+ 1.0f, 1.0f, 1.0f, 2.0f, 0.0f, 1.0f};
+ singa::Tensor weight(singa::Shape{hdim, vdim});
+ weight.CopyDataFromHostPtr(we, hdim * vdim);
+
+ const float bia[hdim] = {
+ 1.0f, 1.0f, 1.0f};
+ singa::Tensor bias(singa::Shape{hdim});
+ bias.CopyDataFromHostPtr(bia, hdim);
+
+ dense.set_weight(weight);
+ dense.set_bias(bias);
+
+ singa::Tensor out1 = dense.Forward(singa::kTrain, in);
+ singa::CppCPU host(0, 1);
+ const float *outptr1 = out1.data<const float *>();
+ EXPECT_EQ(9, out1.Size());
+ for (int i = 0; i < 3; i++)
+ for (int j = 0; j < 3; j++)
+ EXPECT_FLOAT_EQ((x[i * 2 + 0] * we[j * 2 + 0] + x[i * 2 + 1] * we[j * 2 + 1] + bia[j]), outptr1[i * 3 + j]);
+}
+
+TEST(Dense, ForwardCuda) {
+ Dense dense;
+
+ singa::LayerConf conf;
+ singa::DenseConf *denseconf = conf.mutable_dense_conf();
+ denseconf->set_num_input(2);
+ denseconf->set_num_output(3);
+ denseconf->set_transpose(false);
+ dense.Setup(conf);
+
+
+ const size_t batchsize = 3, vdim = 2, hdim = 3;
+ const float x[batchsize * vdim] = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f,
+ 6.0f};
+ singa::CudaGPU cuda(0, 1);
+ singa::Tensor in(singa::Shape{batchsize, vdim}, &cuda);
+ in.CopyDataFromHostPtr(x, batchsize * vdim);
+
+ // set weight
+ const float we[hdim * vdim] = {
+ 1.0f, 1.0f, 1.0f, 2.0f, 0.0f, 1.0f};
+ singa::Tensor weight(singa::Shape{hdim, vdim}, &cuda);
+ weight.CopyDataFromHostPtr(we, hdim * vdim);
+
+ const float bia[hdim] = {
+ 1.0f, 1.0f, 1.0f};
+ singa::Tensor bias(singa::Shape{hdim}, &cuda);
+ bias.CopyDataFromHostPtr(bia, hdim);
+
+ dense.set_weight(weight);
+ dense.set_bias(bias);
+
+ singa::Tensor out1 = dense.Forward(singa::kTrain, in);
+ singa::CppCPU host(0, 1);
+ out1.ToDevice(&host);
+ const float *outptr1 = out1.data<const float *>();
+ EXPECT_EQ(9, out1.Size());
+ for (int i = 0; i < 3; i++)
+ for (int j = 0; j < 3; j++)
+ EXPECT_FLOAT_EQ((x[i * 2 + 0] * we[j * 2 + 0] + x[i * 2 + 1] * we[j * 2 + 1] + bia[j]), outptr1[i * 3 + j]);
+}
+
+TEST(Dense, BackwardCpp) {
+ Dense dense;
+
+ singa::LayerConf conf;
+ singa::DenseConf *denseconf = conf.mutable_dense_conf();
+ denseconf->set_num_input(2);
+ denseconf->set_num_output(3);
+ denseconf->set_transpose(false);
+ dense.Setup(conf);
+
+ const size_t batchsize = 3, vdim = 2, hdim = 3;
+ const float x[batchsize * vdim] = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f,
+ 6.0f};
+ singa::CudaGPU cuda(0, 1);
+ singa::Tensor in(singa::Shape{batchsize, vdim});
+ in.CopyDataFromHostPtr(x, batchsize * vdim);
+
+ // set weight
+ const float we[hdim * vdim] = {
+ 1.0f, 1.0f, 1.0f, 2.0f, 0.0f, 1.0f};
+ singa::Tensor weight(singa::Shape{hdim, vdim});
+ weight.CopyDataFromHostPtr(we, hdim * vdim);
+
+ const float bia[hdim] = {
+ 1.0f, 1.0f, 1.0f};
+ singa::Tensor bias(singa::Shape{hdim});
+ bias.CopyDataFromHostPtr(bia, hdim);
+
+ dense.set_weight(weight);
+ dense.set_bias(bias);
+
+ singa::Tensor out1 = dense.Forward(singa::kTrain, in);
+
+ // grad
+ const float dy[batchsize * hdim] = {1.0f, 1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 3.0f, 3.0f, 3.0f};
+ singa::Tensor grad(singa::Shape{batchsize, hdim});
+ grad.CopyDataFromHostPtr(dy, batchsize * hdim);
+
+ const auto ret = dense.Backward(singa::kTrain, grad);
+ singa::CppCPU host(0, 1);
+ singa::Tensor in_grad = ret.first;
+ singa::Tensor dweight = ret.second.at(0);
+ singa::Tensor dbias = ret.second.at(1);
+ const float *dx = in_grad.data<const float *>();
+ EXPECT_EQ(6, in_grad.Size());
+ for (int i = 0; i < 3; i++)
+ for (int j = 0; j < 2; j++)
+ EXPECT_FLOAT_EQ((dy[i * 3 + 0] * we[0 * 2 + j] + dy[i * 3 + 1] * we[1 * 2 + j] + dy[i * 3 + 2] * we[2 * 2 + j]), dx[i * 2 + j]);
+ const float *dweightx = dweight.data<const float *>();
+ EXPECT_EQ(6, dweight.Size());
+ for (int i = 0; i < 3; i++)
+ for (int j = 0; j < 2; j++)
+ EXPECT_FLOAT_EQ((dy[0 * 3 + i] * x[0 *2 + j] + dy[1 * 3 + i] * x[1 * 2 + j] + dy[2 * 3 + i] * x[2 * 2 + j]), dweightx[i * 2 + j]);
+ const float *dbiasx = dbias.data<const float *>();
+ EXPECT_EQ(3, dbias.Size());
+ for (int i = 0; i < 3; i++)
+ EXPECT_FLOAT_EQ((dy[0 * 3 + i] + dy[1 * 3 + i] + dy[2 * 3 + i]), dbiasx[i]);
+}
+
+TEST(Dense, BackwardCuda) {
+ Dense dense;
+
+ singa::LayerConf conf;
+ singa::DenseConf *denseconf = conf.mutable_dense_conf();
+ denseconf->set_num_input(2);
+ denseconf->set_num_output(3);
+ denseconf->set_transpose(false);
+ dense.Setup(conf);
+
+ const size_t batchsize = 3, vdim = 2, hdim = 3;
+ const float x[batchsize * vdim] = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f,
+ 6.0f};
+ singa::CudaGPU cuda(0, 1);
+ singa::Tensor in(singa::Shape{batchsize, vdim}, &cuda);
+ in.CopyDataFromHostPtr(x, batchsize * vdim);
+
+ // set weight
+ const float we[hdim * vdim] = {
+ 1.0f, 1.0f, 1.0f, 2.0f, 0.0f, 1.0f};
+ singa::Tensor weight(singa::Shape{hdim, vdim}, &cuda);
+ weight.CopyDataFromHostPtr(we, hdim * vdim);
+
+ const float bia[hdim] = {
+ 1.0f, 1.0f, 1.0f};
+ singa::Tensor bias(singa::Shape{hdim}, &cuda);
+ bias.CopyDataFromHostPtr(bia, hdim);
+
+ dense.set_weight(weight);
+ dense.set_bias(bias);
+
+ singa::Tensor out1 = dense.Forward(singa::kTrain, in);
+
+ // grad
+ const float dy[batchsize * hdim] = {1.0f, 1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 3.0f, 3.0f, 3.0f};
+ singa::Tensor grad(singa::Shape{batchsize, hdim}, &cuda);
+ grad.CopyDataFromHostPtr(dy, batchsize * hdim);
+
+ const auto ret = dense.Backward(singa::kTrain, grad);
+ singa::CppCPU host(0, 1);
+ singa::Tensor in_grad = ret.first;
+ singa::Tensor dweight = ret.second.at(0);
+ singa::Tensor dbias = ret.second.at(1);
+ in_grad.ToDevice(&host);
+ const float *dx = in_grad.data<const float *>();
+ EXPECT_EQ(6, in_grad.Size());
+ for (int i = 0; i < 3; i++)
+ for (int j = 0; j < 2; j++)
+ EXPECT_FLOAT_EQ((dy[i * 3 + 0] * we[0 * 2 + j] + dy[i * 3 + 1] * we[1 * 2 + j] + dy[i * 3 + 2] * we[2 * 2 + j]), dx[i * 2 + j]);
+ dweight.ToDevice(&host);
+ const float *dweightx = dweight.data<const float *>();
+ EXPECT_EQ(6, dweight.Size());
+ for (int i = 0; i < 3; i++)
+ for (int j = 0; j < 2; j++)
+ EXPECT_FLOAT_EQ((dy[0 * 3 + i] * x[0 *2 + j] + dy[1 * 3 + i] * x[1 * 2 + j] + dy[2 * 3 + i] * x[2 * 2 + j]), dweightx[i * 2 + j]);
+ dbias.ToDevice(&host);
+ const float *dbiasx = dbias.data<const float *>();
+ EXPECT_EQ(3, dbias.Size());
+ for (int i = 0; i < 3; i++)
+ EXPECT_FLOAT_EQ((dy[0 * 3 + i] + dy[1 * 3 + i] + dy[2 * 3 + i]), dbiasx[i]);
+}