You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@singa.apache.org by wa...@apache.org on 2016/01/05 19:10:30 UTC
[01/10] incubator-singa git commit: SINGA-120 - Implemented GRU and
BPTT \n 1) Added the implementation of the GRU model;
\n 2) Added a test for GRU functions
Repository: incubator-singa
Updated Branches:
refs/heads/master bb75a0be5 -> a2f4e4680
SINGA-120 - Implemented GRU and BPTT \n 1) Added the implementation of the GRU model; \n 2) Added a test for GRU functions
Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/ddf4e79a
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/ddf4e79a
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/ddf4e79a
Branch: refs/heads/master
Commit: ddf4e79aff5d8616f6758df18056b9443761405d
Parents: bb75a0b
Author: Ju Fan <fa...@gmail.com>
Authored: Fri Jan 1 10:41:59 2016 +0800
Committer: Wei Wang <wa...@comp.nus.edu.sg>
Committed: Wed Jan 6 01:50:48 2016 +0800
----------------------------------------------------------------------
src/neuralnet/neuron_layer/gru.cc | 275 +++++++++++++++++++++++++++++++
src/test/test_gru_layer.cc | 286 +++++++++++++++++++++++++++++++++
2 files changed, 561 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ddf4e79a/src/neuralnet/neuron_layer/gru.cc
----------------------------------------------------------------------
diff --git a/src/neuralnet/neuron_layer/gru.cc b/src/neuralnet/neuron_layer/gru.cc
new file mode 100644
index 0000000..45d7873
--- /dev/null
+++ b/src/neuralnet/neuron_layer/gru.cc
@@ -0,0 +1,275 @@
+/************************************************************
+*
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements. See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership. The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License. You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing,
+* software distributed under the License is distributed on an
+* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+* KIND, either express or implied. See the License for the
+* specific language governing permissions and limitations
+* under the License.
+*
+*************************************************************/
+
+#include "singa/neuralnet/neuron_layer.h"
+
+#include <glog/logging.h>
+#include "singa/utils/singleton.h"
+#include "singa/utils/math_blob.h"
+#include "singa/utils/singa_op.h"
+
+#include <iostream>
+using namespace std;
+
+namespace singa {
+
+using std::vector;
+
+GRULayer::~GRULayer() {
+ delete weight_z_hx_;
+ delete weight_z_hh_;
+ delete bias_z_;
+
+ delete weight_r_hx_;
+ delete weight_r_hh_;
+ delete bias_r_;
+
+ delete weight_c_hx_;
+ delete weight_c_hh_;
+ delete bias_c_;
+
+ delete update_gate;
+ delete reset_gate;
+ delete new_memory;
+}
+
+void GRULayer::Setup(const LayerProto& conf,
+ const vector<Layer*>& srclayers) {
+ Layer::Setup(conf, srclayers);
+ CHECK_LE(srclayers.size(), 2);
+ const auto& src = srclayers[0]->data(this);
+
+ batchsize_ = src.shape()[0]; // size of batch
+ vdim_ = src.count() / (batchsize_); // dimension of input
+
+ hdim_ = layer_conf_.gru_conf().dim_hidden(); // dimension of hidden state
+
+ data_.Reshape(vector<int>{batchsize_, hdim_});
+ grad_.ReshapeLike(data_);
+
+ // Initialize the parameters
+ weight_z_hx_ = Param::Create(conf.param(0));
+ weight_r_hx_ = Param::Create(conf.param(1));
+ weight_c_hx_ = Param::Create(conf.param(2));
+
+ weight_z_hh_ = Param::Create(conf.param(3));
+ weight_r_hh_ = Param::Create(conf.param(4));
+ weight_c_hh_ = Param::Create(conf.param(5));
+
+ if (conf.gru_conf().bias_term()) {
+ bias_z_ = Param::Create(conf.param(6));
+ bias_r_ = Param::Create(conf.param(7));
+ bias_c_ = Param::Create(conf.param(8));
+ }
+
+ weight_z_hx_->Setup(vector<int>{hdim_, vdim_});
+ weight_r_hx_->Setup(vector<int>{hdim_, vdim_});
+ weight_c_hx_->Setup(vector<int>{hdim_, vdim_});
+
+ weight_z_hh_->Setup(vector<int>{hdim_, hdim_});
+ weight_r_hh_->Setup(vector<int>{hdim_, hdim_});
+ weight_c_hh_->Setup(vector<int>{hdim_, hdim_});
+
+ if (conf.gru_conf().bias_term()) {
+ bias_z_->Setup(vector<int>{hdim_});
+ bias_r_->Setup(vector<int>{hdim_});
+ bias_c_->Setup(vector<int>{hdim_});
+ }
+
+ update_gate = new Blob<float>(batchsize_, hdim_);
+ reset_gate = new Blob<float>(batchsize_, hdim_);
+ new_memory = new Blob<float>(batchsize_, hdim_);
+
+}
+
+void GRULayer::ComputeFeature(int flag,
+ const vector<Layer*>& srclayers) {
+ CHECK_LE(srclayers.size(), 2);
+
+ // Do transpose
+ Blob<float> *w_z_hx_t = Transpose (weight_z_hx_->data());
+ Blob<float> *w_z_hh_t = Transpose (weight_z_hh_->data());
+ Blob<float> *w_r_hx_t = Transpose (weight_r_hx_->data());
+ Blob<float> *w_r_hh_t = Transpose (weight_r_hh_->data());
+ Blob<float> *w_c_hx_t = Transpose (weight_c_hx_->data());
+ Blob<float> *w_c_hh_t = Transpose (weight_c_hh_->data());
+
+ // Prepare the data input and the context
+ const auto& src = srclayers[0]->data(this);
+ const Blob<float> *context;
+ if (srclayers.size() == 1) { // only have data input
+ context = new Blob<float>(batchsize_, hdim_);
+ } else { // have data input & context
+ context = &srclayers[1]->data(this);
+ }
+
+ // Compute the update gate
+ GEMM(1.0f, 0.0f, src,*w_z_hx_t,update_gate);
+ if (bias_z_ != nullptr)
+ MVAddRow(1.0f,1.0f,bias_z_->data(),update_gate);
+ Blob<float> zprev (batchsize_,hdim_);
+ GEMM(1.0f, 0.0f, *context,*w_z_hh_t, &zprev);
+ Add<float>(*update_gate, zprev, update_gate);
+ Map<op::Sigmoid<float>,float>(*update_gate, update_gate);
+
+ // Compute the reset gate
+ GEMM(1.0f, 0.0f, src,*w_r_hx_t,reset_gate);
+ if (bias_r_ != nullptr)
+ MVAddRow(1.0f,1.0f,bias_r_->data(),reset_gate);
+ Blob<float> rprev (batchsize_, hdim_);
+ GEMM(1.0f, 0.0f, *context, *w_r_hh_t, &rprev);
+ Add<float>(*reset_gate, rprev, reset_gate);
+ Map<op::Sigmoid<float>,float>(*reset_gate, reset_gate);
+
+ // Compute the new memory
+ GEMM(1.0f, 0.0f, src, *w_c_hx_t, new_memory);
+ if (bias_c_ != nullptr)
+ MVAddRow(1.0f,1.0f,bias_c_->data(), new_memory);
+ Blob<float> cprev (batchsize_, hdim_);
+ GEMM(1.0f, 0.0f, *context, *w_c_hh_t, &cprev);
+ //Blob<float> new_cprev (batchsize_, hdim_);
+ Mult<float>(*reset_gate, cprev, &cprev);
+ Add<float>(*new_memory, cprev, new_memory);
+ Map<op::Tanh<float>,float>(*new_memory, new_memory);
+
+ // Compute data - new memory part
+ Blob<float> z1 (batchsize_,hdim_);
+ for (int i = 0; i < z1.count(); i ++) {
+ z1.mutable_cpu_data()[i] = 1.0f; // generate a matrix with ones
+ }
+ AXPY<float>(-1.0f, *update_gate, &z1);
+ Mult<float>(z1, *new_memory, &data_);
+
+ // Compute data - context part
+ Blob<float> data_prev (batchsize_, hdim_);
+ Mult<float>(*update_gate,*context,&data_prev);
+ Add<float>(data_, data_prev, &data_);
+
+ // delete the pointers
+ if (srclayers.size() == 1) delete context;
+ else context = NULL;
+
+ delete w_z_hx_t;
+ delete w_z_hh_t;
+ delete w_r_hx_t;
+ delete w_r_hh_t;
+ delete w_c_hx_t;
+ delete w_c_hh_t;
+}
+
+void GRULayer::ComputeGradient(int flag,
+ const vector<Layer*>& srclayers) {
+ CHECK_LE(srclayers.size(), 2);
+
+ // Prepare the data input and the context
+ const Blob<float>& src = srclayers[0]->data(this);
+ const Blob<float> *context;
+ if (srclayers.size() == 1) { // only have data input
+ context = new Blob<float>(batchsize_, hdim_);
+ } else { // have data input & context
+ context = &srclayers[1]->data(this);
+ }
+
+ // Prepare gradient of output neurons
+ Blob<float> *grad_t = Transpose (grad_);
+
+ // Compute intermediate gradients which are used for other computations
+ Blob<float> dugatedz (batchsize_, hdim_);
+ Map<singa::op::SigmoidGrad<float>, float>(*update_gate, &dugatedz);
+ Blob<float> drgatedr (batchsize_, hdim_);
+ Map<singa::op::SigmoidGrad<float>, float>(*reset_gate, &drgatedr);
+ Blob<float> dnewmdc (batchsize_, hdim_);
+ Map<singa::op::TanhGrad<float>, float>(*new_memory,&dnewmdc);
+
+ Blob<float> dLdz (batchsize_, hdim_);
+ Sub<float>(*context, *new_memory, &dLdz);
+ Mult<float>(dLdz, grad_, &dLdz);
+ Mult<float>(dLdz, dugatedz, &dLdz);
+
+ Blob<float> dLdc (batchsize_,hdim_);
+ Blob<float> z1 (batchsize_,hdim_);
+ for (int i = 0; i < z1.count(); i ++) {
+ z1.mutable_cpu_data()[i] = 1.0f; // generate a matrix with ones
+ }
+ AXPY<float>(-1.0f, *update_gate, &z1);
+ Mult(grad_,z1,&dLdc);
+ Mult(dLdc,dnewmdc,&dLdc);
+
+ Blob<float> reset_dLdc (batchsize_,hdim_);
+ Mult(dLdc, *reset_gate, &reset_dLdc);
+
+ Blob<float> dLdr (batchsize_, hdim_);
+ Blob<float> cprev (batchsize_, hdim_);
+ Blob<float> *w_c_hh_t = Transpose(weight_c_hh_->data());
+ GEMM(1.0f,0.0f,*context,*w_c_hh_t, &cprev);
+ delete w_c_hh_t;
+ Mult(dLdc,cprev,&dLdr);
+ Mult(dLdr,drgatedr,&dLdr);
+
+
+ // Compute gradients for parameters of update gate
+ Blob<float> *dLdz_t = Transpose(dLdz);
+ GEMM(1.0f,0.0f,*dLdz_t,src,weight_z_hx_->mutable_grad());
+ GEMM(1.0f,0.0f,*dLdz_t,*context,weight_z_hh_->mutable_grad());
+ if (bias_z_ != nullptr)
+ MVSumRow<float>(1.0f,0.0f,dLdz,bias_z_->mutable_grad());
+ delete dLdz_t;
+
+ // Compute gradients for parameters of reset gate
+ Blob<float> *dLdr_t = Transpose(dLdr);
+ GEMM(1.0f,0.0f,*dLdr_t,src,weight_r_hx_->mutable_grad());
+ GEMM(1.0f,0.0f,*dLdr_t,*context,weight_r_hh_->mutable_grad());
+ if (bias_r_ != nullptr)
+ MVSumRow(1.0f,0.0f,dLdr,bias_r_->mutable_grad());
+ delete dLdr_t;
+
+ // Compute gradients for parameters of new memory
+ Blob<float> *dLdc_t = Transpose(dLdc);
+ GEMM(1.0f,0.0f,*dLdc_t,src,weight_c_hx_->mutable_grad());
+ if (bias_c_ != nullptr)
+ MVSumRow(1.0f,0.0f,dLdc,bias_c_->mutable_grad());
+ delete dLdc_t;
+
+ Blob<float> *reset_dLdc_t = Transpose(reset_dLdc);
+ GEMM(1.0f,0.0f,*reset_dLdc_t,*context,weight_c_hh_->mutable_grad());
+ delete reset_dLdc_t;
+
+ // Compute gradients for data input layer
+ if (srclayers[0]->mutable_grad(this) != nullptr) {
+ GEMM(1.0f,0.0f,dLdc,weight_c_hx_->data(),srclayers[0]->mutable_grad(this));
+ GEMM(1.0f,1.0f,dLdz,weight_z_hx_->data(),srclayers[0]->mutable_grad(this));
+ GEMM(1.0f,1.0f,dLdr,weight_r_hx_->data(), srclayers[0]->mutable_grad(this));
+ }
+
+ if (srclayers.size() > 1 && srclayers[1]->mutable_grad(this) != nullptr) {
+ // Compute gradients for context layer
+ GEMM(1.0f,0.0f,reset_dLdc,weight_c_hh_->data(), srclayers[1]->mutable_grad(this));
+ GEMM(1.0f,1.0f,dLdr, weight_r_hh_->data(), srclayers[1]->mutable_grad(this));
+ GEMM(1.0f,1.0f,dLdz,weight_z_hh_->data(), srclayers[1]->mutable_grad(this));
+ Add(srclayers[1]->grad(this), *update_gate, srclayers[1]->mutable_grad(this));
+ }
+
+ if (srclayers.size() == 1) delete context;
+ else context = NULL;
+ delete grad_t;
+}
+
+} // namespace singa
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ddf4e79a/src/test/test_gru_layer.cc
----------------------------------------------------------------------
diff --git a/src/test/test_gru_layer.cc b/src/test/test_gru_layer.cc
new file mode 100644
index 0000000..296b795
--- /dev/null
+++ b/src/test/test_gru_layer.cc
@@ -0,0 +1,286 @@
+/************************************************************
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ *************************************************************/
+#include <string>
+#include <vector>
+#include <fstream>
+#include <iostream>
+using namespace std;
+
+
+#include "gtest/gtest.h"
+#include "singa/neuralnet/neuron_layer.h"
+#include "singa/neuralnet/input_layer.h"
+#include "singa/driver.h"
+#include "singa/proto/job.pb.h"
+
+using namespace singa;
+
+class GRULayerTest: public ::testing::Test {
+protected:
+ virtual void SetUp() {
+ // Initialize the settings for the first input-layer
+ std::string path1 = "src/test/gru-in-1.csv"; // path of a csv file
+ std::ofstream ofs1(path1, std::ofstream::out);
+ ASSERT_TRUE(ofs1.is_open());
+ ofs1 << "0,0,0,1\n";
+ ofs1 << "0,0,1,0\n";
+ ofs1.close();
+ auto conf1 = in1_conf.mutable_store_conf();
+ conf1->set_path(path1);
+ conf1->set_batchsize(2);
+ conf1->add_shape(4);
+ conf1->set_backend("textfile");
+ conf1->set_has_label(false);
+
+
+ // Initialize the settings for the second input-layer
+ std::string path2 = "src/test/gru-in-2.csv"; // path of a csv file
+ std::ofstream ofs2(path2, std::ofstream::out);
+ ASSERT_TRUE(ofs2.is_open());
+ ofs2 << "0,1,0,0\n";
+ ofs2 << "1,0,0,0\n";
+ ofs2.close();
+ auto conf2 = in2_conf.mutable_store_conf();
+ conf2->set_path(path2);
+
+ conf2->set_batchsize(2);
+ conf2->add_shape(4);
+ conf2->set_backend("textfile");
+ conf2->set_has_label(false);
+
+
+ gru1_conf.mutable_gru_conf() -> set_dim_hidden(2);
+ gru1_conf.mutable_gru_conf() -> set_bias_term(true);
+ for (int i = 0; i < 9; i ++) {
+ gru1_conf.add_param();
+ }
+
+
+ gru1_conf.mutable_param(0)->set_name("wzhx1");
+ gru1_conf.mutable_param(0)->set_type(kParam);
+ gru1_conf.mutable_param(0)->mutable_init()->set_type(kConstant);
+ gru1_conf.mutable_param(0)->mutable_init()->set_value(0.5f);
+
+ gru1_conf.mutable_param(1)->set_name("wrhx1");
+ gru1_conf.mutable_param(1)->set_type(kParam);
+ gru1_conf.mutable_param(1)->mutable_init()->set_type(kConstant);
+ gru1_conf.mutable_param(1)->mutable_init()->set_value(0.5f);
+
+ gru1_conf.mutable_param(2)->set_name("wchx1");
+ gru1_conf.mutable_param(2)->set_type(kParam);
+ gru1_conf.mutable_param(2)->mutable_init()->set_type(kConstant);
+ gru1_conf.mutable_param(2)->mutable_init()->set_value(0.5f);
+
+ gru1_conf.mutable_param(3)->set_name("wzhh1");
+ gru1_conf.mutable_param(3)->set_type(kParam);
+ gru1_conf.mutable_param(3)->mutable_init()->set_type(kConstant);
+ gru1_conf.mutable_param(3)->mutable_init()->set_value(0.5f);
+
+ gru1_conf.mutable_param(4)->set_name("wrhh1");
+ gru1_conf.mutable_param(4)->set_type(kParam);
+ gru1_conf.mutable_param(4)->mutable_init()->set_type(kConstant);
+ gru1_conf.mutable_param(4)->mutable_init()->set_value(0.5f);
+
+ gru1_conf.mutable_param(5)->set_name("wchh1");
+ gru1_conf.mutable_param(5)->set_type(kParam);
+ gru1_conf.mutable_param(5)->mutable_init()->set_type(kConstant);
+ gru1_conf.mutable_param(5)->mutable_init()->set_value(0.5f);
+
+ gru1_conf.mutable_param(6)->set_name("bz1");
+ gru1_conf.mutable_param(6)->set_type(kParam);
+ gru1_conf.mutable_param(6)->mutable_init()->set_type(kConstant);
+ gru1_conf.mutable_param(6)->mutable_init()->set_value(0.5f);
+
+ gru1_conf.mutable_param(7)->set_name("br1");
+ gru1_conf.mutable_param(7)->set_type(kParam);
+ gru1_conf.mutable_param(7)->mutable_init()->set_type(kConstant);
+ gru1_conf.mutable_param(7)->mutable_init()->set_value(0.5f);
+
+ gru1_conf.mutable_param(8)->set_name("bc1");
+ gru1_conf.mutable_param(8)->set_type(kParam);
+ gru1_conf.mutable_param(8)->mutable_init()->set_type(kConstant);
+ gru1_conf.mutable_param(8)->mutable_init()->set_value(0.5f);
+
+ gru2_conf.mutable_gru_conf() -> set_dim_hidden(2);
+ gru2_conf.mutable_gru_conf() -> set_bias_term(true);
+ for (int i = 0; i < 9; i ++) {
+ gru2_conf.add_param();
+ }
+
+ gru2_conf.mutable_param(0)->set_name("wzhx2");
+ gru2_conf.mutable_param(0)->set_type(kParam);
+ gru2_conf.mutable_param(0)->mutable_init()->set_type(kConstant);
+ gru2_conf.mutable_param(0)->mutable_init()->set_value(0.5f);
+
+ gru2_conf.mutable_param(1)->set_name("wrhx2");
+ gru2_conf.mutable_param(1)->set_type(kParam);
+ gru2_conf.mutable_param(1)->mutable_init()->set_type(kConstant);
+ gru2_conf.mutable_param(1)->mutable_init()->set_value(0.5f);
+
+ gru2_conf.mutable_param(2)->set_name("wchx2");
+ gru2_conf.mutable_param(2)->set_type(kParam);
+ gru2_conf.mutable_param(2)->mutable_init()->set_type(kConstant);
+ gru2_conf.mutable_param(2)->mutable_init()->set_value(0.5f);
+
+ gru2_conf.mutable_param(3)->set_name("wzhh2");
+ gru2_conf.mutable_param(3)->set_type(kParam);
+ gru2_conf.mutable_param(3)->mutable_init()->set_type(kConstant);
+ gru2_conf.mutable_param(3)->mutable_init()->set_value(0.5f);
+
+ gru2_conf.mutable_param(4)->set_name("wrhh2");
+ gru2_conf.mutable_param(4)->set_type(kParam);
+ gru2_conf.mutable_param(4)->mutable_init()->set_type(kConstant);
+ gru2_conf.mutable_param(4)->mutable_init()->set_value(0.5f);
+
+ gru2_conf.mutable_param(5)->set_name("wchh2");
+ gru2_conf.mutable_param(5)->set_type(kParam);
+ gru2_conf.mutable_param(5)->mutable_init()->set_type(kConstant);
+ gru2_conf.mutable_param(5)->mutable_init()->set_value(0.5f);
+
+ gru2_conf.mutable_param(6)->set_name("bz2");
+ gru2_conf.mutable_param(6)->set_type(kParam);
+ gru2_conf.mutable_param(6)->mutable_init()->set_type(kConstant);
+ gru2_conf.mutable_param(6)->mutable_init()->set_value(0.5f);
+
+ gru2_conf.mutable_param(7)->set_name("br2");
+ gru2_conf.mutable_param(7)->set_type(kParam);
+ gru2_conf.mutable_param(7)->mutable_init()->set_type(kConstant);
+ gru2_conf.mutable_param(7)->mutable_init()->set_value(0.5f);
+
+ gru2_conf.mutable_param(8)->set_name("bc2");
+ gru2_conf.mutable_param(8)->set_type(kParam);
+ gru2_conf.mutable_param(8)->mutable_init()->set_type(kConstant);
+ gru2_conf.mutable_param(8)->mutable_init()->set_value(0.5f);
+
+ }
+ singa::LayerProto in1_conf;
+ singa::LayerProto in2_conf;
+ singa::LayerProto gru1_conf;
+ singa::LayerProto gru2_conf;
+};
+
+TEST_F(GRULayerTest, Setup) {
+ singa::Driver driver;
+ //driver.RegisterLayer<GRULayer, int> (kGRU);
+ driver.RegisterParam<Param>(0);
+ driver.RegisterParamGenerator<UniformGen>(kUniform);
+ driver.RegisterParamGenerator<ParamGenerator>(kConstant);
+
+ singa::CSVInputLayer in_layer_1;
+ singa::CSVInputLayer in_layer_2;
+
+ in_layer_1.Setup(in1_conf, std::vector<singa::Layer*> { });
+ EXPECT_EQ(2, static_cast<int>(in_layer_1.aux_data().size()));
+ EXPECT_EQ(8, in_layer_1.data(nullptr).count());
+
+ in_layer_2.Setup(in2_conf, std::vector<singa::Layer*>{ });
+ EXPECT_EQ(2, static_cast<int>(in_layer_2.aux_data().size()));
+ EXPECT_EQ(8, in_layer_2.data(nullptr).count());
+
+ singa::GRULayer gru_layer_1;
+ gru_layer_1.Setup(gru1_conf, std::vector<singa::Layer*>{&in_layer_1});
+ //EXPECT_EQ(2, gru_layer_1.hdim());
+ //EXPECT_EQ(4, gru_layer_1.vdim());
+
+ for (unsigned int i = 0; i < gru_layer_1.GetParams().size(); i ++) {
+ gru_layer_1.GetParams()[i]->InitValues();
+ }
+ EXPECT_EQ (0.5, gru_layer_1.GetParams()[0]->data().cpu_data()[0]);
+ //cout << "gru_layer_1: " << gru_layer_1.GetParams()[0]->data().cpu_data()[0] << endl;
+
+ singa::GRULayer gru_layer_2;
+ gru_layer_2.Setup(gru2_conf, std::vector<singa::Layer*>{&in_layer_2, &gru_layer_1});
+ //EXPECT_EQ(2, gru_layer_2.hdim());
+ //EXPECT_EQ(4, gru_layer_2.vdim());
+ for (unsigned int i = 0; i < gru_layer_2.GetParams().size(); i ++) {
+ gru_layer_2.GetParams()[i]->InitValues();
+ }
+ EXPECT_EQ (0.5, gru_layer_2.GetParams()[0]->data().cpu_data()[0]);
+}
+
+
+TEST_F(GRULayerTest, ComputeFeature) {
+ singa::CSVInputLayer in_layer_1;
+ singa::CSVInputLayer in_layer_2;
+
+ in_layer_1.Setup(in1_conf, std::vector<singa::Layer*> { });
+ in_layer_1.ComputeFeature(singa::kTrain, std::vector<singa::Layer*> { });
+ in_layer_2.Setup(in2_conf, std::vector<singa::Layer*>{ });
+ in_layer_2.ComputeFeature(singa::kTrain, std::vector<singa::Layer*> { });
+
+
+ singa::GRULayer gru_layer_1;
+ gru_layer_1.Setup(gru1_conf, std::vector<singa::Layer*>{&in_layer_1});
+ for (unsigned int i = 0; i < gru_layer_1.GetParams().size(); i ++) {
+ gru_layer_1.GetParams()[i]->InitValues();
+ }
+ gru_layer_1.ComputeFeature(singa::kTrain, std::vector<singa::Layer*>{&in_layer_1});
+ for (int i = 0; i < gru_layer_1.data(nullptr).count(); i ++) {
+ EXPECT_GT(0.000001,abs(0.204824-gru_layer_1.data(nullptr).cpu_data()[i]));
+ }
+
+ singa::GRULayer gru_layer_2;
+ gru_layer_2.Setup(gru2_conf, std::vector<singa::Layer*>{&in_layer_2, &gru_layer_1});
+
+ for (unsigned int i = 0; i < gru_layer_2.GetParams().size(); i ++) {
+ gru_layer_2.GetParams()[i]->InitValues();
+ }
+ gru_layer_2.ComputeFeature(singa::kTrain, std::vector<singa::Layer*>{&in_layer_2, &gru_layer_1});
+ for (int i = 0; i < gru_layer_2.data(nullptr).count(); i ++) {
+ EXPECT_GT(0.000001,abs(0.346753-gru_layer_2.data(nullptr).cpu_data()[i]));
+ }
+}
+
+
+TEST_F(GRULayerTest, ComputeGradient) {
+ singa::CSVInputLayer in_layer_1;
+ singa::CSVInputLayer in_layer_2;
+
+ in_layer_1.Setup(in1_conf, std::vector<singa::Layer*> { });
+ in_layer_1.ComputeFeature(singa::kTrain, std::vector<singa::Layer*> { });
+ in_layer_2.Setup(in2_conf, std::vector<singa::Layer*>{ });
+ in_layer_2.ComputeFeature(singa::kTrain, std::vector<singa::Layer*> { });
+
+
+ singa::GRULayer gru_layer_1;
+ gru_layer_1.Setup(gru1_conf, std::vector<singa::Layer*>{&in_layer_1});
+ for (unsigned int i = 0; i < gru_layer_1.GetParams().size(); i ++) {
+ gru_layer_1.GetParams()[i]->InitValues();
+ }
+ gru_layer_1.ComputeFeature(singa::kTrain, std::vector<singa::Layer*>{&in_layer_1});
+
+
+ singa::GRULayer gru_layer_2;
+ gru_layer_2.Setup(gru2_conf, std::vector<singa::Layer*>{&in_layer_2, &gru_layer_1});
+ for (unsigned int i = 0; i < gru_layer_2.GetParams().size(); i ++) {
+ gru_layer_2.GetParams()[i]->InitValues();
+ }
+ gru_layer_2.ComputeFeature(singa::kTrain, std::vector<singa::Layer*>{&in_layer_2, &gru_layer_1});
+
+ // For test purpose, we set dummy values for gru_layer_2.grad_
+ for (int i = 0; i < gru_layer_2.grad(nullptr).count(); i ++) {
+ gru_layer_2.mutable_grad(nullptr)->mutable_cpu_data()[i] = 1.0f;
+ }
+ gru_layer_2.ComputeGradient(singa::kTrain, std::vector<singa::Layer*>{&in_layer_2, &gru_layer_1});
+
+ gru_layer_1.ComputeGradient(singa::kTrain, std::vector<singa::Layer*>{&in_layer_1});
+
+}
[04/10] incubator-singa git commit: SINGA-120 - Implemented GRU and
BPTT: 1) Updated Driver.cc to register GRU;
2) Updated job.proto to include configuration of GRU;
3) Updated configure.ac to fix some compliation errors
Posted by wa...@apache.org.
SINGA-120 - Implemented GRU and BPTT: 1) Updated Driver.cc to register GRU; 2) Updated job.proto to include configuration of GRU; 3) Updated configure.ac to fix some compliation errors
Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/473c9858
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/473c9858
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/473c9858
Branch: refs/heads/master
Commit: 473c9858aafd4cd15e6f6316deaeca1f1c0dfffc
Parents: ddf4e79
Author: Ju Fan <fa...@gmail.com>
Authored: Fri Jan 1 10:46:51 2016 +0800
Committer: Wei Wang <wa...@comp.nus.edu.sg>
Committed: Wed Jan 6 01:53:05 2016 +0800
----------------------------------------------------------------------
Makefile.am | 38 ++++++++++++++++++++------------------
Makefile.gpu | 6 +++---
configure.ac | 4 ++--
src/driver.cc | 1 +
src/proto/job.proto | 27 +++++++++++++++++++++++++++
5 files changed, 53 insertions(+), 23 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/473c9858/Makefile.am
----------------------------------------------------------------------
diff --git a/Makefile.am b/Makefile.am
index 3c282e3..aa88348 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -19,7 +19,6 @@
#*
#*************************************************************/
-
ACLOCAL_AMFLAGS = -I config
AUTOMAKE_OPTIONS = foreign subdir-objects
@@ -33,7 +32,7 @@ CFLAGS = $(DEBUG)
CXXFLAGS = $(DEBUG)
#AC_CXXFLAGS = $(DEBUG)
-INCLUDES = -I$(top_srcdir)/include
+INCLUDES = -I$(top_srcdir)/include
PROTOS := $(top_srcdir)/src/proto/singa.proto \
$(top_srcdir)/src/proto/job.proto \
@@ -50,7 +49,7 @@ PROTO_PYS := tool/python/pb2/singa_pb2.py \
CUDA_SRCS := src/utils/math_kernel.cu
CUDA_OBJS := src/utils/math_kernel.o
-CUDA_HDRS := include/singa/utils/math_kernel.h
+CUDA_HDRS := include/singa/utils/math_kernel.h
CUDNN_SRCS := src/neuralnet/loss_layer/cudnn_softmaxloss.cc \
src/neuralnet/neuron_layer/cudnn_softmax.cc \
@@ -63,9 +62,9 @@ PY_SRCS := tool/python/singa/driver_wrap.cxx \
src/driver.cc
HDFS_SRCS := src/io/hdfsfile.cc \
- src/io/hdfsfile_store.cc
+ src/io/hdfsfile_store.cc
HDFS_HDRS := include/singa/io/hdfsfile.h \
- include/singa/io/hdfsfile_store.h
+ include/singa/io/hdfsfile_store.h
SINGA_SRCS := src/driver.cc \
src/server.cc \
@@ -96,6 +95,7 @@ SINGA_SRCS := src/driver.cc \
src/neuralnet/neuron_layer/lrn.cc \
src/neuralnet/neuron_layer/pooling.cc \
src/neuralnet/neuron_layer/rbm.cc \
+ src/neuralnet/neuron_layer/gru.cc \
src/neuralnet/neuron_layer/relu.cc \
src/neuralnet/neuron_layer/sigmoid.cc \
src/neuralnet/neuron_layer/softmax.cc \
@@ -170,7 +170,9 @@ TEST_SRCS := include/gtest/gtest_main.cc \
src/test/test_store.cc \
src/test/test_connection_layers.cc \
src/test/test_record_input_layer.cc \
- src/test/test_csv_input_layer.cc
+ src/test/test_csv_input_layer.cc \
+ src/test/test_gru_layer.cc \
+ src/test/test_unrolling.cc
#EXTRA_PROGRAMS = $(PROGS)
EXTRA_PROGRAMS = singatest test
@@ -186,7 +188,7 @@ py_LTLIBRARIES = $(PY_PROGS)
#lib_LTLIBRARIES = libsinga.la
libsinga_la_SOURCES = $(PROTO_SRCS) $(SINGA_SRCS)
libsinga_la_CXXFLAGS = $(DEFAULT_FLAGS) -msse3 -fpermissive -I$(top_srcdir)/include
-libsinga_la_LDFLAGS =
+libsinga_la_LDFLAGS =
if LMDB
libsinga_la_CXXFLAGS += -DUSE_LMDB
endif
@@ -194,7 +196,7 @@ if DCUDA
libsinga_la_SOURCES += $(CUDA_SRCS) $(CUDA_HDRS)
libsinga_la_CXXFLAGS += $(CUDA_CFLAGS)
libsinga_la_LDFLAGS += $(CUDA_LDFLAGS) $(CUDA_LIBS) -lsingagpu
-libsinga_la_LIBADD = libsingagpu.so
+libsinga_la_LIBADD = libsingagpu.so
endif
if DCUDNN
@@ -215,11 +217,11 @@ singa_CXXFLAGS = $(DEFAULT_FLAGS) -MMD -I$(top_srcdir)/include
singa_LDFLAGS = -lsinga \
-lglog \
-lprotobuf \
- -lrt \
+ #-lrt \
-lopenblas \
-lzmq \
-lczmq \
- -lzookeeper_mt
+ -lzookeeper_mt
if LMDB
singa_LDFLAGS += -llmdb
endif
@@ -248,10 +250,10 @@ singatool_CXXFLAGS = -Wall -pthread -fPIC -std=c++11 -MMD -Wno-unknown-pragmas \
singatool_LDFLAGS = -lsinga \
-lglog \
-lprotobuf \
- -lzookeeper_mt
+ -lzookeeper_mt
#if DCUDA
-#singatool_SOURCES += $(CUDA_SRCS) $(CUDA_HDRS)
+#singatool_SOURCES += $(CUDA_SRCS) $(CUDA_HDRS)
#singatool_CXXFLAGS += $(CUDA_CFLAGS)
#singatool_LDFLAGS += $(CUDA_LDFLAGS) $(CUDA_LIBS)
#endif
@@ -283,7 +285,7 @@ singatest_LDADD = ./libgtest.la
singatest_LDFLAGS = -lsinga \
-lglog \
-lprotobuf \
- -lrt \
+ #-lrt \
-lopenblas \
-lzmq \
-lczmq \
@@ -293,7 +295,7 @@ if LMDB
singatest_LDFLAGS += -llmdb
endif
-if DCUDA
+if DCUDA
singatest_SOURCES += $(CUDA_SRCS) $(CUDA_HDRS)
singatest_CXXFLAGS += $(CUDA_CFLAGS)
singatest_LDFLAGS += $(CUDA_LDFLAGS) $(CUDA_LIBS)
@@ -310,13 +312,13 @@ _driver_la_CXXFLAGS = $(DEFAULT_FLAGS) $(MSHADOW_FLAGS) -I$(top_srcdir)/include
_driver_la_LDFLAGS = -lsinga -module -shared $(PYLIBS) -avoid-version -rpath $(pydir)
if DCUDA
-_driver_la_CXXFLAGS += $(CUDA_CFLAGS)
-_driver_la_LDFLAGS += $(CUDA_LDFLAGS) $(CUDA_LIBS)
+_driver_la_CXXFLAGS += $(CUDA_CFLAGS)
+_driver_la_LDFLAGS += $(CUDA_LDFLAGS) $(CUDA_LIBS)
endif
if DCUDNN
_driver_la_CXXFLAGS += $(CUDNN_CFLAGS)
-_driver_la_LDFLAGS += $(CUDNN_LDFLAGS) $(CUDNN_LIBS)
+_driver_la_LDFLAGS += $(CUDNN_LDFLAGS) $(CUDNN_LIBS)
endif
clean-local:
@@ -348,7 +350,7 @@ rat:
$(NVCC) $(MSHADOW_FLAGS) --shared -Xcompiler -fPIC $(CUDA_CFLAGS) $(CUDA_LDFLAGS) $(CUDA_LIBS) -I$(top_srcdir)/include -std=c++11 -G -c -o $@ $<
libsingagpu.so: $(CUDA_OBJS)
- $(NVCC) -o libsingagpu.so -shared -Xcompiler -fPIC $(CUDA_OBJS) $(CUDA_CFLAGS) $(CUDA_LDFLAGS) $(CUDA_LIBS) -I$(top_srcdir)/include -std=c++11 -G
+ $(NVCC) -o libsingagpu.so -shared -Xcompiler -fPIC $(CUDA_OBJS) $(CUDA_CFLAGS) $(CUDA_LDFLAGS) $(CUDA_LIBS) -I$(top_srcdir)/include -std=c++11 -G
install-pyLTLIBRARIES: $(py_LTLIBRARIES)
touch tool/python/singa/__init__.py
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/473c9858/Makefile.gpu
----------------------------------------------------------------------
diff --git a/Makefile.gpu b/Makefile.gpu
index c35a445..11d6827 100644
--- a/Makefile.gpu
+++ b/Makefile.gpu
@@ -20,7 +20,7 @@
###################User Config Varaibles #############################
# third-party library installation folder
-HOME_DIR := /media/hd1/home/wangwei/local
+HOME_DIR := /home/wangwei/local
# must config the cudnn folder if using cudnn
CUDNN_DIR := $(HOME_DIR)/cudnn
@@ -30,7 +30,7 @@ CUDA_DIR := /usr/local/cuda
# Lib folder for system and external libs. You may need to change it.
LIBRARY_DIRS := $(HOME_DIR)/lib64 $(HOME_DIR)/lib $(CUDNN_DIR)/lib64 $(CUDA_DIR)/lib64 $(CUDA_DIR)/lib
# Header folder for system and external libs. You may need to change it.
-INCLUDE_DIRS := ./include $(HOME_DIR)/include $(CUDNN_DIR)/include $(CUDA_DIR)/include
+INCLUDE_DIRS := ./include $(HOME_DIR)/include $(CUDNN_DIR)/include $(CUDA_DIR)/include
# g++ location, should support c++11, tested with 4.8.1
CXX := g++
CUCXX := nvcc
@@ -85,7 +85,7 @@ TEST_CUDA_SRCS :=$(shell find src/test/ -maxdepth 1 -name "*.cu")
TEST_CUDA_OBJS := $(sort $(addprefix $(BUILD_DIR)/, $(TEST_CUDA_SRCS:.cu=.o)))
-include $(TEST_CUDA_OBJS:%.o=%.P)
-SINGA_CUDA_SRCS :=$(shell find src/ -name "*.cu")
+SINGA_CUDA_SRCS := $(shell find src/ \( -path "src/test" \) -prune -o \( -name "*.cu" -type f \) -print )
SINGA_CUDA_OBJS := $(sort $(addprefix $(BUILD_DIR)/, $(SINGA_CUDA_SRCS:.cu=.o)))
-include $(SINGA_CUDA_OBJS:%.o=%.P)
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/473c9858/configure.ac
----------------------------------------------------------------------
diff --git a/configure.ac b/configure.ac
index 82db55a..2377fa9 100644
--- a/configure.ac
+++ b/configure.ac
@@ -224,10 +224,10 @@ if test x"$enable_python" != x"no"; then
AC_CHECK_PROG(PY_CHECK,python,yes)
if test x"$PY_CHECK" != x"yes"; then
AC_MSG_ERROR([Cannot find command "python". Please intall before make.])
- else
+ else
PYLIBS="-lpython`python -V 2>&1 | awk '{print substr($2,1,3)}'`"
fi
-fi
+fi
AC_SUBST(PYLIBS)
AC_ARG_WITH([python],
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/473c9858/src/driver.cc
----------------------------------------------------------------------
diff --git a/src/driver.cc b/src/driver.cc
index c17ea82..5e0772b 100644
--- a/src/driver.cc
+++ b/src/driver.cc
@@ -112,6 +112,7 @@ void Driver::Init(int argc, char **argv) {
RegisterLayer<SoftmaxLossLayer, int>(kSoftmaxLoss);
RegisterLayer<STanhLayer, int>(kSTanh);
RegisterLayer<SoftmaxLayer, int>(kSoftmax);
+ RegisterLayer<GRULayer, int>(kGRU);
#ifdef USE_LMDB
RegisterLayer<LMDBDataLayer, int>(kLMDBData);
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/473c9858/src/proto/job.proto
----------------------------------------------------------------------
diff --git a/src/proto/job.proto b/src/proto/job.proto
index db55987..7cdc287 100644
--- a/src/proto/job.proto
+++ b/src/proto/job.proto
@@ -195,6 +195,11 @@ message LayerProto {
// share data and grad blob with the single src layer, e.g., relu layer can
// share blobs from conv layer. It is useful for saving memory space.
optional bool share_src_blobs = 22 [default = false];
+
+ optional int32 unroll_len = 23 [default = 1];
+ repeated UnrollConnType unroll_conn_type = 24;
+ repeated int32 shift = 25;
+
// overrides the partition dimension for neural net
optional int32 partition_dim = 60 [default = -1];
// names of parameters shared from other layers
@@ -222,6 +227,7 @@ message LayerProto {
optional RBMProto rbm_conf = 209;
optional ReLUProto relu_conf = 211;
optional SoftmaxProto softmax_conf = 214;
+ optional GRUProto gru_conf = 215;
// configuration for loss layers, id range [300, 400)
optional SoftmaxLossProto softmaxloss_conf = 301;
@@ -436,6 +442,15 @@ message RBMProto {
optional bool gaussian = 3 [default = false]; // use gaussian sampling or not
}
+// Message that stores parameters used by GRULayer
+message GRUProto {
+ // dimension of hidden state for the layer
+ required int32 dim_hidden = 1;
+ // use bias vector or not
+ optional bool bias_term = 2 [default = true];
+}
+
+
// Message that stores parameters used by InnerProductLayer
message InnerProductProto {
// number of outputs for the layer
@@ -594,6 +609,7 @@ enum LayerType {
kSTanh = 212;
kSigmoid = 213;
kSoftmax = 214;
+ kGRU = 215;
// cudnn v3
kCudnnConv = 250;
kCudnnPool = 251;
@@ -706,3 +722,14 @@ enum InitMethod {
// For user defined init method
kUserInit = 101;
}
+
+enum UnrollConnType {
+ // i-th unrolled layer <- (i - shift)-th src unrolled layer
+ kUnrollOneToOne = 1;
+ // i-th unrolled layer <- all src unrolled layers
+ kUnrollOneToAll = 2;
+ // i-th unrolled layer <- last unrolled src layer
+ kUnrollFirstToLast = 3;
+ // customized connection type defined by src_conn
+ kUnrollCustomized = 4;
+}
[10/10] incubator-singa git commit: SINGA-120 - Implemented GRU and
BPTT
Posted by wa...@apache.org.
SINGA-120 - Implemented GRU and BPTT
Change back to r * (h x U) for new memory compuation;
Loss to 2.8 per char/unit.
Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/a2f4e468
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/a2f4e468
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/a2f4e468
Branch: refs/heads/master
Commit: a2f4e4680bb7b5dc5077a064a757698e30cc5e13
Parents: 6a4c996
Author: Wei Wang <wa...@comp.nus.edu.sg>
Authored: Wed Jan 6 00:35:41 2016 +0800
Committer: Wei Wang <wa...@comp.nus.edu.sg>
Committed: Wed Jan 6 02:03:50 2016 +0800
----------------------------------------------------------------------
examples/char-rnn/data.py | 8 +
examples/char-rnn/job.conf | 250 +++++++++++++++++++
examples/char-rnn/sample.conf | 212 ++++++++++++++++
include/singa/neuralnet/neuron_layer.h | 15 +-
include/singa/utils/math_blob.h | 2 +
src/neuralnet/connection_layer/rnn_dummy.cc | 67 +++++
src/neuralnet/input_layer/char_rnn.cc | 6 +-
src/neuralnet/input_layer/onehot.cc | 40 +++
src/neuralnet/neuralnet.cc | 73 +++---
src/neuralnet/neuron_layer/dummy.cc | 1 -
src/neuralnet/neuron_layer/embedding.cc | 4 +-
src/neuralnet/neuron_layer/gru.cc | 298 +++++++++++------------
src/neuralnet/output_layer/char_rnn.cc | 51 ++++
src/utils/updater.cc | 6 +
src/worker.cc | 3 +-
15 files changed, 832 insertions(+), 204 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/a2f4e468/examples/char-rnn/data.py
----------------------------------------------------------------------
diff --git a/examples/char-rnn/data.py b/examples/char-rnn/data.py
new file mode 100644
index 0000000..4b1c28c
--- /dev/null
+++ b/examples/char-rnn/data.py
@@ -0,0 +1,8 @@
+# pls get linux_input.txt from http://cs.stanford.edu/people/karpathy/char-rnn/
+data = open('linux_input.txt', 'r').read() # should be simple plain text file
+chars = list(set(data))
+data_size, vocab_size = len(data), len(chars)
+print 'data has %d characters, %d unique.' % (data_size, vocab_size)
+with open('vocab.txt', 'w') as fd:
+ fd.write("".join(chars))
+ fd.flush()
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/a2f4e468/examples/char-rnn/job.conf
----------------------------------------------------------------------
diff --git a/examples/char-rnn/job.conf b/examples/char-rnn/job.conf
new file mode 100644
index 0000000..2e1c761
--- /dev/null
+++ b/examples/char-rnn/job.conf
@@ -0,0 +1,250 @@
+name:"char-rnn"
+train_steps: 100000
+disp_freq: 100
+#debug: true
+gpu: 0
+train_one_batch {
+ alg: kBPTT
+}
+
+updater {
+ type: kRMSProp
+ rmsprop_conf {
+ rho: 0.95
+ }
+ learning_rate {
+ type: kStep
+ base_lr: 0.002
+ step_conf {
+ gamma: 0.97
+ change_freq: 2000
+ }
+ }
+ clip_low: -5
+ clip_high: 5
+}
+
+neuralnet {
+ unroll_len: 50
+ layer {
+ name: "data"
+ type: kCharRNN
+ unroll_len: 1
+ char_rnn_conf {
+ path: "examples/char-rnn/linux_input.txt"
+ vocab_path:"examples/char-rnn/vocab.txt"
+ batchsize: 50
+ unroll_len: 50
+ }
+ }
+ layer {
+ name: "onehot"
+ type: kOneHot
+ srclayers: "data"
+ unroll_conn_type: kUnrollOneToAll
+ }
+
+ layer {
+ name: "label"
+ type: kRNNLabel
+ srclayers: "data"
+ unroll_conn_type: kUnrollOneToAll
+ }
+
+ layer {
+ name: "gru1"
+ type: kGRU
+ srclayers: "onehot"
+ gru_conf {
+ dim_hidden: 512
+ }
+ param {
+ name: "z_hx"
+ init {
+ type: kUniform
+ low: -0.08
+ high: 0.08
+ }
+ }
+ param {
+ name: "r_hx"
+ init {
+ type: kUniform
+ low: -0.08
+ high: 0.08
+ }
+ }
+ param {
+ name: "c_hx"
+ init {
+ type: kUniform
+ low: -0.08
+ high: 0.08
+ }
+ }
+ param {
+ name: "z_hh"
+ init {
+ type: kUniform
+ low: -0.08
+ high: 0.08
+ }
+ }
+ param {
+ name: "r_hh"
+ init {
+ type: kUniform
+ low: -0.08
+ high: 0.08
+ }
+ }
+ param {
+ name: "c_hh"
+ init {
+ type: kUniform
+ low: -0.08
+ high: 0.08
+ }
+ }
+ param {
+ name: "z_b"
+ init {
+ type: kUniform
+ low: -0.08
+ high: 0.08
+ }
+ }
+ param {
+ name: "r_b"
+ init {
+ type: kUniform
+ low: -0.08
+ high: 0.08
+ }
+ }
+ param {
+ name: "c_b"
+ init {
+ type: kUniform
+ low: -0.08
+ high: 0.08
+ }
+ }
+
+ }
+# layer {
+# name: "gru2"
+# type: kGRU
+# srclayers: "gru1"
+# gru_conf {
+# dim_hidden: 512
+# }
+# param {
+# name: "z_hx2"
+# init {
+# type: kUniform
+# low: -0.08
+# high: 0.08
+# }
+# }
+# param {
+# name: "r_hx2"
+# init {
+# type: kUniform
+# low: -0.08
+# high: 0.08
+# }
+# }
+# param {
+# name: "c_hx2"
+# init {
+# type: kUniform
+# low: -0.08
+# high: 0.08
+# }
+# }
+# param {
+# name: "z_hh2"
+# init {
+# type: kUniform
+# low: -0.08
+# high: 0.08
+# }
+# }
+# param {
+# name: "r_hh2"
+# init {
+# type: kUniform
+# low: -0.08
+# high: 0.08
+# }
+# }
+# param {
+# name: "c_hh2"
+# init {
+# type: kUniform
+# low: -0.08
+# high: 0.08
+# }
+# }
+# param {
+# name: "z_b2"
+# init {
+# type: kUniform
+# low: -0.08
+# high: 0.08
+# }
+# }
+# param {
+# name: "r_b2"
+# init {
+# type: kUniform
+# low: -0.08
+# high: 0.08
+# }
+# }
+# param {
+# name: "c_b2"
+# init {
+# type: kUniform
+# low: -0.08
+# high: 0.08
+# }
+# }
+# }
+#
+ layer {
+ name: "ip1"
+ type: kInnerProduct
+ srclayers: "gru1"
+ innerproduct_conf {
+ num_output: 101
+ }
+ param {
+ name: "w"
+ init {
+ type: kUniform
+ low: -0.08
+ high: 0.08
+ }
+ }
+ param {
+ name: "b"
+ init {
+ type: kUniform
+ low: -0.08
+ high: 0.08
+ }
+ }
+ }
+ layer {
+ name: "loss"
+ type: kSoftmaxLoss
+ srclayers: "ip1"
+ srclayers: "label"
+ }
+}
+
+cluster {
+ workspace: "examples/char-rnn/"
+}
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/a2f4e468/examples/char-rnn/sample.conf
----------------------------------------------------------------------
diff --git a/examples/char-rnn/sample.conf b/examples/char-rnn/sample.conf
new file mode 100644
index 0000000..b15ef9e
--- /dev/null
+++ b/examples/char-rnn/sample.conf
@@ -0,0 +1,212 @@
+name:"char-rnn"
+test_steps: 100
+#debug: true
+gpu: 0
+checkpoint_path: "examples/char-rnn/checkpoint/step2000-worker0"
+train_one_batch {
+ alg: kBPTT
+}
+
+neuralnet {
+ layer {
+ name: "data"
+ type: kRNNDummy
+ rnn_dummy_conf {
+ shape: 1
+ integer: true
+ low: 0
+ high: 101
+ dynamic_srclayer: "argsort"
+ }
+ }
+ layer {
+ name: "onehot"
+ type: kOneHot
+ srclayers: "data"
+ }
+
+ layer {
+ name: "gru1"
+ type: kGRU
+ srclayers: "onehot"
+ gru_conf {
+ dim_hidden: 512
+ }
+ param {
+ name: "z_hx"
+ init {
+ type: kUniform
+ low: -0.08
+ high: 0.08
+ }
+ }
+ param {
+ name: "r_hx"
+ init {
+ type: kUniform
+ low: -0.08
+ high: 0.08
+ }
+ }
+ param {
+ name: "c_hx"
+ init {
+ type: kUniform
+ low: -0.08
+ high: 0.08
+ }
+ }
+ param {
+ name: "z_hh"
+ init {
+ type: kUniform
+ low: -0.08
+ high: 0.08
+ }
+ }
+ param {
+ name: "r_hh"
+ init {
+ type: kUniform
+ low: -0.08
+ high: 0.08
+ }
+ }
+ param {
+ name: "c_hh"
+ init {
+ type: kUniform
+ low: -0.08
+ high: 0.08
+ }
+ }
+ param {
+ name: "z_b"
+ init {
+ type: kUniform
+ low: -0.08
+ high: 0.08
+ }
+ }
+ param {
+ name: "r_b"
+ init {
+ type: kUniform
+ low: -0.08
+ high: 0.08
+ }
+ }
+ param {
+ name: "c_b"
+ init {
+ type: kUniform
+ low: -0.08
+ high: 0.08
+ }
+ }
+ }
+ layer {
+ name: "gru2"
+ type: kGRU
+ srclayers: "gru1"
+ gru_conf {
+ dim_hidden: 512
+ }
+ param {
+ name: "z_hx2"
+ init {
+ type: kUniform
+ low: -0.08
+ high: 0.08
+ }
+ }
+ param {
+ name: "r_hx2"
+ init {
+ type: kUniform
+ low: -0.08
+ high: 0.08
+ }
+ }
+ param {
+ name: "c_hx2"
+ init {
+ type: kUniform
+ low: -0.08
+ high: 0.08
+ }
+ }
+ param {
+ name: "z_hh2"
+ init {
+ type: kUniform
+ low: -0.08
+ high: 0.08
+ }
+ }
+ param {
+ name: "r_hh2"
+ init {
+ type: kUniform
+ low: -0.08
+ high: 0.08
+ }
+ }
+ param {
+ name: "c_hh2"
+ init {
+ type: kUniform
+ low: -0.08
+ high: 0.08
+ }
+ }
+ }
+
+
+ layer {
+ name: "ip1"
+ type: kInnerProduct
+ srclayers: "gru2"
+ innerproduct_conf {
+ num_output: 101
+ }
+ param {
+ name: "w"
+ init {
+ type: kUniform
+ low: -0.08
+ high: 0.08
+ }
+ }
+ param {
+ name: "b"
+ init {
+ type: kUniform
+ low: -0.08
+ high: 0.08
+ }
+ }
+ }
+ layer {
+ name: "softmax"
+ type: kSoftmax
+ srclayers: "ip1"
+ }
+ layer {
+ name: "argsort"
+ type: kArgSort
+ srclayers: "softmax"
+ }
+ layer {
+ name: "sampling"
+ type: kCharRNNOutput
+ srclayers: "argsort"
+ char_rnn_conf {
+ vocab_path: "examples/char-rnn/vocab.txt"
+ }
+ }
+}
+
+cluster {
+ workspace: "examples/char-rnn/"
+}
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/a2f4e468/include/singa/neuralnet/neuron_layer.h
----------------------------------------------------------------------
diff --git a/include/singa/neuralnet/neuron_layer.h b/include/singa/neuralnet/neuron_layer.h
index e1a63a2..c612aed 100644
--- a/include/singa/neuralnet/neuron_layer.h
+++ b/include/singa/neuralnet/neuron_layer.h
@@ -187,16 +187,15 @@ class GRULayer : public NeuronLayer {
}
const std::vector<Param*> GetParams() const override {
+ std::vector<Param*> params{weight_z_hx_, weight_r_hx_,weight_c_hx_,
+ weight_z_hh_, weight_r_hh_, weight_c_hh_};
+
if (bias_z_ != nullptr && bias_r_ != nullptr && bias_c_ != nullptr) {
- std::vector<Param*> params{weight_z_hx_, weight_r_hx_,weight_c_hx_,
- weight_z_hh_, weight_r_hh_, weight_c_hh_,
- bias_z_, bias_r_, bias_c_};
- return params;
- } else {
- std::vector<Param*> params{weight_z_hx_, weight_r_hx_,weight_c_hx_,
- weight_z_hh_, weight_r_hh_, weight_c_hh_};
- return params;
+ params.push_back(bias_z_);
+ params.push_back(bias_r_);
+ params.push_back(bias_c_);
}
+ return params;
}
private:
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/a2f4e468/include/singa/utils/math_blob.h
----------------------------------------------------------------------
diff --git a/include/singa/utils/math_blob.h b/include/singa/utils/math_blob.h
index bdaf914..e151c37 100644
--- a/include/singa/utils/math_blob.h
+++ b/include/singa/utils/math_blob.h
@@ -267,6 +267,8 @@ void Map(const Blob<Dtype> & A, Blob<Dtype> * B) {
} else {
#ifdef USE_GPU
gpu_e_f<Op>(A.count(), A.gpu_data(), B->mutable_gpu_data());
+#else
+ LOG(ERROR) << "Not implemented";
#endif // USE_GPU
}
}
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/a2f4e468/src/neuralnet/connection_layer/rnn_dummy.cc
----------------------------------------------------------------------
diff --git a/src/neuralnet/connection_layer/rnn_dummy.cc b/src/neuralnet/connection_layer/rnn_dummy.cc
new file mode 100644
index 0000000..865066f
--- /dev/null
+++ b/src/neuralnet/connection_layer/rnn_dummy.cc
@@ -0,0 +1,67 @@
+/************************************************************
+*
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements. See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership. The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License. You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing,
+* software distributed under the License is distributed on an
+* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+* KIND, either express or implied. See the License for the
+* specific language governing permissions and limitations
+* under the License.
+*
+*************************************************************/
+
+#include "singa/neuralnet/connection_layer.h"
+#include "singa/utils/math_blob.h"
+
+namespace singa {
+
+void RNNDummyLayer::Setup(const LayerProto& conf,
+ const vector<Layer*>& srclayers) {
+ Layer::Setup(conf, srclayers);
+ dynamic_src_ = AddPrefixSuffix(unroll_index(), partition_id(),
+ conf.rnn_dummy_conf().dynamic_srclayer());
+ LOG(ERROR) << dynamic_src_;
+ vector<int> shape;
+ for (int s : conf.rnn_dummy_conf().shape())
+ shape.push_back(s);
+ integer_ = conf.rnn_dummy_conf().integer();
+ low_ = conf.rnn_dummy_conf().low();
+ high_ = conf.rnn_dummy_conf().high();
+ // if no src layer, then it will genereate data by itself based on shape
+ // and random range
+ if (srclayers.size() == 0) {
+ CHECK(shape.size());
+ CHECK_NE(low_, high_);
+ data_.Reshape(shape);
+ srclayer_ = nullptr;
+ } else {
+ srclayer_ = srclayers.at(0);
+ data_.ReshapeLike(srclayer_->data(this));
+ data_.ShareData(srclayer_->mutable_data(this), false);
+ }
+}
+
+void RNNDummyLayer::ComputeFeature(int flag, const vector<Layer*>& srclayers) {
+ if (srclayers.size() == 0) {
+ SampleUniform(low_, high_, &data_);
+ if (integer_) {
+ for (int i = 0; i < data_.count(); i ++) {
+ data_.mutable_cpu_data()[i] = floor(data_.cpu_data()[i]);
+ }
+ }
+ } else if (srclayer_ != srclayers.at(0)) {
+ srclayer_ = srclayers.at(0);
+ data_.ShareData(srclayer_->mutable_data(this), false);
+ }
+}
+} // namespace singa
+
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/a2f4e468/src/neuralnet/input_layer/char_rnn.cc
----------------------------------------------------------------------
diff --git a/src/neuralnet/input_layer/char_rnn.cc b/src/neuralnet/input_layer/char_rnn.cc
index cc13b1b..8a56711 100644
--- a/src/neuralnet/input_layer/char_rnn.cc
+++ b/src/neuralnet/input_layer/char_rnn.cc
@@ -69,6 +69,7 @@ void CharRNNInputLayer::ComputeFeature(int flag,
// decide the start pos of each instance in one mini-batch
int max_offset = buf_.length() / batchsize_;
CHECK_GT(max_offset, unroll_len_);
+ LOG(ERROR) << "Max iteration per epoch = " << max_offset / unroll_len_;
for (int i = 0; i < batchsize_; i ++) {
start_.push_back(i * max_offset);
}
@@ -77,7 +78,7 @@ void CharRNNInputLayer::ComputeFeature(int flag,
for (int l = 0; l < unroll_len_ + 1; l++) {
float* ptr = datavec_[l]->mutable_cpu_data();
for (int i = 0; i < batchsize_; i++) {
- ptr[i] = static_cast<float>(char2index_.at(buf_[start_[i] + l]));
+ ptr[i] = static_cast<float>(char2index_.at(buf_[start_[i] + offset_ + l]));
}
}
offset_ += unroll_len_;
@@ -87,9 +88,6 @@ void CharRNNInputLayer::ComputeFeature(int flag,
// std::shuffle(start_.begin(), start_.end(), g);
offset_ = 0;
// return -1;
- } else {
- // return 0;
}
}
-
} // namespace singa
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/a2f4e468/src/neuralnet/input_layer/onehot.cc
----------------------------------------------------------------------
diff --git a/src/neuralnet/input_layer/onehot.cc b/src/neuralnet/input_layer/onehot.cc
new file mode 100644
index 0000000..056656a
--- /dev/null
+++ b/src/neuralnet/input_layer/onehot.cc
@@ -0,0 +1,40 @@
+/************************************************************
+*
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements. See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership. The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License. You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing,
+* software distributed under the License is distributed on an
+* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+* KIND, either express or implied. See the License for the
+* specific language governing permissions and limitations
+* under the License.
+*
+*************************************************************/
+#include "singa/neuralnet/input_layer.h"
+
+namespace singa {
+void OneHotLayer::Setup(const LayerProto& conf,
+ const vector<Layer*>& srclayers) {
+ InputLayer::Setup(conf, srclayers);
+ batchsize_ = srclayers.at(0)->data(unroll_index()).shape(0);
+ dim_ = 101 ; // proto.onehot_conf().vocab_size();
+ data_.Reshape(batchsize_, dim_);
+}
+
+void OneHotLayer::ComputeFeature(int flag, const vector<Layer*>& srclayers) {
+ float* ptr = data_.mutable_cpu_data();
+ memset(ptr, 0, sizeof(float) * data_.count());
+ const float* idx = srclayers[0]->data(unroll_index()).cpu_data();
+ for (int i = 0; i < batchsize_; i++) {
+ ptr[i * dim_ + static_cast<int>(idx[i])] = 1;
+ }
+}
+} // namespace singa
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/a2f4e468/src/neuralnet/neuralnet.cc
----------------------------------------------------------------------
diff --git a/src/neuralnet/neuralnet.cc b/src/neuralnet/neuralnet.cc
index 49978a1..b045e06 100644
--- a/src/neuralnet/neuralnet.cc
+++ b/src/neuralnet/neuralnet.cc
@@ -19,13 +19,12 @@
*
*************************************************************/
-#include "singa/neuralnet/neuralnet.h"
+#include "singa/neuralnet/neuralnet.h"
+#include <unordered_map>
#include <algorithm>
#include <queue>
#include "singa/utils/singleton.h"
-#include <unordered_map>
-using namespace std;
namespace singa {
@@ -60,7 +59,7 @@ const NetProto NetConfPreprocess(const NetProto& conf) {
}
NeuralNet* NeuralNet::Create(const NetProto& net_conf, Phase phase,
- int npartitions) {
+ int npartitions) {
const NetProto& full_net_conf = NetConfPreprocess(net_conf);
NetProto conf = full_net_conf;
conf.clear_layer();
@@ -99,21 +98,21 @@ NeuralNet* NeuralNet::Create(const NetProto& net_conf, Phase phase,
layer_conf->set_partition_dim(net_conf.partition_dim());
}
// LOG(INFO) << "Before unrolling: \n" << conf.DebugString();
- conf = Unrolling (conf);
+ conf = Unrolling(conf);
// Copy shared parameters for sharing param conf
std::vector<ParamProto*> shares;
std::unordered_map<string, ParamProto*> name2param;
- for (int index = 0; index < conf.layer_size();index ++) {
- LayerProto* layer = conf.mutable_layer(index);
- for (int i = 0; i < layer->param_size(); i++) {
- ParamProto* param = layer->mutable_param(i);
+ for (int index = 0; index < conf.layer_size(); index++) {
+ LayerProto* layer = conf.mutable_layer(index);
+ for (int i = 0; i < layer->param_size(); i++) {
+ ParamProto* param = layer->mutable_param(i);
CHECK(name2param.find(param->name()) == name2param.end())
<< "Repeated param = " << param->name();
- name2param[param->name()] = param;
- if (param->has_share_from() && param->share_from() != "")
- shares.push_back(param);
- }
+ name2param[param->name()] = param;
+ if (param->has_share_from() && param->share_from() != "")
+ shares.push_back(param);
+ }
}
for (auto param : shares) {
const std::string from = param->share_from();
@@ -135,32 +134,30 @@ const NetProto NeuralNet::Unrolling(const NetProto& net_conf) {
NetProto conf;
std::vector<std::vector<int>> layer_groups;
- std::unordered_map<string,int> org_layer_names;
+ std::unordered_map<string, int> org_layer_names;
for (int index = 0; index < net_conf.layer_size(); index ++) {
const LayerProto& org_layer = net_conf.layer(index);
- org_layer_names[org_layer.name()] = index; // layer_name -> index
+ org_layer_names[org_layer.name()] = index; // layer_name -> index
std::vector<int> layer_group;
- for (int i = 0; i < org_layer.unroll_len(); i ++) { // unroll
+ for (int i = 0; i < org_layer.unroll_len(); i ++) { // unroll
LayerProto* unroll_layer = conf.add_layer();
- unroll_layer->CopyFrom(org_layer); // create a new layer conf
- // if (org_layer.unroll_len() > 1) {
- // update layer names
- std::stringstream sstm;
- sstm << i << '#' << unroll_layer->name();
- unroll_layer->set_name(sstm.str());
- unroll_layer->set_unroll_index(i);
- // update layer parameter sharing
- for (int j = 0; j < unroll_layer->param_size(); j ++) {
- ParamProto* param = unroll_layer->mutable_param(j);
- if (i > 0) {
- param->set_share_from("0#" + param->name());
- }
- std::stringstream sstm1;
- sstm1 << i << '#' << param->name();
- param->set_name(sstm1.str());
+ unroll_layer->CopyFrom(org_layer); // create a new layer conf
+ // update layer names
+ std::stringstream sstm;
+ sstm << i << '#' << unroll_layer->name();
+ unroll_layer->set_name(sstm.str());
+ unroll_layer->set_unroll_index(i);
+ // update layer parameter sharing
+ for (int j = 0; j < unroll_layer->param_size(); j ++) {
+ ParamProto* param = unroll_layer->mutable_param(j);
+ if (i > 0) {
+ param->set_share_from("0#" + param->name());
}
- // }
+ std::stringstream sstm1;
+ sstm1 << i << '#' << param->name();
+ param->set_name(sstm1.str());
+ }
// clear unrolling related fields
unroll_layer->clear_unroll_len();
unroll_layer->clear_unroll_conn_type();
@@ -176,7 +173,7 @@ const NetProto NeuralNet::Unrolling(const NetProto& net_conf) {
for (int index = 0; index < net_conf.layer_size(); index ++) {
const LayerProto& org_layer = net_conf.layer(index);
if (org_layer.srclayers_size() == 0)
- continue; // no src layer
+ continue; // no src layer
for (int i = 0; i < org_layer.srclayers_size(); i ++) {
const string& org_layer_src = org_layer.srclayers(i);
singa::UnrollConnType unroll_conn_type = kUnrollOneToOne;
@@ -197,7 +194,7 @@ const NetProto NeuralNet::Unrolling(const NetProto& net_conf) {
unroll_layer->add_srclayers(conf.layer(unroll_layer_src).name());
}
} else if (unroll_conn_type == kUnrollOneToOne) {
- if (j < shift) continue; // no need to connect with the src
+ if (j < shift) continue; // no need to connect with the src
int unroll_layer_src = unroll_layer_srcs[j - shift];
unroll_layer->add_srclayers(conf.layer(unroll_layer_src).name());
} else if (unroll_conn_type == kUnrollFirstToLast) {
@@ -209,16 +206,14 @@ const NetProto NeuralNet::Unrolling(const NetProto& net_conf) {
}
}
- //TODO(fanju): add LSTM when it is ready
- if (org_layer.type() == kGRU) { // connect GRU layers
+ // TODO(fanju): add LSTM when it is ready
+ if (org_layer.type() == kGRU) { // connect GRU layers
for (unsigned int j = 1; j < layer_groups[index].size(); j ++) {
LayerProto* unroll_layer = conf.mutable_layer(layer_groups[index][j]);
string srcname = conf.layer(layer_groups[index][j-1]).name();
unroll_layer->add_srclayers(srcname);
- // LOG(ERROR) << "connect " << unroll_layer->name() << " from " << srcname;
}
}
-
}
return conf;
}
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/a2f4e468/src/neuralnet/neuron_layer/dummy.cc
----------------------------------------------------------------------
diff --git a/src/neuralnet/neuron_layer/dummy.cc b/src/neuralnet/neuron_layer/dummy.cc
index 9ccb179..936bb5e 100644
--- a/src/neuralnet/neuron_layer/dummy.cc
+++ b/src/neuralnet/neuron_layer/dummy.cc
@@ -45,7 +45,6 @@ void DummyLayer::Setup(const LayerProto& proto,
if (proto.dummy_conf().output()) { // use as output layer
output_ = true;
}
-
}
void DummyLayer::ComputeFeature(int flag, const vector<Layer*>& srclayers) {
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/a2f4e468/src/neuralnet/neuron_layer/embedding.cc
----------------------------------------------------------------------
diff --git a/src/neuralnet/neuron_layer/embedding.cc b/src/neuralnet/neuron_layer/embedding.cc
index 00e9139..c980c54 100644
--- a/src/neuralnet/neuron_layer/embedding.cc
+++ b/src/neuralnet/neuron_layer/embedding.cc
@@ -65,8 +65,8 @@ void EmbeddingLayer::ComputeFeature(int flag, const vector<Layer*>& srclayers) {
}
}
-void EmbeddingLayer::ComputeGradient(int flag, const vector<Layer*>& srclayers)
-{
+void EmbeddingLayer::ComputeGradient(int flag,
+ const vector<Layer*>& srclayers) {
const float* word_idx = srclayers.at(0)->data(unroll_index()).cpu_data();
auto context = Singleton<Context>::Instance();
if ((flag & kAggGrad) == 0)
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/a2f4e468/src/neuralnet/neuron_layer/gru.cc
----------------------------------------------------------------------
diff --git a/src/neuralnet/neuron_layer/gru.cc b/src/neuralnet/neuron_layer/gru.cc
index cf7425b..da2f9c5 100644
--- a/src/neuralnet/neuron_layer/gru.cc
+++ b/src/neuralnet/neuron_layer/gru.cc
@@ -19,14 +19,12 @@
*
*************************************************************/
-#include "singa/neuralnet/neuron_layer.h"
-
#include <glog/logging.h>
+#include "singa/neuralnet/neuron_layer.h"
#include "singa/utils/singleton.h"
#include "singa/utils/math_blob.h"
#include "singa/utils/singa_op.h"
-#include <iostream>
using namespace std;
namespace singa {
@@ -57,10 +55,10 @@ void GRULayer::Setup(const LayerProto& conf,
CHECK_LE(srclayers.size(), 2);
const auto& src = srclayers[0]->data(this);
- batchsize_ = src.shape()[0]; // size of batch
- vdim_ = src.count() / (batchsize_); // dimension of input
+ batchsize_ = src.shape()[0]; // size of batch
+ vdim_ = src.count() / (batchsize_); // dimension of input
- hdim_ = layer_conf_.gru_conf().dim_hidden(); // dimension of hidden state
+ hdim_ = layer_conf_.gru_conf().dim_hidden(); // dimension of hidden state
data_.Reshape(vector<int>{batchsize_, hdim_});
grad_.ReshapeLike(data_);
@@ -77,9 +75,9 @@ void GRULayer::Setup(const LayerProto& conf,
weight_c_hh_ = Param::Create(conf.param(5));
if (conf.param_size() > 6) {
- bias_z_ = Param::Create(conf.param(6));
- bias_r_ = Param::Create(conf.param(7));
- bias_c_ = Param::Create(conf.param(8));
+ bias_z_ = Param::Create(conf.param(6));
+ bias_r_ = Param::Create(conf.param(7));
+ bias_c_ = Param::Create(conf.param(8));
}
weight_z_hx_->Setup(vector<int>{hdim_, vdim_});
@@ -91,168 +89,170 @@ void GRULayer::Setup(const LayerProto& conf,
weight_c_hh_->Setup(vector<int>{hdim_, hdim_});
if (conf.param_size() > 6) {
- bias_z_->Setup(vector<int>{hdim_});
- bias_r_->Setup(vector<int>{hdim_});
- bias_c_->Setup(vector<int>{hdim_});
+ bias_z_->Setup(vector<int>{hdim_});
+ bias_r_->Setup(vector<int>{hdim_});
+ bias_c_->Setup(vector<int>{hdim_});
}
update_gate = new Blob<float>(batchsize_, hdim_);
reset_gate = new Blob<float>(batchsize_, hdim_);
- // reset gate x context
- reset_context = new Blob<float>(batchsize_, hdim_);
new_memory = new Blob<float>(batchsize_, hdim_);
}
void GRULayer::ComputeFeature(int flag,
const vector<Layer*>& srclayers) {
- CHECK_LE(srclayers.size(), 2);
-
- // Do transpose
- Blob<float> *w_z_hx_t = Transpose (weight_z_hx_->data());
- Blob<float> *w_z_hh_t = Transpose (weight_z_hh_->data());
- Blob<float> *w_r_hx_t = Transpose (weight_r_hx_->data());
- Blob<float> *w_r_hh_t = Transpose (weight_r_hh_->data());
- Blob<float> *w_c_hx_t = Transpose (weight_c_hx_->data());
- Blob<float> *w_c_hh_t = Transpose (weight_c_hh_->data());
-
- // Prepare the data input and the context
- const auto& src = srclayers[0]->data(this);
- const Blob<float> *context;
- if (srclayers.size() == 1) { // only have data input
- context = new Blob<float>(batchsize_, hdim_);
- } else { // have data input & context
- context = &srclayers[1]->data(this);
- }
-
- // Compute the update gate
- GEMM(1.0f, 0.0f, src,*w_z_hx_t,update_gate);
- if (bias_z_ != nullptr)
- MVAddRow(1.0f,1.0f,bias_z_->data(),update_gate);
- GEMM(1.0f, 1.0f, *context, *w_z_hh_t, update_gate);
- Map<op::Sigmoid<float>,float>(*update_gate, update_gate);
-
- // Compute the reset gate
- GEMM(1.0f, 0.0f, src, *w_r_hx_t, reset_gate);
- if (bias_r_ != nullptr)
- MVAddRow(1.0f,1.0f, bias_r_->data(),reset_gate);
- GEMM(1.0f, 1.0f, *context, *w_r_hh_t, reset_gate);
- Map<op::Sigmoid<float>,float>(*reset_gate, reset_gate);
-
- // Compute the new memory
- Mult<float>(*reset_gate, *context, reset_context);
- GEMM(1.0f, 0.0f, *reset_context, *w_c_hh_t, new_memory);
- GEMM(1.0f, 1.0f, src, *w_c_hx_t, new_memory);
- if (bias_c_ != nullptr)
- MVAddRow(1.0f, 1.0f, bias_c_->data(), new_memory);
- Map<op::Tanh<float>,float>(*new_memory, new_memory);
-
- Sub(*new_memory, *context, &data_);
+ CHECK_LE(srclayers.size(), 2);
+
+ // Do transpose
+ Blob<float> *w_z_hx_t = Transpose(weight_z_hx_->data());
+ Blob<float> *w_z_hh_t = Transpose(weight_z_hh_->data());
+ Blob<float> *w_r_hx_t = Transpose(weight_r_hx_->data());
+ Blob<float> *w_r_hh_t = Transpose(weight_r_hh_->data());
+ Blob<float> *w_c_hx_t = Transpose(weight_c_hx_->data());
+ Blob<float> *w_c_hh_t = Transpose(weight_c_hh_->data());
+
+ // Prepare the data input and the context
+ const auto& src = srclayers[0]->data(this);
+ const Blob<float> *context;
+ if (srclayers.size() == 1) { // only have data input
+ context = new Blob<float>(batchsize_, hdim_);
+ } else { // have data input & context
+ context = &srclayers[1]->data(this);
+ }
+
+ // Compute the update gate
+ GEMM(1.0f, 0.0f, src, *w_z_hx_t, update_gate);
+ if (bias_z_ != nullptr)
+ MVAddRow(1.0f, 1.0f, bias_z_->data(), update_gate);
+ GEMM(1.0f, 1.0f, *context, *w_z_hh_t, update_gate);
+ Map<op::Sigmoid<float>, float>(*update_gate, update_gate);
+
+ // Compute the reset gate
+ GEMM(1.0f, 0.0f, src, *w_r_hx_t, reset_gate);
+ if (bias_r_ != nullptr)
+ MVAddRow(1.0f, 1.0f, bias_r_->data(), reset_gate);
+ GEMM(1.0f, 1.0f, *context, *w_r_hh_t, reset_gate);
+ Map<op::Sigmoid<float>, float>(*reset_gate, reset_gate);
+
+ // Compute the new memory
+ GEMM(1.0f, 0.0f, src, *w_c_hx_t, new_memory);
+ if (bias_c_ != nullptr)
+ MVAddRow(1.0f, 1.0f, bias_c_->data(), new_memory);
+ Mult<float>(*reset_gate, *new_memory, new_memory);
+ GEMM(1.0f, 1.0f, *context, *w_c_hh_t, new_memory);
+ Map<op::Tanh<float>, float>(*new_memory, new_memory);
+
+
+ Sub(*context, *new_memory, &data_);
Mult(data_, *update_gate, &data_);
- AXPY(1.0f, *context, &data_);
+ Add(data_, *new_memory, &data_);
- // delete the pointers
- if (srclayers.size() == 1)
+ // delete the pointers
+ if (srclayers.size() == 1)
delete context;
- delete w_z_hx_t;
- delete w_z_hh_t;
- delete w_r_hx_t;
- delete w_r_hh_t;
- delete w_c_hx_t;
- delete w_c_hh_t;
+ delete w_z_hx_t;
+ delete w_z_hh_t;
+ delete w_r_hx_t;
+ delete w_r_hh_t;
+ delete w_c_hx_t;
+ delete w_c_hh_t;
}
void GRULayer::ComputeGradient(int flag,
const vector<Layer*>& srclayers) {
- CHECK_LE(srclayers.size(), 2);
- // agg grad from two dst layers
+ CHECK_LE(srclayers.size(), 2);
+ // agg grad from two dst layers, gradvec_[0] is grad_
AXPY(1.0f, *gradvec_[1], &grad_);
- float beta = 1.0f; // agg param gradients
-
- Layer* ilayer = srclayers[0]; // input layer
- Layer* clayer = nullptr; // context layer
- // Prepare the data input and the context
- const Blob<float>& src = ilayer->data(this);
- const Blob<float> *context;
- if (srclayers.size() == 1) { // only have data input
- context = new Blob<float>(batchsize_, hdim_);
- } else { // have data input & context
+ float beta = 1.0f; // agg param gradients
+
+ Layer* ilayer = srclayers[0]; // input layer
+ Layer* clayer = nullptr; // context layer
+ // Prepare the data input and the context
+ const Blob<float>& src = ilayer->data(this);
+ const Blob<float> *context;
+ if (srclayers.size() == 1) { // only have data input
+ context = new Blob<float>(batchsize_, hdim_);
+ } else { // have data input & context
clayer = srclayers[1];
- context = &(clayer->data(this));
- }
-
- // Prepare gradient of output neurons
- Blob<float> *grad_t = Transpose (grad_);
-
- // Compute intermediate gradients which are used for other computations
- Blob<float> dugatedz (batchsize_, hdim_);
- Map<singa::op::SigmoidGrad<float>, float>(*update_gate, &dugatedz);
- Blob<float> drgatedr (batchsize_, hdim_);
- Map<singa::op::SigmoidGrad<float>, float>(*reset_gate, &drgatedr);
- Blob<float> dnewmdc (batchsize_, hdim_);
- Map<singa::op::TanhGrad<float>, float>(*new_memory, &dnewmdc);
-
- Blob<float> dLdz (batchsize_, hdim_);
- Sub<float>(*new_memory, *context, &dLdz);
- Mult<float>(dLdz, grad_, &dLdz);
- Mult<float>(dLdz, dugatedz, &dLdz);
-
- Blob<float> dLdc (batchsize_,hdim_);
- Mult(grad_, *update_gate, &dLdc);
- Mult(dLdc, dnewmdc, &dLdc);
-
- Blob<float> reset_dLdc (batchsize_,hdim_);
- GEMM(1.0f, 0.0f, dLdc, weight_c_hh_->data(), &reset_dLdc);
-
- Blob<float> dLdr (batchsize_, hdim_);
- Mult(reset_dLdc, *context, &dLdr);
- Mult(dLdr, drgatedr, &dLdr);
-
- // Compute gradients for parameters of update gate
- Blob<float> *dLdz_t = Transpose(dLdz);
- GEMM(1.0f, beta, *dLdz_t, src, weight_z_hx_->mutable_grad());
- GEMM(1.0f, beta, *dLdz_t, *context, weight_z_hh_->mutable_grad());
- if (bias_z_ != nullptr)
- MVSumRow<float>(1.0f, beta, dLdz, bias_z_->mutable_grad());
- delete dLdz_t;
-
- // Compute gradients for parameters of reset gate
- Blob<float> *dLdr_t = Transpose(dLdr);
- GEMM(1.0f, beta, *dLdr_t, src, weight_r_hx_->mutable_grad());
- GEMM(1.0f, beta, *dLdr_t, *context, weight_r_hh_->mutable_grad());
- if (bias_r_ != nullptr)
- MVSumRow(1.0f, beta, dLdr, bias_r_->mutable_grad());
- delete dLdr_t;
-
- // Compute gradients for parameters of new memory
- Blob<float> *dLdc_t = Transpose(dLdc);
- GEMM(1.0f, beta, *dLdc_t, src, weight_c_hx_->mutable_grad());
- GEMM(1.0f, beta, *dLdc_t, *reset_context, weight_c_hh_->mutable_grad());
- if (bias_c_ != nullptr)
- MVSumRow(1.0f, beta, dLdc, bias_c_->mutable_grad());
- delete dLdc_t;
-
- // Compute gradients for data input layer
- if (srclayers[0]->mutable_grad(this) != nullptr) {
- GEMM(1.0f,0.0f, dLdc, weight_c_hx_->data(), ilayer->mutable_grad(this));
- GEMM(1.0f,1.0f, dLdz, weight_z_hx_->data(), ilayer->mutable_grad(this));
- GEMM(1.0f,1.0f, dLdr, weight_r_hx_->data(), ilayer->mutable_grad(this));
- }
-
- if (clayer != nullptr && clayer->mutable_grad(this) != nullptr) {
- // Compute gradients for context layer
- Mult(reset_dLdc, *reset_gate, clayer->mutable_grad(this));
- GEMM(1.0f, 1.0f, dLdr, weight_r_hh_->data(), clayer->mutable_grad(this));
- GEMM(1.0f, 1.0f, dLdz, weight_z_hh_->data(), clayer->mutable_grad(this));
- AXPY(-1.0f, *update_gate, clayer->mutable_grad(this));
+ context = &(clayer->data(this));
+ }
+
+ // Compute intermediate gradients which are used for other computations
+ Blob<float> dugatedz(batchsize_, hdim_);
+ Map<singa::op::SigmoidGrad<float>, float>(*update_gate, &dugatedz);
+ Blob<float> drgatedr(batchsize_, hdim_);
+ Map<singa::op::SigmoidGrad<float>, float>(*reset_gate, &drgatedr);
+ Blob<float> dnewmdc(batchsize_, hdim_);
+ Map<singa::op::TanhGrad<float>, float>(*new_memory, &dnewmdc);
+
+ Blob<float> dLdz(batchsize_, hdim_);
+ Sub<float>(*context, *new_memory, &dLdz);
+ Mult<float>(dLdz, grad_, &dLdz);
+ Mult<float>(dLdz, dugatedz, &dLdz);
+
+ Blob<float> dLdc(batchsize_, hdim_);
+ Blob<float> z1(batchsize_, hdim_);
+ z1.SetValue(1.0f);
+ AXPY<float>(-1.0f, *update_gate, &z1);
+ Mult(grad_, z1, &dLdc);
+ Mult(dLdc, dnewmdc, &dLdc);
+
+ Blob<float> reset_dLdc(batchsize_, hdim_);
+ Mult(dLdc, *reset_gate, &reset_dLdc);
+
+ Blob<float> dLdr(batchsize_, hdim_);
+ Blob<float> cprev(batchsize_, hdim_);
+ GEMM(1.0f, 0.0f, *context, weight_c_hh_->data().T(), &cprev);
+ Mult(dLdc, cprev, &dLdr);
+ Mult(dLdr, drgatedr, &dLdr);
+
+ // Compute gradients for parameters of update gate
+ Blob<float> *dLdz_t = Transpose(dLdz);
+ GEMM(1.0f, beta, *dLdz_t, src, weight_z_hx_->mutable_grad());
+ GEMM(1.0f, beta, *dLdz_t, *context, weight_z_hh_->mutable_grad());
+ if (bias_z_ != nullptr)
+ MVSumRow<float>(1.0f, beta, dLdz, bias_z_->mutable_grad());
+ delete dLdz_t;
+
+ // Compute gradients for parameters of reset gate
+ Blob<float> *dLdr_t = Transpose(dLdr);
+ GEMM(1.0f, beta, *dLdr_t, src, weight_r_hx_->mutable_grad());
+ GEMM(1.0f, beta, *dLdr_t, *context, weight_r_hh_->mutable_grad());
+ if (bias_r_ != nullptr)
+ MVSumRow(1.0f, beta, dLdr, bias_r_->mutable_grad());
+ delete dLdr_t;
+
+ // Compute gradients for parameters of new memory
+ Blob<float> *dLdc_t = Transpose(dLdc);
+ GEMM(1.0f, beta, *dLdc_t, src, weight_c_hx_->mutable_grad());
+ if (bias_c_ != nullptr)
+ MVSumRow(1.0f, beta, dLdc, bias_c_->mutable_grad());
+ delete dLdc_t;
+
+ Blob<float> *reset_dLdc_t = Transpose(reset_dLdc);
+ GEMM(1.0f, beta, *reset_dLdc_t, *context, weight_c_hh_->mutable_grad());
+ delete reset_dLdc_t;
+
+ // Compute gradients for data input layer
+ if (srclayers[0]->mutable_grad(this) != nullptr) {
+ GEMM(1.0f, 0.0f, dLdc, weight_c_hx_->data(), ilayer->mutable_grad(this));
+ GEMM(1.0f, 1.0f, dLdz, weight_z_hx_->data(), ilayer->mutable_grad(this));
+ GEMM(1.0f, 1.0f, dLdr, weight_r_hx_->data(), ilayer->mutable_grad(this));
+ }
+
+ if (clayer != nullptr && clayer->mutable_grad(this) != nullptr) {
+ // Compute gradients for context layer
+ GEMM(1.0f, 0.0f, reset_dLdc, weight_c_hh_->data(),
+ clayer->mutable_grad(this));
+ GEMM(1.0f, 1.0f, dLdr, weight_r_hh_->data(), clayer->mutable_grad(this));
+ GEMM(1.0f, 1.0f, dLdz, weight_z_hh_->data(), clayer->mutable_grad(this));
+ Add(clayer->grad(this), *update_gate, clayer->mutable_grad(this));
// LOG(ERROR) << "grad to prev gru " << Asum(clayer->grad(this));
- }
+ }
- if (srclayers.size() == 1)
+ if (srclayers.size() == 1)
delete context;
- else
- context = NULL;
- delete grad_t;
}
} // namespace singa
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/a2f4e468/src/neuralnet/output_layer/char_rnn.cc
----------------------------------------------------------------------
diff --git a/src/neuralnet/output_layer/char_rnn.cc b/src/neuralnet/output_layer/char_rnn.cc
new file mode 100644
index 0000000..c3f1733
--- /dev/null
+++ b/src/neuralnet/output_layer/char_rnn.cc
@@ -0,0 +1,51 @@
+/************************************************************
+*
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements. See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership. The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License. You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing,
+* software distributed under the License is distributed on an
+* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+* KIND, either express or implied. See the License for the
+* specific language governing permissions and limitations
+* under the License.
+*
+*************************************************************/
+
+#include <algorithm>
+#include <iostream>
+#include <fstream>
+#include "singa/neuralnet/output_layer.h"
+
+namespace singa {
+
+void CharRNNOutputLayer::Setup(const LayerProto& proto,
+ const vector<Layer*>& srclayers) {
+ CHECK_EQ(srclayers.size(), 1);
+ OutputLayer::Setup(proto, srclayers);
+ std::ifstream fin;
+ const string path = proto.char_rnn_conf().vocab_path();
+ fin.open(path);
+ CHECK(fin.is_open()) << "Can't open vocab_path = " << path;
+ std::stringstream stream;
+ stream << fin.rdbuf();
+ vocab_ = stream.str();
+ fin.close();
+}
+
+void CharRNNOutputLayer::ComputeFeature(int flag,
+ const vector<Layer*>& srclayers) {
+ const float* dptr = srclayers[0]->data(this).cpu_data();
+ for (int i = 0; i < srclayers[0]->data(this).shape(0); i++) {
+ std::cout<<vocab_[static_cast<int>(dptr[i])];
+ }
+}
+
+} // namespace singa;
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/a2f4e468/src/utils/updater.cc
----------------------------------------------------------------------
diff --git a/src/utils/updater.cc b/src/utils/updater.cc
index 200670a..1b3e26c 100644
--- a/src/utils/updater.cc
+++ b/src/utils/updater.cc
@@ -140,6 +140,9 @@ void SGDUpdater::Update(int step, Param* param, float grad_scale) {
/***********************Nesterov******************************/
void NesterovUpdater::Update(int step, Param* param, float grad_scale) {
+ if (clip_high_ > clip_low_)
+ Clip(clip_low_, clip_high_, param);
+
Shape<1> s = Shape1(param->size());
Tensor<cpu, 1> data(param->mutable_cpu_data(), s);
Tensor<cpu, 1> grad(param->mutable_cpu_grad(), s);
@@ -181,6 +184,9 @@ void RMSPropUpdater::Init(const UpdaterProto& proto) {
}
void RMSPropUpdater::Update(int step, Param* param, float grad_scale) {
+ if (clip_high_ > clip_low_)
+ Clip(clip_low_, clip_high_, param);
+
Shape<1> s=Shape1(param->size());
Tensor<cpu, 1> data(param->mutable_cpu_data(), s);
Tensor<cpu, 1> grad(param->mutable_cpu_grad(), s);
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/a2f4e468/src/worker.cc
----------------------------------------------------------------------
diff --git a/src/worker.cc b/src/worker.cc
index abe74e7..2afa8b0 100644
--- a/src/worker.cc
+++ b/src/worker.cc
@@ -433,7 +433,8 @@ void BPTTWorker::Backward(int step, NeuralNet* net) {
for (auto it = layers.rbegin(); it != layers.rend(); it++) {
Layer* layer = *it;
if (layer->partition_id() == id_) {
- layer->ComputeGradient(kTrain | kBackward | kAggGrad, net->srclayers(layer));
+ layer->ComputeGradient(kTrain | kBackward | kAggGrad,
+ net->srclayers(layer));
// LOG(ERROR) << layer->name() << " backward";
if (job_conf_.debug() && DisplayNow(step) && grp_id_ == 0)
label[layer->name()] = layer->ToString(true, kTrain | kBackward);
[06/10] incubator-singa git commit: SINGA-120 - Implemented GRU and
BPTT: Made changes for debug tokenizer
Posted by wa...@apache.org.
SINGA-120 - Implemented GRU and BPTT: Made changes for debug tokenizer
Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/b4b6a385
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/b4b6a385
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/b4b6a385
Branch: refs/heads/master
Commit: b4b6a38568a5b22829fa3500dc7621e2719bb62b
Parents: d43af9f
Author: Ju Fan <fa...@gmail.com>
Authored: Fri Jan 1 10:53:11 2016 +0800
Committer: Wei Wang <wa...@comp.nus.edu.sg>
Committed: Wed Jan 6 01:53:05 2016 +0800
----------------------------------------------------------------------
include/singa/utils/context.h | 4 ++--
include/singa/utils/tokenizer.h | 1 +
2 files changed, 3 insertions(+), 2 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/b4b6a385/include/singa/utils/context.h
----------------------------------------------------------------------
diff --git a/include/singa/utils/context.h b/include/singa/utils/context.h
index a2bc64f..8e7bbb8 100644
--- a/include/singa/utils/context.h
+++ b/include/singa/utils/context.h
@@ -154,9 +154,9 @@ class Context {
*/
std::mt19937* rand_generator(const std::thread::id& tid) {
if (rand_generator_.find(tid) == rand_generator_.end()) {
- CHECK(seed_.find(tid) != seed_.end());
+ // CHECK(seed_.find(tid) != seed_.end());
auto seed = static_cast<unsigned>(seed_[tid]);
- if (seed_[tid] == -1)
+ if (seed_.find(tid) == seed_.end() || seed_.at(tid) == -1)
seed = std::chrono::system_clock::now().time_since_epoch().count();
rand_generator_[tid] = new std::mt19937(seed);
}
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/b4b6a385/include/singa/utils/tokenizer.h
----------------------------------------------------------------------
diff --git a/include/singa/utils/tokenizer.h b/include/singa/utils/tokenizer.h
index c66e0af..9637c75 100644
--- a/include/singa/utils/tokenizer.h
+++ b/include/singa/utils/tokenizer.h
@@ -37,6 +37,7 @@ namespace singa {
* t >> x; // x is wes
* cout << (t >> x); // print 0.
*/
+
class Tokenizer {
public:
Tokenizer(const std::string& str, const std::string& sep): start_(0),
[09/10] incubator-singa git commit: SINGA-120 - Implemented GRU and
BPTT
Posted by wa...@apache.org.
SINGA-120 - Implemented GRU and BPTT
Add input layers for char rnn example.
Fix the bug from worker.cc for flag setting in computegradient
Run with GPU; Loss decreases slowly to 3 per unit;
Todo add RNNDummyLayer and train with RMSProp
Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/959ef705
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/959ef705
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/959ef705
Branch: refs/heads/master
Commit: 959ef705a66b20b474dfad3e85a9f35635e8690f
Parents: 1f03f9d
Author: Wei Wang <wa...@comp.nus.edu.sg>
Authored: Sat Jan 2 22:54:20 2016 +0800
Committer: Wei Wang <wa...@comp.nus.edu.sg>
Committed: Wed Jan 6 01:55:08 2016 +0800
----------------------------------------------------------------------
Makefile.am | 4 +
include/singa/neuralnet/input_layer.h | 38 ++++
include/singa/neuralnet/layer.h | 6 +
include/singa/neuralnet/neuralnet.h | 11 +
include/singa/neuralnet/neuron_layer.h | 50 +++++
include/singa/utils/common.h | 5 +
include/singa/utils/context.h | 6 +
include/singa/utils/math_blob.h | 15 ++
include/singa/utils/param.h | 6 +-
include/singa/utils/updater.h | 7 +-
include/singa/worker.h | 31 ++-
src/driver.cc | 5 +
src/neuralnet/input_layer/char_rnn.cc | 95 +++++++++
src/neuralnet/input_layer/rnn_label.cc | 35 ++++
src/neuralnet/neuralnet.cc | 243 +++++++++++++----------
src/neuralnet/neuron_layer/embedding.cc | 98 +++++++++
src/neuralnet/neuron_layer/gru.cc | 115 +++++------
src/neuralnet/neuron_layer/inner_product.cc | 13 +-
src/proto/job.proto | 42 +++-
src/stub.cc | 2 +
src/test/test_gru_layer.cc | 1 -
src/test/test_math.cc | 1 -
src/utils/common.cc | 26 +++
src/utils/param.cc | 17 +-
src/utils/updater.cc | 17 ++
src/worker.cc | 84 +++++++-
26 files changed, 787 insertions(+), 186 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/959ef705/Makefile.am
----------------------------------------------------------------------
diff --git a/Makefile.am b/Makefile.am
index aa88348..d2b2aa8 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -75,12 +75,15 @@ SINGA_SRCS := src/driver.cc \
src/neuralnet/connection_layer/concate.cc \
src/neuralnet/connection_layer/slice.cc \
src/neuralnet/connection_layer/split.cc \
+ src/neuralnet/input_layer/char_rnn.cc \
+ src/neuralnet/input_layer/onehot.cc \
src/neuralnet/input_layer/csv.cc \
src/neuralnet/input_layer/image_preprocess.cc \
src/neuralnet/input_layer/prefetch.cc \
src/neuralnet/input_layer/record.cc \
src/neuralnet/input_layer/deprecated.cc \
src/neuralnet/input_layer/store.cc \
+ src/neuralnet/input_layer/rnn_label.cc \
src/neuralnet/output_layer/accuracy.cc \
src/neuralnet/output_layer/argsort.cc \
src/neuralnet/output_layer/csv.cc \
@@ -91,6 +94,7 @@ SINGA_SRCS := src/driver.cc \
src/neuralnet/neuron_layer/convolution.cc \
src/neuralnet/neuron_layer/dropout.cc \
src/neuralnet/neuron_layer/dummy.cc \
+ src/neuralnet/neuron_layer/embedding.cc \
src/neuralnet/neuron_layer/inner_product.cc \
src/neuralnet/neuron_layer/lrn.cc \
src/neuralnet/neuron_layer/pooling.cc \
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/959ef705/include/singa/neuralnet/input_layer.h
----------------------------------------------------------------------
diff --git a/include/singa/neuralnet/input_layer.h b/include/singa/neuralnet/input_layer.h
index 2825d65..e701eec 100644
--- a/include/singa/neuralnet/input_layer.h
+++ b/include/singa/neuralnet/input_layer.h
@@ -162,6 +162,44 @@ class PrefetchLayer : public Layer {
std::thread thread_;
};
+class OneHotLayer : public InputLayer {
+ public:
+ void Setup(const LayerProto& proto, const vector<Layer*>& srclayers) override;
+ void ComputeFeature(int flag, const vector<Layer*>& srclayers);
+
+ private:
+ int batchsize_, dim_;
+};
+
+/**
+ * * Read the ASCII file as a large string used for RNN model where each character
+ * * is a single input to the unrolled RNN layer.
+ * * max string length is string::max_size();
+ * */
+class CharRNNInputLayer : public InputLayer {
+ public:
+ void Setup(const LayerProto& proto, const vector<Layer*>& srclayers) override;
+ void ComputeFeature(int flag, const vector<Layer*>& srclayers);
+
+ private:
+ int batchsize_ = 0, unroll_len_ = 1;
+ unsigned offset_ = 0;
+ string path_, vocab_path_;
+ string buf_;
+ vector<int> start_;
+ std::unordered_map<char, int> char2index_;
+};
+
+/**
+ * Label layer for fetching labels from the src input layer for RNN models.
+ * The i-th unrolled layer fetch label from the input layer via data(i+1).
+ * Particularly, it shares data_ Blob with data(i+1) of its src layer.
+ */
+class RNNLabelLayer : public InputLayer {
+ public:
+ void Setup(const LayerProto& proto, const vector<Layer*>& srclayers);
+ void ComputeFeature(int flag, const vector<Layer*>& srclayers);
+};
/****************Deprecated layers******************/
/**
* @deprecated please use the StoreInputLayer.
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/959ef705/include/singa/neuralnet/layer.h
----------------------------------------------------------------------
diff --git a/include/singa/neuralnet/layer.h b/include/singa/neuralnet/layer.h
index 28ab92e..f4738fa 100644
--- a/include/singa/neuralnet/layer.h
+++ b/include/singa/neuralnet/layer.h
@@ -174,6 +174,12 @@ class Layer {
*/
inline const std::string& name() const { return layer_conf_.name(); }
/**
+ * Return the index of the unrolled layer within the unrolling group, which
+ * should be [0, max_unrolling_length)
+ */
+ inline const int unroll_index() const { return layer_conf_.unroll_index(); }
+
+ /**
* @return a const ref for Blob vector storing feature values of this layer.
*/
virtual const vector<Blob<float>*>& data() const {
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/959ef705/include/singa/neuralnet/neuralnet.h
----------------------------------------------------------------------
diff --git a/include/singa/neuralnet/neuralnet.h b/include/singa/neuralnet/neuralnet.h
index be8f5c8..33ad38c 100644
--- a/include/singa/neuralnet/neuralnet.h
+++ b/include/singa/neuralnet/neuralnet.h
@@ -109,6 +109,15 @@ class NeuralNet {
<< "layer (" << layer->name() << " ) has no source layers";
return src_map_.at(layer);
}
+ Layer* last_unroll_layer(const Layer* layer) const {
+ auto pos = layer->name().find("#");
+ if (pos == std::string::npos)
+ return nullptr;
+ string last_name = std::to_string(unroll_len_) + layer->name().substr(pos);
+ CHECK(name2layer_.find(last_name) != name2layer_.end())
+ << "layer name = " << last_name << " has no unroll layers";
+ return name2layer_.at(last_name);
+ }
inline Param* paramid2param(int id) const { return paramid2param_.at(id); }
/**
@@ -137,6 +146,7 @@ class NeuralNet {
* prepare data structures, e.g., params_, layers_, etc.
*/
void PrepareDataStructures();
+ void PrepareDataStructures(const NetProto& proto);
/**
* add split layers, due to connections to multiple dst-layers
*/
@@ -149,6 +159,7 @@ class NeuralNet {
int npartitions);
protected:
+ int unroll_len_ = 1;
std::vector<Layer*> layers_;
std::vector<Param*> params_;
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/959ef705/include/singa/neuralnet/neuron_layer.h
----------------------------------------------------------------------
diff --git a/include/singa/neuralnet/neuron_layer.h b/include/singa/neuralnet/neuron_layer.h
index 3f126ab..e587e38 100644
--- a/include/singa/neuralnet/neuron_layer.h
+++ b/include/singa/neuralnet/neuron_layer.h
@@ -131,12 +131,60 @@ class DummyLayer: public NeuronLayer {
bool output_ = false; // use as output layer
};
+/**
+ * Embedding layer that converts an array of index ID into a matrix.
+ *
+ * Each index ID corresponds to a word (or feature) vector in the vocabulary
+ * matrix maintained by the embedding layer.
+ * The index ID ranges within [0, |D|), where |D| is the size of the vocabulary,
+ * i.e., the number of rows of the vocabulary matrix.
+ * If the index is -1, which means it is a padding word. A feature vector with
+ * all values 0 will be constructed and inserted into the feature Blob.
+ * Users handle special words by themseleves. For example, the index 0 could be
+ * the starting word/symbol of a sentence, the index 1 could be the ending
+ * word/symbol of a sentence.
+ */
+class EmbeddingLayer : public NeuronLayer {
+ public:
+ ~EmbeddingLayer() {
+ delete vocab_;
+ }
+ void Setup(const LayerProto& proto, const vector<Layer*>& srclayers) override;
+ void ComputeFeature(int flag, const vector<Layer*>& srclayers) override;
+ void ComputeGradient(int flag, const vector<Layer*>& srclayers) override;
+ const std::vector<Param*> GetParams() const override {
+ std::vector<Param*> params;
+ params.push_back(vocab_);
+ return params;
+ }
+
+ private:
+ int vocab_size_, feature_dim_, batchsize_;
+ //!< the vocabulary matrix to be learned
+ Param *vocab_;
+};
+
class GRULayer : public NeuronLayer {
public:
~GRULayer();
void Setup(const LayerProto& proto, const vector<Layer*>& srclayers) override;
void ComputeFeature(int flag, const vector<Layer*>& srclayers) override;
void ComputeGradient(int flag, const vector<Layer*>& srclayers) override;
+ ConnectionType dst_layer_connection() const override{
+ return kOneToMany;
+ }
+ Blob<float>* mutable_grad(const Layer* from) override {
+ if (typeid(*from) == typeid(GRULayer))
+ return gradvec_[1];
+ else
+ return gradvec_[0];
+ }
+ const Blob<float>& grad(const Layer* from) override{
+ if (typeid(*from) == typeid(GRULayer))
+ return *gradvec_[1];
+ else
+ return *gradvec_[0];
+ }
const std::vector<Param*> GetParams() const override {
if (bias_z_ != nullptr && bias_r_ != nullptr && bias_c_ != nullptr) {
@@ -156,6 +204,8 @@ class GRULayer : public NeuronLayer {
int vdim_, hdim_; // dimensions
Blob<float> *update_gate, *reset_gate, *new_memory;
+ //!< gru layer connect to two dst layers, hence need to grad blobs.
+ Blob<float> aux_grad_;
Param *weight_z_hx_, *weight_z_hh_, *bias_z_; // update gate
Param *weight_r_hx_, *weight_r_hh_, *bias_r_; // reset gate
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/959ef705/include/singa/utils/common.h
----------------------------------------------------------------------
diff --git a/include/singa/utils/common.h b/include/singa/utils/common.h
index afbe954..0bcec58 100644
--- a/include/singa/utils/common.h
+++ b/include/singa/utils/common.h
@@ -155,6 +155,11 @@ void WriteProtoToBinaryFile(const Message& proto, const char* filename);
* Write a string (e.g., graph reprensetation of a net) into a text file.
*/
void WriteStringToTextFile(const string& filename, const string& context);
+
+/**
+ * Parse metric pairs (key = value[, key = value]) from string
+ */
+const vector<std::pair<string, float>> GetMetricFromString(const string& disp);
} // namespace singa
#endif // SINGA_UTILS_COMMON_H_
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/959ef705/include/singa/utils/context.h
----------------------------------------------------------------------
diff --git a/include/singa/utils/context.h b/include/singa/utils/context.h
index 8e7bbb8..b1128c1 100644
--- a/include/singa/utils/context.h
+++ b/include/singa/utils/context.h
@@ -100,6 +100,12 @@ class Context {
}
/**
+ * @return the device ID of the current thread.
+ */
+ int device_id() {
+ return device_id(std::this_thread::get_id());
+ }
+ /**
* @return the ID of the device attached to a given CPU thread, or -1 if this
* thread has not been attached GPU device.
*/
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/959ef705/include/singa/utils/math_blob.h
----------------------------------------------------------------------
diff --git a/include/singa/utils/math_blob.h b/include/singa/utils/math_blob.h
index 125509a..bdaf914 100644
--- a/include/singa/utils/math_blob.h
+++ b/include/singa/utils/math_blob.h
@@ -712,6 +712,21 @@ void Softmax(int nb_rows, const Blob<Dtype>& A, Blob<Dtype>* B) {
#endif // USE_GPU
}
}
+
+template<typename Dtype>
+void Zero(Blob<Dtype>* B) {
+ auto context = Singleton<Context>::Instance();
+ int device = context->device_id(std::this_thread::get_id());
+ if (device == -1) {
+ B->SetValue(0);
+ } else {
+#ifdef USE_GPU
+ cudaMemset(B->mutable_gpu_data(), 0, B->count() * sizeof(float));
+#else
+ LOG(FATAL) << "Not implemented";
+#endif // USE_GPU
+ }
+}
} // end of namespace singa
#endif // SINGA_UTILS_MATH_BLOB_H_
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/959ef705/include/singa/utils/param.h
----------------------------------------------------------------------
diff --git a/include/singa/utils/param.h b/include/singa/utils/param.h
index 415490e..9930710 100644
--- a/include/singa/utils/param.h
+++ b/include/singa/utils/param.h
@@ -146,7 +146,11 @@ class Param {
* @param cpu_only if true, share only cpu memory (used for training with
* multi-gpu cards); else, share both cpu and gpu memory.
*/
- void ShareFrom(Param* other, bool cpu_only);
+ void ShareDataFrom(Param* other, bool cpu_only);
+ /**
+ * Share both data and grad from other param
+ */
+ void ShareFrom(Param* other);
/**
* Init param values from checkpoint blob.
*/
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/959ef705/include/singa/utils/updater.h
----------------------------------------------------------------------
diff --git a/include/singa/utils/updater.h b/include/singa/utils/updater.h
index 6413a80..575ab86 100644
--- a/include/singa/utils/updater.h
+++ b/include/singa/utils/updater.h
@@ -7,9 +7,9 @@
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
-*
+*
* http://www.apache.org/licenses/LICENSE-2.0
-*
+*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
@@ -93,12 +93,13 @@ class Updater {
virtual void Init(const UpdaterProto &proto);
virtual void Update(int step, Param* param, float grad_scale) = 0;
-
+ void Clip(const float low, const float high, Param* param);
protected:
UpdaterProto proto_;
LRGenerator* lr_gen_;
float weight_decay_;
float momentum_;
+ float clip_low_, clip_high_;
};
class SGDUpdater : public Updater {
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/959ef705/include/singa/worker.h
----------------------------------------------------------------------
diff --git a/include/singa/worker.h b/include/singa/worker.h
index 8738c27..34c8000 100644
--- a/include/singa/worker.h
+++ b/include/singa/worker.h
@@ -165,7 +165,7 @@ class Worker {
* @param prefix display prefix, e.g., 'Train step 100', 'Test step 90'.
* @param net display layers from this neural net.
*/
- void Display(int flag, const std::string& prefix, NeuralNet* net);
+ virtual void Display(int flag, const std::string& prefix, NeuralNet* net);
/**
* Put Param values to server.
*
@@ -284,10 +284,35 @@ class BPWorker: public Worker {
public:
void TrainOneBatch(int step, NeuralNet* net) override;
void TestOneBatch(int step, Phase phase, NeuralNet* net) override;
- void Forward(int step, Phase phase, NeuralNet* net);
- void Backward(int step, NeuralNet* net);
+ virtual void Forward(int step, Phase phase, NeuralNet* net);
+ virtual void Backward(int step, NeuralNet* net);
};
+/**
+ * Subclass of Worker that implements BPTT (Backpropagation through time)
+ * algorithm for computing gradients of RNN models.
+ * Max BPTT/unrolling length is configured by users.
+ */
+class BPTTWorker: public BPWorker {
+ public:
+ void Forward(int step, Phase phase, NeuralNet* net) override;
+ void Backward(int step, NeuralNet* net) override;
+ void Display(int flag, const std::string& prefix, NeuralNet* net) override;
+
+ private:
+ /*
+ * indicator used in truncted BPTT, which feeds the hidden state of the last
+ * unrolled unit to the first unit in Forward() for the next iteration.
+ * currently always feed the last hidden state to the first.
+ */
+ bool full_state_ = false;
+ //!< indicator used for the starting of a new pass of the dataset.
+ bool begin_ = false;
+};
+/**
+ * Subclass of Worker that implements the Contrastive Divergence algorithm for
+ * computing the gradients of paramters of energy models.
+ */
class CDWorker: public Worker {
public:
void TrainOneBatch(int step, NeuralNet* net) override;
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/959ef705/src/driver.cc
----------------------------------------------------------------------
diff --git a/src/driver.cc b/src/driver.cc
index 5e0772b..21968bb 100644
--- a/src/driver.cc
+++ b/src/driver.cc
@@ -71,6 +71,9 @@ void Driver::Init(int argc, char **argv) {
RegisterLayer<ImagePreprocessLayer, int>(kImagePreprocess);
RegisterLayer<RecordOutputLayer, int>(kRecordOutput);
RegisterLayer<CSVOutputLayer, int>(kCSVOutput);
+ RegisterLayer<CharRNNInputLayer, int>(kCharRNN);
+ RegisterLayer<RNNLabelLayer, int>(kRNNLabel);
+ RegisterLayer<OneHotLayer, int>(kOneHot);
// connection layers
RegisterLayer<BridgeDstLayer, int>(kBridgeDst);
@@ -84,6 +87,7 @@ void Driver::Init(int argc, char **argv) {
RegisterLayer<ConvolutionLayer, int>(kConvolution);
RegisterLayer<CConvolutionLayer, int>(kCConvolution);
RegisterLayer<CPoolingLayer, int>(kCPooling);
+ RegisterLayer<EmbeddingLayer, int>(kEmbedding);
#ifdef USE_CUDNN
RegisterLayer<CudnnActivationLayer, int>(kCudnnActivation);
@@ -135,6 +139,7 @@ void Driver::Init(int argc, char **argv) {
// register workers
RegisterWorker<BPWorker>(kBP);
+ RegisterWorker<BPTTWorker>(kBPTT);
RegisterWorker<CDWorker>(kCD);
// register params
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/959ef705/src/neuralnet/input_layer/char_rnn.cc
----------------------------------------------------------------------
diff --git a/src/neuralnet/input_layer/char_rnn.cc b/src/neuralnet/input_layer/char_rnn.cc
new file mode 100644
index 0000000..cc13b1b
--- /dev/null
+++ b/src/neuralnet/input_layer/char_rnn.cc
@@ -0,0 +1,95 @@
+/************************************************************
+*
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements. See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership. The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License. You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing,
+* software distributed under the License is distributed on an
+* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+* KIND, either express or implied. See the License for the
+* specific language governing permissions and limitations
+* under the License.
+*
+*************************************************************/
+#include <sstream>
+#include <fstream>
+#include "singa/neuralnet/input_layer.h"
+namespace singa {
+
+void CharRNNInputLayer::Setup(const LayerProto& conf,
+ const vector<Layer*>& srclayers) {
+ InputLayer::Setup(conf, srclayers);
+ batchsize_ = conf.char_rnn_conf().batchsize();
+ path_ = conf.char_rnn_conf().path();
+ vocab_path_ = conf.char_rnn_conf().vocab_path();
+ unroll_len_ = conf.char_rnn_conf().unroll_len();
+ datavec_.clear();
+ // each unroll layer has a input blob
+ for (int i = 0; i <= unroll_len_; i++) {
+ datavec_.push_back(new Blob<float>(batchsize_));
+ }
+}
+
+void CharRNNInputLayer::ComputeFeature(int flag,
+ const vector<Layer*>& srclayers) {
+ if (buf_.size() == 0) {
+
+ // read the vocab
+ {
+ std::ifstream fin;
+ fin.open(vocab_path_);
+ CHECK(fin.is_open()) << "Can't open vocab_path = " << vocab_path_;
+ std::stringstream stream;
+ stream << fin.rdbuf();
+ string vocab = stream.str();
+ LOG(ERROR) << "Vocab_size = " << vocab.length();
+ for (char c : vocab)
+ char2index_[c] = char2index_.size() - 1;
+ fin.close();
+ }
+
+ // read the whole text file
+ {
+ std::ifstream fin;
+ fin.open(path_);
+ CHECK(fin.is_open()) << "Can't open filepath = " << path_;
+ std::stringstream stream;
+ stream << fin.rdbuf();
+ buf_ = stream.str();
+ fin.close();
+ }
+
+ // decide the start pos of each instance in one mini-batch
+ int max_offset = buf_.length() / batchsize_;
+ CHECK_GT(max_offset, unroll_len_);
+ for (int i = 0; i < batchsize_; i ++) {
+ start_.push_back(i * max_offset);
+ }
+ }
+
+ for (int l = 0; l < unroll_len_ + 1; l++) {
+ float* ptr = datavec_[l]->mutable_cpu_data();
+ for (int i = 0; i < batchsize_; i++) {
+ ptr[i] = static_cast<float>(char2index_.at(buf_[start_[i] + l]));
+ }
+ }
+ offset_ += unroll_len_;
+ if (offset_ >= buf_.length() / batchsize_) {
+// unsigned seed = std::chrono::system_clock::now().time_since_epoch().count();
+// std::mt19937 g(seed);
+// std::shuffle(start_.begin(), start_.end(), g);
+ offset_ = 0;
+ // return -1;
+ } else {
+ // return 0;
+ }
+}
+
+} // namespace singa
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/959ef705/src/neuralnet/input_layer/rnn_label.cc
----------------------------------------------------------------------
diff --git a/src/neuralnet/input_layer/rnn_label.cc b/src/neuralnet/input_layer/rnn_label.cc
new file mode 100644
index 0000000..4924d87
--- /dev/null
+++ b/src/neuralnet/input_layer/rnn_label.cc
@@ -0,0 +1,35 @@
+/************************************************************
+*
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements. See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership. The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License. You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing,
+* software distributed under the License is distributed on an
+* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+* KIND, either express or implied. See the License for the
+* specific language governing permissions and limitations
+* under the License.
+*
+*************************************************************/
+
+#include "singa/neuralnet/input_layer.h"
+namespace singa {
+void RNNLabelLayer::Setup(const LayerProto& proto,
+ const vector<Layer*>& srclayers) {
+ InputLayer::Setup(proto, srclayers);
+ aux_data_.resize(srclayers[0]->data(unroll_index() + 1).shape(0));
+}
+void RNNLabelLayer::ComputeFeature(int flag, const vector<Layer*>& srclayers) {
+ const float* input = srclayers[0]->data(unroll_index() + 1).cpu_data();
+ for (unsigned i = 0; i < aux_data_.size(); i++) {
+ aux_data_[i] = static_cast<int>(input[i]);
+ }
+}
+} // namespace singa
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/959ef705/src/neuralnet/neuralnet.cc
----------------------------------------------------------------------
diff --git a/src/neuralnet/neuralnet.cc b/src/neuralnet/neuralnet.cc
index 6bb0ecd..f9579b1 100644
--- a/src/neuralnet/neuralnet.cc
+++ b/src/neuralnet/neuralnet.cc
@@ -33,10 +33,36 @@ using std::map;
using std::string;
using std::vector;
+/**
+ * Check user defined net config and make some preprocessing, e.g., assing names
+ * to params.
+ * TODO(wnagwei) implement the following functions.
+ * 1. layer and paramname should not include '@', '+' and '#'. '@<suffix>'
+ * is used for identifying layer location/partition. '<prefix>#' is used for
+ * identifying the unrolled Param in RNN models.
+ * 2. assign names to unnamed Param, e.g., p<param_id>+<layer_name>.
+ */
+const NetProto NetConfPreprocess(const NetProto& conf) {
+ /*
+ string param_name = "$";
+ // if user does not name the param, then name it based on layer name.
+ if (param->name() == "") {
+ param->set_name(layer->name() + param_name);
+ param_name += "$";
+ }
+ */
+ NetProto proto = conf;
+ for (int i = 0; i < proto.layer_size(); i++) {
+ if (!proto.layer(i).has_unroll_len())
+ proto.mutable_layer(i)->set_unroll_len(proto.unroll_len());
+ }
+ return proto;
+}
+
NeuralNet* NeuralNet::Create(const NetProto& net_conf, Phase phase,
int npartitions) {
- NetProto conf;
- conf.CopyFrom(net_conf);
+ const NetProto& full_net_conf = NetConfPreprocess(net_conf);
+ NetProto conf = full_net_conf;
conf.clear_layer();
// flag=0: neither exclude nor include field appears
// flag=1: exclude field appears
@@ -45,25 +71,19 @@ NeuralNet* NeuralNet::Create(const NetProto& net_conf, Phase phase,
// exclude layers according to phase
// exclude field is deprecated
// please use include field instead
- for (const auto& layer : net_conf.layer()) {
+ for (const auto& layer : full_net_conf.layer()) {
bool include = true;
for (auto p : layer.exclude()) {
// check whether both exclude and include field
// appear in the same .conf file
- CHECK(flag == 0 || flag == 1)
- << "include and exclude field should not simultaneously"
- << " appear in the same .conf file";
+ CHECK(flag == 0 || flag == 1) << "Don't use include and exclude together";
if (p == phase)
include = false;
flag = 1;
}
// neural net only include the specified layer in the include field
for (auto p : layer.include()) {
- // check whether both exclude and include field
- // appear in the same .conf file
- CHECK(flag == 0 || flag == 2)
- << "include and exclude field should not simultaneously"
- << " appear in the same .conf file";
+ CHECK(flag == 0 || flag == 2) << "Don't use include and exclude together";
if (p == phase) {
include = true;
break;
@@ -78,21 +98,19 @@ NeuralNet* NeuralNet::Create(const NetProto& net_conf, Phase phase,
if (!layer_conf->has_partition_dim())
layer_conf->set_partition_dim(net_conf.partition_dim());
}
- //LOG(INFO) << "Before unrolling: \n" << conf.DebugString();
+ // LOG(INFO) << "Before unrolling: \n" << conf.DebugString();
conf = Unrolling (conf);
// Copy shared parameters for sharing param conf
- std::unordered_map<string, ParamProto*> name2param;
std::vector<ParamProto*> shares;
+ std::unordered_map<string, ParamProto*> name2param;
for (int index = 0; index < conf.layer_size();index ++) {
LayerProto* layer = conf.mutable_layer(index);
for (int i = 0; i < layer->param_size(); i++) {
ParamProto* param = layer->mutable_param(i);
- if (param->has_name() && param->name() != "") {
- CHECK(name2param.find(param->name()) == name2param.end())
- << "param name is repeated: " << param->name();
+ CHECK(name2param.find(param->name()) == name2param.end())
+ << "Repeated param = " << param->name();
name2param[param->name()] = param;
- }
if (param->has_share_from() && param->share_from() != "")
shares.push_back(param);
}
@@ -101,99 +119,108 @@ NeuralNet* NeuralNet::Create(const NetProto& net_conf, Phase phase,
const std::string from = param->share_from();
const std::string name = param->name();
CHECK(name2param.find(from) != name2param.end())
- << "can't find param " << from;
+ << "can't find share_from = " << from;
// CopyFrom will overwrite the name and share_from fields
param->CopyFrom(*name2param.at(from));
param->set_name(name);
param->set_share_from(from);
}
LOG(INFO) << "Initial NeuralNet Config is\n" << conf.DebugString();
- // TODO(wangwei) create net based on net type, e.g., directed, undirected, etc
+ // TODO(wangwei) create net based on net type, e.g., directed, undirected.
return new NeuralNet(conf, npartitions);
}
const NetProto NeuralNet::Unrolling(const NetProto& net_conf) {
- // Step 1: Unroll each layer & set parameter sharing
- NetProto conf;
+ // Step 1: Unroll each layer & set parameter sharing
+ NetProto conf;
- std::vector<std::vector<int>> layer_groups;
- std::unordered_map<string,int> org_layer_names;
- for (int index = 0; index < net_conf.layer_size(); index ++) {
- const LayerProto& org_layer = net_conf.layer(index);
- org_layer_names[org_layer.name()] = index; // layer_name -> index
+ std::vector<std::vector<int>> layer_groups;
+ std::unordered_map<string,int> org_layer_names;
+ for (int index = 0; index < net_conf.layer_size(); index ++) {
+ const LayerProto& org_layer = net_conf.layer(index);
+ org_layer_names[org_layer.name()] = index; // layer_name -> index
- std::vector<int> layer_group;
- for (int i = 0; i < org_layer.unroll_len(); i ++) { // unroll
- LayerProto* unroll_layer = conf.add_layer();
- unroll_layer->CopyFrom(org_layer); // create a new layer conf
- if (org_layer.unroll_len() > 1) {
- // update layer names
- std::stringstream sstm;
- sstm << unroll_layer->name() << "_" << i;
- unroll_layer->set_name(sstm.str());
- // update layer parameter sharing
- for (int j = 0; j < unroll_layer->param_size(); j ++) {
- ParamProto* param = unroll_layer->mutable_param(j);
- if (i == 0) continue; // no need to rename parameters in the i-th unrolled layer
- if (!param->has_share_from() || param->share_from() == "") {// not shared from others
- param->set_share_from(param->name());
- }
- std::stringstream sstm1;
- sstm1 << param->name() << "_" << i;
- param->set_name(sstm1.str());
- }
- }
- // clear unrolling related fields
- unroll_layer->clear_unroll_len();
- unroll_layer->clear_unroll_conn_type();
- unroll_layer->clear_shift();
- unroll_layer->clear_srclayers();
+ std::vector<int> layer_group;
+ for (int i = 0; i < org_layer.unroll_len(); i ++) { // unroll
+ LayerProto* unroll_layer = conf.add_layer();
+ unroll_layer->CopyFrom(org_layer); // create a new layer conf
+ if (org_layer.unroll_len() > 1) {
+ // update layer names
+ std::stringstream sstm;
+ sstm << i << '#' << unroll_layer->name();
+ unroll_layer->set_name(sstm.str());
+ unroll_layer->set_unroll_index(i);
+ // update layer parameter sharing
+ for (int j = 0; j < unroll_layer->param_size(); j ++) {
+ ParamProto* param = unroll_layer->mutable_param(j);
+ if (i > 0) {
+ param->set_share_from("0#" + param->name());
+ }
+ std::stringstream sstm1;
+ sstm1 << i << '#' << param->name();
+ param->set_name(sstm1.str());
+ }
+ }
+ // clear unrolling related fields
+ unroll_layer->clear_unroll_len();
+ unroll_layer->clear_unroll_conn_type();
+ unroll_layer->clear_shift();
+ unroll_layer->clear_srclayers();
- layer_group.push_back(conf.layer_size() - 1);
- }
- layer_groups.push_back(layer_group);
- }
- // Step 2: Connect unrolled layers by setting `srclayers`
- for (int index = 0; index < net_conf.layer_size(); index ++) {
- const LayerProto& org_layer = net_conf.layer(index);
- if (org_layer.srclayers_size() == 0) continue; // no src layer
- //TODO(fanju): add LSTM when it is ready
- if (org_layer.type() == kGRU) { // connect GRU layers
- for (unsigned int j = 1; j < layer_groups[index].size(); j ++) {
- LayerProto* unroll_layer = conf.mutable_layer(layer_groups[index][j]);
- unroll_layer->add_srclayers(conf.layer(layer_groups[index][j-1]).name());
- }
- }
- for (int i = 0; i < org_layer.srclayers_size(); i ++) {
- const string& org_layer_src = org_layer.srclayers(i);
+ layer_group.push_back(conf.layer_size() - 1);
+ // LOG(ERROR) << "unrolling layer " << unroll_layer->name();
+ }
+ layer_groups.push_back(layer_group);
+ }
+ // Step 2: Connect unrolled layers by setting `srclayers`
+ for (int index = 0; index < net_conf.layer_size(); index ++) {
+ const LayerProto& org_layer = net_conf.layer(index);
+ if (org_layer.srclayers_size() == 0)
+ continue; // no src layer
+ for (int i = 0; i < org_layer.srclayers_size(); i ++) {
+ const string& org_layer_src = org_layer.srclayers(i);
+ singa::UnrollConnType unroll_conn_type = kUnrollOneToOne;
+ if (i < org_layer.unroll_conn_type_size())
+ unroll_conn_type = org_layer.unroll_conn_type(i);
+ unsigned int shift = 0;
+ if (i < org_layer.shift_size())
+ shift = org_layer.shift(i);
- singa::UnrollConnType unroll_conn_type = kUnrollOneToOne; // Default value
- if (i < org_layer.unroll_conn_type_size()) unroll_conn_type = org_layer.unroll_conn_type(i);
- unsigned int shift = 0; // Default shift value
- if (i < org_layer.shift_size()) shift = org_layer.shift(i);
+ const std::vector<int> unroll_layer_srcs
+ = layer_groups[org_layer_names[org_layer_src]];
- const std::vector<int> unroll_layer_srcs = layer_groups[org_layer_names[org_layer_src]];
+ for (unsigned int j = 0; j < layer_groups[index].size(); j ++) {
+ LayerProto* unroll_layer = conf.mutable_layer(layer_groups[index][j]);
+ // Update src layers of `unroll_layer` by considering the types
+ if (unroll_conn_type == kUnrollOneToAll) {
+ for (int unroll_layer_src : unroll_layer_srcs) {
+ unroll_layer->add_srclayers(conf.layer(unroll_layer_src).name());
+ }
+ } else if (unroll_conn_type == kUnrollOneToOne) {
+ if (j < shift) continue; // no need to connect with the src
+ int unroll_layer_src = unroll_layer_srcs[j - shift];
+ unroll_layer->add_srclayers(conf.layer(unroll_layer_src).name());
+ } else if (unroll_conn_type == kUnrollFirstToLast) {
+ if (j > 0) break;
+ int unroll_layer_src =
+ unroll_layer_srcs[unroll_layer_srcs.size() - 1];
+ unroll_layer->add_srclayers(conf.layer(unroll_layer_src).name());
+ }
+ }
+ }
- for (unsigned int j = 0; j < layer_groups[index].size(); j ++) {
- LayerProto* unroll_layer = conf.mutable_layer(layer_groups[index][j]);
- // Update src layers of `unroll_layer` by considering the types
- if (unroll_conn_type == kUnrollOneToAll) {
- for (int unroll_layer_src : unroll_layer_srcs) {
- unroll_layer->add_srclayers(conf.layer(unroll_layer_src).name());
- }
- } else if (unroll_conn_type == kUnrollOneToOne) {
- if (j < shift) continue; // no need to connect with the src
- int unroll_layer_src = unroll_layer_srcs[j - shift];
- unroll_layer->add_srclayers(conf.layer(unroll_layer_src).name());
- } else if (unroll_conn_type == kUnrollFirstToLast) {
- if (j > 0) break;
- int unroll_layer_src = unroll_layer_srcs[unroll_layer_srcs.size() - 1];
- unroll_layer->add_srclayers(conf.layer(unroll_layer_src).name());
- }
- }
- }
- }
- return conf;
+ //TODO(fanju): add LSTM when it is ready
+ if (org_layer.type() == kGRU) { // connect GRU layers
+ for (unsigned int j = 1; j < layer_groups[index].size(); j ++) {
+ LayerProto* unroll_layer = conf.mutable_layer(layer_groups[index][j]);
+ string srcname = conf.layer(layer_groups[index][j-1]).name();
+ unroll_layer->add_srclayers(srcname);
+ // LOG(ERROR) << "connect " << unroll_layer->name() << " from " << srcname;
+ }
+ }
+
+ }
+ return conf;
}
@@ -202,10 +229,12 @@ NeuralNet::NeuralNet(NetProto netproto, int npartitions) {
auto graph = CreateGraph(netproto, npartitions);
CreateNetFromGraph(graph);
PrepareDataStructures();
+
for (Node* node : graph->nodes())
delete static_cast<LayerProto*>(node->proto);
delete graph;
LOG(INFO) << "NeuralNet Constructed";
+ unroll_len_ = netproto.unroll_len();
}
NeuralNet::~NeuralNet() {
@@ -243,7 +272,7 @@ void NeuralNet::ShareParamsFrom(NeuralNet* other, bool cpu_only) {
const auto& params = layer->GetParams();
CHECK_EQ(params.size(), otherparams.size());
for (size_t i = 0; i < params.size(); i++) {
- params[i]->ShareFrom(otherparams[i], cpu_only);
+ params[i]->ShareDataFrom(otherparams[i], cpu_only);
}
}
}
@@ -442,6 +471,7 @@ Graph* NeuralNet::CreateGraph(const NetProto& netproto, int npartitions) {
proto->set_num_partitions(npartitions);
Node* node = graph->AddNode(nodename, layer.name(), i, proto);
nodes.push_back(node);
+ // TODO(wangwei) update param name
}
name2nodes[layer.name()] = nodes;
name2proto[layer.name()] = &layer;
@@ -526,14 +556,8 @@ void NeuralNet::CreateNetFromGraph(Graph* graph) {
layer->Setup(*(static_cast<LayerProto*>(node->proto)), srclayers(layer));
DLOG(INFO) << "constructing graph: " << layer->name();
layerinfo[layer->name()] = IntVecToString(layer->data(nullptr).shape());
- string param_name = "$";
for (auto param : layer->GetParams()) {
param->set_id(paramid++);
- // if user does not name the param, then name it based on layer name.
- if (param->name() == "") {
- param->set_name(layer->name() + param_name);
- param_name += "$";
- }
}
if (layer->partition_dim() == 0)
share_param_layers[node->origin].push_back(layer);
@@ -556,12 +580,25 @@ void NeuralNet::CreateNetFromGraph(Graph* graph) {
const string share_from = param->share_from();
if (param->share_from() != "") {
if (name2param.find(share_from) != name2param.end()) {
- param->ShareFrom(name2param.at(param->share_from()), false);
+ param->ShareDataFrom(name2param.at(param->share_from()), false);
} else {
LOG(FATAL) << "No param with the name (share_from) " << share_from;
}
}
}
+
+ // share params due to laye unrolling
+ for (auto & entry : name2param) {
+ Param* param = entry.second;
+ auto pos = param->name().find("#");
+ if (pos != std::string::npos && param->owner() != param->id()) {
+ string from = "0" + param->name().substr(pos);
+ CHECK(name2param.find(from) != name2param.end())
+ << "Can't find owner = " << from << " for param = " << param->name();
+ Param* owner = name2param.at(from);
+ param->ShareFrom(owner);
+ }
+ }
// share Params for layers generated (partitioned) from the same origin layer
for (auto & entry : share_param_layers) {
const auto& owner = entry.second.begin();
@@ -570,7 +607,7 @@ void NeuralNet::CreateNetFromGraph(Graph* graph) {
auto params = (*it)->GetParams();
CHECK_EQ(params.size(), owner_params.size());
for (size_t i = 0; i < params.size(); i++)
- params.at(i)->ShareFrom(owner_params.at(i), true);
+ params.at(i)->ShareDataFrom(owner_params.at(i), true);
}
}
}
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/959ef705/src/neuralnet/neuron_layer/embedding.cc
----------------------------------------------------------------------
diff --git a/src/neuralnet/neuron_layer/embedding.cc b/src/neuralnet/neuron_layer/embedding.cc
new file mode 100644
index 0000000..00e9139
--- /dev/null
+++ b/src/neuralnet/neuron_layer/embedding.cc
@@ -0,0 +1,98 @@
+/************************************************************
+*
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements. See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership. The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License. You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing,
+* software distributed under the License is distributed on an
+* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+* KIND, either express or implied. See the License for the
+* specific language governing permissions and limitations
+* under the License.
+*
+*************************************************************/
+
+#include "singa/neuralnet/neuron_layer.h"
+#include "singa/utils/math_addr.h"
+#include "singa/utils/math_blob.h"
+#include "singa/utils/singleton.h"
+#include "singa/utils/context.h"
+
+namespace singa {
+
+void EmbeddingLayer::Setup(const LayerProto& conf,
+ const vector<Layer*>& srclayers) {
+ NeuronLayer::Setup(conf, srclayers);
+ vocab_size_ = conf.embedding_conf().vocab_size();
+ feature_dim_ = conf.embedding_conf().feature_dim();
+ vocab_ = Param::Create(conf.param(0));
+ vocab_->Setup(vector<int>{vocab_size_, feature_dim_});
+ batchsize_ = srclayers.at(0)->data(unroll_index()).shape(0);
+ data_.Reshape(batchsize_, feature_dim_);
+ grad_.ReshapeLike(data_);
+}
+
+void EmbeddingLayer::ComputeFeature(int flag, const vector<Layer*>& srclayers) {
+ const float* word_idx = srclayers.at(0)->data(unroll_index()).cpu_data();
+ int device = Singleton<Context>::Instance()->device_id();
+ if (device == -1) {
+ const float* src = vocab_->data().cpu_data();
+ float* dst = data_.mutable_cpu_data();
+ for (int i = 0; i < batchsize_; i++) {
+ memcpy(dst + i * feature_dim_,
+ src + static_cast<int>(word_idx[i]) * feature_dim_,
+ feature_dim_ * sizeof(float));
+ }
+ } else {
+#ifdef USE_GPU
+ const float* src = vocab_->data().gpu_data();
+ float* dst = data_.mutable_gpu_data();
+ for (int i = 0; i < batchsize_; i++) {
+ cudaMemcpy(dst + i * feature_dim_,
+ src + static_cast<int>(word_idx[i]) * feature_dim_,
+ feature_dim_ * sizeof(float), cudaMemcpyDefault);
+ }
+#else
+ LOG(FATAL) << "Not implemented";
+#endif
+ }
+}
+
+void EmbeddingLayer::ComputeGradient(int flag, const vector<Layer*>& srclayers)
+{
+ const float* word_idx = srclayers.at(0)->data(unroll_index()).cpu_data();
+ auto context = Singleton<Context>::Instance();
+ if ((flag & kAggGrad) == 0)
+ Zero(vocab_->mutable_grad());
+
+ if (context->device_id() == -1) {
+ const float* src = grad_.cpu_data();
+ float* dst = vocab_->mutable_grad()->mutable_cpu_data();
+ memset(dst, 0 , sizeof(float) * grad_.count());
+ for (int i = 0; i < batchsize_; i++) {
+ cpu_axpy(feature_dim_, 1.0f, src + i * feature_dim_,
+ dst + static_cast<int>(word_idx[i]) * feature_dim_);
+ }
+ } else {
+#ifdef USE_GPU
+ const float* src = grad_.gpu_data();
+ float* dst = vocab_->mutable_grad()->mutable_gpu_data();
+ for (int i = 0; i < batchsize_; i++) {
+ gpu_axpy(context->cublas_handle(), grad_.count(), 1.0f,
+ src + i * feature_dim_,
+ dst + static_cast<int>(word_idx[i]) * feature_dim_);
+ }
+#else
+ LOG(FATAL) << "Not implemented";
+#endif
+ }
+}
+
+} // namespace singa
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/959ef705/src/neuralnet/neuron_layer/gru.cc
----------------------------------------------------------------------
diff --git a/src/neuralnet/neuron_layer/gru.cc b/src/neuralnet/neuron_layer/gru.cc
index 45d7873..9ba5a50 100644
--- a/src/neuralnet/neuron_layer/gru.cc
+++ b/src/neuralnet/neuron_layer/gru.cc
@@ -64,6 +64,8 @@ void GRULayer::Setup(const LayerProto& conf,
data_.Reshape(vector<int>{batchsize_, hdim_});
grad_.ReshapeLike(data_);
+ // one for grad from dst GRU, one for grad from upper layer
+ gradvec_.push_back(new Blob<float>(grad_.shape()));
// Initialize the parameters
weight_z_hx_ = Param::Create(conf.param(0));
@@ -74,7 +76,7 @@ void GRULayer::Setup(const LayerProto& conf,
weight_r_hh_ = Param::Create(conf.param(4));
weight_c_hh_ = Param::Create(conf.param(5));
- if (conf.gru_conf().bias_term()) {
+ if (conf.param_size() > 6) {
bias_z_ = Param::Create(conf.param(6));
bias_r_ = Param::Create(conf.param(7));
bias_c_ = Param::Create(conf.param(8));
@@ -88,7 +90,7 @@ void GRULayer::Setup(const LayerProto& conf,
weight_r_hh_->Setup(vector<int>{hdim_, hdim_});
weight_c_hh_->Setup(vector<int>{hdim_, hdim_});
- if (conf.gru_conf().bias_term()) {
+ if (conf.param_size() > 6) {
bias_z_->Setup(vector<int>{hdim_});
bias_r_->Setup(vector<int>{hdim_});
bias_c_->Setup(vector<int>{hdim_});
@@ -97,7 +99,6 @@ void GRULayer::Setup(const LayerProto& conf,
update_gate = new Blob<float>(batchsize_, hdim_);
reset_gate = new Blob<float>(batchsize_, hdim_);
new_memory = new Blob<float>(batchsize_, hdim_);
-
}
void GRULayer::ComputeFeature(int flag,
@@ -105,11 +106,11 @@ void GRULayer::ComputeFeature(int flag,
CHECK_LE(srclayers.size(), 2);
// Do transpose
- Blob<float> *w_z_hx_t = Transpose (weight_z_hx_->data());
+ Blob<float> *w_z_hx_t = Transpose (weight_z_hx_->data());
Blob<float> *w_z_hh_t = Transpose (weight_z_hh_->data());
- Blob<float> *w_r_hx_t = Transpose (weight_r_hx_->data());
+ Blob<float> *w_r_hx_t = Transpose (weight_r_hx_->data());
Blob<float> *w_r_hh_t = Transpose (weight_r_hh_->data());
- Blob<float> *w_c_hx_t = Transpose (weight_c_hx_->data());
+ Blob<float> *w_c_hx_t = Transpose (weight_c_hx_->data());
Blob<float> *w_c_hh_t = Transpose (weight_c_hh_->data());
// Prepare the data input and the context
@@ -123,49 +124,34 @@ void GRULayer::ComputeFeature(int flag,
// Compute the update gate
GEMM(1.0f, 0.0f, src,*w_z_hx_t,update_gate);
- if (bias_z_ != nullptr)
+ if (bias_z_ != nullptr)
MVAddRow(1.0f,1.0f,bias_z_->data(),update_gate);
- Blob<float> zprev (batchsize_,hdim_);
- GEMM(1.0f, 0.0f, *context,*w_z_hh_t, &zprev);
- Add<float>(*update_gate, zprev, update_gate);
+ GEMM(1.0f, 1.0f, *context, *w_z_hh_t, update_gate);
Map<op::Sigmoid<float>,float>(*update_gate, update_gate);
// Compute the reset gate
GEMM(1.0f, 0.0f, src,*w_r_hx_t,reset_gate);
if (bias_r_ != nullptr)
MVAddRow(1.0f,1.0f,bias_r_->data(),reset_gate);
- Blob<float> rprev (batchsize_, hdim_);
- GEMM(1.0f, 0.0f, *context, *w_r_hh_t, &rprev);
- Add<float>(*reset_gate, rprev, reset_gate);
+ GEMM(1.0f, 1.0f, *context, *w_r_hh_t, reset_gate);
Map<op::Sigmoid<float>,float>(*reset_gate, reset_gate);
// Compute the new memory
GEMM(1.0f, 0.0f, src, *w_c_hx_t, new_memory);
if (bias_c_ != nullptr)
MVAddRow(1.0f,1.0f,bias_c_->data(), new_memory);
- Blob<float> cprev (batchsize_, hdim_);
- GEMM(1.0f, 0.0f, *context, *w_c_hh_t, &cprev);
- //Blob<float> new_cprev (batchsize_, hdim_);
- Mult<float>(*reset_gate, cprev, &cprev);
- Add<float>(*new_memory, cprev, new_memory);
+ Mult<float>(*reset_gate, *new_memory, new_memory);
+ GEMM(1.0f, 1.0f, *context, *w_c_hh_t, new_memory);
Map<op::Tanh<float>,float>(*new_memory, new_memory);
- // Compute data - new memory part
- Blob<float> z1 (batchsize_,hdim_);
- for (int i = 0; i < z1.count(); i ++) {
- z1.mutable_cpu_data()[i] = 1.0f; // generate a matrix with ones
- }
- AXPY<float>(-1.0f, *update_gate, &z1);
- Mult<float>(z1, *new_memory, &data_);
- // Compute data - context part
- Blob<float> data_prev (batchsize_, hdim_);
- Mult<float>(*update_gate,*context,&data_prev);
- Add<float>(data_, data_prev, &data_);
+ Sub(*context, *new_memory, &data_);
+ Mult(data_, *update_gate, &data_);
+ Add(data_, *new_memory, &data_);
// delete the pointers
- if (srclayers.size() == 1) delete context;
- else context = NULL;
+ if (srclayers.size() == 1)
+ delete context;
delete w_z_hx_t;
delete w_z_hh_t;
@@ -178,14 +164,20 @@ void GRULayer::ComputeFeature(int flag,
void GRULayer::ComputeGradient(int flag,
const vector<Layer*>& srclayers) {
CHECK_LE(srclayers.size(), 2);
+ // agg grad from two dst layers
+ AXPY(1.0f, *gradvec_[1], &grad_);
+ float beta = 1.0f; // agg param gradients
+ Layer* ilayer = srclayers[0]; // input layer
+ Layer* clayer = nullptr; // context layer
// Prepare the data input and the context
- const Blob<float>& src = srclayers[0]->data(this);
+ const Blob<float>& src = ilayer->data(this);
const Blob<float> *context;
if (srclayers.size() == 1) { // only have data input
context = new Blob<float>(batchsize_, hdim_);
} else { // have data input & context
- context = &srclayers[1]->data(this);
+ clayer = srclayers[1];
+ context = &(clayer->data(this));
}
// Prepare gradient of output neurons
@@ -197,7 +189,7 @@ void GRULayer::ComputeGradient(int flag,
Blob<float> drgatedr (batchsize_, hdim_);
Map<singa::op::SigmoidGrad<float>, float>(*reset_gate, &drgatedr);
Blob<float> dnewmdc (batchsize_, hdim_);
- Map<singa::op::TanhGrad<float>, float>(*new_memory,&dnewmdc);
+ Map<singa::op::TanhGrad<float>, float>(*new_memory, &dnewmdc);
Blob<float> dLdz (batchsize_, hdim_);
Sub<float>(*context, *new_memory, &dLdz);
@@ -206,9 +198,7 @@ void GRULayer::ComputeGradient(int flag,
Blob<float> dLdc (batchsize_,hdim_);
Blob<float> z1 (batchsize_,hdim_);
- for (int i = 0; i < z1.count(); i ++) {
- z1.mutable_cpu_data()[i] = 1.0f; // generate a matrix with ones
- }
+ z1.SetValue(1.0f);
AXPY<float>(-1.0f, *update_gate, &z1);
Mult(grad_,z1,&dLdc);
Mult(dLdc,dnewmdc,&dLdc);
@@ -218,57 +208,58 @@ void GRULayer::ComputeGradient(int flag,
Blob<float> dLdr (batchsize_, hdim_);
Blob<float> cprev (batchsize_, hdim_);
- Blob<float> *w_c_hh_t = Transpose(weight_c_hh_->data());
- GEMM(1.0f,0.0f,*context,*w_c_hh_t, &cprev);
- delete w_c_hh_t;
- Mult(dLdc,cprev,&dLdr);
- Mult(dLdr,drgatedr,&dLdr);
-
+ GEMM(1.0f, 0.0f, *context, weight_c_hh_->data().T(), &cprev);
+ Mult(dLdc, cprev, &dLdr);
+ Mult(dLdr, drgatedr, &dLdr);
// Compute gradients for parameters of update gate
Blob<float> *dLdz_t = Transpose(dLdz);
- GEMM(1.0f,0.0f,*dLdz_t,src,weight_z_hx_->mutable_grad());
- GEMM(1.0f,0.0f,*dLdz_t,*context,weight_z_hh_->mutable_grad());
+ GEMM(1.0f, beta, *dLdz_t, src, weight_z_hx_->mutable_grad());
+ GEMM(1.0f, beta, *dLdz_t, *context, weight_z_hh_->mutable_grad());
if (bias_z_ != nullptr)
- MVSumRow<float>(1.0f,0.0f,dLdz,bias_z_->mutable_grad());
+ MVSumRow<float>(1.0f, beta, dLdz, bias_z_->mutable_grad());
delete dLdz_t;
// Compute gradients for parameters of reset gate
Blob<float> *dLdr_t = Transpose(dLdr);
- GEMM(1.0f,0.0f,*dLdr_t,src,weight_r_hx_->mutable_grad());
- GEMM(1.0f,0.0f,*dLdr_t,*context,weight_r_hh_->mutable_grad());
+ GEMM(1.0f, beta, *dLdr_t, src, weight_r_hx_->mutable_grad());
+ GEMM(1.0f, beta, *dLdr_t, *context, weight_r_hh_->mutable_grad());
if (bias_r_ != nullptr)
- MVSumRow(1.0f,0.0f,dLdr,bias_r_->mutable_grad());
+ MVSumRow(1.0f, beta, dLdr, bias_r_->mutable_grad());
delete dLdr_t;
// Compute gradients for parameters of new memory
Blob<float> *dLdc_t = Transpose(dLdc);
- GEMM(1.0f,0.0f,*dLdc_t,src,weight_c_hx_->mutable_grad());
+ GEMM(1.0f, beta, *dLdc_t, src,weight_c_hx_->mutable_grad());
if (bias_c_ != nullptr)
- MVSumRow(1.0f,0.0f,dLdc,bias_c_->mutable_grad());
+ MVSumRow(1.0f, beta, dLdc, bias_c_->mutable_grad());
delete dLdc_t;
Blob<float> *reset_dLdc_t = Transpose(reset_dLdc);
- GEMM(1.0f,0.0f,*reset_dLdc_t,*context,weight_c_hh_->mutable_grad());
+ GEMM(1.0f, beta, *reset_dLdc_t, *context, weight_c_hh_->mutable_grad());
delete reset_dLdc_t;
// Compute gradients for data input layer
if (srclayers[0]->mutable_grad(this) != nullptr) {
- GEMM(1.0f,0.0f,dLdc,weight_c_hx_->data(),srclayers[0]->mutable_grad(this));
- GEMM(1.0f,1.0f,dLdz,weight_z_hx_->data(),srclayers[0]->mutable_grad(this));
- GEMM(1.0f,1.0f,dLdr,weight_r_hx_->data(), srclayers[0]->mutable_grad(this));
+ GEMM(1.0f,0.0f,dLdc, weight_c_hx_->data(), ilayer->mutable_grad(this));
+ GEMM(1.0f,1.0f,dLdz, weight_z_hx_->data(), ilayer->mutable_grad(this));
+ GEMM(1.0f,1.0f,dLdr, weight_r_hx_->data(), ilayer->mutable_grad(this));
}
- if (srclayers.size() > 1 && srclayers[1]->mutable_grad(this) != nullptr) {
+ if (clayer != nullptr && clayer->mutable_grad(this) != nullptr) {
// Compute gradients for context layer
- GEMM(1.0f,0.0f,reset_dLdc,weight_c_hh_->data(), srclayers[1]->mutable_grad(this));
- GEMM(1.0f,1.0f,dLdr, weight_r_hh_->data(), srclayers[1]->mutable_grad(this));
- GEMM(1.0f,1.0f,dLdz,weight_z_hh_->data(), srclayers[1]->mutable_grad(this));
- Add(srclayers[1]->grad(this), *update_gate, srclayers[1]->mutable_grad(this));
+ GEMM(1.0f, 0.0f, reset_dLdc, weight_c_hh_->data(),
+ clayer->mutable_grad(this));
+ GEMM(1.0f, 1.0f, dLdr, weight_r_hh_->data(), clayer->mutable_grad(this));
+ GEMM(1.0f, 1.0f, dLdz, weight_z_hh_->data(), clayer->mutable_grad(this));
+ Add(clayer->grad(this), *update_gate, clayer->mutable_grad(this));
+ // LOG(ERROR) << "grad to prev gru " << Asum(clayer->grad(this));
}
- if (srclayers.size() == 1) delete context;
- else context = NULL;
+ if (srclayers.size() == 1)
+ delete context;
+ else
+ context = NULL;
delete grad_t;
}
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/959ef705/src/neuralnet/neuron_layer/inner_product.cc
----------------------------------------------------------------------
diff --git a/src/neuralnet/neuron_layer/inner_product.cc b/src/neuralnet/neuron_layer/inner_product.cc
index f50afba..1e5e93e 100644
--- a/src/neuralnet/neuron_layer/inner_product.cc
+++ b/src/neuralnet/neuron_layer/inner_product.cc
@@ -66,12 +66,17 @@ void InnerProductLayer::ComputeFeature(int flag,
void InnerProductLayer::ComputeGradient(int flag,
const vector<Layer*>& srclayers) {
-
- MVSumRow(1.0f, 0.0f, grad_, bias_->mutable_grad());
+ float beta = 0.0f;
+ if (flag & kAggGrad)
+ beta = 1.0f;
+ MVSumRow(1.0f, beta, grad_, bias_->mutable_grad());
if (transpose_)
- MMDot(srclayers[0]->data(this).T(), grad_, weight_->mutable_grad());
+ GEMM(1.0f, beta, srclayers[0]->data(this).T(), grad_,
+ weight_->mutable_grad());
else
- MMDot(grad_.T(), srclayers[0]->data(this), weight_->mutable_grad());
+ GEMM(1.0f, beta, grad_.T(), srclayers[0]->data(this),
+ weight_->mutable_grad());
+
if (srclayers[0]->mutable_grad(this) != nullptr) {
if (transpose_)
MMDot(grad_, weight_->data().T(), srclayers[0]->mutable_grad(this));
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/959ef705/src/proto/job.proto
----------------------------------------------------------------------
diff --git a/src/proto/job.proto b/src/proto/job.proto
index 7cdc287..e520eba 100644
--- a/src/proto/job.proto
+++ b/src/proto/job.proto
@@ -120,6 +120,14 @@ message NetProto {
repeated LayerProto layer = 1;
// partitioning type for parallelism
optional int32 partition_dim = 20 [default = 0];
+ // Each layer corresponds to a group of unrolled layers, used in RNN models
+ repeated LayerGroupProto layer_group = 21;
+ optional int32 unroll_len = 22 [default = 1];
+}
+
+message LayerGroupProto {
+ // name of the layers belong to the same group
+ repeated string layer = 1;
}
message UpdaterProto {
@@ -139,6 +147,9 @@ message UpdaterProto {
// used to avoid divide by 0, i.e. x/(y+delta)
optional float delta = 35 [default = 0.00000001];
+ optional float clip_low = 36 [default = 0];
+ optional float clip_high = 37 [default = 0];
+
extensions 101 to 200;
}
@@ -195,10 +206,11 @@ message LayerProto {
// share data and grad blob with the single src layer, e.g., relu layer can
// share blobs from conv layer. It is useful for saving memory space.
optional bool share_src_blobs = 22 [default = false];
-
+ // for unrolling layers in RNN model
optional int32 unroll_len = 23 [default = 1];
- repeated UnrollConnType unroll_conn_type = 24;
- repeated int32 shift = 25;
+ optional int32 unroll_index = 24 [default = 0];
+ repeated UnrollConnType unroll_conn_type = 25;
+ repeated int32 shift = 26;
// overrides the partition dimension for neural net
optional int32 partition_dim = 60 [default = -1];
@@ -215,6 +227,7 @@ message LayerProto {
optional MnistProto mnist_conf = 192;
optional RGBImageProto rgbimage_conf = 193;
optional DataProto sharddata_conf = 194;
+ optional CharRNNProto char_rnn_conf = 195;
// configuration for neuron layers id range [200, 300)
optional ActivationProto activation_conf = 200;
@@ -228,6 +241,7 @@ message LayerProto {
optional ReLUProto relu_conf = 211;
optional SoftmaxProto softmax_conf = 214;
optional GRUProto gru_conf = 215;
+ optional EmbeddingProto embedding_conf = 216;
// configuration for loss layers, id range [300, 400)
optional SoftmaxLossProto softmaxloss_conf = 301;
@@ -354,7 +368,19 @@ message StoreProto {
optional int32 random_skip = 11 [default = 0];
optional bool has_label = 12 [default = true];
}
+message CharRNNProto {
+ optional string path = 1;
+ optional string vocab_path = 2;
+ // num of chars to read per instance, should = NetProto::unroll_len
+ optional int32 unroll_len = 3 [default = 50];
+ optional int32 batchsize = 4 [default = 1];
+}
+message EmbeddingProto {
+ optional int32 vocab_size = 1 [default = 0];
+ optional int32 feature_dim = 2 [default = 100];
+
+}
message SoftmaxLossProto {
// computing accuracy against topk results
optional int32 topk = 1 [default = 1];
@@ -572,6 +598,8 @@ enum AlgType {
kBP = 1;
// Contrastive Divergence algorithm for RBM, DBM, etc.
kCD = 2;
+ // BPTT for training RNN models
+ kBPTT = 3;
// For user defined algorithm.
kUserAlg = 104;
}
@@ -590,6 +618,9 @@ enum LayerType {
kMnist = 192; // deprecated
kRGBImage = 193; // deprecated
kShardData = 194; // deprecated
+ kCharRNN = 195;
+ kRNNLabel = 196;
+ kOneHot = 197;
/*
* Neuron layers
@@ -610,6 +641,8 @@ enum LayerType {
kSigmoid = 213;
kSoftmax = 214;
kGRU = 215;
+ kEmbedding = 216;
+
// cudnn v3
kCudnnConv = 250;
kCudnnPool = 251;
@@ -678,6 +711,9 @@ enum Phase {
kBackward = 64;
kLoss = 128;
kDeploy = 256;
+
+ // used for aggregate parameter gradients when Param is shared
+ kAggGrad = 512;
}
enum ParamType {
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/959ef705/src/stub.cc
----------------------------------------------------------------------
diff --git a/src/stub.cc b/src/stub.cc
index 7c0ec90..c06128c 100644
--- a/src/stub.cc
+++ b/src/stub.cc
@@ -83,6 +83,8 @@ const std::unordered_map<int, ParamEntry*> CreateParamShard(
int grp = entry.first;
int wstart = grp2workers[grp].first, wend = grp2workers[grp].second;
for (auto layer : entry.second->layers()) {
+ if (layer->unroll_index() > 0)
+ continue;
int partition = layer->partition_id();
bool local = partition >= wstart && partition < wend;
for (auto param : layer->GetParams()) {
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/959ef705/src/test/test_gru_layer.cc
----------------------------------------------------------------------
diff --git a/src/test/test_gru_layer.cc b/src/test/test_gru_layer.cc
index 296b795..e432ae1 100644
--- a/src/test/test_gru_layer.cc
+++ b/src/test/test_gru_layer.cc
@@ -239,7 +239,6 @@ TEST_F(GRULayerTest, ComputeFeature) {
singa::GRULayer gru_layer_2;
gru_layer_2.Setup(gru2_conf, std::vector<singa::Layer*>{&in_layer_2, &gru_layer_1});
-
for (unsigned int i = 0; i < gru_layer_2.GetParams().size(); i ++) {
gru_layer_2.GetParams()[i]->InitValues();
}
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/959ef705/src/test/test_math.cc
----------------------------------------------------------------------
diff --git a/src/test/test_math.cc b/src/test/test_math.cc
index 2e7deec..2627b2e 100644
--- a/src/test/test_math.cc
+++ b/src/test/test_math.cc
@@ -286,7 +286,6 @@ TEST(MathTest, TestAxpyGPU) {
TEST(MathTest, TestDotGPU) {
float A[12];
float B[12];
-
for (int i = 0; i < 12; i++) {
A[i] = i - 1;
B[i] = i + 1;
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/959ef705/src/utils/common.cc
----------------------------------------------------------------------
diff --git a/src/utils/common.cc b/src/utils/common.cc
index 928d4bb..eefdb5c 100644
--- a/src/utils/common.cc
+++ b/src/utils/common.cc
@@ -570,4 +570,30 @@ void WriteStringToTextFile(const string& filename, const string& context) {
ofs.flush();
ofs.close();
}
+
+
+const vector<std::pair<string, float>> GetMetricFromString(const string& disp) {
+ size_t pos = 0;
+ vector<string> terms;
+ while (pos != string::npos) {
+ auto next = disp.find_first_of(" ,", pos); // delimiter: space or comma
+ if (next != string::npos) {
+ terms.push_back(disp.substr(pos, next - pos));
+ pos = disp.find_first_not_of(" ,", next + 1);
+ } else {
+ break;
+ }
+ }
+ if (pos != string::npos)
+ terms.push_back(disp.substr(pos));
+ vector<std::pair<string, float>> ret;
+ for (unsigned i = 0; i < terms.size(); i++) {
+ if (terms[i] == "=") {
+ CHECK_GE(i, 1);
+ CHECK_LT(i, terms.size() - 1) << "terms[i] = " << terms[i];
+ ret.push_back(std::make_pair(terms[i-1], std::stof(terms[i + 1])));
+ }
+ }
+ return ret;
+}
} // namespace singa
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/959ef705/src/utils/param.cc
----------------------------------------------------------------------
diff --git a/src/utils/param.cc b/src/utils/param.cc
index bdae72f..95396bc 100644
--- a/src/utils/param.cc
+++ b/src/utils/param.cc
@@ -166,7 +166,12 @@ void Param::InitValues(int version) {
set_version(version);
}
-void Param::ShareFrom(Param* other, bool cpu_only) {
+void Param::ShareDataFrom(Param* other, bool cpu_only) {
+ if (this == other) {
+ LOG(WARNING) << "No need to share Param with itself";
+ return;
+ }
+
proto_.set_owner(other->owner());
CHECK_EQ(data_.count(), other->data_.count());
data_.ShareData(&(other->data_), cpu_only);
@@ -183,6 +188,16 @@ void Param::ShareFrom(Param* other, bool cpu_only) {
pending_update_.resize(other->pending_update_.size());
}
+void Param::ShareFrom(Param* other) {
+ if (this == other) {
+ LOG(WARNING) << "No need to share Param with itself";
+ return;
+ }
+
+ ShareDataFrom(other, false);
+ grad_.ShareData(&(other->grad_), false);
+}
+
void Param::FromProto(const BlobProto& blob) {
data_.FromProto(blob);
}
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/959ef705/src/utils/updater.cc
----------------------------------------------------------------------
diff --git a/src/utils/updater.cc b/src/utils/updater.cc
index 21608fa..a9f70c0 100644
--- a/src/utils/updater.cc
+++ b/src/utils/updater.cc
@@ -101,9 +101,24 @@ void Updater::Init(const UpdaterProto& proto) {
momentum_ = proto.momentum();
weight_decay_ = proto.weight_decay();
lr_gen_ = LRGenerator::Create(proto.learning_rate());
+ clip_low_ = proto.clip_low();
+ clip_high_ = proto.clip_high();
+}
+
+void Updater::Clip(const float low, const float high, Param* param) {
+ Blob<float>* grad = param->mutable_grad();
+ float* ptr = grad->mutable_cpu_data();
+ for (int i = 0; i < grad->count(); i++) {
+ if (ptr[i] > high)
+ ptr[i] = high;
+ else if (ptr[i] < low)
+ ptr[i] = low;
+ }
}
void SGDUpdater::Update(int step, Param* param, float grad_scale) {
+ if (clip_high_ > clip_low_)
+ Clip(clip_low_, clip_high_, param);
Shape<1> s = Shape1(param->size());
Tensor<cpu, 1> data(param->mutable_cpu_data(), s);
Tensor<cpu, 1> grad(param->mutable_cpu_grad(), s);
@@ -143,6 +158,8 @@ void NesterovUpdater::Update(int step, Param* param, float grad_scale) {
}
/***********************AdaGrad******************************/
void AdaGradUpdater::Update(int step, Param* param, float grad_scale) {
+ if (clip_high_ > clip_low_)
+ Clip(clip_low_, clip_high_, param);
Shape<1> s = Shape1(param->size());
Tensor<cpu, 1> data(param->mutable_cpu_data(), s);
Tensor<cpu, 1> grad(param->mutable_cpu_grad(), s);
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/959ef705/src/worker.cc
----------------------------------------------------------------------
diff --git a/src/worker.cc b/src/worker.cc
index 8495b5c..4e1dc75 100644
--- a/src/worker.cc
+++ b/src/worker.cc
@@ -29,6 +29,7 @@
#include "singa/utils/factory.h"
#include "singa/utils/singleton.h"
#include "singa/utils/context.h"
+#include "singa/utils/math_blob.h"
namespace singa {
@@ -214,7 +215,6 @@ void Worker::InitNetParams(const JobProto& job_conf, NeuralNet* net) {
}
}
-
void Worker::Checkpoint(int step, const std::string& folder, NeuralNet* net) {
BlobProtos bps;
for (auto layer : net->layers()) {
@@ -338,7 +338,7 @@ void BPWorker::Forward(int step, Phase phase, NeuralNet* net) {
map<string, string> label;
for (auto& layer : net->layers()) {
if (layer->partition_id() == id_) {
- if (phase == kTrain) {
+ if (phase == kTrain && layer->unroll_index() == 0) {
// wait until param is updated
for (Param* p : layer->GetParams()) {
Collect(step, p);
@@ -346,7 +346,7 @@ void BPWorker::Forward(int step, Phase phase, NeuralNet* net) {
}
// DLOG(ERROR) << "Forward " << layer->name();
layer->ComputeFeature(phase | kForward, net->srclayers(layer));
- if (job_conf_.debug() && grp_id_ == 0)
+ if (job_conf_.debug() && DisplayNow(step) && grp_id_ == 0)
label[layer->name()] = layer->ToString(true, phase | kForward);
}
}
@@ -364,7 +364,7 @@ void BPWorker::Backward(int step, NeuralNet* net) {
Layer* layer = *it;
if (layer->partition_id() == id_) {
layer->ComputeGradient(kTrain | kBackward, net->srclayers(layer));
- if (job_conf_.debug() && grp_id_ == 0)
+ if (job_conf_.debug() && DisplayNow(step) && grp_id_ == 0)
label[layer->name()] = layer->ToString(true, kTrain | kBackward);
for (Param* p : layer->GetParams())
Update(step, p);
@@ -377,6 +377,82 @@ void BPWorker::Backward(int step, NeuralNet* net) {
}
}
+/***************************BPTTWorker*********************************/
+void BPTTWorker::Forward(int step, Phase phase, NeuralNet* net) {
+ map<string, string> label;
+ for (auto& layer : net->layers()) {
+ if (layer->partition_id() == id_) {
+ if (phase == kTrain && layer->unroll_index() == 0) {
+ // wait until param is updated
+ for (Param* p : layer->GetParams()) {
+ Collect(step, p);
+ Zero(p->mutable_grad());
+ }
+ }
+ vector<Layer*> src = net->srclayers(layer);
+ // if full state rnn and not the starting of a new passing of the dataset,
+ // feed the hidden state of the last unit to the first unit.
+ if (layer->unroll_index() == 0 && full_state_ && !begin_) {
+ Layer* last = net->last_unroll_layer(layer);
+ if (last != layer) {
+ src.push_back(last);
+ }
+ }
+ // LOG(ERROR) << layer->name() << " forward";
+ // int ret =
+ layer->ComputeFeature(phase | kForward, src);
+ /*
+ if ((phase & Phase::kTrain) && ret == Status::kEnd)
+ begin_ = true;
+ */
+
+ if (job_conf_.debug() && DisplayNow(step) && grp_id_ == 0)
+ label[layer->name()] = layer->ToString(true, phase | kForward);
+ }
+ }
+ if (label.size()) {
+ const string path = Cluster::Get()->vis_folder() + "/fp-step"
+ + std::to_string(step) +"-loc" + std::to_string(id_) + ".json";
+ WriteStringToTextFile(path, net->ToGraph(false).ToJson(label));
+ }
+}
+
+void BPTTWorker::Backward(int step, NeuralNet* net) {
+ map<string, string> label;
+ auto& layers = net->layers();
+ for (auto it = layers.rbegin(); it != layers.rend(); it++) {
+ Layer* layer = *it;
+ if (layer->partition_id() == id_) {
+ layer->ComputeGradient(kTrain | kBackward | kAggGrad, net->srclayers(layer));
+ // LOG(ERROR) << layer->name() << " backward";
+ if (job_conf_.debug() && DisplayNow(step) && grp_id_ == 0)
+ label[layer->name()] = layer->ToString(true, kTrain | kBackward);
+ // unrolled layers share parameter data and grad, just update the 1st one
+ if (layer->unroll_index() == 0)
+ for (Param* p : layer->GetParams())
+ Update(step, p);
+ }
+ }
+ if (label.size()) {
+ const string path = Cluster::Get()->vis_folder() + "/bp-step"
+ + std::to_string(step) + "-loc" + std::to_string(id_) + ".json";
+ WriteStringToTextFile(path, net->ToGraph(false).Reverse().ToJson(label));
+ }
+}
+void BPTTWorker::Display(int flag, const std::string& prefix, NeuralNet* net) {
+ std::unordered_map<string, float> perf;
+ for (auto layer : net->layers()) {
+ if (layer->partition_id() == id_) {
+ const string& disp = layer->ToString(false, flag);
+ for (const auto& entry : GetMetricFromString(disp))
+ perf[entry.first] += entry.second;
+ }
+ }
+ string disp = prefix + " ";
+ for (const auto& entry : perf)
+ disp += entry.first + " = " + std::to_string(entry.second) + ", ";
+ LOG(ERROR) << disp;
+}
/****************************CDWorker**********************************/
void CDWorker::TrainOneBatch(int step, NeuralNet* net) {
const auto& layers = net->layers();
[02/10] incubator-singa git commit: SINGA-120 - Implemented GRU and
BPTT: Fixed bugs of returning null
Posted by wa...@apache.org.
SINGA-120 - Implemented GRU and BPTT: Fixed bugs of returning null
Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/d43af9fe
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/d43af9fe
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/d43af9fe
Branch: refs/heads/master
Commit: d43af9fef5889da55c22d56487b91989e841daa8
Parents: c440e4a
Author: Ju Fan <fa...@gmail.com>
Authored: Fri Jan 1 10:52:20 2016 +0800
Committer: Wei Wang <wa...@comp.nus.edu.sg>
Committed: Wed Jan 6 01:53:05 2016 +0800
----------------------------------------------------------------------
include/singa/neuralnet/neuron_layer.h | 33 ++++++++++++++++++++++++++++-
1 file changed, 32 insertions(+), 1 deletion(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/d43af9fe/include/singa/neuralnet/neuron_layer.h
----------------------------------------------------------------------
diff --git a/include/singa/neuralnet/neuron_layer.h b/include/singa/neuralnet/neuron_layer.h
index b02aa7d..3f126ab 100644
--- a/include/singa/neuralnet/neuron_layer.h
+++ b/include/singa/neuralnet/neuron_layer.h
@@ -120,16 +120,47 @@ class DropoutLayer : public NeuronLayer {
* Use it as output layer, it will generate random grad;
* Use it as neuron layer, it will replicates data and grad.
*/
-class DummyLayer: public Layer {
+class DummyLayer: public NeuronLayer {
public:
void Setup(const LayerProto& proto, const vector<Layer*>& srclayers) override;
void ComputeFeature(int flag, const vector<Layer*>& srclayers) override;
void ComputeGradient(int flag, const vector<Layer*>& srclayers) override;
+
private:
bool input_ = false; // use as input layer
bool output_ = false; // use as output layer
};
+class GRULayer : public NeuronLayer {
+ public:
+ ~GRULayer();
+ void Setup(const LayerProto& proto, const vector<Layer*>& srclayers) override;
+ void ComputeFeature(int flag, const vector<Layer*>& srclayers) override;
+ void ComputeGradient(int flag, const vector<Layer*>& srclayers) override;
+
+ const std::vector<Param*> GetParams() const override {
+ if (bias_z_ != nullptr && bias_r_ != nullptr && bias_c_ != nullptr) {
+ std::vector<Param*> params{weight_z_hx_, weight_r_hx_,weight_c_hx_,
+ weight_z_hh_, weight_r_hh_, weight_c_hh_,
+ bias_z_, bias_r_, bias_c_};
+ return params;
+ } else {
+ std::vector<Param*> params{weight_z_hx_, weight_r_hx_,weight_c_hx_,
+ weight_z_hh_, weight_r_hh_, weight_c_hh_};
+ return params;
+ }
+ }
+
+ private:
+ int batchsize_; // batch size
+ int vdim_, hdim_; // dimensions
+
+ Blob<float> *update_gate, *reset_gate, *new_memory;
+
+ Param *weight_z_hx_, *weight_z_hh_, *bias_z_; // update gate
+ Param *weight_r_hx_, *weight_r_hh_, *bias_r_; // reset gate
+ Param *weight_c_hx_, *weight_c_hh_, *bias_c_; // new memory
+};
/**
* Layer that applys linear transformations as
[08/10] incubator-singa git commit: SINGA-120 - Implemented GRU and
BPTT
Posted by wa...@apache.org.
SINGA-120 - Implemented GRU and BPTT
Change new memory computation formula following char-rnn (i.e., element-wise multiplication before matrix multiplication)
Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/6a4c9960
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/6a4c9960
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/6a4c9960
Branch: refs/heads/master
Commit: 6a4c9960e0795aeac6df282d7190b6f93b305c58
Parents: 959ef70
Author: Wei Wang <wa...@comp.nus.edu.sg>
Authored: Tue Jan 5 18:14:46 2016 +0800
Committer: Wei Wang <wa...@comp.nus.edu.sg>
Committed: Wed Jan 6 01:55:08 2016 +0800
----------------------------------------------------------------------
Makefile.am | 2 +
include/singa/neuralnet/connection_layer.h | 28 +++++++++++++
include/singa/neuralnet/layer.h | 14 +++++++
include/singa/neuralnet/neuron_layer.h | 2 +-
include/singa/neuralnet/output_layer.h | 15 +++++++
include/singa/utils/updater.h | 8 ++--
src/driver.cc | 6 ++-
src/neuralnet/neuralnet.cc | 9 ++---
src/neuralnet/neuron_layer/gru.cc | 52 +++++++++++--------------
src/proto/job.proto | 14 +++++++
src/utils/updater.cc | 28 ++++++-------
src/worker.cc | 16 ++++++--
12 files changed, 133 insertions(+), 61 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6a4c9960/Makefile.am
----------------------------------------------------------------------
diff --git a/Makefile.am b/Makefile.am
index d2b2aa8..7ae4537 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -75,6 +75,7 @@ SINGA_SRCS := src/driver.cc \
src/neuralnet/connection_layer/concate.cc \
src/neuralnet/connection_layer/slice.cc \
src/neuralnet/connection_layer/split.cc \
+ src/neuralnet/connection_layer/rnn_dummy.cc \
src/neuralnet/input_layer/char_rnn.cc \
src/neuralnet/input_layer/onehot.cc \
src/neuralnet/input_layer/csv.cc \
@@ -88,6 +89,7 @@ SINGA_SRCS := src/driver.cc \
src/neuralnet/output_layer/argsort.cc \
src/neuralnet/output_layer/csv.cc \
src/neuralnet/output_layer/record.cc \
+ src/neuralnet/output_layer/char_rnn.cc \
src/neuralnet/loss_layer/euclidean.cc \
src/neuralnet/loss_layer/softmax.cc \
src/neuralnet/neuron_layer/activation.cc \
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6a4c9960/include/singa/neuralnet/connection_layer.h
----------------------------------------------------------------------
diff --git a/include/singa/neuralnet/connection_layer.h b/include/singa/neuralnet/connection_layer.h
index a18f458..481d991 100644
--- a/include/singa/neuralnet/connection_layer.h
+++ b/include/singa/neuralnet/connection_layer.h
@@ -153,6 +153,34 @@ class SplitLayer : public ConnectionLayer {
Layer2Index layer_idx_;
};
+/**
+ * Dummy layer for RNN models, which provides input for other layers.
+ *
+ * Particularly, it is used in the test phase of RNN models to connect other
+ * layers and avoid cycles in the neural net config.
+ */
+class RNNDummyLayer : public ConnectionLayer {
+ public:
+ void Setup(const LayerProto& proto, const vector<Layer*>& srclayers) override;
+ void ComputeFeature(int flag, const vector<Layer*>& srclayers) override;
+ void ComputeGradient(int flag, const vector<Layer*>& srclayers) {
+ LOG(FATAL) << "Not implemented";
+ }
+
+ const string srclayer(int step) const {
+ if (step > 0)
+ return dynamic_src_;
+ else
+ return "";
+ }
+
+ private:
+ string dynamic_src_;
+ float low_, high_;
+ bool integer_;
+ Layer* srclayer_;
+};
+
} // namespace singa
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6a4c9960/include/singa/neuralnet/layer.h
----------------------------------------------------------------------
diff --git a/include/singa/neuralnet/layer.h b/include/singa/neuralnet/layer.h
index f4738fa..c1612a2 100644
--- a/include/singa/neuralnet/layer.h
+++ b/include/singa/neuralnet/layer.h
@@ -36,6 +36,20 @@ using std::string;
// TODO(wangwei) make AuxType a template argument for Layer.
using AuxType = int;
+
+inline const string AddUnrollingPrefix(int unroll_idx, const string& name) {
+ return std::to_string(unroll_idx) + "#" + name;
+}
+inline const string AddPartitionSuffix(int partition_idx, const string& name) {
+ return name + "@" + std::to_string(partition_idx);
+}
+
+
+inline const string AddPrefixSuffix(int unroll_idx, int partition_idx,
+ const string& name) {
+ return std::to_string(unroll_idx) + "#" + name + "@" +
+ std::to_string(partition_idx);
+}
/**
* Base layer class.
*
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6a4c9960/include/singa/neuralnet/neuron_layer.h
----------------------------------------------------------------------
diff --git a/include/singa/neuralnet/neuron_layer.h b/include/singa/neuralnet/neuron_layer.h
index e587e38..e1a63a2 100644
--- a/include/singa/neuralnet/neuron_layer.h
+++ b/include/singa/neuralnet/neuron_layer.h
@@ -203,7 +203,7 @@ class GRULayer : public NeuronLayer {
int batchsize_; // batch size
int vdim_, hdim_; // dimensions
- Blob<float> *update_gate, *reset_gate, *new_memory;
+ Blob<float> *update_gate, *reset_gate, *new_memory, *reset_context;
//!< gru layer connect to two dst layers, hence need to grad blobs.
Blob<float> aux_grad_;
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6a4c9960/include/singa/neuralnet/output_layer.h
----------------------------------------------------------------------
diff --git a/include/singa/neuralnet/output_layer.h b/include/singa/neuralnet/output_layer.h
index c7e5d6a..9071f33 100644
--- a/include/singa/neuralnet/output_layer.h
+++ b/include/singa/neuralnet/output_layer.h
@@ -80,5 +80,20 @@ class RecordOutputLayer : public OutputLayer {
int inst_ = 0; //!< instance No.
io::Store* store_ = nullptr;
};
+
+/**
+ * Output layer for char rnn model, which convert sample id back to char and
+ * dump to stdout.
+ */
+class CharRNNOutputLayer : public OutputLayer {
+ public:
+ void Setup(const LayerProto& proto, const vector<Layer*>& srclayers) override;
+
+ void ComputeFeature(int flag, const vector<Layer*>& srclayers) override;
+
+ private:
+ string vocab_;
+};
+
} // namespace singa
#endif // SINGA_NEURALNET_OUTPUT_LAYER_H_
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6a4c9960/include/singa/utils/updater.h
----------------------------------------------------------------------
diff --git a/include/singa/utils/updater.h b/include/singa/utils/updater.h
index 575ab86..7fec78c 100644
--- a/include/singa/utils/updater.h
+++ b/include/singa/utils/updater.h
@@ -118,18 +118,16 @@ class NesterovUpdater : public Updater {
void Update(int step, Param* param, float grad_scale) override;
};
-/*
class RMSPropUpdater : public Updater {
public:
- virtual void Update(int step, Param* param, float grad_scale);
+ void Init(const UpdaterProto &proto) override;
+ void Update(int step, Param* param, float grad_scale) override;
protected:
- float base_lr_;
- float delta_;
float rho_;
- float weight_decay_;
};
+/*
class AdaDeltaUpdater : public Updater {
public:
virtual void Update(int step, Param* param, float grad_scale);
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6a4c9960/src/driver.cc
----------------------------------------------------------------------
diff --git a/src/driver.cc b/src/driver.cc
index 21968bb..1e4929f 100644
--- a/src/driver.cc
+++ b/src/driver.cc
@@ -74,6 +74,7 @@ void Driver::Init(int argc, char **argv) {
RegisterLayer<CharRNNInputLayer, int>(kCharRNN);
RegisterLayer<RNNLabelLayer, int>(kRNNLabel);
RegisterLayer<OneHotLayer, int>(kOneHot);
+ RegisterLayer<CharRNNOutputLayer, int>(kCharRNNOutput);
// connection layers
RegisterLayer<BridgeDstLayer, int>(kBridgeDst);
@@ -81,6 +82,7 @@ void Driver::Init(int argc, char **argv) {
RegisterLayer<ConcateLayer, int>(kConcate);
RegisterLayer<SliceLayer, int>(kSlice);
RegisterLayer<SplitLayer, int>(kSplit);
+ RegisterLayer<RNNDummyLayer, int>(kRNNDummy);
RegisterLayer<AccuracyLayer, int>(kAccuracy);
RegisterLayer<ArgSortLayer, int>(kArgSort);
@@ -125,7 +127,7 @@ void Driver::Init(int argc, char **argv) {
// register updaters
RegisterUpdater<AdaGradUpdater>(kAdaGrad);
RegisterUpdater<NesterovUpdater>(kNesterov);
- // TODO(wangwei) RegisterUpdater<kRMSPropUpdater>(kRMSProp);
+ RegisterUpdater<RMSPropUpdater>(kRMSProp);
RegisterUpdater<SGDUpdater>(kSGD);
// register learning rate change methods
@@ -198,6 +200,8 @@ void Driver::Test(const JobProto& job_conf) {
auto worker = Worker::Create(job_conf.train_one_batch());
worker->Setup(0, 0, job_conf, nullptr, nullptr, nullptr);
auto net = NeuralNet::Create(job_conf.neuralnet(), kTest, 1);
+ WriteStringToTextFile(Cluster::Get()->vis_folder() + "/test_net.json",
+ net->ToGraph(true).ToJson());
vector<string> paths;
for (const auto& p : job_conf.checkpoint_path())
paths.push_back(p);
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6a4c9960/src/neuralnet/neuralnet.cc
----------------------------------------------------------------------
diff --git a/src/neuralnet/neuralnet.cc b/src/neuralnet/neuralnet.cc
index f9579b1..49978a1 100644
--- a/src/neuralnet/neuralnet.cc
+++ b/src/neuralnet/neuralnet.cc
@@ -144,7 +144,7 @@ const NetProto NeuralNet::Unrolling(const NetProto& net_conf) {
for (int i = 0; i < org_layer.unroll_len(); i ++) { // unroll
LayerProto* unroll_layer = conf.add_layer();
unroll_layer->CopyFrom(org_layer); // create a new layer conf
- if (org_layer.unroll_len() > 1) {
+ // if (org_layer.unroll_len() > 1) {
// update layer names
std::stringstream sstm;
sstm << i << '#' << unroll_layer->name();
@@ -160,7 +160,7 @@ const NetProto NeuralNet::Unrolling(const NetProto& net_conf) {
sstm1 << i << '#' << param->name();
param->set_name(sstm1.str());
}
- }
+ // }
// clear unrolling related fields
unroll_layer->clear_unroll_len();
unroll_layer->clear_unroll_conn_type();
@@ -257,6 +257,7 @@ void NeuralNet::Load(const vector<string>& paths,
ReadProtoFromBinaryFile(path.c_str(), &bps);
for (int i = 0; i < bps.name_size(); i++) {
if (params.find(bps.name(i)) != params.end()) {
+ // LOG(ERROR) << "Loading param = " << bps.name(i);
params.at(bps.name(i))->FromProto(bps.blob(i));
params.at(bps.name(i))->set_version(bps.version(i));
}
@@ -458,12 +459,10 @@ Graph* NeuralNet::CreateGraph(const NetProto& netproto, int npartitions) {
map<string, const LayerProto*> name2proto;
for (const LayerProto& layer : net_w_connection.layer()) {
vector<Node*> nodes;
- char suffix[4];
for (int i = 0; i < npartitions; i++) {
LayerProto *proto = new LayerProto(layer);
- snprintf(suffix, sizeof(suffix), "%02d", i);
// differentiate partitions
- string nodename = layer.name() + "@" + string(suffix);
+ string nodename = layer.name() + "@" + std::to_string(i);
proto->set_name(nodename);
proto->set_type(layer.type());
proto->set_partition_dim(layer.partition_dim());
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6a4c9960/src/neuralnet/neuron_layer/gru.cc
----------------------------------------------------------------------
diff --git a/src/neuralnet/neuron_layer/gru.cc b/src/neuralnet/neuron_layer/gru.cc
index 9ba5a50..cf7425b 100644
--- a/src/neuralnet/neuron_layer/gru.cc
+++ b/src/neuralnet/neuron_layer/gru.cc
@@ -98,6 +98,8 @@ void GRULayer::Setup(const LayerProto& conf,
update_gate = new Blob<float>(batchsize_, hdim_);
reset_gate = new Blob<float>(batchsize_, hdim_);
+ // reset gate x context
+ reset_context = new Blob<float>(batchsize_, hdim_);
new_memory = new Blob<float>(batchsize_, hdim_);
}
@@ -130,24 +132,23 @@ void GRULayer::ComputeFeature(int flag,
Map<op::Sigmoid<float>,float>(*update_gate, update_gate);
// Compute the reset gate
- GEMM(1.0f, 0.0f, src,*w_r_hx_t,reset_gate);
+ GEMM(1.0f, 0.0f, src, *w_r_hx_t, reset_gate);
if (bias_r_ != nullptr)
- MVAddRow(1.0f,1.0f,bias_r_->data(),reset_gate);
+ MVAddRow(1.0f,1.0f, bias_r_->data(),reset_gate);
GEMM(1.0f, 1.0f, *context, *w_r_hh_t, reset_gate);
Map<op::Sigmoid<float>,float>(*reset_gate, reset_gate);
// Compute the new memory
- GEMM(1.0f, 0.0f, src, *w_c_hx_t, new_memory);
+ Mult<float>(*reset_gate, *context, reset_context);
+ GEMM(1.0f, 0.0f, *reset_context, *w_c_hh_t, new_memory);
+ GEMM(1.0f, 1.0f, src, *w_c_hx_t, new_memory);
if (bias_c_ != nullptr)
- MVAddRow(1.0f,1.0f,bias_c_->data(), new_memory);
- Mult<float>(*reset_gate, *new_memory, new_memory);
- GEMM(1.0f, 1.0f, *context, *w_c_hh_t, new_memory);
+ MVAddRow(1.0f, 1.0f, bias_c_->data(), new_memory);
Map<op::Tanh<float>,float>(*new_memory, new_memory);
-
- Sub(*context, *new_memory, &data_);
+ Sub(*new_memory, *context, &data_);
Mult(data_, *update_gate, &data_);
- Add(data_, *new_memory, &data_);
+ AXPY(1.0f, *context, &data_);
// delete the pointers
if (srclayers.size() == 1)
@@ -192,24 +193,19 @@ void GRULayer::ComputeGradient(int flag,
Map<singa::op::TanhGrad<float>, float>(*new_memory, &dnewmdc);
Blob<float> dLdz (batchsize_, hdim_);
- Sub<float>(*context, *new_memory, &dLdz);
+ Sub<float>(*new_memory, *context, &dLdz);
Mult<float>(dLdz, grad_, &dLdz);
Mult<float>(dLdz, dugatedz, &dLdz);
Blob<float> dLdc (batchsize_,hdim_);
- Blob<float> z1 (batchsize_,hdim_);
- z1.SetValue(1.0f);
- AXPY<float>(-1.0f, *update_gate, &z1);
- Mult(grad_,z1,&dLdc);
- Mult(dLdc,dnewmdc,&dLdc);
+ Mult(grad_, *update_gate, &dLdc);
+ Mult(dLdc, dnewmdc, &dLdc);
Blob<float> reset_dLdc (batchsize_,hdim_);
- Mult(dLdc, *reset_gate, &reset_dLdc);
+ GEMM(1.0f, 0.0f, dLdc, weight_c_hh_->data(), &reset_dLdc);
Blob<float> dLdr (batchsize_, hdim_);
- Blob<float> cprev (batchsize_, hdim_);
- GEMM(1.0f, 0.0f, *context, weight_c_hh_->data().T(), &cprev);
- Mult(dLdc, cprev, &dLdr);
+ Mult(reset_dLdc, *context, &dLdr);
Mult(dLdr, drgatedr, &dLdr);
// Compute gradients for parameters of update gate
@@ -230,29 +226,25 @@ void GRULayer::ComputeGradient(int flag,
// Compute gradients for parameters of new memory
Blob<float> *dLdc_t = Transpose(dLdc);
- GEMM(1.0f, beta, *dLdc_t, src,weight_c_hx_->mutable_grad());
+ GEMM(1.0f, beta, *dLdc_t, src, weight_c_hx_->mutable_grad());
+ GEMM(1.0f, beta, *dLdc_t, *reset_context, weight_c_hh_->mutable_grad());
if (bias_c_ != nullptr)
MVSumRow(1.0f, beta, dLdc, bias_c_->mutable_grad());
delete dLdc_t;
- Blob<float> *reset_dLdc_t = Transpose(reset_dLdc);
- GEMM(1.0f, beta, *reset_dLdc_t, *context, weight_c_hh_->mutable_grad());
- delete reset_dLdc_t;
-
// Compute gradients for data input layer
if (srclayers[0]->mutable_grad(this) != nullptr) {
- GEMM(1.0f,0.0f,dLdc, weight_c_hx_->data(), ilayer->mutable_grad(this));
- GEMM(1.0f,1.0f,dLdz, weight_z_hx_->data(), ilayer->mutable_grad(this));
- GEMM(1.0f,1.0f,dLdr, weight_r_hx_->data(), ilayer->mutable_grad(this));
+ GEMM(1.0f,0.0f, dLdc, weight_c_hx_->data(), ilayer->mutable_grad(this));
+ GEMM(1.0f,1.0f, dLdz, weight_z_hx_->data(), ilayer->mutable_grad(this));
+ GEMM(1.0f,1.0f, dLdr, weight_r_hx_->data(), ilayer->mutable_grad(this));
}
if (clayer != nullptr && clayer->mutable_grad(this) != nullptr) {
// Compute gradients for context layer
- GEMM(1.0f, 0.0f, reset_dLdc, weight_c_hh_->data(),
- clayer->mutable_grad(this));
+ Mult(reset_dLdc, *reset_gate, clayer->mutable_grad(this));
GEMM(1.0f, 1.0f, dLdr, weight_r_hh_->data(), clayer->mutable_grad(this));
GEMM(1.0f, 1.0f, dLdz, weight_z_hh_->data(), clayer->mutable_grad(this));
- Add(clayer->grad(this), *update_gate, clayer->mutable_grad(this));
+ AXPY(-1.0f, *update_gate, clayer->mutable_grad(this));
// LOG(ERROR) << "grad to prev gru " << Asum(clayer->grad(this));
}
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6a4c9960/src/proto/job.proto
----------------------------------------------------------------------
diff --git a/src/proto/job.proto b/src/proto/job.proto
index e520eba..28a3a68 100644
--- a/src/proto/job.proto
+++ b/src/proto/job.proto
@@ -253,6 +253,7 @@ message LayerProto {
optional ConcateProto concate_conf = 502;
optional SliceProto slice_conf = 503;
optional SplitProto split_conf = 504;
+ optional RNNDummyProto rnn_dummy_conf = 505;
extensions 1001 to 1100;
}
@@ -456,6 +457,17 @@ message DummyProto {
repeated int32 shape = 3;
}
+message RNNDummyProto {
+ optional string dynamic_srclayer = 1;
+ // if shape set, random generate the data blob
+ repeated int32 shape = 2;
+ // if integer is true, generate integer data
+ optional bool integer = 3 [default = false];
+ // range of the random generation
+ optional float low = 4 [default = 0];
+ optional float high = 5 [default = 0];
+}
+
// Message that stores parameters used by DropoutLayer
message DropoutProto {
// dropout ratio
@@ -667,6 +679,7 @@ enum LayerType {
kArgSort = 401;
kCSVOutput = 402;
kRecordOutput = 403;
+ kCharRNNOutput = 404;
/*
* Connection layers
@@ -677,6 +690,7 @@ enum LayerType {
kConcate = 502;
kSlice = 503;
kSplit = 504;
+ kRNNDummy = 505;
/*
* User defined layer
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6a4c9960/src/utils/updater.cc
----------------------------------------------------------------------
diff --git a/src/utils/updater.cc b/src/utils/updater.cc
index a9f70c0..200670a 100644
--- a/src/utils/updater.cc
+++ b/src/utils/updater.cc
@@ -174,31 +174,27 @@ void AdaGradUpdater::Update(int step, Param* param, float grad_scale) {
data -= lr * grad / (F<sqrtop>(history, proto_.delta()));
}
-/***********************RMSProp******************************
-void RMSPropUpdater::Init(const UpdaterProto& proto){
+/***********************RMSProp******************************/
+void RMSPropUpdater::Init(const UpdaterProto& proto) {
Updater::Init(proto);
- base_lr_ = proto.base_lr();
- CHECK_GT(base_lr_, 0);
- delta_ = proto.delta();
rho_ = proto.rmsprop_conf().rho();
- weight_decay_ = proto.weight_decay();
}
-void RMSPropUpdater::Update(int step, Param* param, float grad_scale){
+void RMSPropUpdater::Update(int step, Param* param, float grad_scale) {
Shape<1> s=Shape1(param->size());
Tensor<cpu, 1> data(param->mutable_cpu_data(), s);
Tensor<cpu, 1> grad(param->mutable_cpu_grad(), s);
Tensor<cpu, 1> history(param->mutable_cpu_history(), s);
- history=history*rho_+(1-rho_)*F<op::square>(grad*grad_scale);
- float lr=GetLearningRate(step)*param->lr_scale();
- float wd=weight_decay_*param->wd_scale();
- if(wd>0){ // L2 regularization
- grad+=data*wd;
- }
- data-=lr*grad/(F<op::sqrtop>(history,delta_));
+ float lr = lr_gen_->Get(step) * param->lr_scale();
+ float wd = weight_decay_ * param->wd_scale();
+ if (grad_scale != 1.f)
+ grad *= grad_scale;
+ if (wd > 0) // L2 regularization, should be done after timing grad_scale
+ grad += data * wd;
+ history = history * rho_ + (1 - rho_) * F<square>(grad);
+ data -= lr * grad / (F<sqrtop>(history, proto_.delta()));
}
-
-***********************AdaDelta******************************
+/***********************AdaDelta******************************
void AdaDeltaUpdater::Init(const UpdaterProto& proto){
Updater::Init(proto);
delta_=proto.delta();
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6a4c9960/src/worker.cc
----------------------------------------------------------------------
diff --git a/src/worker.cc b/src/worker.cc
index 4e1dc75..abe74e7 100644
--- a/src/worker.cc
+++ b/src/worker.cc
@@ -390,13 +390,24 @@ void BPTTWorker::Forward(int step, Phase phase, NeuralNet* net) {
}
}
vector<Layer*> src = net->srclayers(layer);
+ if ((phase & kTest) && typeid(*layer) == typeid(RNNDummyLayer)) {
+ CHECK_LE(src.size(), 1);
+ auto dummy = dynamic_cast<RNNDummyLayer*>(layer);
+ Layer* srclayer = net->name2layer(dummy->srclayer(step));
+ if (step > 0)
+ CHECK(srclayer != nullptr);
+ if (srclayer != nullptr) {
+ src.clear();
+ src.push_back(srclayer);
+ }
+ }
// if full state rnn and not the starting of a new passing of the dataset,
// feed the hidden state of the last unit to the first unit.
if (layer->unroll_index() == 0 && full_state_ && !begin_) {
Layer* last = net->last_unroll_layer(layer);
- if (last != layer) {
+ CHECK(last != nullptr);
+ if (last != layer || (phase & kTest))
src.push_back(last);
- }
}
// LOG(ERROR) << layer->name() << " forward";
// int ret =
@@ -405,7 +416,6 @@ void BPTTWorker::Forward(int step, Phase phase, NeuralNet* net) {
if ((phase & Phase::kTrain) && ret == Status::kEnd)
begin_ = true;
*/
-
if (job_conf_.debug() && DisplayNow(step) && grp_id_ == 0)
label[layer->name()] = layer->ToString(true, phase | kForward);
}
[03/10] incubator-singa git commit: SINGA-120 - Implemented GRU and
BPTT: Fixed bugs regarding the Map function
Posted by wa...@apache.org.
SINGA-120 - Implemented GRU and BPTT: Fixed bugs regarding the Map function
Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/c440e4a4
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/c440e4a4
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/c440e4a4
Branch: refs/heads/master
Commit: c440e4a491a970ae6af8eaf5e371ee6b2509af15
Parents: 777dfb6
Author: Ju Fan <fa...@gmail.com>
Authored: Fri Jan 1 10:51:17 2016 +0800
Committer: Wei Wang <wa...@comp.nus.edu.sg>
Committed: Wed Jan 6 01:53:05 2016 +0800
----------------------------------------------------------------------
include/singa/utils/math_blob.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/c440e4a4/include/singa/utils/math_blob.h
----------------------------------------------------------------------
diff --git a/include/singa/utils/math_blob.h b/include/singa/utils/math_blob.h
index 34e67ad..125509a 100644
--- a/include/singa/utils/math_blob.h
+++ b/include/singa/utils/math_blob.h
@@ -280,7 +280,7 @@ template<typename Op, typename Dtype>
void Map(const Blob<Dtype> & A, const Blob<Dtype> & B, Blob<Dtype> * C) {
CHECK_EQ(A.count(), B.count()) << "Blobs must have the same size";
CHECK_EQ(A.count(), C->count()) << "Blobs must have the same size";
- cpu_e_f<Op>(A.count(), A.cpu_data(), B.cpu_data(), C->mutable_cpu_data());
+ //cpu_e_f<Op>(A.count(), A.cpu_data(), B.cpu_data(), C->mutable_cpu_data());
auto context = Singleton<Context>::Instance();
int device = context->device_id(std::this_thread::get_id());
if (device == -1) {
[07/10] incubator-singa git commit: SINGA-120 - Implemented GRU and
BPTT: 1) Implemented the unrolling function for BPTT;
2) Added tests for unrolling
Posted by wa...@apache.org.
SINGA-120 - Implemented GRU and BPTT: 1) Implemented the unrolling function for BPTT; 2) Added tests for unrolling
Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/777dfb6a
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/777dfb6a
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/777dfb6a
Branch: refs/heads/master
Commit: 777dfb6a6fc4058b35368dfbe1fba4a27a93e828
Parents: 473c985
Author: Ju Fan <fa...@gmail.com>
Authored: Fri Jan 1 10:50:20 2016 +0800
Committer: Wei Wang <wa...@comp.nus.edu.sg>
Committed: Wed Jan 6 01:53:05 2016 +0800
----------------------------------------------------------------------
include/singa/neuralnet/neuralnet.h | 1 +
src/neuralnet/neuralnet.cc | 121 ++++++++--
src/test/test_connection_layers.cc | 8 +-
src/test/test_unrolling.cc | 398 +++++++++++++++++++++++++++++++
4 files changed, 510 insertions(+), 18 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/777dfb6a/include/singa/neuralnet/neuralnet.h
----------------------------------------------------------------------
diff --git a/include/singa/neuralnet/neuralnet.h b/include/singa/neuralnet/neuralnet.h
index bc1a7d8..be8f5c8 100644
--- a/include/singa/neuralnet/neuralnet.h
+++ b/include/singa/neuralnet/neuralnet.h
@@ -58,6 +58,7 @@ class NeuralNet {
static NeuralNet* Create(const NetProto& net_conf, Phase phase,
int npartitions);
+ static const NetProto Unrolling(const NetProto& net_conf);
/**
* construct the net structure from protocol buffer.
* @param netproto neural net config
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/777dfb6a/src/neuralnet/neuralnet.cc
----------------------------------------------------------------------
diff --git a/src/neuralnet/neuralnet.cc b/src/neuralnet/neuralnet.cc
index 9cdaff4..6bb0ecd 100644
--- a/src/neuralnet/neuralnet.cc
+++ b/src/neuralnet/neuralnet.cc
@@ -24,6 +24,8 @@
#include <algorithm>
#include <queue>
#include "singa/utils/singleton.h"
+#include <unordered_map>
+using namespace std;
namespace singa {
@@ -36,9 +38,6 @@ NeuralNet* NeuralNet::Create(const NetProto& net_conf, Phase phase,
NetProto conf;
conf.CopyFrom(net_conf);
conf.clear_layer();
- // for sharing param conf
- std::unordered_map<string, ParamProto*> name2param;
- std::vector<ParamProto*> shares;
// flag=0: neither exclude nor include field appears
// flag=1: exclude field appears
// flag=2: include field appears
@@ -78,16 +77,25 @@ NeuralNet* NeuralNet::Create(const NetProto& net_conf, Phase phase,
// using net partition if layer partition is not set
if (!layer_conf->has_partition_dim())
layer_conf->set_partition_dim(net_conf.partition_dim());
- for (int i = 0; i < layer_conf->param_size(); i++) {
- ParamProto* param = layer_conf->mutable_param(i);
- if (param->has_name() && param->name() != "") {
- CHECK(name2param.find(param->name()) == name2param.end())
- << "param name is repeated: " << param->name();
- name2param[param->name()] = param;
- }
- if (param->has_share_from() && param->share_from() != "")
- shares.push_back(param);
- }
+ }
+ //LOG(INFO) << "Before unrolling: \n" << conf.DebugString();
+ conf = Unrolling (conf);
+
+ // Copy shared parameters for sharing param conf
+ std::unordered_map<string, ParamProto*> name2param;
+ std::vector<ParamProto*> shares;
+ for (int index = 0; index < conf.layer_size();index ++) {
+ LayerProto* layer = conf.mutable_layer(index);
+ for (int i = 0; i < layer->param_size(); i++) {
+ ParamProto* param = layer->mutable_param(i);
+ if (param->has_name() && param->name() != "") {
+ CHECK(name2param.find(param->name()) == name2param.end())
+ << "param name is repeated: " << param->name();
+ name2param[param->name()] = param;
+ }
+ if (param->has_share_from() && param->share_from() != "")
+ shares.push_back(param);
+ }
}
for (auto param : shares) {
const std::string from = param->share_from();
@@ -104,6 +112,91 @@ NeuralNet* NeuralNet::Create(const NetProto& net_conf, Phase phase,
return new NeuralNet(conf, npartitions);
}
+const NetProto NeuralNet::Unrolling(const NetProto& net_conf) {
+ // Step 1: Unroll each layer & set parameter sharing
+ NetProto conf;
+
+ std::vector<std::vector<int>> layer_groups;
+ std::unordered_map<string,int> org_layer_names;
+ for (int index = 0; index < net_conf.layer_size(); index ++) {
+ const LayerProto& org_layer = net_conf.layer(index);
+ org_layer_names[org_layer.name()] = index; // layer_name -> index
+
+ std::vector<int> layer_group;
+ for (int i = 0; i < org_layer.unroll_len(); i ++) { // unroll
+ LayerProto* unroll_layer = conf.add_layer();
+ unroll_layer->CopyFrom(org_layer); // create a new layer conf
+ if (org_layer.unroll_len() > 1) {
+ // update layer names
+ std::stringstream sstm;
+ sstm << unroll_layer->name() << "_" << i;
+ unroll_layer->set_name(sstm.str());
+ // update layer parameter sharing
+ for (int j = 0; j < unroll_layer->param_size(); j ++) {
+ ParamProto* param = unroll_layer->mutable_param(j);
+ if (i == 0) continue; // no need to rename parameters in the i-th unrolled layer
+ if (!param->has_share_from() || param->share_from() == "") {// not shared from others
+ param->set_share_from(param->name());
+ }
+ std::stringstream sstm1;
+ sstm1 << param->name() << "_" << i;
+ param->set_name(sstm1.str());
+ }
+ }
+ // clear unrolling related fields
+ unroll_layer->clear_unroll_len();
+ unroll_layer->clear_unroll_conn_type();
+ unroll_layer->clear_shift();
+ unroll_layer->clear_srclayers();
+
+ layer_group.push_back(conf.layer_size() - 1);
+ }
+ layer_groups.push_back(layer_group);
+ }
+ // Step 2: Connect unrolled layers by setting `srclayers`
+ for (int index = 0; index < net_conf.layer_size(); index ++) {
+ const LayerProto& org_layer = net_conf.layer(index);
+ if (org_layer.srclayers_size() == 0) continue; // no src layer
+ //TODO(fanju): add LSTM when it is ready
+ if (org_layer.type() == kGRU) { // connect GRU layers
+ for (unsigned int j = 1; j < layer_groups[index].size(); j ++) {
+ LayerProto* unroll_layer = conf.mutable_layer(layer_groups[index][j]);
+ unroll_layer->add_srclayers(conf.layer(layer_groups[index][j-1]).name());
+ }
+ }
+ for (int i = 0; i < org_layer.srclayers_size(); i ++) {
+ const string& org_layer_src = org_layer.srclayers(i);
+
+ singa::UnrollConnType unroll_conn_type = kUnrollOneToOne; // Default value
+ if (i < org_layer.unroll_conn_type_size()) unroll_conn_type = org_layer.unroll_conn_type(i);
+ unsigned int shift = 0; // Default shift value
+ if (i < org_layer.shift_size()) shift = org_layer.shift(i);
+
+ const std::vector<int> unroll_layer_srcs = layer_groups[org_layer_names[org_layer_src]];
+
+ for (unsigned int j = 0; j < layer_groups[index].size(); j ++) {
+ LayerProto* unroll_layer = conf.mutable_layer(layer_groups[index][j]);
+ // Update src layers of `unroll_layer` by considering the types
+ if (unroll_conn_type == kUnrollOneToAll) {
+ for (int unroll_layer_src : unroll_layer_srcs) {
+ unroll_layer->add_srclayers(conf.layer(unroll_layer_src).name());
+ }
+ } else if (unroll_conn_type == kUnrollOneToOne) {
+ if (j < shift) continue; // no need to connect with the src
+ int unroll_layer_src = unroll_layer_srcs[j - shift];
+ unroll_layer->add_srclayers(conf.layer(unroll_layer_src).name());
+ } else if (unroll_conn_type == kUnrollFirstToLast) {
+ if (j > 0) break;
+ int unroll_layer_src = unroll_layer_srcs[unroll_layer_srcs.size() - 1];
+ unroll_layer->add_srclayers(conf.layer(unroll_layer_src).name());
+ }
+ }
+ }
+ }
+ return conf;
+}
+
+
NeuralNet::NeuralNet(NetProto netproto, int npartitions) {
LOG(INFO) << "Constructing NeuralNet...";
auto graph = CreateGraph(netproto, npartitions);
@@ -260,7 +353,7 @@ NetProto NeuralNet::AddPartitionConnectionLayers(const NetProto& netproto,
* (NO) src_pdim = dst_pdim ?
* (YES) Direct Connection
* (NO) Slice -> Concate
- */
+ */
for (const LayerProto& origin_layer : netproto.layer()) {
LayerProto* dst_layer = name2proto[origin_layer.name()];
int dst_pdim = dst_layer->partition_dim();
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/777dfb6a/src/test/test_connection_layers.cc
----------------------------------------------------------------------
diff --git a/src/test/test_connection_layers.cc b/src/test/test_connection_layers.cc
index 5517bde..6529840 100644
--- a/src/test/test_connection_layers.cc
+++ b/src/test/test_connection_layers.cc
@@ -179,8 +179,8 @@ TEST(ConnectionLayerTest, DataSliceTest) {
proto_slice.mutable_slice_conf()->set_num_slices(K);
SliceLayer slice;
slice.Setup(proto_slice, src_slice);
- ASSERT_EQ(slice.data(static_cast<Layer*>(&slice)).shape(0), N / K);
- ASSERT_EQ(slice.data(static_cast<Layer*>(&slice)).shape(1), M);
+ ASSERT_EQ(slice.data(nullptr).shape(0), N / K);
+ ASSERT_EQ(slice.data(nullptr).shape(1), M);
// use dummy as output layers
LayerProto proto_out[K];
@@ -236,8 +236,8 @@ TEST(ConnectionLayerTest, ModelSliceTest) {
proto_slice.mutable_slice_conf()->set_num_slices(K);
SliceLayer slice;
slice.Setup(proto_slice, src_slice);
- ASSERT_EQ(slice.data(static_cast<Layer*>(&slice)).shape(0), N);
- ASSERT_EQ(slice.data(static_cast<Layer*>(&slice)).shape(1), M / K);
+ ASSERT_EQ(slice.data(nullptr).shape(0), N);
+ ASSERT_EQ(slice.data(nullptr).shape(1), M / K);
// use dummy as output layers
LayerProto proto_out[K];
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/777dfb6a/src/test/test_unrolling.cc
----------------------------------------------------------------------
diff --git a/src/test/test_unrolling.cc b/src/test/test_unrolling.cc
new file mode 100644
index 0000000..e32c528
--- /dev/null
+++ b/src/test/test_unrolling.cc
@@ -0,0 +1,398 @@
+/************************************************************
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ *************************************************************/
+#include <string>
+#include <vector>
+#include <fstream>
+#include <iostream>
+using namespace std;
+
+#include "gtest/gtest.h"
+#include "singa/neuralnet/input_layer.h"
+#include "singa/neuralnet/neuron_layer.h"
+#include "singa/neuralnet/neuralnet.h"
+#include "singa/neuralnet/connection_layer.h"
+#include "singa/driver.h"
+#include "singa/proto/job.pb.h"
+#include "singa/utils/common.h"
+
+using namespace singa;
+
+class UnrollingTest: public ::testing::Test {
+protected:
+ virtual void SetUp() {
+ NetProto* net_conf1 = job_conf1.mutable_neuralnet();
+
+ LayerProto* data_layer1 = net_conf1->add_layer();
+ data_layer1->set_name("data");
+ data_layer1->set_type(kRecordInput);
+
+ LayerProto* embedding_layer1 = net_conf1->add_layer();
+ embedding_layer1->set_name("embedding");
+ embedding_layer1->set_type(kDummy);
+ embedding_layer1->add_srclayers("data");
+ embedding_layer1->set_unroll_len(3);
+ embedding_layer1->add_unroll_conn_type(kUnrollOneToAll);
+ embedding_layer1->add_shift(0);
+
+ LayerProto* gru_layer1 = net_conf1->add_layer();
+ gru_layer1->set_name("gru");
+ gru_layer1->set_type(kGRU);
+ gru_layer1->add_srclayers("embedding");
+ gru_layer1->mutable_gru_conf()->set_dim_hidden(20);
+ gru_layer1->mutable_gru_conf()->set_bias_term(false);
+ gru_layer1->add_param()->set_name("w_z_hx");
+ gru_layer1->add_param()->set_name("w_r_hx");
+ gru_layer1->add_param()->set_name("w_c_hx");
+ gru_layer1->add_param()->set_name("w_z_hh");
+ gru_layer1->add_param()->set_name("w_r_hh");
+ gru_layer1->add_param()->set_name("w_c_hh");
+ gru_layer1->set_unroll_len(3);
+ gru_layer1->add_unroll_conn_type(kUnrollOneToOne);
+ gru_layer1->add_shift(0);
+
+ LayerProto* out_layer1 = net_conf1->add_layer();
+ out_layer1->set_name("out");
+ out_layer1->set_type(kInnerProduct);
+ out_layer1->add_srclayers("gru");
+ out_layer1->mutable_innerproduct_conf()->set_num_output(100);
+ out_layer1->add_param()->set_name("w");
+ out_layer1->add_param()->set_name("b");
+ out_layer1->set_unroll_len(3);
+ out_layer1->add_unroll_conn_type(kUnrollOneToOne);
+ out_layer1->add_shift(0);
+
+ LayerProto* softmax_layer1 = net_conf1->add_layer();
+ softmax_layer1->set_name("softmax");
+ softmax_layer1->set_type(kSoftmax);
+ softmax_layer1->add_srclayers("out");
+ softmax_layer1->set_unroll_len(3);
+ softmax_layer1->add_unroll_conn_type(kUnrollOneToOne);
+ softmax_layer1->add_shift(0);
+
+ LayerProto* loss_layer1 = net_conf1->add_layer();
+ loss_layer1->set_name("loss");
+ loss_layer1->set_type(kSoftmaxLoss);
+ loss_layer1->add_srclayers("softmax");
+ loss_layer1->add_srclayers("data");
+ loss_layer1->set_unroll_len(3);
+ loss_layer1->add_unroll_conn_type(kUnrollOneToOne);
+ loss_layer1->add_shift(0);
+ loss_layer1->add_unroll_conn_type(kUnrollOneToAll);
+ loss_layer1->add_shift(0);
+
+
+ /*
+ * Initialize job conf 2
+ */
+ NetProto* net_conf2 = job_conf2.mutable_neuralnet();
+
+ LayerProto* data_layer2 = net_conf2->add_layer();
+ data_layer2->set_name("data");
+ data_layer2->set_type(kRecordInput);
+
+ LayerProto* embedding_layer2 = net_conf2->add_layer();
+ embedding_layer2->set_name("embedding");
+ embedding_layer2->set_type(kDummy);
+ embedding_layer2->add_srclayers("data");
+ embedding_layer2->add_srclayers("softmax");
+ embedding_layer2->set_unroll_len(3);
+ embedding_layer2->add_unroll_conn_type(kUnrollOneToAll);
+ embedding_layer2->add_shift(0);
+ embedding_layer2->add_unroll_conn_type(kUnrollOneToOne);
+ embedding_layer2->add_shift(1);
+
+ LayerProto* gru_layer2 = net_conf2->add_layer();
+ gru_layer2->set_name("gru");
+ gru_layer2->set_type(kGRU);
+ gru_layer2->add_srclayers("embedding");
+ gru_layer2->mutable_gru_conf()->set_dim_hidden(20);
+ gru_layer2->mutable_gru_conf()->set_bias_term(false);
+ gru_layer2->add_param()->set_name("w_z_hx");
+ gru_layer2->add_param()->set_name("w_r_hx");
+ gru_layer2->add_param()->set_name("w_c_hx");
+ gru_layer2->add_param()->set_name("w_z_hh");
+ gru_layer2->add_param()->set_name("w_r_hh");
+ gru_layer2->add_param()->set_name("w_c_hh");
+ gru_layer2->set_unroll_len(3);
+ gru_layer2->add_unroll_conn_type(kUnrollOneToOne);
+ gru_layer2->add_shift(0);
+
+ LayerProto* out_layer2 = net_conf2->add_layer();
+ out_layer2->set_name("out");
+ out_layer2->set_type(kInnerProduct);
+ out_layer2->add_srclayers("gru");
+ out_layer2->mutable_innerproduct_conf()->set_num_output(100);
+ out_layer2->add_param()->set_name("w");
+ out_layer2->add_param()->set_name("b");
+ out_layer2->set_unroll_len(3);
+ out_layer2->add_unroll_conn_type(kUnrollOneToOne);
+ out_layer2->add_shift(0);
+
+ LayerProto* softmax_layer2 = net_conf2->add_layer();
+ softmax_layer2->set_name("softmax");
+ softmax_layer2->set_type(kSoftmax);
+ softmax_layer2->add_srclayers("out");
+ softmax_layer2->set_unroll_len(3);
+ softmax_layer2->add_unroll_conn_type(kUnrollOneToOne);
+ softmax_layer2->add_shift(0);
+
+ LayerProto* loss_layer2 = net_conf2->add_layer();
+ loss_layer2->set_name("loss");
+ loss_layer2->set_type(kSoftmaxLoss);
+ loss_layer2->add_srclayers("softmax");
+ loss_layer2->add_srclayers("data");
+ loss_layer2->set_unroll_len(3);
+ loss_layer2->add_unroll_conn_type(kUnrollOneToOne);
+ loss_layer2->add_shift(0);
+ loss_layer2->add_unroll_conn_type(kUnrollOneToAll);
+ loss_layer2->add_shift(0);
+ }
+
+ singa::JobProto job_conf1;
+ singa::JobProto job_conf2;
+};
+
+TEST_F(UnrollingTest, GRULanguageModelTrain) {
+ NetProto net;
+ net.CopyFrom(job_conf1.neuralnet());
+ NetProto unrolled_net = NeuralNet::Unrolling(net);
+ EXPECT_EQ("data", unrolled_net.layer(0).name());
+
+ EXPECT_EQ("embedding_0", unrolled_net.layer(1).name());
+ EXPECT_EQ(1, unrolled_net.layer(1).srclayers_size());
+ EXPECT_EQ("data", unrolled_net.layer(1).srclayers(0));
+
+ EXPECT_EQ("embedding_1", unrolled_net.layer(2).name());
+ EXPECT_EQ(1, unrolled_net.layer(2).srclayers_size());
+ EXPECT_EQ("data", unrolled_net.layer(2).srclayers(0));
+
+ EXPECT_EQ("embedding_2", unrolled_net.layer(3).name());
+ EXPECT_EQ(1, unrolled_net.layer(3).srclayers_size());
+ EXPECT_EQ("data", unrolled_net.layer(3).srclayers(0));
+
+ EXPECT_EQ("gru_0", unrolled_net.layer(4).name());
+ EXPECT_EQ(1, unrolled_net.layer(4).srclayers_size());
+ EXPECT_EQ("embedding_0", unrolled_net.layer(4).srclayers(0));
+ EXPECT_EQ("w_z_hx", unrolled_net.layer(4).param(0).name());
+ EXPECT_EQ("w_r_hx", unrolled_net.layer(4).param(1).name());
+ EXPECT_EQ("w_c_hx", unrolled_net.layer(4).param(2).name());
+ EXPECT_EQ("w_z_hh", unrolled_net.layer(4).param(3).name());
+ EXPECT_EQ("w_r_hh", unrolled_net.layer(4).param(4).name());
+ EXPECT_EQ("w_c_hh", unrolled_net.layer(4).param(5).name());
+
+ EXPECT_EQ("gru_1", unrolled_net.layer(5).name());
+ EXPECT_EQ(2, unrolled_net.layer(5).srclayers_size());
+ EXPECT_EQ("gru_0", unrolled_net.layer(5).srclayers(0));
+ EXPECT_EQ("embedding_1", unrolled_net.layer(5).srclayers(1));
+ EXPECT_EQ("w_z_hx_1", unrolled_net.layer(5).param(0).name());
+ EXPECT_EQ("w_z_hx", unrolled_net.layer(5).param(0).share_from());
+ EXPECT_EQ("w_r_hx_1", unrolled_net.layer(5).param(1).name());
+ EXPECT_EQ("w_r_hx", unrolled_net.layer(5).param(1).share_from());
+ EXPECT_EQ("w_c_hx_1", unrolled_net.layer(5).param(2).name());
+ EXPECT_EQ("w_c_hx", unrolled_net.layer(5).param(2).share_from());
+ EXPECT_EQ("w_z_hh_1", unrolled_net.layer(5).param(3).name());
+ EXPECT_EQ("w_z_hh", unrolled_net.layer(5).param(3).share_from());
+ EXPECT_EQ("w_r_hh_1", unrolled_net.layer(5).param(4).name());
+ EXPECT_EQ("w_r_hh", unrolled_net.layer(5).param(4).share_from());
+ EXPECT_EQ("w_c_hh_1", unrolled_net.layer(5).param(5).name());
+ EXPECT_EQ("w_c_hh", unrolled_net.layer(5).param(5).share_from());
+
+ EXPECT_EQ("gru_2", unrolled_net.layer(6).name());
+ EXPECT_EQ(2, unrolled_net.layer(6).srclayers_size());
+ EXPECT_EQ("gru_1", unrolled_net.layer(6).srclayers(0));
+ EXPECT_EQ("embedding_2", unrolled_net.layer(6).srclayers(1));
+ EXPECT_EQ("w_z_hx_2", unrolled_net.layer(6).param(0).name());
+ EXPECT_EQ("w_z_hx", unrolled_net.layer(6).param(0).share_from());
+ EXPECT_EQ("w_r_hx_2", unrolled_net.layer(6).param(1).name());
+ EXPECT_EQ("w_r_hx", unrolled_net.layer(6).param(1).share_from());
+ EXPECT_EQ("w_c_hx_2", unrolled_net.layer(6).param(2).name());
+ EXPECT_EQ("w_c_hx", unrolled_net.layer(6).param(2).share_from());
+ EXPECT_EQ("w_z_hh_2", unrolled_net.layer(6).param(3).name());
+ EXPECT_EQ("w_z_hh", unrolled_net.layer(6).param(3).share_from());
+ EXPECT_EQ("w_r_hh_2", unrolled_net.layer(6).param(4).name());
+ EXPECT_EQ("w_r_hh", unrolled_net.layer(6).param(4).share_from());
+ EXPECT_EQ("w_c_hh_2", unrolled_net.layer(6).param(5).name());
+ EXPECT_EQ("w_c_hh", unrolled_net.layer(6).param(5).share_from());
+
+ EXPECT_EQ("out_0", unrolled_net.layer(7).name());
+ EXPECT_EQ(1, unrolled_net.layer(7).srclayers_size());
+ EXPECT_EQ("gru_0", unrolled_net.layer(7).srclayers(0));
+ EXPECT_EQ("w", unrolled_net.layer(7).param(0).name());
+ EXPECT_EQ("b", unrolled_net.layer(7).param(1).name());
+
+ EXPECT_EQ("out_1", unrolled_net.layer(8).name());
+ EXPECT_EQ(1, unrolled_net.layer(8).srclayers_size());
+ EXPECT_EQ("gru_1", unrolled_net.layer(8).srclayers(0));
+ EXPECT_EQ("w_1", unrolled_net.layer(8).param(0).name());
+ EXPECT_EQ("w", unrolled_net.layer(8).param(0).share_from());
+ EXPECT_EQ("b_1", unrolled_net.layer(8).param(1).name());
+ EXPECT_EQ("b", unrolled_net.layer(8).param(1).share_from());
+
+ EXPECT_EQ("out_2", unrolled_net.layer(9).name());
+ EXPECT_EQ(1, unrolled_net.layer(9).srclayers_size());
+ EXPECT_EQ("gru_2", unrolled_net.layer(9).srclayers(0));
+ EXPECT_EQ("w_2", unrolled_net.layer(9).param(0).name());
+ EXPECT_EQ("w", unrolled_net.layer(9).param(0).share_from());
+ EXPECT_EQ("b_2", unrolled_net.layer(9).param(1).name());
+ EXPECT_EQ("b", unrolled_net.layer(9).param(1).share_from());
+
+ EXPECT_EQ("softmax_0", unrolled_net.layer(10).name());
+ EXPECT_EQ(1, unrolled_net.layer(10).srclayers_size());
+ EXPECT_EQ("out_0", unrolled_net.layer(10).srclayers(0));
+
+ EXPECT_EQ("softmax_1", unrolled_net.layer(11).name());
+ EXPECT_EQ(1, unrolled_net.layer(11).srclayers_size());
+ EXPECT_EQ("out_1", unrolled_net.layer(11).srclayers(0));
+
+ EXPECT_EQ("softmax_2", unrolled_net.layer(12).name());
+ EXPECT_EQ(1, unrolled_net.layer(12).srclayers_size());
+ EXPECT_EQ("out_2", unrolled_net.layer(12).srclayers(0));
+
+ EXPECT_EQ("loss_0", unrolled_net.layer(13).name());
+ EXPECT_EQ(2, unrolled_net.layer(13).srclayers_size());
+ EXPECT_EQ("softmax_0", unrolled_net.layer(13).srclayers(0));
+ EXPECT_EQ("data", unrolled_net.layer(13).srclayers(1));
+
+ EXPECT_EQ("loss_1", unrolled_net.layer(14).name());
+ EXPECT_EQ(2, unrolled_net.layer(14).srclayers_size());
+ EXPECT_EQ("softmax_1", unrolled_net.layer(14).srclayers(0));
+ EXPECT_EQ("data", unrolled_net.layer(14).srclayers(1));
+
+ EXPECT_EQ("loss_2", unrolled_net.layer(15).name());
+ EXPECT_EQ(2, unrolled_net.layer(15).srclayers_size());
+ EXPECT_EQ("softmax_2", unrolled_net.layer(15).srclayers(0));
+ EXPECT_EQ("data", unrolled_net.layer(15).srclayers(1));
+}
+
+TEST_F(UnrollingTest, GRULanguageModelTest) {
+ NetProto net;
+ net.CopyFrom(job_conf2.neuralnet());
+ NetProto unrolled_net = NeuralNet::Unrolling(net);
+
+ EXPECT_EQ("data", unrolled_net.layer(0).name());
+
+ EXPECT_EQ("embedding_0", unrolled_net.layer(1).name());
+ EXPECT_EQ(1, unrolled_net.layer(1).srclayers_size());
+ EXPECT_EQ("data", unrolled_net.layer(1).srclayers(0));
+
+ EXPECT_EQ("embedding_1", unrolled_net.layer(2).name());
+ EXPECT_EQ(2, unrolled_net.layer(2).srclayers_size());
+ EXPECT_EQ("data", unrolled_net.layer(2).srclayers(0));
+ EXPECT_EQ("softmax_0", unrolled_net.layer(2).srclayers(1));
+
+ EXPECT_EQ("embedding_2", unrolled_net.layer(3).name());
+ EXPECT_EQ(2, unrolled_net.layer(3).srclayers_size());
+ EXPECT_EQ("data", unrolled_net.layer(3).srclayers(0));
+ EXPECT_EQ("softmax_1", unrolled_net.layer(3).srclayers(1));
+
+ EXPECT_EQ("gru_0", unrolled_net.layer(4).name());
+ EXPECT_EQ(1, unrolled_net.layer(4).srclayers_size());
+ EXPECT_EQ("embedding_0", unrolled_net.layer(4).srclayers(0));
+ EXPECT_EQ("w_z_hx", unrolled_net.layer(4).param(0).name());
+ EXPECT_EQ("w_r_hx", unrolled_net.layer(4).param(1).name());
+ EXPECT_EQ("w_c_hx", unrolled_net.layer(4).param(2).name());
+ EXPECT_EQ("w_z_hh", unrolled_net.layer(4).param(3).name());
+ EXPECT_EQ("w_r_hh", unrolled_net.layer(4).param(4).name());
+ EXPECT_EQ("w_c_hh", unrolled_net.layer(4).param(5).name());
+
+ EXPECT_EQ("gru_1", unrolled_net.layer(5).name());
+ EXPECT_EQ(2, unrolled_net.layer(5).srclayers_size());
+ EXPECT_EQ("gru_0", unrolled_net.layer(5).srclayers(0));
+ EXPECT_EQ("embedding_1", unrolled_net.layer(5).srclayers(1));
+ EXPECT_EQ("w_z_hx_1", unrolled_net.layer(5).param(0).name());
+ EXPECT_EQ("w_z_hx", unrolled_net.layer(5).param(0).share_from());
+ EXPECT_EQ("w_r_hx_1", unrolled_net.layer(5).param(1).name());
+ EXPECT_EQ("w_r_hx", unrolled_net.layer(5).param(1).share_from());
+ EXPECT_EQ("w_c_hx_1", unrolled_net.layer(5).param(2).name());
+ EXPECT_EQ("w_c_hx", unrolled_net.layer(5).param(2).share_from());
+ EXPECT_EQ("w_z_hh_1", unrolled_net.layer(5).param(3).name());
+ EXPECT_EQ("w_z_hh", unrolled_net.layer(5).param(3).share_from());
+ EXPECT_EQ("w_r_hh_1", unrolled_net.layer(5).param(4).name());
+ EXPECT_EQ("w_r_hh", unrolled_net.layer(5).param(4).share_from());
+ EXPECT_EQ("w_c_hh_1", unrolled_net.layer(5).param(5).name());
+ EXPECT_EQ("w_c_hh", unrolled_net.layer(5).param(5).share_from());
+
+ EXPECT_EQ("gru_2", unrolled_net.layer(6).name());
+ EXPECT_EQ(2, unrolled_net.layer(6).srclayers_size());
+ EXPECT_EQ("gru_1", unrolled_net.layer(6).srclayers(0));
+ EXPECT_EQ("embedding_2", unrolled_net.layer(6).srclayers(1));
+ EXPECT_EQ("w_z_hx_2", unrolled_net.layer(6).param(0).name());
+ EXPECT_EQ("w_z_hx", unrolled_net.layer(6).param(0).share_from());
+ EXPECT_EQ("w_r_hx_2", unrolled_net.layer(6).param(1).name());
+ EXPECT_EQ("w_r_hx", unrolled_net.layer(6).param(1).share_from());
+ EXPECT_EQ("w_c_hx_2", unrolled_net.layer(6).param(2).name());
+ EXPECT_EQ("w_c_hx", unrolled_net.layer(6).param(2).share_from());
+ EXPECT_EQ("w_z_hh_2", unrolled_net.layer(6).param(3).name());
+ EXPECT_EQ("w_z_hh", unrolled_net.layer(6).param(3).share_from());
+ EXPECT_EQ("w_r_hh_2", unrolled_net.layer(6).param(4).name());
+ EXPECT_EQ("w_r_hh", unrolled_net.layer(6).param(4).share_from());
+ EXPECT_EQ("w_c_hh_2", unrolled_net.layer(6).param(5).name());
+ EXPECT_EQ("w_c_hh", unrolled_net.layer(6).param(5).share_from());
+
+ EXPECT_EQ("out_0", unrolled_net.layer(7).name());
+ EXPECT_EQ(1, unrolled_net.layer(7).srclayers_size());
+ EXPECT_EQ("gru_0", unrolled_net.layer(7).srclayers(0));
+ EXPECT_EQ("w", unrolled_net.layer(7).param(0).name());
+ EXPECT_EQ("b", unrolled_net.layer(7).param(1).name());
+
+ EXPECT_EQ("out_1", unrolled_net.layer(8).name());
+ EXPECT_EQ(1, unrolled_net.layer(8).srclayers_size());
+ EXPECT_EQ("gru_1", unrolled_net.layer(8).srclayers(0));
+ EXPECT_EQ("w_1", unrolled_net.layer(8).param(0).name());
+ EXPECT_EQ("w", unrolled_net.layer(8).param(0).share_from());
+ EXPECT_EQ("b_1", unrolled_net.layer(8).param(1).name());
+ EXPECT_EQ("b", unrolled_net.layer(8).param(1).share_from());
+
+ EXPECT_EQ("out_2", unrolled_net.layer(9).name());
+ EXPECT_EQ(1, unrolled_net.layer(9).srclayers_size());
+ EXPECT_EQ("gru_2", unrolled_net.layer(9).srclayers(0));
+ EXPECT_EQ("w_2", unrolled_net.layer(9).param(0).name());
+ EXPECT_EQ("w", unrolled_net.layer(9).param(0).share_from());
+ EXPECT_EQ("b_2", unrolled_net.layer(9).param(1).name());
+ EXPECT_EQ("b", unrolled_net.layer(9).param(1).share_from());
+
+ EXPECT_EQ("softmax_0", unrolled_net.layer(10).name());
+ EXPECT_EQ(1, unrolled_net.layer(10).srclayers_size());
+ EXPECT_EQ("out_0", unrolled_net.layer(10).srclayers(0));
+
+ EXPECT_EQ("softmax_1", unrolled_net.layer(11).name());
+ EXPECT_EQ(1, unrolled_net.layer(11).srclayers_size());
+ EXPECT_EQ("out_1", unrolled_net.layer(11).srclayers(0));
+
+ EXPECT_EQ("softmax_2", unrolled_net.layer(12).name());
+ EXPECT_EQ(1, unrolled_net.layer(12).srclayers_size());
+ EXPECT_EQ("out_2", unrolled_net.layer(12).srclayers(0));
+
+ EXPECT_EQ("loss_0", unrolled_net.layer(13).name());
+ EXPECT_EQ(2, unrolled_net.layer(13).srclayers_size());
+ EXPECT_EQ("softmax_0", unrolled_net.layer(13).srclayers(0));
+ EXPECT_EQ("data", unrolled_net.layer(13).srclayers(1));
+
+ EXPECT_EQ("loss_1", unrolled_net.layer(14).name());
+ EXPECT_EQ(2, unrolled_net.layer(14).srclayers_size());
+ EXPECT_EQ("softmax_1", unrolled_net.layer(14).srclayers(0));
+ EXPECT_EQ("data", unrolled_net.layer(14).srclayers(1));
+
+ EXPECT_EQ("loss_2", unrolled_net.layer(15).name());
+ EXPECT_EQ(2, unrolled_net.layer(15).srclayers_size());
+ EXPECT_EQ("softmax_2", unrolled_net.layer(15).srclayers(0));
+ EXPECT_EQ("data", unrolled_net.layer(15).srclayers(1));
+}
[05/10] incubator-singa git commit: SINGA-120 - Implemented GRU and
BPTT: Updated dummy layer for fixing bugs
Posted by wa...@apache.org.
SINGA-120 - Implemented GRU and BPTT: Updated dummy layer for fixing bugs
Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/1f03f9d8
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/1f03f9d8
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/1f03f9d8
Branch: refs/heads/master
Commit: 1f03f9d8486cb85e55c585121b2a8838bae4ed44
Parents: b4b6a38
Author: Ju Fan <fa...@gmail.com>
Authored: Fri Jan 1 10:54:43 2016 +0800
Committer: Wei Wang <wa...@comp.nus.edu.sg>
Committed: Wed Jan 6 01:53:05 2016 +0800
----------------------------------------------------------------------
src/neuralnet/neuron_layer/dummy.cc | 17 ++++++++++-------
1 file changed, 10 insertions(+), 7 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/1f03f9d8/src/neuralnet/neuron_layer/dummy.cc
----------------------------------------------------------------------
diff --git a/src/neuralnet/neuron_layer/dummy.cc b/src/neuralnet/neuron_layer/dummy.cc
index 11ce3ec..9ccb179 100644
--- a/src/neuralnet/neuron_layer/dummy.cc
+++ b/src/neuralnet/neuron_layer/dummy.cc
@@ -22,12 +22,14 @@
#include <glog/logging.h>
#include "singa/neuralnet/neuron_layer.h"
#include "singa/utils/math_blob.h"
+#include "singa/utils/context.h"
+#include "singa/utils/singleton.h"
namespace singa {
void DummyLayer::Setup(const LayerProto& proto,
const vector<Layer*>& srclayers) {
- Layer::Setup(proto, srclayers);
+ NeuronLayer::Setup(proto, srclayers);
if (proto.dummy_conf().input()) { // use as input layer
CHECK_EQ(srclayers.size(), 0);
input_ = true;
@@ -43,27 +45,28 @@ void DummyLayer::Setup(const LayerProto& proto,
if (proto.dummy_conf().output()) { // use as output layer
output_ = true;
}
-}
-std::random_device rd;
-std::mt19937 gen(rd());
-std::uniform_real_distribution<> dis(0, 1);
+}
void DummyLayer::ComputeFeature(int flag, const vector<Layer*>& srclayers) {
+ std::uniform_real_distribution<float> dis(0, 1);
+ auto gen = Singleton<Context>::Instance()->rand_generator();
if (input_) {
// randomly init data with [0,1] values
for (int i = 0; i < data_.count(); ++i)
- data_.mutable_cpu_data()[i] = dis(gen);
+ data_.mutable_cpu_data()[i] = dis(*gen);
}
if (srclayers.size() > 0)
Copy(srclayers[0]->data(this), &data_);
}
void DummyLayer::ComputeGradient(int flag, const vector<Layer*>& srclayers) {
+ std::uniform_real_distribution<float> dis(0, 1);
+ auto gen = Singleton<Context>::Instance()->rand_generator();
if (output_) {
// randomly init data with [0,1] values
for (int i = 0; i < data_.count(); ++i)
- grad_.mutable_cpu_data()[i] = dis(gen);
+ grad_.mutable_cpu_data()[i] = dis(*gen);
}
if (srclayers.size() > 0)
Copy(grad_, srclayers[0]->mutable_grad(this));