You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@singa.apache.org by wa...@apache.org on 2016/01/05 19:10:30 UTC

[01/10] incubator-singa git commit: SINGA-120 - Implemented GRU and BPTT \n 1) Added the implementation of the GRU model; \n 2) Added a test for GRU functions

Repository: incubator-singa
Updated Branches:
  refs/heads/master bb75a0be5 -> a2f4e4680


SINGA-120 - Implemented GRU and BPTT \n 1) Added the implementation of the GRU model; \n 2) Added a test for GRU functions


Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/ddf4e79a
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/ddf4e79a
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/ddf4e79a

Branch: refs/heads/master
Commit: ddf4e79aff5d8616f6758df18056b9443761405d
Parents: bb75a0b
Author: Ju Fan <fa...@gmail.com>
Authored: Fri Jan 1 10:41:59 2016 +0800
Committer: Wei Wang <wa...@comp.nus.edu.sg>
Committed: Wed Jan 6 01:50:48 2016 +0800

----------------------------------------------------------------------
 src/neuralnet/neuron_layer/gru.cc | 275 +++++++++++++++++++++++++++++++
 src/test/test_gru_layer.cc        | 286 +++++++++++++++++++++++++++++++++
 2 files changed, 561 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ddf4e79a/src/neuralnet/neuron_layer/gru.cc
----------------------------------------------------------------------
diff --git a/src/neuralnet/neuron_layer/gru.cc b/src/neuralnet/neuron_layer/gru.cc
new file mode 100644
index 0000000..45d7873
--- /dev/null
+++ b/src/neuralnet/neuron_layer/gru.cc
@@ -0,0 +1,275 @@
+/************************************************************
+*
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements.  See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership.  The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License.  You may obtain a copy of the License at
+*
+*   http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing,
+* software distributed under the License is distributed on an
+* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+* KIND, either express or implied.  See the License for the
+* specific language governing permissions and limitations
+* under the License.
+*
+*************************************************************/
+
+#include "singa/neuralnet/neuron_layer.h"
+
+#include <glog/logging.h>
+#include "singa/utils/singleton.h"
+#include "singa/utils/math_blob.h"
+#include "singa/utils/singa_op.h"
+
+#include <iostream>
+using namespace std;
+
+namespace singa {
+
+using std::vector;
+
+GRULayer::~GRULayer() {
+  delete weight_z_hx_;
+  delete weight_z_hh_;
+  delete bias_z_;
+
+  delete weight_r_hx_;
+  delete weight_r_hh_;
+  delete bias_r_;
+
+  delete weight_c_hx_;
+  delete weight_c_hh_;
+  delete bias_c_;
+
+  delete update_gate;
+  delete reset_gate;
+  delete new_memory;
+}
+
+void GRULayer::Setup(const LayerProto& conf,
+    const vector<Layer*>& srclayers) {
+  Layer::Setup(conf, srclayers);
+  CHECK_LE(srclayers.size(), 2);
+  const auto& src = srclayers[0]->data(this);
+
+  batchsize_ = src.shape()[0]; // size of batch
+  vdim_ = src.count() / (batchsize_); // dimension of input
+
+  hdim_ = layer_conf_.gru_conf().dim_hidden(); // dimension of hidden state
+
+  data_.Reshape(vector<int>{batchsize_, hdim_});
+  grad_.ReshapeLike(data_);
+
+  // Initialize the parameters
+  weight_z_hx_ = Param::Create(conf.param(0));
+  weight_r_hx_ = Param::Create(conf.param(1));
+  weight_c_hx_ = Param::Create(conf.param(2));
+
+  weight_z_hh_ = Param::Create(conf.param(3));
+  weight_r_hh_ = Param::Create(conf.param(4));
+  weight_c_hh_ = Param::Create(conf.param(5));
+
+  if (conf.gru_conf().bias_term()) {
+	  bias_z_ = Param::Create(conf.param(6));
+	  bias_r_ = Param::Create(conf.param(7));
+	  bias_c_ = Param::Create(conf.param(8));
+  }
+
+  weight_z_hx_->Setup(vector<int>{hdim_, vdim_});
+  weight_r_hx_->Setup(vector<int>{hdim_, vdim_});
+  weight_c_hx_->Setup(vector<int>{hdim_, vdim_});
+
+  weight_z_hh_->Setup(vector<int>{hdim_, hdim_});
+  weight_r_hh_->Setup(vector<int>{hdim_, hdim_});
+  weight_c_hh_->Setup(vector<int>{hdim_, hdim_});
+
+  if (conf.gru_conf().bias_term()) {
+	  bias_z_->Setup(vector<int>{hdim_});
+	  bias_r_->Setup(vector<int>{hdim_});
+	  bias_c_->Setup(vector<int>{hdim_});
+  }
+
+  update_gate = new Blob<float>(batchsize_, hdim_);
+  reset_gate = new Blob<float>(batchsize_, hdim_);
+  new_memory = new Blob<float>(batchsize_, hdim_);
+
+}
+
+void GRULayer::ComputeFeature(int flag,
+    const vector<Layer*>& srclayers) {
+	CHECK_LE(srclayers.size(), 2);
+
+	// Do transpose
+	Blob<float> *w_z_hx_t = Transpose (weight_z_hx_->data());
+	Blob<float> *w_z_hh_t = Transpose (weight_z_hh_->data());
+	Blob<float> *w_r_hx_t = Transpose (weight_r_hx_->data());
+	Blob<float> *w_r_hh_t = Transpose (weight_r_hh_->data());
+	Blob<float> *w_c_hx_t = Transpose (weight_c_hx_->data());
+	Blob<float> *w_c_hh_t = Transpose (weight_c_hh_->data());
+
+	// Prepare the data input and the context
+	const auto& src = srclayers[0]->data(this);
+	const Blob<float> *context;
+	if (srclayers.size() == 1) { // only have data input
+		context = new Blob<float>(batchsize_, hdim_);
+	} else { // have data input & context
+		context = &srclayers[1]->data(this);
+	}
+
+	// Compute the update gate
+	GEMM(1.0f, 0.0f, src,*w_z_hx_t,update_gate);
+	if (bias_z_ != nullptr)
+		MVAddRow(1.0f,1.0f,bias_z_->data(),update_gate);
+	Blob<float> zprev (batchsize_,hdim_);
+	GEMM(1.0f, 0.0f, *context,*w_z_hh_t, &zprev);
+	Add<float>(*update_gate, zprev, update_gate);
+	Map<op::Sigmoid<float>,float>(*update_gate, update_gate);
+
+	// Compute the reset gate
+	GEMM(1.0f, 0.0f, src,*w_r_hx_t,reset_gate);
+	if (bias_r_ != nullptr)
+		MVAddRow(1.0f,1.0f,bias_r_->data(),reset_gate);
+	Blob<float> rprev (batchsize_, hdim_);
+	GEMM(1.0f, 0.0f, *context, *w_r_hh_t, &rprev);
+	Add<float>(*reset_gate, rprev, reset_gate);
+	Map<op::Sigmoid<float>,float>(*reset_gate, reset_gate);
+
+	// Compute the new memory
+	GEMM(1.0f, 0.0f, src, *w_c_hx_t, new_memory);
+	if (bias_c_ != nullptr)
+		MVAddRow(1.0f,1.0f,bias_c_->data(), new_memory);
+	Blob<float> cprev (batchsize_, hdim_);
+	GEMM(1.0f, 0.0f, *context, *w_c_hh_t, &cprev);
+	//Blob<float> new_cprev (batchsize_, hdim_);
+	Mult<float>(*reset_gate, cprev, &cprev);
+	Add<float>(*new_memory, cprev, new_memory);
+	Map<op::Tanh<float>,float>(*new_memory, new_memory);
+
+	// Compute data - new memory part
+	Blob<float> z1 (batchsize_,hdim_);
+	for (int i = 0; i < z1.count(); i ++) {
+		z1.mutable_cpu_data()[i] = 1.0f; // generate a matrix with ones
+	}
+	AXPY<float>(-1.0f, *update_gate, &z1);
+	Mult<float>(z1, *new_memory, &data_);
+
+	// Compute data - context part
+	Blob<float> data_prev (batchsize_, hdim_);
+	Mult<float>(*update_gate,*context,&data_prev);
+	Add<float>(data_, data_prev, &data_);
+
+	// delete the pointers
+	if (srclayers.size() == 1) delete context;
+	else context = NULL;
+
+	delete w_z_hx_t;
+	delete w_z_hh_t;
+	delete w_r_hx_t;
+	delete w_r_hh_t;
+	delete w_c_hx_t;
+	delete w_c_hh_t;
+}
+
+void GRULayer::ComputeGradient(int flag,
+    const vector<Layer*>& srclayers) {
+	CHECK_LE(srclayers.size(), 2);
+
+	// Prepare the data input and the context
+	const Blob<float>& src = srclayers[0]->data(this);
+	const Blob<float> *context;
+	if (srclayers.size() == 1) { // only have data input
+		context = new Blob<float>(batchsize_, hdim_);
+	} else { // have data input & context
+		context = &srclayers[1]->data(this);
+	}
+
+	// Prepare gradient of output neurons
+	Blob<float> *grad_t = Transpose (grad_);
+
+	// Compute intermediate gradients which are used for other computations
+	Blob<float> dugatedz (batchsize_, hdim_);
+	Map<singa::op::SigmoidGrad<float>, float>(*update_gate, &dugatedz);
+	Blob<float> drgatedr (batchsize_, hdim_);
+	Map<singa::op::SigmoidGrad<float>, float>(*reset_gate, &drgatedr);
+	Blob<float> dnewmdc (batchsize_, hdim_);
+	Map<singa::op::TanhGrad<float>, float>(*new_memory,&dnewmdc);
+
+	Blob<float> dLdz (batchsize_, hdim_);
+	Sub<float>(*context, *new_memory, &dLdz);
+	Mult<float>(dLdz, grad_, &dLdz);
+	Mult<float>(dLdz, dugatedz, &dLdz);
+
+	Blob<float> dLdc (batchsize_,hdim_);
+	Blob<float> z1 (batchsize_,hdim_);
+	for (int i = 0; i < z1.count(); i ++) {
+		z1.mutable_cpu_data()[i] = 1.0f; // generate a matrix with ones
+	}
+	AXPY<float>(-1.0f, *update_gate, &z1);
+	Mult(grad_,z1,&dLdc);
+	Mult(dLdc,dnewmdc,&dLdc);
+
+	Blob<float> reset_dLdc (batchsize_,hdim_);
+	Mult(dLdc, *reset_gate, &reset_dLdc);
+
+	Blob<float> dLdr (batchsize_, hdim_);
+	Blob<float> cprev (batchsize_, hdim_);
+	Blob<float> *w_c_hh_t = Transpose(weight_c_hh_->data());
+	GEMM(1.0f,0.0f,*context,*w_c_hh_t, &cprev);
+	delete w_c_hh_t;
+	Mult(dLdc,cprev,&dLdr);
+	Mult(dLdr,drgatedr,&dLdr);
+
+
+	// Compute gradients for parameters of update gate
+	Blob<float> *dLdz_t = Transpose(dLdz);
+	GEMM(1.0f,0.0f,*dLdz_t,src,weight_z_hx_->mutable_grad());
+	GEMM(1.0f,0.0f,*dLdz_t,*context,weight_z_hh_->mutable_grad());
+	if (bias_z_ != nullptr)
+		MVSumRow<float>(1.0f,0.0f,dLdz,bias_z_->mutable_grad());
+	delete dLdz_t;
+
+	// Compute gradients for parameters of reset gate
+	Blob<float> *dLdr_t = Transpose(dLdr);
+	GEMM(1.0f,0.0f,*dLdr_t,src,weight_r_hx_->mutable_grad());
+	GEMM(1.0f,0.0f,*dLdr_t,*context,weight_r_hh_->mutable_grad());
+	if (bias_r_ != nullptr)
+		MVSumRow(1.0f,0.0f,dLdr,bias_r_->mutable_grad());
+	delete dLdr_t;
+
+	// Compute gradients for parameters of new memory
+	Blob<float> *dLdc_t = Transpose(dLdc);
+	GEMM(1.0f,0.0f,*dLdc_t,src,weight_c_hx_->mutable_grad());
+	if (bias_c_ != nullptr)
+		MVSumRow(1.0f,0.0f,dLdc,bias_c_->mutable_grad());
+	delete dLdc_t;
+
+	Blob<float> *reset_dLdc_t = Transpose(reset_dLdc);
+	GEMM(1.0f,0.0f,*reset_dLdc_t,*context,weight_c_hh_->mutable_grad());
+	delete reset_dLdc_t;
+
+	// Compute gradients for data input layer
+	if (srclayers[0]->mutable_grad(this) != nullptr) {
+		GEMM(1.0f,0.0f,dLdc,weight_c_hx_->data(),srclayers[0]->mutable_grad(this));
+		GEMM(1.0f,1.0f,dLdz,weight_z_hx_->data(),srclayers[0]->mutable_grad(this));
+		GEMM(1.0f,1.0f,dLdr,weight_r_hx_->data(), srclayers[0]->mutable_grad(this));
+	}
+
+	if (srclayers.size() > 1 && srclayers[1]->mutable_grad(this) != nullptr) {
+		// Compute gradients for context layer
+		GEMM(1.0f,0.0f,reset_dLdc,weight_c_hh_->data(), srclayers[1]->mutable_grad(this));
+		GEMM(1.0f,1.0f,dLdr, weight_r_hh_->data(), srclayers[1]->mutable_grad(this));
+		GEMM(1.0f,1.0f,dLdz,weight_z_hh_->data(), srclayers[1]->mutable_grad(this));
+		Add(srclayers[1]->grad(this), *update_gate, srclayers[1]->mutable_grad(this));
+	}
+
+	if (srclayers.size() == 1) delete context;
+	else context = NULL;
+	delete grad_t;
+}
+
+}  // namespace singa

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ddf4e79a/src/test/test_gru_layer.cc
----------------------------------------------------------------------
diff --git a/src/test/test_gru_layer.cc b/src/test/test_gru_layer.cc
new file mode 100644
index 0000000..296b795
--- /dev/null
+++ b/src/test/test_gru_layer.cc
@@ -0,0 +1,286 @@
+/************************************************************
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ *************************************************************/
+#include <string>
+#include <vector>
+#include <fstream>
+#include <iostream>
+using namespace std;
+
+
+#include "gtest/gtest.h"
+#include "singa/neuralnet/neuron_layer.h"
+#include "singa/neuralnet/input_layer.h"
+#include "singa/driver.h"
+#include "singa/proto/job.pb.h"
+
+using namespace singa;
+
+class GRULayerTest: public ::testing::Test {
+protected:
+	virtual void SetUp() {
+		// Initialize the settings for the first input-layer
+		std::string path1 = "src/test/gru-in-1.csv"; // path of a csv file
+		std::ofstream ofs1(path1, std::ofstream::out);
+		ASSERT_TRUE(ofs1.is_open());
+		ofs1 << "0,0,0,1\n";
+		ofs1 << "0,0,1,0\n";
+		ofs1.close();
+		auto conf1 = in1_conf.mutable_store_conf();
+		conf1->set_path(path1);
+		conf1->set_batchsize(2);
+		conf1->add_shape(4);
+		conf1->set_backend("textfile");
+		conf1->set_has_label(false);
+
+
+		// Initialize the settings for the second input-layer
+		std::string path2 = "src/test/gru-in-2.csv"; // path of a csv file
+		std::ofstream ofs2(path2, std::ofstream::out);
+		ASSERT_TRUE(ofs2.is_open());
+		ofs2 << "0,1,0,0\n";
+		ofs2 << "1,0,0,0\n";
+		ofs2.close();
+		auto conf2 = in2_conf.mutable_store_conf();
+		conf2->set_path(path2);
+
+		conf2->set_batchsize(2);
+		conf2->add_shape(4);
+		conf2->set_backend("textfile");
+		conf2->set_has_label(false);
+
+
+		gru1_conf.mutable_gru_conf() -> set_dim_hidden(2);
+		gru1_conf.mutable_gru_conf() -> set_bias_term(true);
+		for (int i = 0; i < 9; i ++) {
+			gru1_conf.add_param();
+		}
+
+
+		gru1_conf.mutable_param(0)->set_name("wzhx1");
+		gru1_conf.mutable_param(0)->set_type(kParam);
+		gru1_conf.mutable_param(0)->mutable_init()->set_type(kConstant);
+		gru1_conf.mutable_param(0)->mutable_init()->set_value(0.5f);
+
+		gru1_conf.mutable_param(1)->set_name("wrhx1");
+		gru1_conf.mutable_param(1)->set_type(kParam);
+		gru1_conf.mutable_param(1)->mutable_init()->set_type(kConstant);
+		gru1_conf.mutable_param(1)->mutable_init()->set_value(0.5f);
+
+		gru1_conf.mutable_param(2)->set_name("wchx1");
+		gru1_conf.mutable_param(2)->set_type(kParam);
+		gru1_conf.mutable_param(2)->mutable_init()->set_type(kConstant);
+		gru1_conf.mutable_param(2)->mutable_init()->set_value(0.5f);
+
+		gru1_conf.mutable_param(3)->set_name("wzhh1");
+		gru1_conf.mutable_param(3)->set_type(kParam);
+		gru1_conf.mutable_param(3)->mutable_init()->set_type(kConstant);
+		gru1_conf.mutable_param(3)->mutable_init()->set_value(0.5f);
+
+		gru1_conf.mutable_param(4)->set_name("wrhh1");
+		gru1_conf.mutable_param(4)->set_type(kParam);
+		gru1_conf.mutable_param(4)->mutable_init()->set_type(kConstant);
+		gru1_conf.mutable_param(4)->mutable_init()->set_value(0.5f);
+
+		gru1_conf.mutable_param(5)->set_name("wchh1");
+		gru1_conf.mutable_param(5)->set_type(kParam);
+		gru1_conf.mutable_param(5)->mutable_init()->set_type(kConstant);
+		gru1_conf.mutable_param(5)->mutable_init()->set_value(0.5f);
+
+		gru1_conf.mutable_param(6)->set_name("bz1");
+		gru1_conf.mutable_param(6)->set_type(kParam);
+		gru1_conf.mutable_param(6)->mutable_init()->set_type(kConstant);
+		gru1_conf.mutable_param(6)->mutable_init()->set_value(0.5f);
+
+		gru1_conf.mutable_param(7)->set_name("br1");
+		gru1_conf.mutable_param(7)->set_type(kParam);
+		gru1_conf.mutable_param(7)->mutable_init()->set_type(kConstant);
+		gru1_conf.mutable_param(7)->mutable_init()->set_value(0.5f);
+
+		gru1_conf.mutable_param(8)->set_name("bc1");
+		gru1_conf.mutable_param(8)->set_type(kParam);
+		gru1_conf.mutable_param(8)->mutable_init()->set_type(kConstant);
+		gru1_conf.mutable_param(8)->mutable_init()->set_value(0.5f);
+
+		gru2_conf.mutable_gru_conf() -> set_dim_hidden(2);
+		gru2_conf.mutable_gru_conf() -> set_bias_term(true);
+		for (int i = 0; i < 9; i ++) {
+			gru2_conf.add_param();
+		}
+
+		gru2_conf.mutable_param(0)->set_name("wzhx2");
+		gru2_conf.mutable_param(0)->set_type(kParam);
+		gru2_conf.mutable_param(0)->mutable_init()->set_type(kConstant);
+		gru2_conf.mutable_param(0)->mutable_init()->set_value(0.5f);
+
+		gru2_conf.mutable_param(1)->set_name("wrhx2");
+		gru2_conf.mutable_param(1)->set_type(kParam);
+		gru2_conf.mutable_param(1)->mutable_init()->set_type(kConstant);
+		gru2_conf.mutable_param(1)->mutable_init()->set_value(0.5f);
+
+		gru2_conf.mutable_param(2)->set_name("wchx2");
+		gru2_conf.mutable_param(2)->set_type(kParam);
+		gru2_conf.mutable_param(2)->mutable_init()->set_type(kConstant);
+		gru2_conf.mutable_param(2)->mutable_init()->set_value(0.5f);
+
+		gru2_conf.mutable_param(3)->set_name("wzhh2");
+		gru2_conf.mutable_param(3)->set_type(kParam);
+		gru2_conf.mutable_param(3)->mutable_init()->set_type(kConstant);
+		gru2_conf.mutable_param(3)->mutable_init()->set_value(0.5f);
+
+		gru2_conf.mutable_param(4)->set_name("wrhh2");
+		gru2_conf.mutable_param(4)->set_type(kParam);
+		gru2_conf.mutable_param(4)->mutable_init()->set_type(kConstant);
+		gru2_conf.mutable_param(4)->mutable_init()->set_value(0.5f);
+
+		gru2_conf.mutable_param(5)->set_name("wchh2");
+		gru2_conf.mutable_param(5)->set_type(kParam);
+		gru2_conf.mutable_param(5)->mutable_init()->set_type(kConstant);
+		gru2_conf.mutable_param(5)->mutable_init()->set_value(0.5f);
+
+		gru2_conf.mutable_param(6)->set_name("bz2");
+		gru2_conf.mutable_param(6)->set_type(kParam);
+		gru2_conf.mutable_param(6)->mutable_init()->set_type(kConstant);
+		gru2_conf.mutable_param(6)->mutable_init()->set_value(0.5f);
+
+		gru2_conf.mutable_param(7)->set_name("br2");
+		gru2_conf.mutable_param(7)->set_type(kParam);
+		gru2_conf.mutable_param(7)->mutable_init()->set_type(kConstant);
+		gru2_conf.mutable_param(7)->mutable_init()->set_value(0.5f);
+
+		gru2_conf.mutable_param(8)->set_name("bc2");
+		gru2_conf.mutable_param(8)->set_type(kParam);
+		gru2_conf.mutable_param(8)->mutable_init()->set_type(kConstant);
+		gru2_conf.mutable_param(8)->mutable_init()->set_value(0.5f);
+
+	}
+	singa::LayerProto in1_conf;
+	singa::LayerProto in2_conf;
+	singa::LayerProto gru1_conf;
+	singa::LayerProto gru2_conf;
+};
+
+TEST_F(GRULayerTest, Setup) {
+	singa::Driver driver;
+	//driver.RegisterLayer<GRULayer, int> (kGRU);
+	driver.RegisterParam<Param>(0);
+	driver.RegisterParamGenerator<UniformGen>(kUniform);
+	driver.RegisterParamGenerator<ParamGenerator>(kConstant);
+
+	singa::CSVInputLayer in_layer_1;
+	singa::CSVInputLayer in_layer_2;
+
+	in_layer_1.Setup(in1_conf, std::vector<singa::Layer*> { });
+	EXPECT_EQ(2, static_cast<int>(in_layer_1.aux_data().size()));
+	EXPECT_EQ(8, in_layer_1.data(nullptr).count());
+
+	in_layer_2.Setup(in2_conf, std::vector<singa::Layer*>{ });
+	EXPECT_EQ(2, static_cast<int>(in_layer_2.aux_data().size()));
+	EXPECT_EQ(8, in_layer_2.data(nullptr).count());
+
+	singa::GRULayer gru_layer_1;
+	gru_layer_1.Setup(gru1_conf, std::vector<singa::Layer*>{&in_layer_1});
+	//EXPECT_EQ(2, gru_layer_1.hdim());
+	//EXPECT_EQ(4, gru_layer_1.vdim());
+
+	for (unsigned int i = 0; i < gru_layer_1.GetParams().size(); i ++) {
+		gru_layer_1.GetParams()[i]->InitValues();
+	}
+	EXPECT_EQ (0.5, gru_layer_1.GetParams()[0]->data().cpu_data()[0]);
+	//cout << "gru_layer_1: " << gru_layer_1.GetParams()[0]->data().cpu_data()[0] << endl;
+
+	singa::GRULayer gru_layer_2;
+	gru_layer_2.Setup(gru2_conf, std::vector<singa::Layer*>{&in_layer_2, &gru_layer_1});
+	//EXPECT_EQ(2, gru_layer_2.hdim());
+	//EXPECT_EQ(4, gru_layer_2.vdim());
+	for (unsigned int i = 0; i < gru_layer_2.GetParams().size(); i ++) {
+		gru_layer_2.GetParams()[i]->InitValues();
+	}
+	EXPECT_EQ (0.5, gru_layer_2.GetParams()[0]->data().cpu_data()[0]);
+}
+
+
+TEST_F(GRULayerTest, ComputeFeature) {
+	singa::CSVInputLayer in_layer_1;
+	singa::CSVInputLayer in_layer_2;
+
+	in_layer_1.Setup(in1_conf, std::vector<singa::Layer*> { });
+	in_layer_1.ComputeFeature(singa::kTrain, std::vector<singa::Layer*> { });
+	in_layer_2.Setup(in2_conf, std::vector<singa::Layer*>{ });
+	in_layer_2.ComputeFeature(singa::kTrain, std::vector<singa::Layer*> { });
+
+
+	singa::GRULayer gru_layer_1;
+	gru_layer_1.Setup(gru1_conf, std::vector<singa::Layer*>{&in_layer_1});
+	for (unsigned int i = 0; i < gru_layer_1.GetParams().size(); i ++) {
+		gru_layer_1.GetParams()[i]->InitValues();
+	}
+	gru_layer_1.ComputeFeature(singa::kTrain, std::vector<singa::Layer*>{&in_layer_1});
+	for (int i = 0; i < gru_layer_1.data(nullptr).count(); i ++) {
+		EXPECT_GT(0.000001,abs(0.204824-gru_layer_1.data(nullptr).cpu_data()[i]));
+	}
+
+	singa::GRULayer gru_layer_2;
+	gru_layer_2.Setup(gru2_conf, std::vector<singa::Layer*>{&in_layer_2, &gru_layer_1});
+
+	for (unsigned int i = 0; i < gru_layer_2.GetParams().size(); i ++) {
+		gru_layer_2.GetParams()[i]->InitValues();
+	}
+	gru_layer_2.ComputeFeature(singa::kTrain, std::vector<singa::Layer*>{&in_layer_2, &gru_layer_1});
+	for (int i = 0; i < gru_layer_2.data(nullptr).count(); i ++) {
+		EXPECT_GT(0.000001,abs(0.346753-gru_layer_2.data(nullptr).cpu_data()[i]));
+	}
+}
+
+
+TEST_F(GRULayerTest, ComputeGradient) {
+	singa::CSVInputLayer in_layer_1;
+	singa::CSVInputLayer in_layer_2;
+
+	in_layer_1.Setup(in1_conf, std::vector<singa::Layer*> { });
+	in_layer_1.ComputeFeature(singa::kTrain, std::vector<singa::Layer*> { });
+	in_layer_2.Setup(in2_conf, std::vector<singa::Layer*>{ });
+	in_layer_2.ComputeFeature(singa::kTrain, std::vector<singa::Layer*> { });
+
+
+	singa::GRULayer gru_layer_1;
+	gru_layer_1.Setup(gru1_conf, std::vector<singa::Layer*>{&in_layer_1});
+	for (unsigned int i = 0; i < gru_layer_1.GetParams().size(); i ++) {
+		gru_layer_1.GetParams()[i]->InitValues();
+	}
+	gru_layer_1.ComputeFeature(singa::kTrain, std::vector<singa::Layer*>{&in_layer_1});
+
+
+	singa::GRULayer gru_layer_2;
+	gru_layer_2.Setup(gru2_conf, std::vector<singa::Layer*>{&in_layer_2, &gru_layer_1});
+	for (unsigned int i = 0; i < gru_layer_2.GetParams().size(); i ++) {
+		gru_layer_2.GetParams()[i]->InitValues();
+	}
+	gru_layer_2.ComputeFeature(singa::kTrain, std::vector<singa::Layer*>{&in_layer_2, &gru_layer_1});
+
+	// For test purpose, we set dummy values for gru_layer_2.grad_
+	for (int i = 0; i < gru_layer_2.grad(nullptr).count(); i ++) {
+		gru_layer_2.mutable_grad(nullptr)->mutable_cpu_data()[i] = 1.0f;
+	}
+	gru_layer_2.ComputeGradient(singa::kTrain, std::vector<singa::Layer*>{&in_layer_2, &gru_layer_1});
+
+	gru_layer_1.ComputeGradient(singa::kTrain, std::vector<singa::Layer*>{&in_layer_1});
+
+}


[04/10] incubator-singa git commit: SINGA-120 - Implemented GRU and BPTT: 1) Updated Driver.cc to register GRU; 2) Updated job.proto to include configuration of GRU; 3) Updated configure.ac to fix some compliation errors

Posted by wa...@apache.org.
SINGA-120 - Implemented GRU and BPTT: 1) Updated Driver.cc to register GRU; 2) Updated job.proto to include configuration of GRU; 3) Updated configure.ac to fix some compliation errors


Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/473c9858
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/473c9858
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/473c9858

Branch: refs/heads/master
Commit: 473c9858aafd4cd15e6f6316deaeca1f1c0dfffc
Parents: ddf4e79
Author: Ju Fan <fa...@gmail.com>
Authored: Fri Jan 1 10:46:51 2016 +0800
Committer: Wei Wang <wa...@comp.nus.edu.sg>
Committed: Wed Jan 6 01:53:05 2016 +0800

----------------------------------------------------------------------
 Makefile.am         | 38 ++++++++++++++++++++------------------
 Makefile.gpu        |  6 +++---
 configure.ac        |  4 ++--
 src/driver.cc       |  1 +
 src/proto/job.proto | 27 +++++++++++++++++++++++++++
 5 files changed, 53 insertions(+), 23 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/473c9858/Makefile.am
----------------------------------------------------------------------
diff --git a/Makefile.am b/Makefile.am
index 3c282e3..aa88348 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -19,7 +19,6 @@
 #*
 #*************************************************************/
 
-
 ACLOCAL_AMFLAGS = -I config
 AUTOMAKE_OPTIONS = foreign subdir-objects
 
@@ -33,7 +32,7 @@ CFLAGS = $(DEBUG)
 CXXFLAGS = $(DEBUG)
 #AC_CXXFLAGS = $(DEBUG)
 
-INCLUDES = -I$(top_srcdir)/include 
+INCLUDES = -I$(top_srcdir)/include
 
 PROTOS := $(top_srcdir)/src/proto/singa.proto \
           $(top_srcdir)/src/proto/job.proto \
@@ -50,7 +49,7 @@ PROTO_PYS := tool/python/pb2/singa_pb2.py \
 
 CUDA_SRCS := src/utils/math_kernel.cu
 CUDA_OBJS := src/utils/math_kernel.o
-CUDA_HDRS := include/singa/utils/math_kernel.h 
+CUDA_HDRS := include/singa/utils/math_kernel.h
 
 CUDNN_SRCS := src/neuralnet/loss_layer/cudnn_softmaxloss.cc \
 			  src/neuralnet/neuron_layer/cudnn_softmax.cc \
@@ -63,9 +62,9 @@ PY_SRCS := tool/python/singa/driver_wrap.cxx \
 		   src/driver.cc
 
 HDFS_SRCS := src/io/hdfsfile.cc \
-			 src/io/hdfsfile_store.cc 
+			 src/io/hdfsfile_store.cc
 HDFS_HDRS := include/singa/io/hdfsfile.h \
-			 include/singa/io/hdfsfile_store.h 
+			 include/singa/io/hdfsfile_store.h
 
 SINGA_SRCS := src/driver.cc \
               src/server.cc \
@@ -96,6 +95,7 @@ SINGA_SRCS := src/driver.cc \
               src/neuralnet/neuron_layer/lrn.cc \
               src/neuralnet/neuron_layer/pooling.cc \
               src/neuralnet/neuron_layer/rbm.cc \
+              src/neuralnet/neuron_layer/gru.cc \
               src/neuralnet/neuron_layer/relu.cc \
               src/neuralnet/neuron_layer/sigmoid.cc \
               src/neuralnet/neuron_layer/softmax.cc \
@@ -170,7 +170,9 @@ TEST_SRCS := include/gtest/gtest_main.cc \
 						 src/test/test_store.cc \
 						 src/test/test_connection_layers.cc \
 						 src/test/test_record_input_layer.cc \
-						 src/test/test_csv_input_layer.cc
+						 src/test/test_csv_input_layer.cc \
+             					 src/test/test_gru_layer.cc \
+						 src/test/test_unrolling.cc
 
 #EXTRA_PROGRAMS = $(PROGS)
 EXTRA_PROGRAMS = singatest test
@@ -186,7 +188,7 @@ py_LTLIBRARIES = $(PY_PROGS)
 #lib_LTLIBRARIES = libsinga.la
 libsinga_la_SOURCES = $(PROTO_SRCS) $(SINGA_SRCS)
 libsinga_la_CXXFLAGS = $(DEFAULT_FLAGS) -msse3 -fpermissive -I$(top_srcdir)/include
-libsinga_la_LDFLAGS = 
+libsinga_la_LDFLAGS =
 if LMDB
 libsinga_la_CXXFLAGS += -DUSE_LMDB
 endif
@@ -194,7 +196,7 @@ if DCUDA
 libsinga_la_SOURCES += $(CUDA_SRCS) $(CUDA_HDRS)
 libsinga_la_CXXFLAGS += $(CUDA_CFLAGS)
 libsinga_la_LDFLAGS += $(CUDA_LDFLAGS) $(CUDA_LIBS) -lsingagpu
-libsinga_la_LIBADD = libsingagpu.so 
+libsinga_la_LIBADD = libsingagpu.so
 endif
 
 if DCUDNN
@@ -215,11 +217,11 @@ singa_CXXFLAGS = $(DEFAULT_FLAGS) -MMD -I$(top_srcdir)/include
 singa_LDFLAGS = -lsinga \
                 -lglog  \
                 -lprotobuf \
-                -lrt \
+                #-lrt \
                 -lopenblas \
                 -lzmq \
                 -lczmq \
-                -lzookeeper_mt 
+                -lzookeeper_mt
 if LMDB
 singa_LDFLAGS += -llmdb
 endif
@@ -248,10 +250,10 @@ singatool_CXXFLAGS = -Wall -pthread -fPIC -std=c++11 -MMD -Wno-unknown-pragmas \
 singatool_LDFLAGS = -lsinga \
                     -lglog  \
                     -lprotobuf \
-                    -lzookeeper_mt 
+                    -lzookeeper_mt
 
 #if DCUDA
-#singatool_SOURCES += $(CUDA_SRCS) $(CUDA_HDRS)  
+#singatool_SOURCES += $(CUDA_SRCS) $(CUDA_HDRS)
 #singatool_CXXFLAGS += $(CUDA_CFLAGS)
 #singatool_LDFLAGS += $(CUDA_LDFLAGS) $(CUDA_LIBS)
 #endif
@@ -283,7 +285,7 @@ singatest_LDADD = ./libgtest.la
 singatest_LDFLAGS = -lsinga \
                 -lglog  \
                 -lprotobuf \
-                -lrt \
+                #-lrt \
                 -lopenblas \
                 -lzmq \
                 -lczmq \
@@ -293,7 +295,7 @@ if LMDB
 singatest_LDFLAGS += -llmdb
 endif
 
-if DCUDA 
+if DCUDA
 singatest_SOURCES += $(CUDA_SRCS) $(CUDA_HDRS)
 singatest_CXXFLAGS += $(CUDA_CFLAGS)
 singatest_LDFLAGS += $(CUDA_LDFLAGS) $(CUDA_LIBS)
@@ -310,13 +312,13 @@ _driver_la_CXXFLAGS = $(DEFAULT_FLAGS) $(MSHADOW_FLAGS) -I$(top_srcdir)/include
 _driver_la_LDFLAGS = -lsinga -module -shared $(PYLIBS) -avoid-version -rpath $(pydir)
 
 if DCUDA
-_driver_la_CXXFLAGS += $(CUDA_CFLAGS) 
-_driver_la_LDFLAGS += $(CUDA_LDFLAGS) $(CUDA_LIBS) 
+_driver_la_CXXFLAGS += $(CUDA_CFLAGS)
+_driver_la_LDFLAGS += $(CUDA_LDFLAGS) $(CUDA_LIBS)
 endif
 
 if DCUDNN
 _driver_la_CXXFLAGS += $(CUDNN_CFLAGS)
-_driver_la_LDFLAGS += $(CUDNN_LDFLAGS) $(CUDNN_LIBS) 
+_driver_la_LDFLAGS += $(CUDNN_LDFLAGS) $(CUDNN_LIBS)
 endif
 
 clean-local:
@@ -348,7 +350,7 @@ rat:
 	$(NVCC) $(MSHADOW_FLAGS) --shared -Xcompiler -fPIC $(CUDA_CFLAGS) $(CUDA_LDFLAGS) $(CUDA_LIBS) -I$(top_srcdir)/include -std=c++11 -G -c -o $@ $<
 
 libsingagpu.so: $(CUDA_OBJS)
-	$(NVCC)  -o libsingagpu.so -shared -Xcompiler -fPIC $(CUDA_OBJS)  $(CUDA_CFLAGS) $(CUDA_LDFLAGS) $(CUDA_LIBS) -I$(top_srcdir)/include -std=c++11 -G 
+	$(NVCC)  -o libsingagpu.so -shared -Xcompiler -fPIC $(CUDA_OBJS)  $(CUDA_CFLAGS) $(CUDA_LDFLAGS) $(CUDA_LIBS) -I$(top_srcdir)/include -std=c++11 -G
 
 install-pyLTLIBRARIES: $(py_LTLIBRARIES)
 	touch tool/python/singa/__init__.py

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/473c9858/Makefile.gpu
----------------------------------------------------------------------
diff --git a/Makefile.gpu b/Makefile.gpu
index c35a445..11d6827 100644
--- a/Makefile.gpu
+++ b/Makefile.gpu
@@ -20,7 +20,7 @@
 
 ###################User Config Varaibles #############################
 # third-party library installation folder
-HOME_DIR := /media/hd1/home/wangwei/local
+HOME_DIR := /home/wangwei/local
 
 # must config the cudnn folder if using cudnn
 CUDNN_DIR := $(HOME_DIR)/cudnn
@@ -30,7 +30,7 @@ CUDA_DIR := /usr/local/cuda
 # Lib folder for system and external libs. You may need to change it.
 LIBRARY_DIRS := $(HOME_DIR)/lib64 $(HOME_DIR)/lib $(CUDNN_DIR)/lib64 $(CUDA_DIR)/lib64 $(CUDA_DIR)/lib
 # Header folder for system and external libs. You may need to change it.
-INCLUDE_DIRS := ./include $(HOME_DIR)/include $(CUDNN_DIR)/include $(CUDA_DIR)/include 
+INCLUDE_DIRS := ./include $(HOME_DIR)/include $(CUDNN_DIR)/include $(CUDA_DIR)/include
 # g++ location, should support c++11, tested with 4.8.1
 CXX := g++
 CUCXX := nvcc
@@ -85,7 +85,7 @@ TEST_CUDA_SRCS :=$(shell find src/test/ -maxdepth 1 -name "*.cu")
 TEST_CUDA_OBJS := $(sort $(addprefix $(BUILD_DIR)/, $(TEST_CUDA_SRCS:.cu=.o)))
 -include $(TEST_CUDA_OBJS:%.o=%.P)
 
-SINGA_CUDA_SRCS :=$(shell find src/ -name "*.cu")
+SINGA_CUDA_SRCS := $(shell find src/ \( -path "src/test" \) -prune -o \( -name "*.cu" -type f \) -print )
 SINGA_CUDA_OBJS := $(sort $(addprefix $(BUILD_DIR)/, $(SINGA_CUDA_SRCS:.cu=.o)))
 -include $(SINGA_CUDA_OBJS:%.o=%.P)
 

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/473c9858/configure.ac
----------------------------------------------------------------------
diff --git a/configure.ac b/configure.ac
index 82db55a..2377fa9 100644
--- a/configure.ac
+++ b/configure.ac
@@ -224,10 +224,10 @@ if test x"$enable_python" != x"no"; then
     AC_CHECK_PROG(PY_CHECK,python,yes)
     if test x"$PY_CHECK" != x"yes"; then
         AC_MSG_ERROR([Cannot find command "python". Please intall before make.])
-    else    
+    else
         PYLIBS="-lpython`python -V 2>&1 | awk '{print substr($2,1,3)}'`"
     fi
-fi        
+fi
 AC_SUBST(PYLIBS)
 
 AC_ARG_WITH([python],

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/473c9858/src/driver.cc
----------------------------------------------------------------------
diff --git a/src/driver.cc b/src/driver.cc
index c17ea82..5e0772b 100644
--- a/src/driver.cc
+++ b/src/driver.cc
@@ -112,6 +112,7 @@ void Driver::Init(int argc, char **argv) {
   RegisterLayer<SoftmaxLossLayer, int>(kSoftmaxLoss);
   RegisterLayer<STanhLayer, int>(kSTanh);
   RegisterLayer<SoftmaxLayer, int>(kSoftmax);
+  RegisterLayer<GRULayer, int>(kGRU);
 
 #ifdef USE_LMDB
   RegisterLayer<LMDBDataLayer, int>(kLMDBData);

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/473c9858/src/proto/job.proto
----------------------------------------------------------------------
diff --git a/src/proto/job.proto b/src/proto/job.proto
index db55987..7cdc287 100644
--- a/src/proto/job.proto
+++ b/src/proto/job.proto
@@ -195,6 +195,11 @@ message LayerProto {
   // share data and grad blob with the single src layer, e.g., relu layer can
   // share blobs from conv layer. It is useful for saving memory space.
   optional bool share_src_blobs = 22 [default = false];
+
+  optional int32 unroll_len = 23 [default = 1];
+  repeated UnrollConnType unroll_conn_type = 24;
+  repeated int32 shift = 25;
+
   // overrides the partition dimension for neural net
   optional int32 partition_dim = 60 [default = -1];
   // names of parameters shared from other layers
@@ -222,6 +227,7 @@ message LayerProto {
   optional RBMProto rbm_conf = 209;
   optional ReLUProto relu_conf = 211;
   optional SoftmaxProto softmax_conf = 214;
+  optional GRUProto gru_conf = 215;
 
   // configuration for loss layers, id range [300, 400)
   optional SoftmaxLossProto softmaxloss_conf = 301;
@@ -436,6 +442,15 @@ message RBMProto {
   optional bool gaussian = 3 [default = false]; // use gaussian sampling or not
 }
 
+// Message that stores parameters used by GRULayer
+message GRUProto {
+  // dimension of hidden state for the layer
+  required int32 dim_hidden = 1;
+  // use bias vector or not
+  optional bool bias_term = 2 [default = true];
+}
+
+
 // Message that stores parameters used by InnerProductLayer
 message InnerProductProto {
   // number of outputs for the layer
@@ -594,6 +609,7 @@ enum LayerType {
   kSTanh = 212;
   kSigmoid = 213;
   kSoftmax = 214;
+  kGRU = 215;
   // cudnn v3
   kCudnnConv = 250;
   kCudnnPool = 251;
@@ -706,3 +722,14 @@ enum InitMethod {
   // For user defined init method
   kUserInit = 101;
 }
+
+enum UnrollConnType {
+  // i-th unrolled layer <- (i - shift)-th src unrolled layer
+  kUnrollOneToOne = 1;
+  // i-th unrolled layer <- all src unrolled layers
+  kUnrollOneToAll = 2;
+  // i-th unrolled layer <- last unrolled src layer
+  kUnrollFirstToLast = 3;
+  // customized connection type defined by src_conn
+  kUnrollCustomized = 4;
+}


[10/10] incubator-singa git commit: SINGA-120 - Implemented GRU and BPTT

Posted by wa...@apache.org.
SINGA-120 - Implemented GRU and BPTT

Change back to r * (h x U) for new memory compuation;
Loss to 2.8 per char/unit.


Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/a2f4e468
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/a2f4e468
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/a2f4e468

Branch: refs/heads/master
Commit: a2f4e4680bb7b5dc5077a064a757698e30cc5e13
Parents: 6a4c996
Author: Wei Wang <wa...@comp.nus.edu.sg>
Authored: Wed Jan 6 00:35:41 2016 +0800
Committer: Wei Wang <wa...@comp.nus.edu.sg>
Committed: Wed Jan 6 02:03:50 2016 +0800

----------------------------------------------------------------------
 examples/char-rnn/data.py                   |   8 +
 examples/char-rnn/job.conf                  | 250 +++++++++++++++++++
 examples/char-rnn/sample.conf               | 212 ++++++++++++++++
 include/singa/neuralnet/neuron_layer.h      |  15 +-
 include/singa/utils/math_blob.h             |   2 +
 src/neuralnet/connection_layer/rnn_dummy.cc |  67 +++++
 src/neuralnet/input_layer/char_rnn.cc       |   6 +-
 src/neuralnet/input_layer/onehot.cc         |  40 +++
 src/neuralnet/neuralnet.cc                  |  73 +++---
 src/neuralnet/neuron_layer/dummy.cc         |   1 -
 src/neuralnet/neuron_layer/embedding.cc     |   4 +-
 src/neuralnet/neuron_layer/gru.cc           | 298 +++++++++++------------
 src/neuralnet/output_layer/char_rnn.cc      |  51 ++++
 src/utils/updater.cc                        |   6 +
 src/worker.cc                               |   3 +-
 15 files changed, 832 insertions(+), 204 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/a2f4e468/examples/char-rnn/data.py
----------------------------------------------------------------------
diff --git a/examples/char-rnn/data.py b/examples/char-rnn/data.py
new file mode 100644
index 0000000..4b1c28c
--- /dev/null
+++ b/examples/char-rnn/data.py
@@ -0,0 +1,8 @@
+# pls get linux_input.txt from http://cs.stanford.edu/people/karpathy/char-rnn/
+data = open('linux_input.txt', 'r').read() # should be simple plain text file
+chars = list(set(data))
+data_size, vocab_size = len(data), len(chars)
+print 'data has %d characters, %d unique.' % (data_size, vocab_size)
+with open('vocab.txt', 'w') as fd:
+  fd.write("".join(chars))
+  fd.flush()

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/a2f4e468/examples/char-rnn/job.conf
----------------------------------------------------------------------
diff --git a/examples/char-rnn/job.conf b/examples/char-rnn/job.conf
new file mode 100644
index 0000000..2e1c761
--- /dev/null
+++ b/examples/char-rnn/job.conf
@@ -0,0 +1,250 @@
+name:"char-rnn"
+train_steps: 100000
+disp_freq: 100
+#debug: true
+gpu: 0
+train_one_batch {
+  alg: kBPTT
+}
+
+updater {
+  type: kRMSProp
+  rmsprop_conf {
+    rho: 0.95
+  }
+  learning_rate {
+    type: kStep
+    base_lr: 0.002
+    step_conf {
+      gamma: 0.97
+      change_freq: 2000
+    }
+  }
+  clip_low: -5
+  clip_high: 5
+}
+
+neuralnet {
+  unroll_len: 50
+  layer {
+    name: "data"
+    type: kCharRNN
+    unroll_len: 1
+    char_rnn_conf {
+      path: "examples/char-rnn/linux_input.txt"
+      vocab_path:"examples/char-rnn/vocab.txt"
+      batchsize: 50
+      unroll_len: 50
+    }
+  }
+  layer {
+    name: "onehot"
+    type: kOneHot
+    srclayers: "data"
+    unroll_conn_type: kUnrollOneToAll
+  }
+
+  layer {
+    name: "label"
+    type: kRNNLabel
+    srclayers: "data"
+    unroll_conn_type: kUnrollOneToAll
+  }
+
+  layer {
+    name: "gru1"
+    type: kGRU
+    srclayers: "onehot"
+    gru_conf {
+      dim_hidden: 512
+    }
+    param {
+      name: "z_hx"
+      init {
+        type: kUniform
+        low: -0.08
+        high: 0.08
+      }
+    }
+    param {
+      name: "r_hx"
+      init {
+        type: kUniform
+        low: -0.08
+        high: 0.08
+      }
+    }
+    param {
+      name: "c_hx"
+      init {
+        type: kUniform
+        low: -0.08
+        high: 0.08
+      }
+    }
+    param {
+      name: "z_hh"
+      init {
+        type: kUniform
+        low: -0.08
+        high: 0.08
+      }
+    }
+    param {
+      name: "r_hh"
+      init {
+        type: kUniform
+        low: -0.08
+        high: 0.08
+      }
+    }
+    param {
+      name: "c_hh"
+      init {
+        type: kUniform
+        low: -0.08
+        high: 0.08
+      }
+    }
+    param {
+      name: "z_b"
+      init {
+        type: kUniform
+        low: -0.08
+        high: 0.08
+      }
+    }
+    param {
+      name: "r_b"
+      init {
+        type: kUniform
+        low: -0.08
+        high: 0.08
+      }
+    }
+    param {
+      name: "c_b"
+      init {
+        type: kUniform
+        low: -0.08
+        high: 0.08
+      }
+    }
+
+  }
+#  layer {
+#    name: "gru2"
+#    type: kGRU
+#    srclayers: "gru1"
+#    gru_conf {
+#      dim_hidden: 512
+#    }
+#    param {
+#      name: "z_hx2"
+#      init {
+#        type: kUniform
+#        low: -0.08
+#        high: 0.08
+#      }
+#    }
+#    param {
+#      name: "r_hx2"
+#      init {
+#        type: kUniform
+#        low: -0.08
+#        high: 0.08
+#      }
+#    }
+#    param {
+#      name: "c_hx2"
+#      init {
+#        type: kUniform
+#        low: -0.08
+#        high: 0.08
+#      }
+#    }
+#    param {
+#      name: "z_hh2"
+#      init {
+#        type: kUniform
+#        low: -0.08
+#        high: 0.08
+#      }
+#    }
+#    param {
+#      name: "r_hh2"
+#      init {
+#        type: kUniform
+#        low: -0.08
+#        high: 0.08
+#      }
+#    }
+#    param {
+#      name: "c_hh2"
+#      init {
+#        type: kUniform
+#        low: -0.08
+#        high: 0.08
+#      }
+#    }
+#    param {
+#      name: "z_b2"
+#      init {
+#        type: kUniform
+#        low: -0.08
+#        high: 0.08
+#      }
+#    }
+#    param {
+#      name: "r_b2"
+#      init {
+#        type: kUniform
+#        low: -0.08
+#        high: 0.08
+#      }
+#    }
+#    param {
+#      name: "c_b2"
+#      init {
+#        type: kUniform
+#        low: -0.08
+#        high: 0.08
+#      }
+#    }
+#  }
+#
+  layer {
+    name: "ip1"
+    type: kInnerProduct
+    srclayers: "gru1"
+    innerproduct_conf {
+      num_output: 101
+    }
+    param {
+      name: "w"
+      init {
+        type: kUniform
+        low: -0.08
+        high: 0.08
+      }
+    }
+    param {
+      name: "b"
+      init {
+        type: kUniform
+        low: -0.08
+        high: 0.08
+      }
+    }
+  }
+  layer {
+    name: "loss"
+    type: kSoftmaxLoss
+    srclayers: "ip1"
+    srclayers: "label"
+  }
+}
+
+cluster {
+  workspace: "examples/char-rnn/"
+}

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/a2f4e468/examples/char-rnn/sample.conf
----------------------------------------------------------------------
diff --git a/examples/char-rnn/sample.conf b/examples/char-rnn/sample.conf
new file mode 100644
index 0000000..b15ef9e
--- /dev/null
+++ b/examples/char-rnn/sample.conf
@@ -0,0 +1,212 @@
+name:"char-rnn"
+test_steps: 100
+#debug: true
+gpu: 0
+checkpoint_path: "examples/char-rnn/checkpoint/step2000-worker0"
+train_one_batch {
+  alg: kBPTT
+}
+
+neuralnet {
+  layer {
+    name: "data"
+    type: kRNNDummy
+    rnn_dummy_conf {
+      shape: 1
+      integer: true
+      low: 0
+      high: 101
+      dynamic_srclayer: "argsort"
+    }
+  }
+  layer {
+    name: "onehot"
+    type: kOneHot
+    srclayers: "data"
+  }
+
+  layer {
+    name: "gru1"
+    type: kGRU
+    srclayers: "onehot"
+    gru_conf {
+      dim_hidden: 512
+    }
+    param {
+      name: "z_hx"
+      init {
+        type: kUniform
+        low: -0.08
+        high: 0.08
+      }
+    }
+    param {
+      name: "r_hx"
+      init {
+        type: kUniform
+        low: -0.08
+        high: 0.08
+      }
+    }
+    param {
+      name: "c_hx"
+      init {
+        type: kUniform
+        low: -0.08
+        high: 0.08
+      }
+    }
+    param {
+      name: "z_hh"
+      init {
+        type: kUniform
+        low: -0.08
+        high: 0.08
+      }
+    }
+    param {
+      name: "r_hh"
+      init {
+        type: kUniform
+        low: -0.08
+        high: 0.08
+      }
+    }
+    param {
+      name: "c_hh"
+      init {
+        type: kUniform
+        low: -0.08
+        high: 0.08
+      }
+    }
+    param {
+      name: "z_b"
+      init {
+        type: kUniform
+        low: -0.08
+        high: 0.08
+      }
+    }
+    param {
+      name: "r_b"
+      init {
+        type: kUniform
+        low: -0.08
+        high: 0.08
+      }
+    }
+    param {
+      name: "c_b"
+      init {
+        type: kUniform
+        low: -0.08
+        high: 0.08
+      }
+    }
+  }
+  layer {
+    name: "gru2"
+    type: kGRU
+    srclayers: "gru1"
+    gru_conf {
+      dim_hidden: 512
+    }
+    param {
+      name: "z_hx2"
+      init {
+        type: kUniform
+        low: -0.08
+        high: 0.08
+      }
+    }
+    param {
+      name: "r_hx2"
+      init {
+        type: kUniform
+        low: -0.08
+        high: 0.08
+      }
+    }
+    param {
+      name: "c_hx2"
+      init {
+        type: kUniform
+        low: -0.08
+        high: 0.08
+      }
+    }
+    param {
+      name: "z_hh2"
+      init {
+        type: kUniform
+        low: -0.08
+        high: 0.08
+      }
+    }
+    param {
+      name: "r_hh2"
+      init {
+        type: kUniform
+        low: -0.08
+        high: 0.08
+      }
+    }
+    param {
+      name: "c_hh2"
+      init {
+        type: kUniform
+        low: -0.08
+        high: 0.08
+      }
+    }
+  }
+
+
+  layer {
+    name: "ip1"
+    type: kInnerProduct
+    srclayers: "gru2"
+    innerproduct_conf {
+      num_output: 101
+    }
+    param {
+      name: "w"
+      init {
+        type: kUniform
+        low: -0.08
+        high: 0.08
+      }
+    }
+    param {
+      name: "b"
+      init {
+        type: kUniform
+        low: -0.08
+        high: 0.08
+      }
+    }
+  }
+  layer {
+    name: "softmax"
+    type: kSoftmax
+    srclayers: "ip1"
+  }
+  layer {
+    name: "argsort"
+    type: kArgSort
+    srclayers: "softmax"
+  }
+  layer {
+    name: "sampling"
+    type: kCharRNNOutput
+    srclayers: "argsort"
+    char_rnn_conf {
+      vocab_path: "examples/char-rnn/vocab.txt"
+    }
+  }
+}
+
+cluster {
+  workspace: "examples/char-rnn/"
+}

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/a2f4e468/include/singa/neuralnet/neuron_layer.h
----------------------------------------------------------------------
diff --git a/include/singa/neuralnet/neuron_layer.h b/include/singa/neuralnet/neuron_layer.h
index e1a63a2..c612aed 100644
--- a/include/singa/neuralnet/neuron_layer.h
+++ b/include/singa/neuralnet/neuron_layer.h
@@ -187,16 +187,15 @@ class GRULayer : public NeuronLayer {
   }
 
   const std::vector<Param*> GetParams() const override {
+    std::vector<Param*> params{weight_z_hx_, weight_r_hx_,weight_c_hx_,
+      weight_z_hh_, weight_r_hh_, weight_c_hh_};
+
     if (bias_z_ != nullptr && bias_r_ != nullptr && bias_c_ != nullptr) {
-      std::vector<Param*> params{weight_z_hx_, weight_r_hx_,weight_c_hx_,
-        weight_z_hh_, weight_r_hh_, weight_c_hh_,
-        bias_z_, bias_r_, bias_c_};
-      return params;
-    } else {
-      std::vector<Param*> params{weight_z_hx_, weight_r_hx_,weight_c_hx_,
-        weight_z_hh_, weight_r_hh_, weight_c_hh_};
-      return params;
+      params.push_back(bias_z_);
+      params.push_back(bias_r_);
+      params.push_back(bias_c_);
     }
+    return params;
   }
 
  private:

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/a2f4e468/include/singa/utils/math_blob.h
----------------------------------------------------------------------
diff --git a/include/singa/utils/math_blob.h b/include/singa/utils/math_blob.h
index bdaf914..e151c37 100644
--- a/include/singa/utils/math_blob.h
+++ b/include/singa/utils/math_blob.h
@@ -267,6 +267,8 @@ void Map(const Blob<Dtype> & A, Blob<Dtype> * B) {
   } else {
 #ifdef USE_GPU
     gpu_e_f<Op>(A.count(), A.gpu_data(), B->mutable_gpu_data());
+#else
+    LOG(ERROR) << "Not implemented";
 #endif  // USE_GPU
   }
 }

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/a2f4e468/src/neuralnet/connection_layer/rnn_dummy.cc
----------------------------------------------------------------------
diff --git a/src/neuralnet/connection_layer/rnn_dummy.cc b/src/neuralnet/connection_layer/rnn_dummy.cc
new file mode 100644
index 0000000..865066f
--- /dev/null
+++ b/src/neuralnet/connection_layer/rnn_dummy.cc
@@ -0,0 +1,67 @@
+/************************************************************
+*
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements.  See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership.  The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License.  You may obtain a copy of the License at
+*
+*   http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing,
+* software distributed under the License is distributed on an
+* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+* KIND, either express or implied.  See the License for the
+* specific language governing permissions and limitations
+* under the License.
+*
+*************************************************************/
+
+#include "singa/neuralnet/connection_layer.h"
+#include "singa/utils/math_blob.h"
+
+namespace singa {
+
+void RNNDummyLayer::Setup(const LayerProto& conf,
+                       const vector<Layer*>& srclayers) {
+  Layer::Setup(conf, srclayers);
+  dynamic_src_ = AddPrefixSuffix(unroll_index(), partition_id(),
+      conf.rnn_dummy_conf().dynamic_srclayer());
+  LOG(ERROR) << dynamic_src_;
+  vector<int> shape;
+  for (int s : conf.rnn_dummy_conf().shape())
+    shape.push_back(s);
+  integer_ = conf.rnn_dummy_conf().integer();
+  low_ = conf.rnn_dummy_conf().low();
+  high_ = conf.rnn_dummy_conf().high();
+  // if no src layer, then it will genereate data by itself based on shape
+  // and random range
+  if (srclayers.size() == 0) {
+    CHECK(shape.size());
+    CHECK_NE(low_, high_);
+    data_.Reshape(shape);
+    srclayer_ = nullptr;
+  } else {
+    srclayer_ = srclayers.at(0);
+    data_.ReshapeLike(srclayer_->data(this));
+    data_.ShareData(srclayer_->mutable_data(this), false);
+  }
+}
+
+void RNNDummyLayer::ComputeFeature(int flag, const vector<Layer*>& srclayers) {
+  if (srclayers.size() == 0) {
+    SampleUniform(low_, high_, &data_);
+    if (integer_) {
+      for (int i = 0; i < data_.count(); i ++) {
+        data_.mutable_cpu_data()[i] = floor(data_.cpu_data()[i]);
+      }
+    }
+  } else if (srclayer_ != srclayers.at(0)) {
+    srclayer_ = srclayers.at(0);
+    data_.ShareData(srclayer_->mutable_data(this), false);
+  }
+}
+}  // namespace singa
+

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/a2f4e468/src/neuralnet/input_layer/char_rnn.cc
----------------------------------------------------------------------
diff --git a/src/neuralnet/input_layer/char_rnn.cc b/src/neuralnet/input_layer/char_rnn.cc
index cc13b1b..8a56711 100644
--- a/src/neuralnet/input_layer/char_rnn.cc
+++ b/src/neuralnet/input_layer/char_rnn.cc
@@ -69,6 +69,7 @@ void CharRNNInputLayer::ComputeFeature(int flag,
     // decide the start pos of each instance in one mini-batch
     int max_offset = buf_.length() / batchsize_;
     CHECK_GT(max_offset, unroll_len_);
+    LOG(ERROR) << "Max iteration per epoch = " << max_offset / unroll_len_;
     for (int i = 0; i < batchsize_; i ++) {
       start_.push_back(i * max_offset);
     }
@@ -77,7 +78,7 @@ void CharRNNInputLayer::ComputeFeature(int flag,
   for (int l = 0; l < unroll_len_ + 1; l++) {
     float* ptr = datavec_[l]->mutable_cpu_data();
     for (int i = 0; i < batchsize_; i++) {
-      ptr[i] = static_cast<float>(char2index_.at(buf_[start_[i] + l]));
+      ptr[i] = static_cast<float>(char2index_.at(buf_[start_[i] + offset_ + l]));
     }
   }
   offset_ += unroll_len_;
@@ -87,9 +88,6 @@ void CharRNNInputLayer::ComputeFeature(int flag,
 //  std::shuffle(start_.begin(), start_.end(), g);
     offset_ = 0;
     // return -1;
-  } else {
-    // return 0;
   }
 }
-
 }  // namespace singa

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/a2f4e468/src/neuralnet/input_layer/onehot.cc
----------------------------------------------------------------------
diff --git a/src/neuralnet/input_layer/onehot.cc b/src/neuralnet/input_layer/onehot.cc
new file mode 100644
index 0000000..056656a
--- /dev/null
+++ b/src/neuralnet/input_layer/onehot.cc
@@ -0,0 +1,40 @@
+/************************************************************
+*
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements.  See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership.  The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License.  You may obtain a copy of the License at
+*
+*   http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing,
+* software distributed under the License is distributed on an
+* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+* KIND, either express or implied.  See the License for the
+* specific language governing permissions and limitations
+* under the License.
+*
+*************************************************************/
+#include "singa/neuralnet/input_layer.h"
+
+namespace singa {
+void OneHotLayer::Setup(const LayerProto& conf,
+    const vector<Layer*>& srclayers) {
+  InputLayer::Setup(conf, srclayers);
+  batchsize_ = srclayers.at(0)->data(unroll_index()).shape(0);
+  dim_ = 101 ;  // proto.onehot_conf().vocab_size();
+  data_.Reshape(batchsize_, dim_);
+}
+
+void OneHotLayer::ComputeFeature(int flag, const vector<Layer*>& srclayers) {
+  float* ptr = data_.mutable_cpu_data();
+  memset(ptr, 0, sizeof(float) * data_.count());
+  const float* idx = srclayers[0]->data(unroll_index()).cpu_data();
+  for (int i = 0; i < batchsize_; i++) {
+    ptr[i * dim_ + static_cast<int>(idx[i])] = 1;
+  }
+}
+}  // namespace singa

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/a2f4e468/src/neuralnet/neuralnet.cc
----------------------------------------------------------------------
diff --git a/src/neuralnet/neuralnet.cc b/src/neuralnet/neuralnet.cc
index 49978a1..b045e06 100644
--- a/src/neuralnet/neuralnet.cc
+++ b/src/neuralnet/neuralnet.cc
@@ -19,13 +19,12 @@
 *
 *************************************************************/
 
-#include "singa/neuralnet/neuralnet.h"
 
+#include "singa/neuralnet/neuralnet.h"
+#include <unordered_map>
 #include <algorithm>
 #include <queue>
 #include "singa/utils/singleton.h"
-#include <unordered_map>
-using namespace std;
 
 namespace singa {
 
@@ -60,7 +59,7 @@ const NetProto NetConfPreprocess(const NetProto& conf) {
 }
 
 NeuralNet* NeuralNet::Create(const NetProto& net_conf, Phase phase,
-                                        int npartitions) {
+    int npartitions) {
   const NetProto& full_net_conf = NetConfPreprocess(net_conf);
   NetProto conf = full_net_conf;
   conf.clear_layer();
@@ -99,21 +98,21 @@ NeuralNet* NeuralNet::Create(const NetProto& net_conf, Phase phase,
       layer_conf->set_partition_dim(net_conf.partition_dim());
   }
   // LOG(INFO) << "Before unrolling: \n" << conf.DebugString();
-  conf = Unrolling (conf);
+  conf = Unrolling(conf);
 
   // Copy shared parameters for sharing param conf
   std::vector<ParamProto*> shares;
   std::unordered_map<string, ParamProto*> name2param;
-  for (int index = 0; index < conf.layer_size();index ++) {
-	  LayerProto* layer = conf.mutable_layer(index);
-	  for (int i = 0; i < layer->param_size(); i++) {
-		  ParamProto* param = layer->mutable_param(i);
+  for (int index = 0; index < conf.layer_size(); index++) {
+    LayerProto* layer = conf.mutable_layer(index);
+    for (int i = 0; i < layer->param_size(); i++) {
+      ParamProto* param = layer->mutable_param(i);
       CHECK(name2param.find(param->name()) == name2param.end())
         << "Repeated param = " << param->name();
-			  name2param[param->name()] = param;
-		  if (param->has_share_from() && param->share_from() != "")
-			  shares.push_back(param);
-	  }
+      name2param[param->name()] = param;
+      if (param->has_share_from() && param->share_from() != "")
+        shares.push_back(param);
+    }
   }
   for (auto param : shares) {
     const std::string from = param->share_from();
@@ -135,32 +134,30 @@ const NetProto NeuralNet::Unrolling(const NetProto& net_conf) {
   NetProto conf;
 
   std::vector<std::vector<int>> layer_groups;
-  std::unordered_map<string,int> org_layer_names;
+  std::unordered_map<string, int> org_layer_names;
   for (int index = 0; index < net_conf.layer_size(); index ++) {
     const LayerProto& org_layer = net_conf.layer(index);
-    org_layer_names[org_layer.name()] = index; // layer_name -> index
+    org_layer_names[org_layer.name()] = index;  // layer_name -> index
 
     std::vector<int> layer_group;
-    for (int i = 0; i < org_layer.unroll_len(); i ++) { // unroll
+    for (int i = 0; i < org_layer.unroll_len(); i ++) {  // unroll
       LayerProto* unroll_layer = conf.add_layer();
-      unroll_layer->CopyFrom(org_layer); // create a new layer conf
-      // if (org_layer.unroll_len() > 1) {
-        // update layer names
-        std::stringstream sstm;
-        sstm << i << '#' << unroll_layer->name();
-        unroll_layer->set_name(sstm.str());
-        unroll_layer->set_unroll_index(i);
-        // update layer parameter sharing
-        for (int j = 0; j < unroll_layer->param_size(); j ++) {
-          ParamProto* param = unroll_layer->mutable_param(j);
-          if (i > 0) {
-            param->set_share_from("0#" + param->name());
-          }
-          std::stringstream sstm1;
-          sstm1 << i << '#' << param->name();
-          param->set_name(sstm1.str());
+      unroll_layer->CopyFrom(org_layer);  // create a new layer conf
+      // update layer names
+      std::stringstream sstm;
+      sstm << i << '#' << unroll_layer->name();
+      unroll_layer->set_name(sstm.str());
+      unroll_layer->set_unroll_index(i);
+      // update layer parameter sharing
+      for (int j = 0; j < unroll_layer->param_size(); j ++) {
+        ParamProto* param = unroll_layer->mutable_param(j);
+        if (i > 0) {
+          param->set_share_from("0#" + param->name());
         }
-      // }
+        std::stringstream sstm1;
+        sstm1 << i << '#' << param->name();
+        param->set_name(sstm1.str());
+      }
       // clear unrolling related fields
       unroll_layer->clear_unroll_len();
       unroll_layer->clear_unroll_conn_type();
@@ -176,7 +173,7 @@ const NetProto NeuralNet::Unrolling(const NetProto& net_conf) {
   for (int index = 0; index < net_conf.layer_size(); index ++) {
     const LayerProto& org_layer = net_conf.layer(index);
     if (org_layer.srclayers_size() == 0)
-      continue; // no src layer
+      continue;   // no src layer
     for (int i = 0; i < org_layer.srclayers_size(); i ++) {
       const string& org_layer_src = org_layer.srclayers(i);
       singa::UnrollConnType unroll_conn_type = kUnrollOneToOne;
@@ -197,7 +194,7 @@ const NetProto NeuralNet::Unrolling(const NetProto& net_conf) {
             unroll_layer->add_srclayers(conf.layer(unroll_layer_src).name());
           }
         } else if (unroll_conn_type == kUnrollOneToOne) {
-          if (j < shift) continue; // no need to connect with the src
+          if (j < shift) continue;  // no need to connect with the src
           int unroll_layer_src = unroll_layer_srcs[j - shift];
           unroll_layer->add_srclayers(conf.layer(unroll_layer_src).name());
         } else if (unroll_conn_type == kUnrollFirstToLast) {
@@ -209,16 +206,14 @@ const NetProto NeuralNet::Unrolling(const NetProto& net_conf) {
       }
     }
 
-    //TODO(fanju): add LSTM when it is ready
-    if (org_layer.type() == kGRU) { // connect GRU layers
+    // TODO(fanju): add LSTM when it is ready
+    if (org_layer.type() == kGRU) {  // connect GRU layers
       for (unsigned int j = 1; j < layer_groups[index].size(); j ++) {
         LayerProto* unroll_layer = conf.mutable_layer(layer_groups[index][j]);
         string srcname = conf.layer(layer_groups[index][j-1]).name();
         unroll_layer->add_srclayers(srcname);
-        // LOG(ERROR) << "connect " << unroll_layer->name() << " from " << srcname;
       }
     }
-
   }
   return conf;
 }

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/a2f4e468/src/neuralnet/neuron_layer/dummy.cc
----------------------------------------------------------------------
diff --git a/src/neuralnet/neuron_layer/dummy.cc b/src/neuralnet/neuron_layer/dummy.cc
index 9ccb179..936bb5e 100644
--- a/src/neuralnet/neuron_layer/dummy.cc
+++ b/src/neuralnet/neuron_layer/dummy.cc
@@ -45,7 +45,6 @@ void DummyLayer::Setup(const LayerProto& proto,
   if (proto.dummy_conf().output()) {  // use as output layer
     output_ = true;
   }
-
 }
 
 void DummyLayer::ComputeFeature(int flag, const vector<Layer*>& srclayers) {

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/a2f4e468/src/neuralnet/neuron_layer/embedding.cc
----------------------------------------------------------------------
diff --git a/src/neuralnet/neuron_layer/embedding.cc b/src/neuralnet/neuron_layer/embedding.cc
index 00e9139..c980c54 100644
--- a/src/neuralnet/neuron_layer/embedding.cc
+++ b/src/neuralnet/neuron_layer/embedding.cc
@@ -65,8 +65,8 @@ void EmbeddingLayer::ComputeFeature(int flag, const vector<Layer*>& srclayers) {
   }
 }
 
-void EmbeddingLayer::ComputeGradient(int flag, const vector<Layer*>& srclayers)
-{
+void EmbeddingLayer::ComputeGradient(int flag,
+    const vector<Layer*>& srclayers) {
   const float* word_idx = srclayers.at(0)->data(unroll_index()).cpu_data();
   auto context = Singleton<Context>::Instance();
   if ((flag & kAggGrad) == 0)

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/a2f4e468/src/neuralnet/neuron_layer/gru.cc
----------------------------------------------------------------------
diff --git a/src/neuralnet/neuron_layer/gru.cc b/src/neuralnet/neuron_layer/gru.cc
index cf7425b..da2f9c5 100644
--- a/src/neuralnet/neuron_layer/gru.cc
+++ b/src/neuralnet/neuron_layer/gru.cc
@@ -19,14 +19,12 @@
 *
 *************************************************************/
 
-#include "singa/neuralnet/neuron_layer.h"
-
 #include <glog/logging.h>
+#include "singa/neuralnet/neuron_layer.h"
 #include "singa/utils/singleton.h"
 #include "singa/utils/math_blob.h"
 #include "singa/utils/singa_op.h"
 
-#include <iostream>
 using namespace std;
 
 namespace singa {
@@ -57,10 +55,10 @@ void GRULayer::Setup(const LayerProto& conf,
   CHECK_LE(srclayers.size(), 2);
   const auto& src = srclayers[0]->data(this);
 
-  batchsize_ = src.shape()[0]; // size of batch
-  vdim_ = src.count() / (batchsize_); // dimension of input
+  batchsize_ = src.shape()[0];  // size of batch
+  vdim_ = src.count() / (batchsize_);  // dimension of input
 
-  hdim_ = layer_conf_.gru_conf().dim_hidden(); // dimension of hidden state
+  hdim_ = layer_conf_.gru_conf().dim_hidden();  // dimension of hidden state
 
   data_.Reshape(vector<int>{batchsize_, hdim_});
   grad_.ReshapeLike(data_);
@@ -77,9 +75,9 @@ void GRULayer::Setup(const LayerProto& conf,
   weight_c_hh_ = Param::Create(conf.param(5));
 
   if (conf.param_size() > 6) {
-	  bias_z_ = Param::Create(conf.param(6));
-	  bias_r_ = Param::Create(conf.param(7));
-	  bias_c_ = Param::Create(conf.param(8));
+    bias_z_ = Param::Create(conf.param(6));
+    bias_r_ = Param::Create(conf.param(7));
+    bias_c_ = Param::Create(conf.param(8));
   }
 
   weight_z_hx_->Setup(vector<int>{hdim_, vdim_});
@@ -91,168 +89,170 @@ void GRULayer::Setup(const LayerProto& conf,
   weight_c_hh_->Setup(vector<int>{hdim_, hdim_});
 
   if (conf.param_size() > 6) {
-	  bias_z_->Setup(vector<int>{hdim_});
-	  bias_r_->Setup(vector<int>{hdim_});
-	  bias_c_->Setup(vector<int>{hdim_});
+    bias_z_->Setup(vector<int>{hdim_});
+    bias_r_->Setup(vector<int>{hdim_});
+    bias_c_->Setup(vector<int>{hdim_});
   }
 
   update_gate = new Blob<float>(batchsize_, hdim_);
   reset_gate = new Blob<float>(batchsize_, hdim_);
-  // reset gate x context
-  reset_context = new Blob<float>(batchsize_, hdim_);
   new_memory = new Blob<float>(batchsize_, hdim_);
 }
 
 void GRULayer::ComputeFeature(int flag,
     const vector<Layer*>& srclayers) {
-	CHECK_LE(srclayers.size(), 2);
-
-	// Do transpose
-  Blob<float> *w_z_hx_t = Transpose (weight_z_hx_->data());
-	Blob<float> *w_z_hh_t = Transpose (weight_z_hh_->data());
-  Blob<float> *w_r_hx_t = Transpose (weight_r_hx_->data());
-	Blob<float> *w_r_hh_t = Transpose (weight_r_hh_->data());
-  Blob<float> *w_c_hx_t = Transpose (weight_c_hx_->data());
-	Blob<float> *w_c_hh_t = Transpose (weight_c_hh_->data());
-
-	// Prepare the data input and the context
-	const auto& src = srclayers[0]->data(this);
-	const Blob<float> *context;
-	if (srclayers.size() == 1) { // only have data input
-		context = new Blob<float>(batchsize_, hdim_);
-	} else { // have data input & context
-		context = &srclayers[1]->data(this);
-	}
-
-	// Compute the update gate
-	GEMM(1.0f, 0.0f, src,*w_z_hx_t,update_gate);
- 	if (bias_z_ != nullptr)
-		MVAddRow(1.0f,1.0f,bias_z_->data(),update_gate);
-	GEMM(1.0f, 1.0f, *context, *w_z_hh_t, update_gate);
-	Map<op::Sigmoid<float>,float>(*update_gate, update_gate);
-
-	// Compute the reset gate
-	GEMM(1.0f, 0.0f, src, *w_r_hx_t, reset_gate);
-	if (bias_r_ != nullptr)
-		MVAddRow(1.0f,1.0f, bias_r_->data(),reset_gate);
-	GEMM(1.0f, 1.0f, *context, *w_r_hh_t, reset_gate);
-	Map<op::Sigmoid<float>,float>(*reset_gate, reset_gate);
-
-	// Compute the new memory
-	Mult<float>(*reset_gate, *context, reset_context);
-	GEMM(1.0f, 0.0f, *reset_context, *w_c_hh_t, new_memory);
-	GEMM(1.0f, 1.0f, src, *w_c_hx_t, new_memory);
-	if (bias_c_ != nullptr)
-		MVAddRow(1.0f, 1.0f, bias_c_->data(), new_memory);
-	Map<op::Tanh<float>,float>(*new_memory, new_memory);
-
-  Sub(*new_memory, *context, &data_);
+  CHECK_LE(srclayers.size(), 2);
+
+  // Do transpose
+  Blob<float> *w_z_hx_t = Transpose(weight_z_hx_->data());
+  Blob<float> *w_z_hh_t = Transpose(weight_z_hh_->data());
+  Blob<float> *w_r_hx_t = Transpose(weight_r_hx_->data());
+  Blob<float> *w_r_hh_t = Transpose(weight_r_hh_->data());
+  Blob<float> *w_c_hx_t = Transpose(weight_c_hx_->data());
+  Blob<float> *w_c_hh_t = Transpose(weight_c_hh_->data());
+
+  // Prepare the data input and the context
+  const auto& src = srclayers[0]->data(this);
+  const Blob<float> *context;
+  if (srclayers.size() == 1) {  // only have data input
+    context = new Blob<float>(batchsize_, hdim_);
+  } else {  // have data input & context
+    context = &srclayers[1]->data(this);
+  }
+
+  // Compute the update gate
+  GEMM(1.0f, 0.0f, src, *w_z_hx_t, update_gate);
+  if (bias_z_ != nullptr)
+    MVAddRow(1.0f, 1.0f, bias_z_->data(), update_gate);
+  GEMM(1.0f, 1.0f, *context, *w_z_hh_t, update_gate);
+  Map<op::Sigmoid<float>, float>(*update_gate, update_gate);
+
+  // Compute the reset gate
+  GEMM(1.0f, 0.0f, src, *w_r_hx_t, reset_gate);
+  if (bias_r_ != nullptr)
+    MVAddRow(1.0f, 1.0f, bias_r_->data(), reset_gate);
+  GEMM(1.0f, 1.0f, *context, *w_r_hh_t, reset_gate);
+  Map<op::Sigmoid<float>, float>(*reset_gate, reset_gate);
+
+  // Compute the new memory
+  GEMM(1.0f, 0.0f, src, *w_c_hx_t, new_memory);
+  if (bias_c_ != nullptr)
+    MVAddRow(1.0f, 1.0f, bias_c_->data(), new_memory);
+  Mult<float>(*reset_gate, *new_memory, new_memory);
+  GEMM(1.0f, 1.0f, *context, *w_c_hh_t, new_memory);
+  Map<op::Tanh<float>, float>(*new_memory, new_memory);
+
+
+  Sub(*context, *new_memory, &data_);
   Mult(data_, *update_gate, &data_);
-  AXPY(1.0f, *context, &data_);
+  Add(data_, *new_memory, &data_);
 
-	// delete the pointers
-	if (srclayers.size() == 1)
+  // delete the pointers
+  if (srclayers.size() == 1)
     delete context;
 
-	delete w_z_hx_t;
-	delete w_z_hh_t;
-	delete w_r_hx_t;
-	delete w_r_hh_t;
-	delete w_c_hx_t;
-	delete w_c_hh_t;
+  delete w_z_hx_t;
+  delete w_z_hh_t;
+  delete w_r_hx_t;
+  delete w_r_hh_t;
+  delete w_c_hx_t;
+  delete w_c_hh_t;
 }
 
 void GRULayer::ComputeGradient(int flag,
     const vector<Layer*>& srclayers) {
-	CHECK_LE(srclayers.size(), 2);
-  // agg grad from two dst layers
+  CHECK_LE(srclayers.size(), 2);
+  // agg grad from two dst layers, gradvec_[0] is grad_
   AXPY(1.0f, *gradvec_[1], &grad_);
-  float beta = 1.0f; // agg param gradients
-
-  Layer* ilayer = srclayers[0]; // input layer
-  Layer* clayer = nullptr; // context layer
-	// Prepare the data input and the context
-	const Blob<float>& src = ilayer->data(this);
-	const Blob<float> *context;
-	if (srclayers.size() == 1) { // only have data input
-		context = new Blob<float>(batchsize_, hdim_);
-	} else { // have data input & context
+  float beta = 1.0f;  // agg param gradients
+
+  Layer* ilayer = srclayers[0];  // input layer
+  Layer* clayer = nullptr;  // context layer
+  // Prepare the data input and the context
+  const Blob<float>& src = ilayer->data(this);
+  const Blob<float> *context;
+  if (srclayers.size() == 1) {  // only have data input
+    context = new Blob<float>(batchsize_, hdim_);
+  } else {  // have data input & context
     clayer = srclayers[1];
-		context = &(clayer->data(this));
-	}
-
-	// Prepare gradient of output neurons
-	Blob<float> *grad_t = Transpose (grad_);
-
-	// Compute intermediate gradients which are used for other computations
-	Blob<float> dugatedz (batchsize_, hdim_);
-	Map<singa::op::SigmoidGrad<float>, float>(*update_gate, &dugatedz);
-	Blob<float> drgatedr (batchsize_, hdim_);
-	Map<singa::op::SigmoidGrad<float>, float>(*reset_gate, &drgatedr);
-	Blob<float> dnewmdc (batchsize_, hdim_);
-	Map<singa::op::TanhGrad<float>, float>(*new_memory, &dnewmdc);
-
-	Blob<float> dLdz (batchsize_, hdim_);
-	Sub<float>(*new_memory, *context, &dLdz);
-	Mult<float>(dLdz, grad_, &dLdz);
-	Mult<float>(dLdz, dugatedz, &dLdz);
-
-	Blob<float> dLdc (batchsize_,hdim_);
-	Mult(grad_, *update_gate, &dLdc);
-	Mult(dLdc, dnewmdc, &dLdc);
-
-	Blob<float> reset_dLdc (batchsize_,hdim_);
-  GEMM(1.0f, 0.0f, dLdc, weight_c_hh_->data(), &reset_dLdc);
-
-	Blob<float> dLdr (batchsize_, hdim_);
-	Mult(reset_dLdc, *context, &dLdr);
-	Mult(dLdr, drgatedr, &dLdr);
-
-	// Compute gradients for parameters of update gate
-	Blob<float> *dLdz_t = Transpose(dLdz);
-	GEMM(1.0f, beta, *dLdz_t, src, weight_z_hx_->mutable_grad());
-	GEMM(1.0f, beta, *dLdz_t, *context, weight_z_hh_->mutable_grad());
-	if (bias_z_ != nullptr)
-		MVSumRow<float>(1.0f, beta, dLdz, bias_z_->mutable_grad());
-	delete dLdz_t;
-
-	// Compute gradients for parameters of reset gate
-	Blob<float> *dLdr_t = Transpose(dLdr);
-	GEMM(1.0f, beta, *dLdr_t, src, weight_r_hx_->mutable_grad());
-	GEMM(1.0f, beta, *dLdr_t, *context, weight_r_hh_->mutable_grad());
-	if (bias_r_ != nullptr)
-		MVSumRow(1.0f, beta, dLdr, bias_r_->mutable_grad());
-	delete dLdr_t;
-
-	// Compute gradients for parameters of new memory
-	Blob<float> *dLdc_t = Transpose(dLdc);
-	GEMM(1.0f, beta, *dLdc_t, src, weight_c_hx_->mutable_grad());
-	GEMM(1.0f, beta, *dLdc_t, *reset_context, weight_c_hh_->mutable_grad());
-	if (bias_c_ != nullptr)
-		MVSumRow(1.0f, beta, dLdc, bias_c_->mutable_grad());
-	delete dLdc_t;
-
-	// Compute gradients for data input layer
-	if (srclayers[0]->mutable_grad(this) != nullptr) {
-		GEMM(1.0f,0.0f, dLdc, weight_c_hx_->data(), ilayer->mutable_grad(this));
-		GEMM(1.0f,1.0f, dLdz, weight_z_hx_->data(), ilayer->mutable_grad(this));
-		GEMM(1.0f,1.0f, dLdr, weight_r_hx_->data(), ilayer->mutable_grad(this));
-	}
-
-	if (clayer != nullptr && clayer->mutable_grad(this) != nullptr) {
-		// Compute gradients for context layer
-    Mult(reset_dLdc, *reset_gate, clayer->mutable_grad(this));
-		GEMM(1.0f, 1.0f, dLdr, weight_r_hh_->data(), clayer->mutable_grad(this));
-		GEMM(1.0f, 1.0f, dLdz, weight_z_hh_->data(), clayer->mutable_grad(this));
-		AXPY(-1.0f, *update_gate, clayer->mutable_grad(this));
+    context = &(clayer->data(this));
+  }
+
+  // Compute intermediate gradients which are used for other computations
+  Blob<float> dugatedz(batchsize_, hdim_);
+  Map<singa::op::SigmoidGrad<float>, float>(*update_gate, &dugatedz);
+  Blob<float> drgatedr(batchsize_, hdim_);
+  Map<singa::op::SigmoidGrad<float>, float>(*reset_gate, &drgatedr);
+  Blob<float> dnewmdc(batchsize_, hdim_);
+  Map<singa::op::TanhGrad<float>, float>(*new_memory, &dnewmdc);
+
+  Blob<float> dLdz(batchsize_, hdim_);
+  Sub<float>(*context, *new_memory, &dLdz);
+  Mult<float>(dLdz, grad_, &dLdz);
+  Mult<float>(dLdz, dugatedz, &dLdz);
+
+  Blob<float> dLdc(batchsize_, hdim_);
+  Blob<float> z1(batchsize_, hdim_);
+  z1.SetValue(1.0f);
+  AXPY<float>(-1.0f, *update_gate, &z1);
+  Mult(grad_, z1, &dLdc);
+  Mult(dLdc, dnewmdc, &dLdc);
+
+  Blob<float> reset_dLdc(batchsize_, hdim_);
+  Mult(dLdc, *reset_gate, &reset_dLdc);
+
+  Blob<float> dLdr(batchsize_, hdim_);
+  Blob<float> cprev(batchsize_, hdim_);
+  GEMM(1.0f, 0.0f, *context, weight_c_hh_->data().T(), &cprev);
+  Mult(dLdc, cprev, &dLdr);
+  Mult(dLdr, drgatedr, &dLdr);
+
+  // Compute gradients for parameters of update gate
+  Blob<float> *dLdz_t = Transpose(dLdz);
+  GEMM(1.0f, beta, *dLdz_t, src, weight_z_hx_->mutable_grad());
+  GEMM(1.0f, beta, *dLdz_t, *context, weight_z_hh_->mutable_grad());
+  if (bias_z_ != nullptr)
+    MVSumRow<float>(1.0f, beta, dLdz, bias_z_->mutable_grad());
+  delete dLdz_t;
+
+  // Compute gradients for parameters of reset gate
+  Blob<float> *dLdr_t = Transpose(dLdr);
+  GEMM(1.0f, beta, *dLdr_t, src, weight_r_hx_->mutable_grad());
+  GEMM(1.0f, beta, *dLdr_t, *context, weight_r_hh_->mutable_grad());
+  if (bias_r_ != nullptr)
+    MVSumRow(1.0f, beta, dLdr, bias_r_->mutable_grad());
+  delete dLdr_t;
+
+  // Compute gradients for parameters of new memory
+  Blob<float> *dLdc_t = Transpose(dLdc);
+  GEMM(1.0f, beta, *dLdc_t, src, weight_c_hx_->mutable_grad());
+  if (bias_c_ != nullptr)
+    MVSumRow(1.0f, beta, dLdc, bias_c_->mutable_grad());
+  delete dLdc_t;
+
+  Blob<float> *reset_dLdc_t = Transpose(reset_dLdc);
+  GEMM(1.0f, beta, *reset_dLdc_t, *context, weight_c_hh_->mutable_grad());
+  delete reset_dLdc_t;
+
+  // Compute gradients for data input layer
+  if (srclayers[0]->mutable_grad(this) != nullptr) {
+    GEMM(1.0f, 0.0f, dLdc, weight_c_hx_->data(), ilayer->mutable_grad(this));
+    GEMM(1.0f, 1.0f, dLdz, weight_z_hx_->data(), ilayer->mutable_grad(this));
+    GEMM(1.0f, 1.0f, dLdr, weight_r_hx_->data(), ilayer->mutable_grad(this));
+  }
+
+  if (clayer != nullptr && clayer->mutable_grad(this) != nullptr) {
+    // Compute gradients for context layer
+    GEMM(1.0f, 0.0f, reset_dLdc, weight_c_hh_->data(),
+        clayer->mutable_grad(this));
+    GEMM(1.0f, 1.0f, dLdr, weight_r_hh_->data(), clayer->mutable_grad(this));
+    GEMM(1.0f, 1.0f, dLdz, weight_z_hh_->data(), clayer->mutable_grad(this));
+    Add(clayer->grad(this), *update_gate, clayer->mutable_grad(this));
     // LOG(ERROR) << "grad to prev gru " << Asum(clayer->grad(this));
-	}
+  }
 
-	if (srclayers.size() == 1)
+  if (srclayers.size() == 1)
     delete context;
-	else
-    context = NULL;
-	delete grad_t;
 }
 
 }  // namespace singa

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/a2f4e468/src/neuralnet/output_layer/char_rnn.cc
----------------------------------------------------------------------
diff --git a/src/neuralnet/output_layer/char_rnn.cc b/src/neuralnet/output_layer/char_rnn.cc
new file mode 100644
index 0000000..c3f1733
--- /dev/null
+++ b/src/neuralnet/output_layer/char_rnn.cc
@@ -0,0 +1,51 @@
+/************************************************************
+*
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements.  See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership.  The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License.  You may obtain a copy of the License at
+*
+*   http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing,
+* software distributed under the License is distributed on an
+* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+* KIND, either express or implied.  See the License for the
+* specific language governing permissions and limitations
+* under the License.
+*
+*************************************************************/
+
+#include <algorithm>
+#include <iostream>
+#include <fstream>
+#include "singa/neuralnet/output_layer.h"
+
+namespace singa {
+
+void CharRNNOutputLayer::Setup(const LayerProto& proto,
+    const vector<Layer*>& srclayers) {
+  CHECK_EQ(srclayers.size(), 1);
+  OutputLayer::Setup(proto, srclayers);
+  std::ifstream fin;
+  const string path = proto.char_rnn_conf().vocab_path();
+  fin.open(path);
+  CHECK(fin.is_open()) << "Can't open vocab_path = " << path;
+  std::stringstream stream;
+  stream << fin.rdbuf();
+  vocab_ = stream.str();
+  fin.close();
+}
+
+void CharRNNOutputLayer::ComputeFeature(int flag,
+    const vector<Layer*>& srclayers) {
+  const float* dptr =  srclayers[0]->data(this).cpu_data();
+  for (int i = 0; i < srclayers[0]->data(this).shape(0); i++) {
+    std::cout<<vocab_[static_cast<int>(dptr[i])];
+  }
+}
+
+}  // namespace singa;

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/a2f4e468/src/utils/updater.cc
----------------------------------------------------------------------
diff --git a/src/utils/updater.cc b/src/utils/updater.cc
index 200670a..1b3e26c 100644
--- a/src/utils/updater.cc
+++ b/src/utils/updater.cc
@@ -140,6 +140,9 @@ void SGDUpdater::Update(int step, Param* param, float grad_scale) {
 
 /***********************Nesterov******************************/
 void NesterovUpdater::Update(int step, Param* param, float grad_scale) {
+ if (clip_high_ > clip_low_)
+    Clip(clip_low_, clip_high_, param);
+
   Shape<1> s = Shape1(param->size());
   Tensor<cpu, 1> data(param->mutable_cpu_data(), s);
   Tensor<cpu, 1> grad(param->mutable_cpu_grad(), s);
@@ -181,6 +184,9 @@ void RMSPropUpdater::Init(const UpdaterProto& proto) {
 }
 
 void RMSPropUpdater::Update(int step, Param* param, float grad_scale) {
+ if (clip_high_ > clip_low_)
+    Clip(clip_low_, clip_high_, param);
+
   Shape<1> s=Shape1(param->size());
   Tensor<cpu, 1> data(param->mutable_cpu_data(), s);
   Tensor<cpu, 1> grad(param->mutable_cpu_grad(), s);

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/a2f4e468/src/worker.cc
----------------------------------------------------------------------
diff --git a/src/worker.cc b/src/worker.cc
index abe74e7..2afa8b0 100644
--- a/src/worker.cc
+++ b/src/worker.cc
@@ -433,7 +433,8 @@ void BPTTWorker::Backward(int step, NeuralNet* net) {
   for (auto it = layers.rbegin(); it != layers.rend(); it++) {
     Layer* layer = *it;
     if (layer->partition_id() == id_) {
-      layer->ComputeGradient(kTrain | kBackward | kAggGrad, net->srclayers(layer));
+      layer->ComputeGradient(kTrain | kBackward | kAggGrad,
+          net->srclayers(layer));
       // LOG(ERROR) << layer->name() << " backward";
       if (job_conf_.debug() && DisplayNow(step) && grp_id_ == 0)
         label[layer->name()] = layer->ToString(true, kTrain | kBackward);



[06/10] incubator-singa git commit: SINGA-120 - Implemented GRU and BPTT: Made changes for debug tokenizer

Posted by wa...@apache.org.
SINGA-120 - Implemented GRU and BPTT: Made changes for debug tokenizer


Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/b4b6a385
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/b4b6a385
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/b4b6a385

Branch: refs/heads/master
Commit: b4b6a38568a5b22829fa3500dc7621e2719bb62b
Parents: d43af9f
Author: Ju Fan <fa...@gmail.com>
Authored: Fri Jan 1 10:53:11 2016 +0800
Committer: Wei Wang <wa...@comp.nus.edu.sg>
Committed: Wed Jan 6 01:53:05 2016 +0800

----------------------------------------------------------------------
 include/singa/utils/context.h   | 4 ++--
 include/singa/utils/tokenizer.h | 1 +
 2 files changed, 3 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/b4b6a385/include/singa/utils/context.h
----------------------------------------------------------------------
diff --git a/include/singa/utils/context.h b/include/singa/utils/context.h
index a2bc64f..8e7bbb8 100644
--- a/include/singa/utils/context.h
+++ b/include/singa/utils/context.h
@@ -154,9 +154,9 @@ class Context {
    */
   std::mt19937* rand_generator(const std::thread::id& tid) {
     if (rand_generator_.find(tid) == rand_generator_.end()) {
-      CHECK(seed_.find(tid) != seed_.end());
+      // CHECK(seed_.find(tid) != seed_.end());
       auto seed = static_cast<unsigned>(seed_[tid]);
-      if (seed_[tid] == -1)
+      if (seed_.find(tid) == seed_.end() || seed_.at(tid) == -1)
         seed = std::chrono::system_clock::now().time_since_epoch().count();
       rand_generator_[tid] = new std::mt19937(seed);
     }

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/b4b6a385/include/singa/utils/tokenizer.h
----------------------------------------------------------------------
diff --git a/include/singa/utils/tokenizer.h b/include/singa/utils/tokenizer.h
index c66e0af..9637c75 100644
--- a/include/singa/utils/tokenizer.h
+++ b/include/singa/utils/tokenizer.h
@@ -37,6 +37,7 @@ namespace singa {
  * t >> x; // x is wes
  * cout << (t >> x); // print 0.
  */
+
 class Tokenizer {
  public:
   Tokenizer(const std::string& str, const std::string& sep): start_(0),


[09/10] incubator-singa git commit: SINGA-120 - Implemented GRU and BPTT

Posted by wa...@apache.org.
SINGA-120 - Implemented GRU and BPTT

Add input layers for char rnn example.

Fix the bug from worker.cc for flag setting in computegradient

Run with GPU; Loss decreases slowly to 3 per unit;
Todo add RNNDummyLayer and train with RMSProp


Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/959ef705
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/959ef705
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/959ef705

Branch: refs/heads/master
Commit: 959ef705a66b20b474dfad3e85a9f35635e8690f
Parents: 1f03f9d
Author: Wei Wang <wa...@comp.nus.edu.sg>
Authored: Sat Jan 2 22:54:20 2016 +0800
Committer: Wei Wang <wa...@comp.nus.edu.sg>
Committed: Wed Jan 6 01:55:08 2016 +0800

----------------------------------------------------------------------
 Makefile.am                                 |   4 +
 include/singa/neuralnet/input_layer.h       |  38 ++++
 include/singa/neuralnet/layer.h             |   6 +
 include/singa/neuralnet/neuralnet.h         |  11 +
 include/singa/neuralnet/neuron_layer.h      |  50 +++++
 include/singa/utils/common.h                |   5 +
 include/singa/utils/context.h               |   6 +
 include/singa/utils/math_blob.h             |  15 ++
 include/singa/utils/param.h                 |   6 +-
 include/singa/utils/updater.h               |   7 +-
 include/singa/worker.h                      |  31 ++-
 src/driver.cc                               |   5 +
 src/neuralnet/input_layer/char_rnn.cc       |  95 +++++++++
 src/neuralnet/input_layer/rnn_label.cc      |  35 ++++
 src/neuralnet/neuralnet.cc                  | 243 +++++++++++++----------
 src/neuralnet/neuron_layer/embedding.cc     |  98 +++++++++
 src/neuralnet/neuron_layer/gru.cc           | 115 +++++------
 src/neuralnet/neuron_layer/inner_product.cc |  13 +-
 src/proto/job.proto                         |  42 +++-
 src/stub.cc                                 |   2 +
 src/test/test_gru_layer.cc                  |   1 -
 src/test/test_math.cc                       |   1 -
 src/utils/common.cc                         |  26 +++
 src/utils/param.cc                          |  17 +-
 src/utils/updater.cc                        |  17 ++
 src/worker.cc                               |  84 +++++++-
 26 files changed, 787 insertions(+), 186 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/959ef705/Makefile.am
----------------------------------------------------------------------
diff --git a/Makefile.am b/Makefile.am
index aa88348..d2b2aa8 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -75,12 +75,15 @@ SINGA_SRCS := src/driver.cc \
               src/neuralnet/connection_layer/concate.cc \
               src/neuralnet/connection_layer/slice.cc \
               src/neuralnet/connection_layer/split.cc \
+              src/neuralnet/input_layer/char_rnn.cc \
+              src/neuralnet/input_layer/onehot.cc \
               src/neuralnet/input_layer/csv.cc \
               src/neuralnet/input_layer/image_preprocess.cc \
               src/neuralnet/input_layer/prefetch.cc \
               src/neuralnet/input_layer/record.cc \
               src/neuralnet/input_layer/deprecated.cc \
               src/neuralnet/input_layer/store.cc \
+              src/neuralnet/input_layer/rnn_label.cc \
               src/neuralnet/output_layer/accuracy.cc \
               src/neuralnet/output_layer/argsort.cc \
               src/neuralnet/output_layer/csv.cc \
@@ -91,6 +94,7 @@ SINGA_SRCS := src/driver.cc \
               src/neuralnet/neuron_layer/convolution.cc \
               src/neuralnet/neuron_layer/dropout.cc \
               src/neuralnet/neuron_layer/dummy.cc \
+              src/neuralnet/neuron_layer/embedding.cc \
               src/neuralnet/neuron_layer/inner_product.cc \
               src/neuralnet/neuron_layer/lrn.cc \
               src/neuralnet/neuron_layer/pooling.cc \

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/959ef705/include/singa/neuralnet/input_layer.h
----------------------------------------------------------------------
diff --git a/include/singa/neuralnet/input_layer.h b/include/singa/neuralnet/input_layer.h
index 2825d65..e701eec 100644
--- a/include/singa/neuralnet/input_layer.h
+++ b/include/singa/neuralnet/input_layer.h
@@ -162,6 +162,44 @@ class PrefetchLayer : public Layer {
   std::thread thread_;
 };
 
+class OneHotLayer : public InputLayer {
+ public:
+  void Setup(const LayerProto& proto, const vector<Layer*>& srclayers) override;
+  void ComputeFeature(int flag, const vector<Layer*>& srclayers);
+
+ private:
+  int batchsize_, dim_;
+};
+
+/**
+ *  * Read the ASCII file as a large string used for RNN model where each character
+ *   * is a single input to the unrolled RNN layer.
+ *    * max string length is string::max_size();
+ *     */
+class CharRNNInputLayer : public InputLayer {
+ public:
+  void Setup(const LayerProto& proto, const vector<Layer*>& srclayers) override;
+  void ComputeFeature(int flag, const vector<Layer*>& srclayers);
+
+ private:
+  int batchsize_ = 0, unroll_len_ = 1;
+  unsigned offset_ = 0;
+  string path_, vocab_path_;
+  string buf_;
+  vector<int> start_;
+  std::unordered_map<char, int> char2index_;
+};
+
+/**
+ * Label layer for fetching labels from the src input layer for RNN models.
+ * The i-th unrolled layer fetch label from the input layer via data(i+1).
+ * Particularly, it shares data_ Blob with data(i+1) of its src layer.
+ */
+class RNNLabelLayer : public InputLayer {
+ public:
+  void Setup(const LayerProto& proto, const vector<Layer*>& srclayers);
+  void ComputeFeature(int flag, const vector<Layer*>& srclayers);
+};
 /****************Deprecated layers******************/
 /**
  * @deprecated please use the StoreInputLayer.

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/959ef705/include/singa/neuralnet/layer.h
----------------------------------------------------------------------
diff --git a/include/singa/neuralnet/layer.h b/include/singa/neuralnet/layer.h
index 28ab92e..f4738fa 100644
--- a/include/singa/neuralnet/layer.h
+++ b/include/singa/neuralnet/layer.h
@@ -174,6 +174,12 @@ class Layer {
    */
   inline const std::string& name() const { return layer_conf_.name(); }
   /**
+   * Return the index of the unrolled layer within the unrolling group, which
+   * should be [0, max_unrolling_length)
+   */
+  inline const int unroll_index() const { return layer_conf_.unroll_index(); }
+
+  /**
    * @return a const ref for Blob vector storing feature values of this layer.
    */
   virtual const vector<Blob<float>*>& data() const {

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/959ef705/include/singa/neuralnet/neuralnet.h
----------------------------------------------------------------------
diff --git a/include/singa/neuralnet/neuralnet.h b/include/singa/neuralnet/neuralnet.h
index be8f5c8..33ad38c 100644
--- a/include/singa/neuralnet/neuralnet.h
+++ b/include/singa/neuralnet/neuralnet.h
@@ -109,6 +109,15 @@ class NeuralNet {
       << "layer (" << layer->name() << " ) has no source layers";
     return src_map_.at(layer);
   }
+  Layer* last_unroll_layer(const Layer* layer) const {
+    auto pos = layer->name().find("#");
+    if (pos == std::string::npos)
+      return nullptr;
+    string last_name = std::to_string(unroll_len_) + layer->name().substr(pos);
+    CHECK(name2layer_.find(last_name) != name2layer_.end())
+      << "layer name = " << last_name << " has no unroll layers";
+    return name2layer_.at(last_name);
+  }
   inline Param* paramid2param(int id) const { return paramid2param_.at(id); }
 
   /**
@@ -137,6 +146,7 @@ class NeuralNet {
    * prepare data structures, e.g., params_, layers_, etc.
    */
   void PrepareDataStructures();
+  void PrepareDataStructures(const NetProto& proto);
   /**
    * add split layers, due to connections to multiple dst-layers
    */
@@ -149,6 +159,7 @@ class NeuralNet {
                                         int npartitions);
 
  protected:
+  int unroll_len_ = 1;
   std::vector<Layer*> layers_;
   std::vector<Param*> params_;
 

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/959ef705/include/singa/neuralnet/neuron_layer.h
----------------------------------------------------------------------
diff --git a/include/singa/neuralnet/neuron_layer.h b/include/singa/neuralnet/neuron_layer.h
index 3f126ab..e587e38 100644
--- a/include/singa/neuralnet/neuron_layer.h
+++ b/include/singa/neuralnet/neuron_layer.h
@@ -131,12 +131,60 @@ class DummyLayer: public NeuronLayer {
   bool output_ = false;  // use as output layer
 };
 
+/**
+ * Embedding layer that converts an array of index ID into a matrix.
+ *
+ * Each index ID corresponds to a word (or feature) vector in the vocabulary
+ * matrix maintained by the embedding layer.
+ * The index ID ranges within [0, |D|), where |D| is the size of the vocabulary,
+ * i.e., the number of rows of the vocabulary matrix.
+ * If the index is -1, which means it is a padding word. A feature vector with
+ * all values 0 will be constructed and inserted into the feature Blob.
+ * Users handle special words by themseleves. For example, the index 0 could be
+ * the starting word/symbol of a sentence, the index 1 could be the ending
+ * word/symbol of a sentence.
+ */
+class EmbeddingLayer : public NeuronLayer {
+ public:
+  ~EmbeddingLayer() {
+    delete vocab_;
+  }
+  void Setup(const LayerProto& proto, const vector<Layer*>& srclayers) override;
+  void ComputeFeature(int flag, const vector<Layer*>& srclayers) override;
+  void ComputeGradient(int flag, const vector<Layer*>& srclayers) override;
+  const std::vector<Param*> GetParams() const override {
+    std::vector<Param*> params;
+    params.push_back(vocab_);
+    return params;
+  }
+
+ private:
+  int vocab_size_, feature_dim_, batchsize_;
+  //!< the vocabulary matrix to be learned
+  Param *vocab_;
+};
+
 class GRULayer : public NeuronLayer {
  public:
   ~GRULayer();
   void Setup(const LayerProto& proto, const vector<Layer*>& srclayers) override;
   void ComputeFeature(int flag, const vector<Layer*>& srclayers) override;
   void ComputeGradient(int flag, const vector<Layer*>& srclayers) override;
+  ConnectionType dst_layer_connection() const override{
+    return kOneToMany;
+  }
+  Blob<float>* mutable_grad(const Layer* from) override {
+    if (typeid(*from) == typeid(GRULayer))
+      return gradvec_[1];
+    else
+      return gradvec_[0];
+  }
+  const Blob<float>& grad(const Layer* from) override{
+    if (typeid(*from) == typeid(GRULayer))
+      return *gradvec_[1];
+    else
+      return *gradvec_[0];
+  }
 
   const std::vector<Param*> GetParams() const override {
     if (bias_z_ != nullptr && bias_r_ != nullptr && bias_c_ != nullptr) {
@@ -156,6 +204,8 @@ class GRULayer : public NeuronLayer {
   int vdim_, hdim_; // dimensions
 
   Blob<float> *update_gate, *reset_gate, *new_memory;
+  //!< gru layer connect to two dst layers, hence need to grad blobs.
+  Blob<float> aux_grad_;
 
   Param *weight_z_hx_, *weight_z_hh_, *bias_z_; // update gate
   Param *weight_r_hx_, *weight_r_hh_, *bias_r_; // reset gate

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/959ef705/include/singa/utils/common.h
----------------------------------------------------------------------
diff --git a/include/singa/utils/common.h b/include/singa/utils/common.h
index afbe954..0bcec58 100644
--- a/include/singa/utils/common.h
+++ b/include/singa/utils/common.h
@@ -155,6 +155,11 @@ void WriteProtoToBinaryFile(const Message& proto, const char* filename);
  * Write a string (e.g., graph reprensetation of a net) into a text file.
  */
 void WriteStringToTextFile(const string& filename, const string& context);
+
+/**
+ * Parse metric pairs (key = value[, key = value]) from string
+ */
+const vector<std::pair<string, float>> GetMetricFromString(const string& disp);
 }  // namespace singa
 
 #endif  // SINGA_UTILS_COMMON_H_

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/959ef705/include/singa/utils/context.h
----------------------------------------------------------------------
diff --git a/include/singa/utils/context.h b/include/singa/utils/context.h
index 8e7bbb8..b1128c1 100644
--- a/include/singa/utils/context.h
+++ b/include/singa/utils/context.h
@@ -100,6 +100,12 @@ class Context {
   }
 
   /**
+   * @return the device ID of the current thread.
+   */
+  int device_id() {
+    return device_id(std::this_thread::get_id());
+  }
+  /**
    * @return the ID of the device attached to a given CPU thread, or -1 if this
    * thread has not been attached GPU device.
    */

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/959ef705/include/singa/utils/math_blob.h
----------------------------------------------------------------------
diff --git a/include/singa/utils/math_blob.h b/include/singa/utils/math_blob.h
index 125509a..bdaf914 100644
--- a/include/singa/utils/math_blob.h
+++ b/include/singa/utils/math_blob.h
@@ -712,6 +712,21 @@ void Softmax(int nb_rows, const Blob<Dtype>& A, Blob<Dtype>* B) {
 #endif  // USE_GPU
   }
 }
+
+template<typename Dtype>
+void Zero(Blob<Dtype>* B) {
+  auto context = Singleton<Context>::Instance();
+  int device = context->device_id(std::this_thread::get_id());
+  if (device == -1) {
+    B->SetValue(0);
+  } else {
+#ifdef USE_GPU
+    cudaMemset(B->mutable_gpu_data(), 0, B->count() * sizeof(float));
+#else
+    LOG(FATAL) << "Not implemented";
+#endif  // USE_GPU
+  }
+}
 }  // end of namespace singa
 
 #endif  // SINGA_UTILS_MATH_BLOB_H_

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/959ef705/include/singa/utils/param.h
----------------------------------------------------------------------
diff --git a/include/singa/utils/param.h b/include/singa/utils/param.h
index 415490e..9930710 100644
--- a/include/singa/utils/param.h
+++ b/include/singa/utils/param.h
@@ -146,7 +146,11 @@ class Param {
    * @param cpu_only if true, share only cpu memory (used for training with
    * multi-gpu cards); else, share both cpu and gpu memory.
    */
-  void ShareFrom(Param* other, bool cpu_only);
+  void ShareDataFrom(Param* other, bool cpu_only);
+  /**
+   * Share both data and grad from other param
+   */
+  void ShareFrom(Param* other);
   /**
    * Init param values from checkpoint blob.
    */

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/959ef705/include/singa/utils/updater.h
----------------------------------------------------------------------
diff --git a/include/singa/utils/updater.h b/include/singa/utils/updater.h
index 6413a80..575ab86 100644
--- a/include/singa/utils/updater.h
+++ b/include/singa/utils/updater.h
@@ -7,9 +7,9 @@
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
-* 
+*
 *   http://www.apache.org/licenses/LICENSE-2.0
-* 
+*
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
@@ -93,12 +93,13 @@ class Updater {
 
   virtual void Init(const UpdaterProto &proto);
   virtual void Update(int step, Param* param, float grad_scale) = 0;
-
+  void Clip(const float low, const float high, Param* param);
  protected:
   UpdaterProto proto_;
   LRGenerator* lr_gen_;
   float weight_decay_;
   float momentum_;
+  float clip_low_, clip_high_;
 };
 
 class SGDUpdater : public Updater {

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/959ef705/include/singa/worker.h
----------------------------------------------------------------------
diff --git a/include/singa/worker.h b/include/singa/worker.h
index 8738c27..34c8000 100644
--- a/include/singa/worker.h
+++ b/include/singa/worker.h
@@ -165,7 +165,7 @@ class Worker {
    * @param prefix display prefix, e.g., 'Train step 100', 'Test step 90'.
    * @param net display layers from this neural net.
    */
-  void Display(int flag, const std::string& prefix, NeuralNet* net);
+  virtual void Display(int flag, const std::string& prefix, NeuralNet* net);
   /**
    * Put Param values to server.
    *
@@ -284,10 +284,35 @@ class BPWorker: public Worker {
  public:
   void TrainOneBatch(int step, NeuralNet* net) override;
   void TestOneBatch(int step, Phase phase, NeuralNet* net) override;
-  void Forward(int step, Phase phase, NeuralNet* net);
-  void Backward(int step, NeuralNet* net);
+  virtual void Forward(int step, Phase phase, NeuralNet* net);
+  virtual void Backward(int step, NeuralNet* net);
 };
 
+/**
+ * Subclass of Worker that implements BPTT (Backpropagation through time)
+ * algorithm for computing gradients of RNN models.
+ * Max BPTT/unrolling length is configured by users.
+ */
+class BPTTWorker: public BPWorker {
+ public:
+  void Forward(int step, Phase phase, NeuralNet* net) override;
+  void Backward(int step, NeuralNet* net) override;
+  void Display(int flag, const std::string& prefix, NeuralNet* net) override;
+
+ private:
+  /*
+   * indicator used in truncted BPTT, which feeds the hidden state of the last
+   * unrolled unit to the first unit in Forward() for the next iteration.
+   * currently always feed the last hidden state to the first.
+   */
+  bool full_state_ = false;
+  //!< indicator used for the starting of a new pass of the dataset.
+  bool begin_ = false;
+};
+/**
+ * Subclass of Worker that implements the Contrastive Divergence algorithm for
+ * computing the gradients of paramters of energy models.
+ */
 class CDWorker: public Worker {
  public:
   void TrainOneBatch(int step, NeuralNet* net) override;

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/959ef705/src/driver.cc
----------------------------------------------------------------------
diff --git a/src/driver.cc b/src/driver.cc
index 5e0772b..21968bb 100644
--- a/src/driver.cc
+++ b/src/driver.cc
@@ -71,6 +71,9 @@ void Driver::Init(int argc, char **argv) {
   RegisterLayer<ImagePreprocessLayer, int>(kImagePreprocess);
   RegisterLayer<RecordOutputLayer, int>(kRecordOutput);
   RegisterLayer<CSVOutputLayer, int>(kCSVOutput);
+  RegisterLayer<CharRNNInputLayer, int>(kCharRNN);
+  RegisterLayer<RNNLabelLayer, int>(kRNNLabel);
+  RegisterLayer<OneHotLayer, int>(kOneHot);
 
   // connection layers
   RegisterLayer<BridgeDstLayer, int>(kBridgeDst);
@@ -84,6 +87,7 @@ void Driver::Init(int argc, char **argv) {
   RegisterLayer<ConvolutionLayer, int>(kConvolution);
   RegisterLayer<CConvolutionLayer, int>(kCConvolution);
   RegisterLayer<CPoolingLayer, int>(kCPooling);
+  RegisterLayer<EmbeddingLayer, int>(kEmbedding);
 
 #ifdef USE_CUDNN
   RegisterLayer<CudnnActivationLayer, int>(kCudnnActivation);
@@ -135,6 +139,7 @@ void Driver::Init(int argc, char **argv) {
 
   // register workers
   RegisterWorker<BPWorker>(kBP);
+  RegisterWorker<BPTTWorker>(kBPTT);
   RegisterWorker<CDWorker>(kCD);
 
   // register params

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/959ef705/src/neuralnet/input_layer/char_rnn.cc
----------------------------------------------------------------------
diff --git a/src/neuralnet/input_layer/char_rnn.cc b/src/neuralnet/input_layer/char_rnn.cc
new file mode 100644
index 0000000..cc13b1b
--- /dev/null
+++ b/src/neuralnet/input_layer/char_rnn.cc
@@ -0,0 +1,95 @@
+/************************************************************
+*
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements.  See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership.  The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License.  You may obtain a copy of the License at
+*
+*   http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing,
+* software distributed under the License is distributed on an
+* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+* KIND, either express or implied.  See the License for the
+* specific language governing permissions and limitations
+* under the License.
+*
+*************************************************************/
+#include <sstream>
+#include <fstream>
+#include "singa/neuralnet/input_layer.h"
+namespace singa {
+
+void CharRNNInputLayer::Setup(const LayerProto& conf,
+    const vector<Layer*>& srclayers) {
+  InputLayer::Setup(conf, srclayers);
+  batchsize_ = conf.char_rnn_conf().batchsize();
+  path_ = conf.char_rnn_conf().path();
+  vocab_path_ = conf.char_rnn_conf().vocab_path();
+  unroll_len_ = conf.char_rnn_conf().unroll_len();
+  datavec_.clear();
+  // each unroll layer has a input blob
+  for (int i = 0; i <= unroll_len_; i++) {
+    datavec_.push_back(new Blob<float>(batchsize_));
+  }
+}
+
+void CharRNNInputLayer::ComputeFeature(int flag,
+    const vector<Layer*>& srclayers) {
+  if (buf_.size() == 0) {
+
+    // read the vocab
+    {
+      std::ifstream fin;
+      fin.open(vocab_path_);
+      CHECK(fin.is_open()) << "Can't open vocab_path = " << vocab_path_;
+      std::stringstream stream;
+      stream << fin.rdbuf();
+      string vocab = stream.str();
+      LOG(ERROR) << "Vocab_size = " << vocab.length();
+      for (char c : vocab)
+        char2index_[c] = char2index_.size() - 1;
+      fin.close();
+    }
+
+    // read the whole text file
+    {
+      std::ifstream fin;
+      fin.open(path_);
+      CHECK(fin.is_open()) << "Can't open filepath = " << path_;
+      std::stringstream stream;
+      stream << fin.rdbuf();
+      buf_ = stream.str();
+      fin.close();
+    }
+
+    // decide the start pos of each instance in one mini-batch
+    int max_offset = buf_.length() / batchsize_;
+    CHECK_GT(max_offset, unroll_len_);
+    for (int i = 0; i < batchsize_; i ++) {
+      start_.push_back(i * max_offset);
+    }
+  }
+
+  for (int l = 0; l < unroll_len_ + 1; l++) {
+    float* ptr = datavec_[l]->mutable_cpu_data();
+    for (int i = 0; i < batchsize_; i++) {
+      ptr[i] = static_cast<float>(char2index_.at(buf_[start_[i] + l]));
+    }
+  }
+  offset_ += unroll_len_;
+  if (offset_ >= buf_.length() / batchsize_) {
+//  unsigned seed = std::chrono::system_clock::now().time_since_epoch().count();
+//  std::mt19937 g(seed);
+//  std::shuffle(start_.begin(), start_.end(), g);
+    offset_ = 0;
+    // return -1;
+  } else {
+    // return 0;
+  }
+}
+
+}  // namespace singa

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/959ef705/src/neuralnet/input_layer/rnn_label.cc
----------------------------------------------------------------------
diff --git a/src/neuralnet/input_layer/rnn_label.cc b/src/neuralnet/input_layer/rnn_label.cc
new file mode 100644
index 0000000..4924d87
--- /dev/null
+++ b/src/neuralnet/input_layer/rnn_label.cc
@@ -0,0 +1,35 @@
+/************************************************************
+*
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements.  See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership.  The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License.  You may obtain a copy of the License at
+*
+*   http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing,
+* software distributed under the License is distributed on an
+* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+* KIND, either express or implied.  See the License for the
+* specific language governing permissions and limitations
+* under the License.
+*
+*************************************************************/
+
+#include "singa/neuralnet/input_layer.h"
+namespace singa {
+void RNNLabelLayer::Setup(const LayerProto& proto,
+    const vector<Layer*>& srclayers) {
+  InputLayer::Setup(proto, srclayers);
+  aux_data_.resize(srclayers[0]->data(unroll_index() + 1).shape(0));
+}
+void RNNLabelLayer::ComputeFeature(int flag, const vector<Layer*>& srclayers) {
+  const float* input = srclayers[0]->data(unroll_index() + 1).cpu_data();
+  for (unsigned i = 0; i < aux_data_.size(); i++) {
+    aux_data_[i] = static_cast<int>(input[i]);
+  }
+}
+}  // namespace singa

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/959ef705/src/neuralnet/neuralnet.cc
----------------------------------------------------------------------
diff --git a/src/neuralnet/neuralnet.cc b/src/neuralnet/neuralnet.cc
index 6bb0ecd..f9579b1 100644
--- a/src/neuralnet/neuralnet.cc
+++ b/src/neuralnet/neuralnet.cc
@@ -33,10 +33,36 @@ using std::map;
 using std::string;
 using std::vector;
 
+/**
+ * Check user defined net config and make some preprocessing, e.g., assing names
+ * to params.
+ * TODO(wnagwei) implement the following functions.
+ * 1. layer and paramname should not include '@', '+' and '#'. '@<suffix>'
+ * is used for identifying layer location/partition. '<prefix>#' is used for
+ * identifying the unrolled Param in RNN models.
+ * 2. assign names to unnamed Param, e.g., p<param_id>+<layer_name>.
+ */
+const NetProto NetConfPreprocess(const NetProto& conf) {
+  /*
+  string param_name = "$";
+  // if user does not name the param, then name it based on layer name.
+  if (param->name() == "") {
+    param->set_name(layer->name() + param_name);
+    param_name += "$";
+  }
+  */
+  NetProto proto = conf;
+  for (int i = 0; i < proto.layer_size(); i++) {
+    if (!proto.layer(i).has_unroll_len())
+      proto.mutable_layer(i)->set_unroll_len(proto.unroll_len());
+  }
+  return proto;
+}
+
 NeuralNet* NeuralNet::Create(const NetProto& net_conf, Phase phase,
                                         int npartitions) {
-  NetProto conf;
-  conf.CopyFrom(net_conf);
+  const NetProto& full_net_conf = NetConfPreprocess(net_conf);
+  NetProto conf = full_net_conf;
   conf.clear_layer();
   // flag=0: neither exclude nor include field appears
   // flag=1: exclude field appears
@@ -45,25 +71,19 @@ NeuralNet* NeuralNet::Create(const NetProto& net_conf, Phase phase,
   // exclude layers according to phase
   // exclude field is deprecated
   // please use include field instead
-  for (const auto& layer : net_conf.layer()) {
+  for (const auto& layer : full_net_conf.layer()) {
     bool include = true;
     for (auto p : layer.exclude()) {
       // check whether both exclude and include field
       // appear in the same .conf file
-      CHECK(flag == 0 || flag == 1)
-        << "include and exclude field should not simultaneously"
-        << " appear in the same .conf file";
+      CHECK(flag == 0 || flag == 1) << "Don't use include and exclude together";
       if (p == phase)
         include = false;
       flag = 1;
     }
     // neural net only include the specified layer in the include field
     for (auto p : layer.include()) {
-      // check whether both exclude and include field
-      // appear in the same .conf file
-      CHECK(flag == 0 || flag == 2)
-        << "include and exclude field should not simultaneously"
-        << " appear in the same .conf file";
+      CHECK(flag == 0 || flag == 2) << "Don't use include and exclude together";
       if (p == phase) {
         include = true;
         break;
@@ -78,21 +98,19 @@ NeuralNet* NeuralNet::Create(const NetProto& net_conf, Phase phase,
     if (!layer_conf->has_partition_dim())
       layer_conf->set_partition_dim(net_conf.partition_dim());
   }
-  //LOG(INFO) << "Before unrolling: \n" << conf.DebugString();
+  // LOG(INFO) << "Before unrolling: \n" << conf.DebugString();
   conf = Unrolling (conf);
 
   // Copy shared parameters for sharing param conf
-  std::unordered_map<string, ParamProto*> name2param;
   std::vector<ParamProto*> shares;
+  std::unordered_map<string, ParamProto*> name2param;
   for (int index = 0; index < conf.layer_size();index ++) {
 	  LayerProto* layer = conf.mutable_layer(index);
 	  for (int i = 0; i < layer->param_size(); i++) {
 		  ParamProto* param = layer->mutable_param(i);
-		  if (param->has_name() && param->name() != "") {
-			  CHECK(name2param.find(param->name()) == name2param.end())
-	        		  << "param name is repeated: " << param->name();
+      CHECK(name2param.find(param->name()) == name2param.end())
+        << "Repeated param = " << param->name();
 			  name2param[param->name()] = param;
-		  }
 		  if (param->has_share_from() && param->share_from() != "")
 			  shares.push_back(param);
 	  }
@@ -101,99 +119,108 @@ NeuralNet* NeuralNet::Create(const NetProto& net_conf, Phase phase,
     const std::string from = param->share_from();
     const std::string name = param->name();
     CHECK(name2param.find(from) != name2param.end())
-      << "can't find param " << from;
+      << "can't find share_from = " << from;
     // CopyFrom will overwrite the name and share_from fields
     param->CopyFrom(*name2param.at(from));
     param->set_name(name);
     param->set_share_from(from);
   }
   LOG(INFO) << "Initial NeuralNet Config is\n" << conf.DebugString();
-  // TODO(wangwei) create net based on net type, e.g., directed, undirected, etc
+  // TODO(wangwei) create net based on net type, e.g., directed, undirected.
   return new NeuralNet(conf, npartitions);
 }
 
 const NetProto NeuralNet::Unrolling(const NetProto& net_conf) {
-	// Step 1: Unroll each layer & set parameter sharing
-	NetProto conf;
+  // Step 1: Unroll each layer & set parameter sharing
+  NetProto conf;
 
-	std::vector<std::vector<int>> layer_groups;
-	std::unordered_map<string,int> org_layer_names;
-	for (int index = 0; index < net_conf.layer_size(); index ++) {
-		const LayerProto& org_layer = net_conf.layer(index);
-		org_layer_names[org_layer.name()] = index; // layer_name -> index
+  std::vector<std::vector<int>> layer_groups;
+  std::unordered_map<string,int> org_layer_names;
+  for (int index = 0; index < net_conf.layer_size(); index ++) {
+    const LayerProto& org_layer = net_conf.layer(index);
+    org_layer_names[org_layer.name()] = index; // layer_name -> index
 
-		std::vector<int> layer_group;
-		for (int i = 0; i < org_layer.unroll_len(); i ++) { // unroll
-			LayerProto* unroll_layer = conf.add_layer();
-			unroll_layer->CopyFrom(org_layer); // create a new layer conf
-			if (org_layer.unroll_len() > 1) {
-				// update layer names
-				std::stringstream sstm;
-				sstm << unroll_layer->name() << "_" << i;
-				unroll_layer->set_name(sstm.str());
-				// update layer parameter sharing
-				for (int j = 0; j < unroll_layer->param_size(); j ++) {
-					ParamProto* param = unroll_layer->mutable_param(j);
-					if (i == 0) continue; // no need to rename parameters in the i-th unrolled layer
-					if (!param->has_share_from() || param->share_from() == "") {// not shared from others
-						param->set_share_from(param->name());
-					}
-					std::stringstream sstm1;
-					sstm1 << param->name() << "_" << i;
-					param->set_name(sstm1.str());
-				}
-			}
-			// clear unrolling related fields
-			unroll_layer->clear_unroll_len();
-			unroll_layer->clear_unroll_conn_type();
-			unroll_layer->clear_shift();
-			unroll_layer->clear_srclayers();
+    std::vector<int> layer_group;
+    for (int i = 0; i < org_layer.unroll_len(); i ++) { // unroll
+      LayerProto* unroll_layer = conf.add_layer();
+      unroll_layer->CopyFrom(org_layer); // create a new layer conf
+      if (org_layer.unroll_len() > 1) {
+        // update layer names
+        std::stringstream sstm;
+        sstm << i << '#' << unroll_layer->name();
+        unroll_layer->set_name(sstm.str());
+        unroll_layer->set_unroll_index(i);
+        // update layer parameter sharing
+        for (int j = 0; j < unroll_layer->param_size(); j ++) {
+          ParamProto* param = unroll_layer->mutable_param(j);
+          if (i > 0) {
+            param->set_share_from("0#" + param->name());
+          }
+          std::stringstream sstm1;
+          sstm1 << i << '#' << param->name();
+          param->set_name(sstm1.str());
+        }
+      }
+      // clear unrolling related fields
+      unroll_layer->clear_unroll_len();
+      unroll_layer->clear_unroll_conn_type();
+      unroll_layer->clear_shift();
+      unroll_layer->clear_srclayers();
 
-			layer_group.push_back(conf.layer_size() - 1);
-		}
-		layer_groups.push_back(layer_group);
-	}
-	// Step 2: Connect unrolled layers by setting `srclayers`
-	for (int index = 0; index < net_conf.layer_size(); index ++) {
-		const LayerProto& org_layer = net_conf.layer(index);
-		if (org_layer.srclayers_size() == 0) continue; // no src layer
-		//TODO(fanju): add LSTM when it is ready
-		if (org_layer.type() == kGRU) { // connect GRU layers
-			for (unsigned int j = 1; j < layer_groups[index].size(); j ++) {
-				LayerProto* unroll_layer = conf.mutable_layer(layer_groups[index][j]);
-				unroll_layer->add_srclayers(conf.layer(layer_groups[index][j-1]).name());
-			}
-		}
-		for (int i = 0; i < org_layer.srclayers_size(); i ++) {
-			const string& org_layer_src = org_layer.srclayers(i);
+      layer_group.push_back(conf.layer_size() - 1);
+      // LOG(ERROR) << "unrolling layer " << unroll_layer->name();
+    }
+    layer_groups.push_back(layer_group);
+  }
+  // Step 2: Connect unrolled layers by setting `srclayers`
+  for (int index = 0; index < net_conf.layer_size(); index ++) {
+    const LayerProto& org_layer = net_conf.layer(index);
+    if (org_layer.srclayers_size() == 0)
+      continue; // no src layer
+    for (int i = 0; i < org_layer.srclayers_size(); i ++) {
+      const string& org_layer_src = org_layer.srclayers(i);
+      singa::UnrollConnType unroll_conn_type = kUnrollOneToOne;
+      if (i < org_layer.unroll_conn_type_size())
+        unroll_conn_type = org_layer.unroll_conn_type(i);
+      unsigned int shift = 0;
+      if (i < org_layer.shift_size())
+        shift = org_layer.shift(i);
 
-			singa::UnrollConnType unroll_conn_type = kUnrollOneToOne; // Default value
-			if (i < org_layer.unroll_conn_type_size()) unroll_conn_type = org_layer.unroll_conn_type(i);
-			unsigned int shift = 0; // Default shift value
-			if (i < org_layer.shift_size()) shift = org_layer.shift(i);
+      const std::vector<int> unroll_layer_srcs
+        = layer_groups[org_layer_names[org_layer_src]];
 
-			const std::vector<int> unroll_layer_srcs = layer_groups[org_layer_names[org_layer_src]];
+      for (unsigned int j = 0; j < layer_groups[index].size(); j ++) {
+        LayerProto* unroll_layer = conf.mutable_layer(layer_groups[index][j]);
+        // Update src layers of `unroll_layer` by considering the types
+        if (unroll_conn_type == kUnrollOneToAll) {
+          for (int unroll_layer_src : unroll_layer_srcs) {
+            unroll_layer->add_srclayers(conf.layer(unroll_layer_src).name());
+          }
+        } else if (unroll_conn_type == kUnrollOneToOne) {
+          if (j < shift) continue; // no need to connect with the src
+          int unroll_layer_src = unroll_layer_srcs[j - shift];
+          unroll_layer->add_srclayers(conf.layer(unroll_layer_src).name());
+        } else if (unroll_conn_type == kUnrollFirstToLast) {
+          if (j > 0) break;
+          int unroll_layer_src =
+            unroll_layer_srcs[unroll_layer_srcs.size() - 1];
+          unroll_layer->add_srclayers(conf.layer(unroll_layer_src).name());
+        }
+      }
+    }
 
-			for (unsigned int j = 0; j < layer_groups[index].size(); j ++) {
-				LayerProto* unroll_layer = conf.mutable_layer(layer_groups[index][j]);
-				// Update src layers of `unroll_layer` by considering the types
-				if (unroll_conn_type == kUnrollOneToAll) {
-					for (int unroll_layer_src : unroll_layer_srcs) {
-						unroll_layer->add_srclayers(conf.layer(unroll_layer_src).name());
-					}
-				} else if (unroll_conn_type == kUnrollOneToOne) {
-					if (j < shift) continue; // no need to connect with the src
-					int unroll_layer_src = unroll_layer_srcs[j - shift];
-					unroll_layer->add_srclayers(conf.layer(unroll_layer_src).name());
-				} else if (unroll_conn_type == kUnrollFirstToLast) {
-					if (j > 0) break;
-					int unroll_layer_src = unroll_layer_srcs[unroll_layer_srcs.size() - 1];
-					unroll_layer->add_srclayers(conf.layer(unroll_layer_src).name());
-				}
-			}
-		}
-	}
-	return conf;
+    //TODO(fanju): add LSTM when it is ready
+    if (org_layer.type() == kGRU) { // connect GRU layers
+      for (unsigned int j = 1; j < layer_groups[index].size(); j ++) {
+        LayerProto* unroll_layer = conf.mutable_layer(layer_groups[index][j]);
+        string srcname = conf.layer(layer_groups[index][j-1]).name();
+        unroll_layer->add_srclayers(srcname);
+        // LOG(ERROR) << "connect " << unroll_layer->name() << " from " << srcname;
+      }
+    }
+
+  }
+  return conf;
 }
 
 
@@ -202,10 +229,12 @@ NeuralNet::NeuralNet(NetProto netproto, int npartitions) {
   auto graph = CreateGraph(netproto, npartitions);
   CreateNetFromGraph(graph);
   PrepareDataStructures();
+
   for (Node* node : graph->nodes())
     delete static_cast<LayerProto*>(node->proto);
   delete graph;
   LOG(INFO) << "NeuralNet Constructed";
+  unroll_len_ = netproto.unroll_len();
 }
 
 NeuralNet::~NeuralNet() {
@@ -243,7 +272,7 @@ void NeuralNet::ShareParamsFrom(NeuralNet* other, bool cpu_only) {
       const auto& params = layer->GetParams();
       CHECK_EQ(params.size(), otherparams.size());
       for (size_t i = 0; i < params.size(); i++) {
-        params[i]->ShareFrom(otherparams[i], cpu_only);
+        params[i]->ShareDataFrom(otherparams[i], cpu_only);
       }
     }
   }
@@ -442,6 +471,7 @@ Graph* NeuralNet::CreateGraph(const NetProto& netproto, int npartitions) {
       proto->set_num_partitions(npartitions);
       Node* node = graph->AddNode(nodename, layer.name(), i, proto);
       nodes.push_back(node);
+      // TODO(wangwei) update param name
     }
     name2nodes[layer.name()] = nodes;
     name2proto[layer.name()] = &layer;
@@ -526,14 +556,8 @@ void NeuralNet::CreateNetFromGraph(Graph* graph) {
     layer->Setup(*(static_cast<LayerProto*>(node->proto)), srclayers(layer));
     DLOG(INFO) << "constructing graph: " << layer->name();
     layerinfo[layer->name()] = IntVecToString(layer->data(nullptr).shape());
-    string param_name = "$";
     for (auto param : layer->GetParams()) {
       param->set_id(paramid++);
-      // if user does not name the param, then name it based on layer name.
-      if (param->name() == "") {
-        param->set_name(layer->name() + param_name);
-        param_name += "$";
-      }
     }
     if (layer->partition_dim() == 0)
       share_param_layers[node->origin].push_back(layer);
@@ -556,12 +580,25 @@ void NeuralNet::CreateNetFromGraph(Graph* graph) {
     const string share_from = param->share_from();
     if (param->share_from() != "") {
       if (name2param.find(share_from) != name2param.end()) {
-        param->ShareFrom(name2param.at(param->share_from()), false);
+        param->ShareDataFrom(name2param.at(param->share_from()), false);
       } else {
         LOG(FATAL) << "No param with the name (share_from) " << share_from;
       }
     }
   }
+
+  // share params due to laye unrolling
+  for (auto & entry : name2param) {
+    Param* param = entry.second;
+    auto pos = param->name().find("#");
+    if (pos != std::string::npos && param->owner() != param->id()) {
+      string from = "0" + param->name().substr(pos);
+      CHECK(name2param.find(from) != name2param.end())
+        << "Can't find owner = " << from << " for param = " << param->name();
+      Param* owner = name2param.at(from);
+      param->ShareFrom(owner);
+    }
+  }
   // share Params for layers generated (partitioned) from the same origin layer
   for (auto & entry : share_param_layers) {
     const auto& owner = entry.second.begin();
@@ -570,7 +607,7 @@ void NeuralNet::CreateNetFromGraph(Graph* graph) {
       auto params = (*it)->GetParams();
       CHECK_EQ(params.size(), owner_params.size());
       for (size_t i = 0; i < params.size(); i++)
-        params.at(i)->ShareFrom(owner_params.at(i), true);
+        params.at(i)->ShareDataFrom(owner_params.at(i), true);
     }
   }
 }

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/959ef705/src/neuralnet/neuron_layer/embedding.cc
----------------------------------------------------------------------
diff --git a/src/neuralnet/neuron_layer/embedding.cc b/src/neuralnet/neuron_layer/embedding.cc
new file mode 100644
index 0000000..00e9139
--- /dev/null
+++ b/src/neuralnet/neuron_layer/embedding.cc
@@ -0,0 +1,98 @@
+/************************************************************
+*
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements.  See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership.  The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License.  You may obtain a copy of the License at
+*
+*   http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing,
+* software distributed under the License is distributed on an
+* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+* KIND, either express or implied.  See the License for the
+* specific language governing permissions and limitations
+* under the License.
+*
+*************************************************************/
+
+#include "singa/neuralnet/neuron_layer.h"
+#include "singa/utils/math_addr.h"
+#include "singa/utils/math_blob.h"
+#include "singa/utils/singleton.h"
+#include "singa/utils/context.h"
+
+namespace singa {
+
+void EmbeddingLayer::Setup(const LayerProto& conf,
+    const vector<Layer*>& srclayers) {
+  NeuronLayer::Setup(conf, srclayers);
+  vocab_size_ = conf.embedding_conf().vocab_size();
+  feature_dim_ = conf.embedding_conf().feature_dim();
+  vocab_ = Param::Create(conf.param(0));
+  vocab_->Setup(vector<int>{vocab_size_, feature_dim_});
+  batchsize_ = srclayers.at(0)->data(unroll_index()).shape(0);
+  data_.Reshape(batchsize_, feature_dim_);
+  grad_.ReshapeLike(data_);
+}
+
+void EmbeddingLayer::ComputeFeature(int flag, const vector<Layer*>& srclayers) {
+  const float* word_idx = srclayers.at(0)->data(unroll_index()).cpu_data();
+  int device = Singleton<Context>::Instance()->device_id();
+  if (device == -1) {
+    const float* src = vocab_->data().cpu_data();
+    float* dst = data_.mutable_cpu_data();
+    for (int i = 0; i < batchsize_; i++) {
+      memcpy(dst + i * feature_dim_,
+          src + static_cast<int>(word_idx[i]) * feature_dim_,
+          feature_dim_ * sizeof(float));
+    }
+  } else {
+#ifdef USE_GPU
+    const float* src = vocab_->data().gpu_data();
+    float* dst = data_.mutable_gpu_data();
+    for (int i = 0; i < batchsize_; i++) {
+      cudaMemcpy(dst + i * feature_dim_,
+          src + static_cast<int>(word_idx[i]) * feature_dim_,
+          feature_dim_ * sizeof(float), cudaMemcpyDefault);
+    }
+#else
+    LOG(FATAL) << "Not implemented";
+#endif
+  }
+}
+
+void EmbeddingLayer::ComputeGradient(int flag, const vector<Layer*>& srclayers)
+{
+  const float* word_idx = srclayers.at(0)->data(unroll_index()).cpu_data();
+  auto context = Singleton<Context>::Instance();
+  if ((flag & kAggGrad) == 0)
+    Zero(vocab_->mutable_grad());
+
+  if (context->device_id() == -1) {
+    const float* src = grad_.cpu_data();
+    float* dst = vocab_->mutable_grad()->mutable_cpu_data();
+    memset(dst, 0 , sizeof(float) * grad_.count());
+    for (int i = 0; i < batchsize_; i++) {
+      cpu_axpy(feature_dim_, 1.0f, src + i * feature_dim_,
+          dst + static_cast<int>(word_idx[i]) * feature_dim_);
+    }
+  } else {
+#ifdef USE_GPU
+    const float* src = grad_.gpu_data();
+    float* dst = vocab_->mutable_grad()->mutable_gpu_data();
+    for (int i = 0; i < batchsize_; i++) {
+      gpu_axpy(context->cublas_handle(), grad_.count(), 1.0f,
+          src + i * feature_dim_,
+          dst + static_cast<int>(word_idx[i]) * feature_dim_);
+    }
+#else
+    LOG(FATAL) << "Not implemented";
+#endif
+  }
+}
+
+}  // namespace singa

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/959ef705/src/neuralnet/neuron_layer/gru.cc
----------------------------------------------------------------------
diff --git a/src/neuralnet/neuron_layer/gru.cc b/src/neuralnet/neuron_layer/gru.cc
index 45d7873..9ba5a50 100644
--- a/src/neuralnet/neuron_layer/gru.cc
+++ b/src/neuralnet/neuron_layer/gru.cc
@@ -64,6 +64,8 @@ void GRULayer::Setup(const LayerProto& conf,
 
   data_.Reshape(vector<int>{batchsize_, hdim_});
   grad_.ReshapeLike(data_);
+  // one for grad from dst GRU, one for grad from upper layer
+  gradvec_.push_back(new Blob<float>(grad_.shape()));
 
   // Initialize the parameters
   weight_z_hx_ = Param::Create(conf.param(0));
@@ -74,7 +76,7 @@ void GRULayer::Setup(const LayerProto& conf,
   weight_r_hh_ = Param::Create(conf.param(4));
   weight_c_hh_ = Param::Create(conf.param(5));
 
-  if (conf.gru_conf().bias_term()) {
+  if (conf.param_size() > 6) {
 	  bias_z_ = Param::Create(conf.param(6));
 	  bias_r_ = Param::Create(conf.param(7));
 	  bias_c_ = Param::Create(conf.param(8));
@@ -88,7 +90,7 @@ void GRULayer::Setup(const LayerProto& conf,
   weight_r_hh_->Setup(vector<int>{hdim_, hdim_});
   weight_c_hh_->Setup(vector<int>{hdim_, hdim_});
 
-  if (conf.gru_conf().bias_term()) {
+  if (conf.param_size() > 6) {
 	  bias_z_->Setup(vector<int>{hdim_});
 	  bias_r_->Setup(vector<int>{hdim_});
 	  bias_c_->Setup(vector<int>{hdim_});
@@ -97,7 +99,6 @@ void GRULayer::Setup(const LayerProto& conf,
   update_gate = new Blob<float>(batchsize_, hdim_);
   reset_gate = new Blob<float>(batchsize_, hdim_);
   new_memory = new Blob<float>(batchsize_, hdim_);
-
 }
 
 void GRULayer::ComputeFeature(int flag,
@@ -105,11 +106,11 @@ void GRULayer::ComputeFeature(int flag,
 	CHECK_LE(srclayers.size(), 2);
 
 	// Do transpose
-	Blob<float> *w_z_hx_t = Transpose (weight_z_hx_->data());
+  Blob<float> *w_z_hx_t = Transpose (weight_z_hx_->data());
 	Blob<float> *w_z_hh_t = Transpose (weight_z_hh_->data());
-	Blob<float> *w_r_hx_t = Transpose (weight_r_hx_->data());
+  Blob<float> *w_r_hx_t = Transpose (weight_r_hx_->data());
 	Blob<float> *w_r_hh_t = Transpose (weight_r_hh_->data());
-	Blob<float> *w_c_hx_t = Transpose (weight_c_hx_->data());
+  Blob<float> *w_c_hx_t = Transpose (weight_c_hx_->data());
 	Blob<float> *w_c_hh_t = Transpose (weight_c_hh_->data());
 
 	// Prepare the data input and the context
@@ -123,49 +124,34 @@ void GRULayer::ComputeFeature(int flag,
 
 	// Compute the update gate
 	GEMM(1.0f, 0.0f, src,*w_z_hx_t,update_gate);
-	if (bias_z_ != nullptr)
+ 	if (bias_z_ != nullptr)
 		MVAddRow(1.0f,1.0f,bias_z_->data(),update_gate);
-	Blob<float> zprev (batchsize_,hdim_);
-	GEMM(1.0f, 0.0f, *context,*w_z_hh_t, &zprev);
-	Add<float>(*update_gate, zprev, update_gate);
+	GEMM(1.0f, 1.0f, *context, *w_z_hh_t, update_gate);
 	Map<op::Sigmoid<float>,float>(*update_gate, update_gate);
 
 	// Compute the reset gate
 	GEMM(1.0f, 0.0f, src,*w_r_hx_t,reset_gate);
 	if (bias_r_ != nullptr)
 		MVAddRow(1.0f,1.0f,bias_r_->data(),reset_gate);
-	Blob<float> rprev (batchsize_, hdim_);
-	GEMM(1.0f, 0.0f, *context, *w_r_hh_t, &rprev);
-	Add<float>(*reset_gate, rprev, reset_gate);
+	GEMM(1.0f, 1.0f, *context, *w_r_hh_t, reset_gate);
 	Map<op::Sigmoid<float>,float>(*reset_gate, reset_gate);
 
 	// Compute the new memory
 	GEMM(1.0f, 0.0f, src, *w_c_hx_t, new_memory);
 	if (bias_c_ != nullptr)
 		MVAddRow(1.0f,1.0f,bias_c_->data(), new_memory);
-	Blob<float> cprev (batchsize_, hdim_);
-	GEMM(1.0f, 0.0f, *context, *w_c_hh_t, &cprev);
-	//Blob<float> new_cprev (batchsize_, hdim_);
-	Mult<float>(*reset_gate, cprev, &cprev);
-	Add<float>(*new_memory, cprev, new_memory);
+	Mult<float>(*reset_gate, *new_memory, new_memory);
+	GEMM(1.0f, 1.0f, *context, *w_c_hh_t, new_memory);
 	Map<op::Tanh<float>,float>(*new_memory, new_memory);
 
-	// Compute data - new memory part
-	Blob<float> z1 (batchsize_,hdim_);
-	for (int i = 0; i < z1.count(); i ++) {
-		z1.mutable_cpu_data()[i] = 1.0f; // generate a matrix with ones
-	}
-	AXPY<float>(-1.0f, *update_gate, &z1);
-	Mult<float>(z1, *new_memory, &data_);
 
-	// Compute data - context part
-	Blob<float> data_prev (batchsize_, hdim_);
-	Mult<float>(*update_gate,*context,&data_prev);
-	Add<float>(data_, data_prev, &data_);
+  Sub(*context, *new_memory, &data_);
+  Mult(data_, *update_gate, &data_);
+  Add(data_, *new_memory, &data_);
 
 	// delete the pointers
-	if (srclayers.size() == 1) delete context;
-	else context = NULL;
+	if (srclayers.size() == 1)
+    delete context;
 
 	delete w_z_hx_t;
 	delete w_z_hh_t;
@@ -178,14 +164,20 @@ void GRULayer::ComputeFeature(int flag,
 void GRULayer::ComputeGradient(int flag,
     const vector<Layer*>& srclayers) {
 	CHECK_LE(srclayers.size(), 2);
+  // agg grad from two dst layers
+  AXPY(1.0f, *gradvec_[1], &grad_);
+  float beta = 1.0f; // agg param gradients
 
+  Layer* ilayer = srclayers[0]; // input layer
+  Layer* clayer = nullptr; // context layer
 	// Prepare the data input and the context
-	const Blob<float>& src = srclayers[0]->data(this);
+	const Blob<float>& src = ilayer->data(this);
 	const Blob<float> *context;
 	if (srclayers.size() == 1) { // only have data input
 		context = new Blob<float>(batchsize_, hdim_);
 	} else { // have data input & context
-		context = &srclayers[1]->data(this);
+    clayer = srclayers[1];
+		context = &(clayer->data(this));
 	}
 
 	// Prepare gradient of output neurons
@@ -197,7 +189,7 @@ void GRULayer::ComputeGradient(int flag,
 	Blob<float> drgatedr (batchsize_, hdim_);
 	Map<singa::op::SigmoidGrad<float>, float>(*reset_gate, &drgatedr);
 	Blob<float> dnewmdc (batchsize_, hdim_);
-	Map<singa::op::TanhGrad<float>, float>(*new_memory,&dnewmdc);
+	Map<singa::op::TanhGrad<float>, float>(*new_memory, &dnewmdc);
 
 	Blob<float> dLdz (batchsize_, hdim_);
 	Sub<float>(*context, *new_memory, &dLdz);
@@ -206,9 +198,7 @@ void GRULayer::ComputeGradient(int flag,
 
 	Blob<float> dLdc (batchsize_,hdim_);
 	Blob<float> z1 (batchsize_,hdim_);
-	for (int i = 0; i < z1.count(); i ++) {
-		z1.mutable_cpu_data()[i] = 1.0f; // generate a matrix with ones
-	}
+  z1.SetValue(1.0f);
 	AXPY<float>(-1.0f, *update_gate, &z1);
 	Mult(grad_,z1,&dLdc);
 	Mult(dLdc,dnewmdc,&dLdc);
@@ -218,57 +208,58 @@ void GRULayer::ComputeGradient(int flag,
 
 	Blob<float> dLdr (batchsize_, hdim_);
 	Blob<float> cprev (batchsize_, hdim_);
-	Blob<float> *w_c_hh_t = Transpose(weight_c_hh_->data());
-	GEMM(1.0f,0.0f,*context,*w_c_hh_t, &cprev);
-	delete w_c_hh_t;
-	Mult(dLdc,cprev,&dLdr);
-	Mult(dLdr,drgatedr,&dLdr);
-
+	GEMM(1.0f, 0.0f, *context, weight_c_hh_->data().T(), &cprev);
+	Mult(dLdc, cprev, &dLdr);
+	Mult(dLdr, drgatedr, &dLdr);
 
 	// Compute gradients for parameters of update gate
 	Blob<float> *dLdz_t = Transpose(dLdz);
-	GEMM(1.0f,0.0f,*dLdz_t,src,weight_z_hx_->mutable_grad());
-	GEMM(1.0f,0.0f,*dLdz_t,*context,weight_z_hh_->mutable_grad());
+	GEMM(1.0f, beta, *dLdz_t, src, weight_z_hx_->mutable_grad());
+	GEMM(1.0f, beta, *dLdz_t, *context, weight_z_hh_->mutable_grad());
 	if (bias_z_ != nullptr)
-		MVSumRow<float>(1.0f,0.0f,dLdz,bias_z_->mutable_grad());
+		MVSumRow<float>(1.0f, beta, dLdz, bias_z_->mutable_grad());
 	delete dLdz_t;
 
 	// Compute gradients for parameters of reset gate
 	Blob<float> *dLdr_t = Transpose(dLdr);
-	GEMM(1.0f,0.0f,*dLdr_t,src,weight_r_hx_->mutable_grad());
-	GEMM(1.0f,0.0f,*dLdr_t,*context,weight_r_hh_->mutable_grad());
+	GEMM(1.0f, beta, *dLdr_t, src, weight_r_hx_->mutable_grad());
+	GEMM(1.0f, beta, *dLdr_t, *context, weight_r_hh_->mutable_grad());
 	if (bias_r_ != nullptr)
-		MVSumRow(1.0f,0.0f,dLdr,bias_r_->mutable_grad());
+		MVSumRow(1.0f, beta, dLdr, bias_r_->mutable_grad());
 	delete dLdr_t;
 
 	// Compute gradients for parameters of new memory
 	Blob<float> *dLdc_t = Transpose(dLdc);
-	GEMM(1.0f,0.0f,*dLdc_t,src,weight_c_hx_->mutable_grad());
+	GEMM(1.0f, beta, *dLdc_t, src,weight_c_hx_->mutable_grad());
 	if (bias_c_ != nullptr)
-		MVSumRow(1.0f,0.0f,dLdc,bias_c_->mutable_grad());
+		MVSumRow(1.0f, beta, dLdc, bias_c_->mutable_grad());
 	delete dLdc_t;
 
 	Blob<float> *reset_dLdc_t = Transpose(reset_dLdc);
-	GEMM(1.0f,0.0f,*reset_dLdc_t,*context,weight_c_hh_->mutable_grad());
+	GEMM(1.0f, beta, *reset_dLdc_t, *context, weight_c_hh_->mutable_grad());
 	delete reset_dLdc_t;
 
 	// Compute gradients for data input layer
 	if (srclayers[0]->mutable_grad(this) != nullptr) {
-		GEMM(1.0f,0.0f,dLdc,weight_c_hx_->data(),srclayers[0]->mutable_grad(this));
-		GEMM(1.0f,1.0f,dLdz,weight_z_hx_->data(),srclayers[0]->mutable_grad(this));
-		GEMM(1.0f,1.0f,dLdr,weight_r_hx_->data(), srclayers[0]->mutable_grad(this));
+		GEMM(1.0f,0.0f,dLdc, weight_c_hx_->data(), ilayer->mutable_grad(this));
+		GEMM(1.0f,1.0f,dLdz, weight_z_hx_->data(), ilayer->mutable_grad(this));
+		GEMM(1.0f,1.0f,dLdr, weight_r_hx_->data(), ilayer->mutable_grad(this));
 	}
 
-	if (srclayers.size() > 1 && srclayers[1]->mutable_grad(this) != nullptr) {
+	if (clayer != nullptr && clayer->mutable_grad(this) != nullptr) {
 		// Compute gradients for context layer
-		GEMM(1.0f,0.0f,reset_dLdc,weight_c_hh_->data(), srclayers[1]->mutable_grad(this));
-		GEMM(1.0f,1.0f,dLdr, weight_r_hh_->data(), srclayers[1]->mutable_grad(this));
-		GEMM(1.0f,1.0f,dLdz,weight_z_hh_->data(), srclayers[1]->mutable_grad(this));
-		Add(srclayers[1]->grad(this), *update_gate, srclayers[1]->mutable_grad(this));
+		GEMM(1.0f, 0.0f, reset_dLdc, weight_c_hh_->data(),
+        clayer->mutable_grad(this));
+		GEMM(1.0f, 1.0f, dLdr, weight_r_hh_->data(), clayer->mutable_grad(this));
+		GEMM(1.0f, 1.0f, dLdz, weight_z_hh_->data(), clayer->mutable_grad(this));
+		Add(clayer->grad(this), *update_gate, clayer->mutable_grad(this));
+    // LOG(ERROR) << "grad to prev gru " << Asum(clayer->grad(this));
 	}
 
-	if (srclayers.size() == 1) delete context;
-	else context = NULL;
+	if (srclayers.size() == 1)
+    delete context;
+	else
+    context = NULL;
 	delete grad_t;
 }
 

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/959ef705/src/neuralnet/neuron_layer/inner_product.cc
----------------------------------------------------------------------
diff --git a/src/neuralnet/neuron_layer/inner_product.cc b/src/neuralnet/neuron_layer/inner_product.cc
index f50afba..1e5e93e 100644
--- a/src/neuralnet/neuron_layer/inner_product.cc
+++ b/src/neuralnet/neuron_layer/inner_product.cc
@@ -66,12 +66,17 @@ void InnerProductLayer::ComputeFeature(int flag,
 
 void InnerProductLayer::ComputeGradient(int flag,
     const vector<Layer*>& srclayers) {
-
-  MVSumRow(1.0f, 0.0f, grad_, bias_->mutable_grad());
+  float beta = 0.0f;
+  if (flag & kAggGrad)
+    beta = 1.0f;
+  MVSumRow(1.0f, beta, grad_, bias_->mutable_grad());
   if (transpose_)
-    MMDot(srclayers[0]->data(this).T(), grad_, weight_->mutable_grad());
+    GEMM(1.0f, beta, srclayers[0]->data(this).T(), grad_,
+        weight_->mutable_grad());
   else
-    MMDot(grad_.T(), srclayers[0]->data(this), weight_->mutable_grad());
+    GEMM(1.0f, beta, grad_.T(), srclayers[0]->data(this),
+        weight_->mutable_grad());
+
   if (srclayers[0]->mutable_grad(this) != nullptr) {
     if (transpose_)
       MMDot(grad_, weight_->data().T(), srclayers[0]->mutable_grad(this));

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/959ef705/src/proto/job.proto
----------------------------------------------------------------------
diff --git a/src/proto/job.proto b/src/proto/job.proto
index 7cdc287..e520eba 100644
--- a/src/proto/job.proto
+++ b/src/proto/job.proto
@@ -120,6 +120,14 @@ message NetProto {
   repeated LayerProto layer = 1;
   // partitioning type for parallelism
   optional int32 partition_dim = 20 [default = 0];
+  // Each layer corresponds to a group of unrolled layers, used in RNN models
+  repeated LayerGroupProto layer_group = 21;
+  optional int32 unroll_len = 22 [default = 1];
+}
+
+message LayerGroupProto {
+  // name of the layers belong to the same group
+  repeated string layer = 1;
 }
 
 message UpdaterProto {
@@ -139,6 +147,9 @@ message UpdaterProto {
   // used to avoid divide by 0, i.e. x/(y+delta)
   optional float delta = 35 [default = 0.00000001];
 
+  optional float clip_low = 36 [default = 0];
+  optional float clip_high = 37 [default = 0];
+
   extensions 101 to 200;
 }
 
@@ -195,10 +206,11 @@ message LayerProto {
   // share data and grad blob with the single src layer, e.g., relu layer can
   // share blobs from conv layer. It is useful for saving memory space.
   optional bool share_src_blobs = 22 [default = false];
-
+  // for unrolling layers in RNN model
   optional int32 unroll_len = 23 [default = 1];
-  repeated UnrollConnType unroll_conn_type = 24;
-  repeated int32 shift = 25;
+  optional int32 unroll_index = 24 [default = 0];
+  repeated UnrollConnType unroll_conn_type = 25;
+  repeated int32 shift = 26;
 
   // overrides the partition dimension for neural net
   optional int32 partition_dim = 60 [default = -1];
@@ -215,6 +227,7 @@ message LayerProto {
   optional MnistProto mnist_conf = 192;
   optional RGBImageProto rgbimage_conf = 193;
   optional DataProto sharddata_conf = 194;
+  optional CharRNNProto char_rnn_conf = 195;
 
   // configuration for neuron layers id range [200, 300)
   optional ActivationProto activation_conf = 200;
@@ -228,6 +241,7 @@ message LayerProto {
   optional ReLUProto relu_conf = 211;
   optional SoftmaxProto softmax_conf = 214;
   optional GRUProto gru_conf = 215;
+  optional EmbeddingProto embedding_conf = 216;
 
   // configuration for loss layers, id range [300, 400)
   optional SoftmaxLossProto softmaxloss_conf = 301;
@@ -354,7 +368,19 @@ message StoreProto {
   optional int32 random_skip = 11 [default = 0];
   optional bool has_label = 12 [default = true];
 }
+message CharRNNProto {
+  optional string path = 1;
+  optional string vocab_path = 2;
+  // num of chars to read per instance,  should = NetProto::unroll_len
+  optional int32 unroll_len = 3 [default = 50];
+  optional int32 batchsize = 4 [default = 1];
+}
 
+message EmbeddingProto {
+  optional int32 vocab_size = 1 [default = 0];
+  optional int32 feature_dim = 2 [default = 100];
+
+}
 message SoftmaxLossProto {
   // computing accuracy against topk results
   optional int32 topk = 1 [default = 1];
@@ -572,6 +598,8 @@ enum AlgType {
   kBP = 1;
   // Contrastive Divergence algorithm for RBM, DBM, etc.
   kCD = 2;
+  // BPTT for training RNN models
+  kBPTT = 3;
   // For user defined algorithm.
   kUserAlg = 104;
 }
@@ -590,6 +618,9 @@ enum LayerType {
   kMnist = 192;  // deprecated
   kRGBImage = 193;  // deprecated
   kShardData = 194;  // deprecated
+  kCharRNN = 195;
+  kRNNLabel = 196;
+  kOneHot = 197;
 
   /*
    * Neuron layers
@@ -610,6 +641,8 @@ enum LayerType {
   kSigmoid = 213;
   kSoftmax = 214;
   kGRU = 215;
+  kEmbedding = 216;
+
   // cudnn v3
   kCudnnConv = 250;
   kCudnnPool = 251;
@@ -678,6 +711,9 @@ enum Phase {
   kBackward = 64;
   kLoss = 128;
   kDeploy = 256;
+
+  // used for aggregate parameter gradients when Param is shared
+  kAggGrad = 512;
 }
 
 enum ParamType {

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/959ef705/src/stub.cc
----------------------------------------------------------------------
diff --git a/src/stub.cc b/src/stub.cc
index 7c0ec90..c06128c 100644
--- a/src/stub.cc
+++ b/src/stub.cc
@@ -83,6 +83,8 @@ const std::unordered_map<int, ParamEntry*>  CreateParamShard(
     int grp = entry.first;
     int wstart = grp2workers[grp].first, wend = grp2workers[grp].second;
     for (auto layer : entry.second->layers()) {
+      if (layer->unroll_index() > 0)
+        continue;
       int partition = layer->partition_id();
       bool local =  partition >= wstart && partition < wend;
       for (auto param : layer->GetParams()) {

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/959ef705/src/test/test_gru_layer.cc
----------------------------------------------------------------------
diff --git a/src/test/test_gru_layer.cc b/src/test/test_gru_layer.cc
index 296b795..e432ae1 100644
--- a/src/test/test_gru_layer.cc
+++ b/src/test/test_gru_layer.cc
@@ -239,7 +239,6 @@ TEST_F(GRULayerTest, ComputeFeature) {
 
 	singa::GRULayer gru_layer_2;
 	gru_layer_2.Setup(gru2_conf, std::vector<singa::Layer*>{&in_layer_2, &gru_layer_1});
-
 	for (unsigned int i = 0; i < gru_layer_2.GetParams().size(); i ++) {
 		gru_layer_2.GetParams()[i]->InitValues();
 	}

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/959ef705/src/test/test_math.cc
----------------------------------------------------------------------
diff --git a/src/test/test_math.cc b/src/test/test_math.cc
index 2e7deec..2627b2e 100644
--- a/src/test/test_math.cc
+++ b/src/test/test_math.cc
@@ -286,7 +286,6 @@ TEST(MathTest, TestAxpyGPU) {
 TEST(MathTest, TestDotGPU) {
   float A[12];
   float B[12];
-
   for (int i = 0; i < 12; i++) {
     A[i] = i - 1;
     B[i] = i + 1;

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/959ef705/src/utils/common.cc
----------------------------------------------------------------------
diff --git a/src/utils/common.cc b/src/utils/common.cc
index 928d4bb..eefdb5c 100644
--- a/src/utils/common.cc
+++ b/src/utils/common.cc
@@ -570,4 +570,30 @@ void WriteStringToTextFile(const string& filename, const string& context) {
   ofs.flush();
   ofs.close();
 }
+
+
+const vector<std::pair<string, float>> GetMetricFromString(const string& disp) {
+  size_t pos = 0;
+  vector<string> terms;
+  while (pos != string::npos) {
+    auto next = disp.find_first_of(" ,", pos);  // delimiter: space or comma
+    if (next != string::npos) {
+      terms.push_back(disp.substr(pos, next - pos));
+      pos = disp.find_first_not_of(" ,", next + 1);
+    } else {
+      break;
+    }
+  }
+  if (pos != string::npos)
+    terms.push_back(disp.substr(pos));
+  vector<std::pair<string, float>> ret;
+  for (unsigned i = 0; i < terms.size(); i++) {
+    if (terms[i] == "=") {
+      CHECK_GE(i, 1);
+      CHECK_LT(i, terms.size() - 1) << "terms[i] = " << terms[i];
+      ret.push_back(std::make_pair(terms[i-1], std::stof(terms[i + 1])));
+    }
+  }
+  return ret;
+}
 }  // namespace singa

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/959ef705/src/utils/param.cc
----------------------------------------------------------------------
diff --git a/src/utils/param.cc b/src/utils/param.cc
index bdae72f..95396bc 100644
--- a/src/utils/param.cc
+++ b/src/utils/param.cc
@@ -166,7 +166,12 @@ void Param::InitValues(int version) {
   set_version(version);
 }
 
-void Param::ShareFrom(Param* other, bool cpu_only) {
+void Param::ShareDataFrom(Param* other, bool cpu_only) {
+  if (this == other) {
+    LOG(WARNING) << "No need to share Param with itself";
+    return;
+  }
+
   proto_.set_owner(other->owner());
   CHECK_EQ(data_.count(), other->data_.count());
   data_.ShareData(&(other->data_), cpu_only);
@@ -183,6 +188,16 @@ void Param::ShareFrom(Param* other, bool cpu_only) {
   pending_update_.resize(other->pending_update_.size());
 }
 
+void Param::ShareFrom(Param* other) {
+  if (this == other) {
+    LOG(WARNING) << "No need to share Param with itself";
+    return;
+  }
+
+  ShareDataFrom(other, false);
+  grad_.ShareData(&(other->grad_), false);
+}
+
 void Param::FromProto(const BlobProto& blob) {
   data_.FromProto(blob);
 }

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/959ef705/src/utils/updater.cc
----------------------------------------------------------------------
diff --git a/src/utils/updater.cc b/src/utils/updater.cc
index 21608fa..a9f70c0 100644
--- a/src/utils/updater.cc
+++ b/src/utils/updater.cc
@@ -101,9 +101,24 @@ void Updater::Init(const UpdaterProto& proto) {
   momentum_ = proto.momentum();
   weight_decay_ = proto.weight_decay();
   lr_gen_ = LRGenerator::Create(proto.learning_rate());
+  clip_low_ = proto.clip_low();
+  clip_high_ = proto.clip_high();
+}
+
+void Updater::Clip(const float low, const float high, Param* param) {
+  Blob<float>* grad = param->mutable_grad();
+  float* ptr = grad->mutable_cpu_data();
+  for (int i = 0; i < grad->count(); i++) {
+    if (ptr[i] > high)
+      ptr[i] = high;
+    else if (ptr[i] < low)
+      ptr[i] = low;
+  }
 }
 
 void SGDUpdater::Update(int step, Param* param, float grad_scale) {
+  if (clip_high_ > clip_low_)
+    Clip(clip_low_, clip_high_, param);
   Shape<1> s = Shape1(param->size());
   Tensor<cpu, 1> data(param->mutable_cpu_data(), s);
   Tensor<cpu, 1> grad(param->mutable_cpu_grad(), s);
@@ -143,6 +158,8 @@ void NesterovUpdater::Update(int step, Param* param, float grad_scale) {
 }
 /***********************AdaGrad******************************/
 void AdaGradUpdater::Update(int step, Param* param, float grad_scale) {
+  if (clip_high_ > clip_low_)
+    Clip(clip_low_, clip_high_, param);
   Shape<1> s = Shape1(param->size());
   Tensor<cpu, 1> data(param->mutable_cpu_data(), s);
   Tensor<cpu, 1> grad(param->mutable_cpu_grad(), s);

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/959ef705/src/worker.cc
----------------------------------------------------------------------
diff --git a/src/worker.cc b/src/worker.cc
index 8495b5c..4e1dc75 100644
--- a/src/worker.cc
+++ b/src/worker.cc
@@ -29,6 +29,7 @@
 #include "singa/utils/factory.h"
 #include "singa/utils/singleton.h"
 #include "singa/utils/context.h"
+#include "singa/utils/math_blob.h"
 
 namespace singa {
 
@@ -214,7 +215,6 @@ void Worker::InitNetParams(const JobProto& job_conf, NeuralNet* net) {
   }
 }
 
-
 void Worker::Checkpoint(int step, const std::string& folder, NeuralNet* net) {
   BlobProtos bps;
   for (auto layer : net->layers()) {
@@ -338,7 +338,7 @@ void BPWorker::Forward(int step, Phase phase, NeuralNet* net) {
   map<string, string> label;
   for (auto& layer : net->layers()) {
     if (layer->partition_id() == id_) {
-      if (phase == kTrain) {
+      if (phase == kTrain && layer->unroll_index() == 0) {
         // wait until param is updated
         for (Param* p : layer->GetParams()) {
           Collect(step, p);
@@ -346,7 +346,7 @@ void BPWorker::Forward(int step, Phase phase, NeuralNet* net) {
       }
       // DLOG(ERROR) << "Forward " << layer->name();
       layer->ComputeFeature(phase | kForward, net->srclayers(layer));
-      if (job_conf_.debug() && grp_id_ == 0)
+      if (job_conf_.debug() && DisplayNow(step) && grp_id_ == 0)
         label[layer->name()] = layer->ToString(true, phase | kForward);
     }
   }
@@ -364,7 +364,7 @@ void BPWorker::Backward(int step, NeuralNet* net) {
     Layer* layer = *it;
     if (layer->partition_id() == id_) {
       layer->ComputeGradient(kTrain | kBackward, net->srclayers(layer));
-      if (job_conf_.debug() && grp_id_ == 0)
+      if (job_conf_.debug() && DisplayNow(step) && grp_id_ == 0)
         label[layer->name()] = layer->ToString(true, kTrain | kBackward);
       for (Param* p : layer->GetParams())
         Update(step, p);
@@ -377,6 +377,82 @@ void BPWorker::Backward(int step, NeuralNet* net) {
   }
 }
 
+/***************************BPTTWorker*********************************/
+void BPTTWorker::Forward(int step, Phase phase, NeuralNet* net) {
+  map<string, string> label;
+  for (auto& layer : net->layers()) {
+    if (layer->partition_id() == id_) {
+      if (phase == kTrain && layer->unroll_index() == 0) {
+        // wait until param is updated
+        for (Param* p : layer->GetParams()) {
+          Collect(step, p);
+          Zero(p->mutable_grad());
+        }
+      }
+      vector<Layer*> src = net->srclayers(layer);
+      // if full state rnn and not the starting of a new passing of the dataset,
+      // feed the hidden state of the last unit to the first unit.
+      if (layer->unroll_index() == 0 && full_state_ && !begin_) {
+        Layer* last = net->last_unroll_layer(layer);
+        if (last != layer) {
+          src.push_back(last);
+        }
+      }
+      // LOG(ERROR) << layer->name() << " forward";
+      // int ret =
+      layer->ComputeFeature(phase | kForward, src);
+      /*
+      if ((phase & Phase::kTrain) && ret == Status::kEnd)
+        begin_ = true;
+      */
+
+      if (job_conf_.debug() && DisplayNow(step) && grp_id_ == 0)
+        label[layer->name()] = layer->ToString(true, phase | kForward);
+    }
+  }
+  if (label.size()) {
+    const string path = Cluster::Get()->vis_folder() + "/fp-step"
+      + std::to_string(step) +"-loc" + std::to_string(id_) + ".json";
+    WriteStringToTextFile(path, net->ToGraph(false).ToJson(label));
+  }
+}
+
+void BPTTWorker::Backward(int step, NeuralNet* net) {
+  map<string, string> label;
+  auto& layers = net->layers();
+  for (auto it = layers.rbegin(); it != layers.rend(); it++) {
+    Layer* layer = *it;
+    if (layer->partition_id() == id_) {
+      layer->ComputeGradient(kTrain | kBackward | kAggGrad, net->srclayers(layer));
+      // LOG(ERROR) << layer->name() << " backward";
+      if (job_conf_.debug() && DisplayNow(step) && grp_id_ == 0)
+        label[layer->name()] = layer->ToString(true, kTrain | kBackward);
+      // unrolled layers share parameter data and grad, just update the 1st one
+      if (layer->unroll_index() == 0)
+        for (Param* p : layer->GetParams())
+          Update(step, p);
+    }
+  }
+  if (label.size()) {
+    const string path = Cluster::Get()->vis_folder() + "/bp-step"
+      + std::to_string(step) + "-loc" + std::to_string(id_) + ".json";
+    WriteStringToTextFile(path, net->ToGraph(false).Reverse().ToJson(label));
+  }
+}
+void BPTTWorker::Display(int flag, const std::string& prefix, NeuralNet* net) {
+  std::unordered_map<string, float> perf;
+  for (auto layer : net->layers()) {
+    if (layer->partition_id() == id_) {
+      const string& disp = layer->ToString(false, flag);
+      for (const auto& entry : GetMetricFromString(disp))
+        perf[entry.first] += entry.second;
+    }
+  }
+  string disp = prefix + " ";
+  for (const auto& entry : perf)
+    disp += entry.first + " = " + std::to_string(entry.second) + ", ";
+  LOG(ERROR) << disp;
+}
 /****************************CDWorker**********************************/
 void CDWorker::TrainOneBatch(int step, NeuralNet* net) {
   const auto& layers = net->layers();


[02/10] incubator-singa git commit: SINGA-120 - Implemented GRU and BPTT: Fixed bugs of returning null

Posted by wa...@apache.org.
SINGA-120 - Implemented GRU and BPTT: Fixed bugs of returning null


Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/d43af9fe
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/d43af9fe
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/d43af9fe

Branch: refs/heads/master
Commit: d43af9fef5889da55c22d56487b91989e841daa8
Parents: c440e4a
Author: Ju Fan <fa...@gmail.com>
Authored: Fri Jan 1 10:52:20 2016 +0800
Committer: Wei Wang <wa...@comp.nus.edu.sg>
Committed: Wed Jan 6 01:53:05 2016 +0800

----------------------------------------------------------------------
 include/singa/neuralnet/neuron_layer.h | 33 ++++++++++++++++++++++++++++-
 1 file changed, 32 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/d43af9fe/include/singa/neuralnet/neuron_layer.h
----------------------------------------------------------------------
diff --git a/include/singa/neuralnet/neuron_layer.h b/include/singa/neuralnet/neuron_layer.h
index b02aa7d..3f126ab 100644
--- a/include/singa/neuralnet/neuron_layer.h
+++ b/include/singa/neuralnet/neuron_layer.h
@@ -120,16 +120,47 @@ class DropoutLayer : public NeuronLayer {
  * Use it as output layer, it will generate random grad;
  * Use it as neuron layer, it will replicates data and grad.
  */
-class DummyLayer: public Layer {
+class DummyLayer: public NeuronLayer {
  public:
   void Setup(const LayerProto& proto, const vector<Layer*>& srclayers) override;
   void ComputeFeature(int flag, const vector<Layer*>& srclayers) override;
   void ComputeGradient(int flag, const vector<Layer*>& srclayers) override;
+
  private:
   bool input_ = false;  // use as input layer
   bool output_ = false;  // use as output layer
 };
 
+class GRULayer : public NeuronLayer {
+ public:
+  ~GRULayer();
+  void Setup(const LayerProto& proto, const vector<Layer*>& srclayers) override;
+  void ComputeFeature(int flag, const vector<Layer*>& srclayers) override;
+  void ComputeGradient(int flag, const vector<Layer*>& srclayers) override;
+
+  const std::vector<Param*> GetParams() const override {
+    if (bias_z_ != nullptr && bias_r_ != nullptr && bias_c_ != nullptr) {
+      std::vector<Param*> params{weight_z_hx_, weight_r_hx_,weight_c_hx_,
+        weight_z_hh_, weight_r_hh_, weight_c_hh_,
+        bias_z_, bias_r_, bias_c_};
+      return params;
+    } else {
+      std::vector<Param*> params{weight_z_hx_, weight_r_hx_,weight_c_hx_,
+        weight_z_hh_, weight_r_hh_, weight_c_hh_};
+      return params;
+    }
+  }
+
+ private:
+  int batchsize_; // batch size
+  int vdim_, hdim_; // dimensions
+
+  Blob<float> *update_gate, *reset_gate, *new_memory;
+
+  Param *weight_z_hx_, *weight_z_hh_, *bias_z_; // update gate
+  Param *weight_r_hx_, *weight_r_hh_, *bias_r_; // reset gate
+  Param *weight_c_hx_, *weight_c_hh_, *bias_c_; // new memory
+};
 
 /**
  * Layer that applys linear transformations as


[08/10] incubator-singa git commit: SINGA-120 - Implemented GRU and BPTT

Posted by wa...@apache.org.
SINGA-120 - Implemented GRU and BPTT

Change new memory computation formula following char-rnn (i.e., element-wise multiplication before matrix multiplication)


Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/6a4c9960
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/6a4c9960
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/6a4c9960

Branch: refs/heads/master
Commit: 6a4c9960e0795aeac6df282d7190b6f93b305c58
Parents: 959ef70
Author: Wei Wang <wa...@comp.nus.edu.sg>
Authored: Tue Jan 5 18:14:46 2016 +0800
Committer: Wei Wang <wa...@comp.nus.edu.sg>
Committed: Wed Jan 6 01:55:08 2016 +0800

----------------------------------------------------------------------
 Makefile.am                                |  2 +
 include/singa/neuralnet/connection_layer.h | 28 +++++++++++++
 include/singa/neuralnet/layer.h            | 14 +++++++
 include/singa/neuralnet/neuron_layer.h     |  2 +-
 include/singa/neuralnet/output_layer.h     | 15 +++++++
 include/singa/utils/updater.h              |  8 ++--
 src/driver.cc                              |  6 ++-
 src/neuralnet/neuralnet.cc                 |  9 ++---
 src/neuralnet/neuron_layer/gru.cc          | 52 +++++++++++--------------
 src/proto/job.proto                        | 14 +++++++
 src/utils/updater.cc                       | 28 ++++++-------
 src/worker.cc                              | 16 ++++++--
 12 files changed, 133 insertions(+), 61 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6a4c9960/Makefile.am
----------------------------------------------------------------------
diff --git a/Makefile.am b/Makefile.am
index d2b2aa8..7ae4537 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -75,6 +75,7 @@ SINGA_SRCS := src/driver.cc \
               src/neuralnet/connection_layer/concate.cc \
               src/neuralnet/connection_layer/slice.cc \
               src/neuralnet/connection_layer/split.cc \
+              src/neuralnet/connection_layer/rnn_dummy.cc \
               src/neuralnet/input_layer/char_rnn.cc \
               src/neuralnet/input_layer/onehot.cc \
               src/neuralnet/input_layer/csv.cc \
@@ -88,6 +89,7 @@ SINGA_SRCS := src/driver.cc \
               src/neuralnet/output_layer/argsort.cc \
               src/neuralnet/output_layer/csv.cc \
               src/neuralnet/output_layer/record.cc \
+              src/neuralnet/output_layer/char_rnn.cc \
               src/neuralnet/loss_layer/euclidean.cc \
               src/neuralnet/loss_layer/softmax.cc \
               src/neuralnet/neuron_layer/activation.cc \

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6a4c9960/include/singa/neuralnet/connection_layer.h
----------------------------------------------------------------------
diff --git a/include/singa/neuralnet/connection_layer.h b/include/singa/neuralnet/connection_layer.h
index a18f458..481d991 100644
--- a/include/singa/neuralnet/connection_layer.h
+++ b/include/singa/neuralnet/connection_layer.h
@@ -153,6 +153,34 @@ class SplitLayer : public ConnectionLayer {
   Layer2Index layer_idx_;
 };
 
+/**
+ * Dummy layer for RNN models, which provides input for other layers.
+ *
+ * Particularly, it is used in the test phase of RNN models to connect other
+ * layers and avoid cycles in the neural net config.
+ */
+class RNNDummyLayer : public ConnectionLayer {
+ public:
+  void Setup(const LayerProto& proto, const vector<Layer*>& srclayers) override;
+  void ComputeFeature(int flag, const vector<Layer*>& srclayers) override;
+  void ComputeGradient(int flag, const vector<Layer*>& srclayers) {
+    LOG(FATAL) << "Not implemented";
+  }
+
+  const string srclayer(int step) const {
+    if (step > 0)
+      return dynamic_src_;
+    else
+      return "";
+  }
+
+ private:
+  string dynamic_src_;
+  float low_, high_;
+  bool integer_;
+  Layer* srclayer_;
+};
+
 
 }  // namespace singa
 

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6a4c9960/include/singa/neuralnet/layer.h
----------------------------------------------------------------------
diff --git a/include/singa/neuralnet/layer.h b/include/singa/neuralnet/layer.h
index f4738fa..c1612a2 100644
--- a/include/singa/neuralnet/layer.h
+++ b/include/singa/neuralnet/layer.h
@@ -36,6 +36,20 @@ using std::string;
 
 // TODO(wangwei) make AuxType a template argument for Layer.
 using AuxType = int;
+
+inline const string AddUnrollingPrefix(int unroll_idx, const string& name) {
+  return std::to_string(unroll_idx) + "#" + name;
+}
+inline const string AddPartitionSuffix(int partition_idx, const string& name) {
+  return name + "@" + std::to_string(partition_idx);
+}
+
+
+inline const string AddPrefixSuffix(int unroll_idx, int partition_idx,
+    const string& name) {
+  return std::to_string(unroll_idx) + "#" + name + "@" +
+    std::to_string(partition_idx);
+}
 /**
  * Base layer class.
  *

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6a4c9960/include/singa/neuralnet/neuron_layer.h
----------------------------------------------------------------------
diff --git a/include/singa/neuralnet/neuron_layer.h b/include/singa/neuralnet/neuron_layer.h
index e587e38..e1a63a2 100644
--- a/include/singa/neuralnet/neuron_layer.h
+++ b/include/singa/neuralnet/neuron_layer.h
@@ -203,7 +203,7 @@ class GRULayer : public NeuronLayer {
   int batchsize_; // batch size
   int vdim_, hdim_; // dimensions
 
-  Blob<float> *update_gate, *reset_gate, *new_memory;
+  Blob<float> *update_gate, *reset_gate, *new_memory, *reset_context;
   //!< gru layer connect to two dst layers, hence need to grad blobs.
   Blob<float> aux_grad_;
 

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6a4c9960/include/singa/neuralnet/output_layer.h
----------------------------------------------------------------------
diff --git a/include/singa/neuralnet/output_layer.h b/include/singa/neuralnet/output_layer.h
index c7e5d6a..9071f33 100644
--- a/include/singa/neuralnet/output_layer.h
+++ b/include/singa/neuralnet/output_layer.h
@@ -80,5 +80,20 @@ class RecordOutputLayer : public OutputLayer {
   int inst_ = 0;  //!< instance No.
   io::Store* store_ = nullptr;
 };
+
+/**
+ * Output layer for char rnn model, which convert sample id back to char and
+ * dump to stdout.
+ */
+class CharRNNOutputLayer : public OutputLayer {
+ public:
+  void Setup(const LayerProto& proto, const vector<Layer*>& srclayers) override;
+
+  void ComputeFeature(int flag, const vector<Layer*>& srclayers) override;
+
+ private:
+  string vocab_;
+};
+
 }  // namespace singa
 #endif  // SINGA_NEURALNET_OUTPUT_LAYER_H_

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6a4c9960/include/singa/utils/updater.h
----------------------------------------------------------------------
diff --git a/include/singa/utils/updater.h b/include/singa/utils/updater.h
index 575ab86..7fec78c 100644
--- a/include/singa/utils/updater.h
+++ b/include/singa/utils/updater.h
@@ -118,18 +118,16 @@ class NesterovUpdater : public Updater {
   void Update(int step, Param* param, float grad_scale) override;
 };
 
-/*
 class RMSPropUpdater : public Updater {
  public:
-  virtual void Update(int step, Param* param, float grad_scale);
+  void Init(const UpdaterProto &proto) override;
+  void Update(int step, Param* param, float grad_scale) override;
 
  protected:
-  float base_lr_;
-  float delta_;
   float rho_;
-  float weight_decay_;
 };
 
+/*
 class AdaDeltaUpdater : public Updater {
  public:
   virtual void Update(int step, Param* param, float grad_scale);

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6a4c9960/src/driver.cc
----------------------------------------------------------------------
diff --git a/src/driver.cc b/src/driver.cc
index 21968bb..1e4929f 100644
--- a/src/driver.cc
+++ b/src/driver.cc
@@ -74,6 +74,7 @@ void Driver::Init(int argc, char **argv) {
   RegisterLayer<CharRNNInputLayer, int>(kCharRNN);
   RegisterLayer<RNNLabelLayer, int>(kRNNLabel);
   RegisterLayer<OneHotLayer, int>(kOneHot);
+  RegisterLayer<CharRNNOutputLayer, int>(kCharRNNOutput);
 
   // connection layers
   RegisterLayer<BridgeDstLayer, int>(kBridgeDst);
@@ -81,6 +82,7 @@ void Driver::Init(int argc, char **argv) {
   RegisterLayer<ConcateLayer, int>(kConcate);
   RegisterLayer<SliceLayer, int>(kSlice);
   RegisterLayer<SplitLayer, int>(kSplit);
+  RegisterLayer<RNNDummyLayer, int>(kRNNDummy);
 
   RegisterLayer<AccuracyLayer, int>(kAccuracy);
   RegisterLayer<ArgSortLayer, int>(kArgSort);
@@ -125,7 +127,7 @@ void Driver::Init(int argc, char **argv) {
   // register updaters
   RegisterUpdater<AdaGradUpdater>(kAdaGrad);
   RegisterUpdater<NesterovUpdater>(kNesterov);
-  // TODO(wangwei) RegisterUpdater<kRMSPropUpdater>(kRMSProp);
+  RegisterUpdater<RMSPropUpdater>(kRMSProp);
   RegisterUpdater<SGDUpdater>(kSGD);
 
   // register learning rate change methods
@@ -198,6 +200,8 @@ void Driver::Test(const JobProto& job_conf) {
   auto worker = Worker::Create(job_conf.train_one_batch());
   worker->Setup(0, 0, job_conf, nullptr, nullptr, nullptr);
   auto net = NeuralNet::Create(job_conf.neuralnet(), kTest, 1);
+  WriteStringToTextFile(Cluster::Get()->vis_folder() + "/test_net.json",
+      net->ToGraph(true).ToJson());
   vector<string> paths;
   for (const auto& p : job_conf.checkpoint_path())
     paths.push_back(p);

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6a4c9960/src/neuralnet/neuralnet.cc
----------------------------------------------------------------------
diff --git a/src/neuralnet/neuralnet.cc b/src/neuralnet/neuralnet.cc
index f9579b1..49978a1 100644
--- a/src/neuralnet/neuralnet.cc
+++ b/src/neuralnet/neuralnet.cc
@@ -144,7 +144,7 @@ const NetProto NeuralNet::Unrolling(const NetProto& net_conf) {
     for (int i = 0; i < org_layer.unroll_len(); i ++) { // unroll
       LayerProto* unroll_layer = conf.add_layer();
       unroll_layer->CopyFrom(org_layer); // create a new layer conf
-      if (org_layer.unroll_len() > 1) {
+      // if (org_layer.unroll_len() > 1) {
         // update layer names
         std::stringstream sstm;
         sstm << i << '#' << unroll_layer->name();
@@ -160,7 +160,7 @@ const NetProto NeuralNet::Unrolling(const NetProto& net_conf) {
           sstm1 << i << '#' << param->name();
           param->set_name(sstm1.str());
         }
-      }
+      // }
       // clear unrolling related fields
       unroll_layer->clear_unroll_len();
       unroll_layer->clear_unroll_conn_type();
@@ -257,6 +257,7 @@ void NeuralNet::Load(const vector<string>& paths,
     ReadProtoFromBinaryFile(path.c_str(), &bps);
     for (int i = 0; i < bps.name_size(); i++) {
       if (params.find(bps.name(i)) != params.end()) {
+        // LOG(ERROR) << "Loading param = " << bps.name(i);
         params.at(bps.name(i))->FromProto(bps.blob(i));
         params.at(bps.name(i))->set_version(bps.version(i));
       }
@@ -458,12 +459,10 @@ Graph* NeuralNet::CreateGraph(const NetProto& netproto, int npartitions) {
   map<string, const LayerProto*> name2proto;
   for (const LayerProto& layer : net_w_connection.layer()) {
     vector<Node*> nodes;
-    char suffix[4];
     for (int i = 0; i < npartitions; i++) {
       LayerProto *proto = new LayerProto(layer);
-      snprintf(suffix, sizeof(suffix), "%02d", i);
       // differentiate partitions
-      string nodename = layer.name() + "@" + string(suffix);
+      string nodename = layer.name() + "@" + std::to_string(i);
       proto->set_name(nodename);
       proto->set_type(layer.type());
       proto->set_partition_dim(layer.partition_dim());

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6a4c9960/src/neuralnet/neuron_layer/gru.cc
----------------------------------------------------------------------
diff --git a/src/neuralnet/neuron_layer/gru.cc b/src/neuralnet/neuron_layer/gru.cc
index 9ba5a50..cf7425b 100644
--- a/src/neuralnet/neuron_layer/gru.cc
+++ b/src/neuralnet/neuron_layer/gru.cc
@@ -98,6 +98,8 @@ void GRULayer::Setup(const LayerProto& conf,
 
   update_gate = new Blob<float>(batchsize_, hdim_);
   reset_gate = new Blob<float>(batchsize_, hdim_);
+  // reset gate x context
+  reset_context = new Blob<float>(batchsize_, hdim_);
   new_memory = new Blob<float>(batchsize_, hdim_);
 }
 
@@ -130,24 +132,23 @@ void GRULayer::ComputeFeature(int flag,
 	Map<op::Sigmoid<float>,float>(*update_gate, update_gate);
 
 	// Compute the reset gate
-	GEMM(1.0f, 0.0f, src,*w_r_hx_t,reset_gate);
+	GEMM(1.0f, 0.0f, src, *w_r_hx_t, reset_gate);
 	if (bias_r_ != nullptr)
-		MVAddRow(1.0f,1.0f,bias_r_->data(),reset_gate);
+		MVAddRow(1.0f,1.0f, bias_r_->data(),reset_gate);
 	GEMM(1.0f, 1.0f, *context, *w_r_hh_t, reset_gate);
 	Map<op::Sigmoid<float>,float>(*reset_gate, reset_gate);
 
 	// Compute the new memory
-	GEMM(1.0f, 0.0f, src, *w_c_hx_t, new_memory);
+	Mult<float>(*reset_gate, *context, reset_context);
+	GEMM(1.0f, 0.0f, *reset_context, *w_c_hh_t, new_memory);
+	GEMM(1.0f, 1.0f, src, *w_c_hx_t, new_memory);
 	if (bias_c_ != nullptr)
-		MVAddRow(1.0f,1.0f,bias_c_->data(), new_memory);
-	Mult<float>(*reset_gate, *new_memory, new_memory);
-	GEMM(1.0f, 1.0f, *context, *w_c_hh_t, new_memory);
+		MVAddRow(1.0f, 1.0f, bias_c_->data(), new_memory);
 	Map<op::Tanh<float>,float>(*new_memory, new_memory);
 
-
-  Sub(*context, *new_memory, &data_);
+  Sub(*new_memory, *context, &data_);
   Mult(data_, *update_gate, &data_);
-  Add(data_, *new_memory, &data_);
+  AXPY(1.0f, *context, &data_);
 
 	// delete the pointers
 	if (srclayers.size() == 1)
@@ -192,24 +193,19 @@ void GRULayer::ComputeGradient(int flag,
 	Map<singa::op::TanhGrad<float>, float>(*new_memory, &dnewmdc);
 
 	Blob<float> dLdz (batchsize_, hdim_);
-	Sub<float>(*context, *new_memory, &dLdz);
+	Sub<float>(*new_memory, *context, &dLdz);
 	Mult<float>(dLdz, grad_, &dLdz);
 	Mult<float>(dLdz, dugatedz, &dLdz);
 
 	Blob<float> dLdc (batchsize_,hdim_);
-	Blob<float> z1 (batchsize_,hdim_);
-  z1.SetValue(1.0f);
-	AXPY<float>(-1.0f, *update_gate, &z1);
-	Mult(grad_,z1,&dLdc);
-	Mult(dLdc,dnewmdc,&dLdc);
+	Mult(grad_, *update_gate, &dLdc);
+	Mult(dLdc, dnewmdc, &dLdc);
 
 	Blob<float> reset_dLdc (batchsize_,hdim_);
-	Mult(dLdc, *reset_gate, &reset_dLdc);
+  GEMM(1.0f, 0.0f, dLdc, weight_c_hh_->data(), &reset_dLdc);
 
 	Blob<float> dLdr (batchsize_, hdim_);
-	Blob<float> cprev (batchsize_, hdim_);
-	GEMM(1.0f, 0.0f, *context, weight_c_hh_->data().T(), &cprev);
-	Mult(dLdc, cprev, &dLdr);
+	Mult(reset_dLdc, *context, &dLdr);
 	Mult(dLdr, drgatedr, &dLdr);
 
 	// Compute gradients for parameters of update gate
@@ -230,29 +226,25 @@ void GRULayer::ComputeGradient(int flag,
 
 	// Compute gradients for parameters of new memory
 	Blob<float> *dLdc_t = Transpose(dLdc);
-	GEMM(1.0f, beta, *dLdc_t, src,weight_c_hx_->mutable_grad());
+	GEMM(1.0f, beta, *dLdc_t, src, weight_c_hx_->mutable_grad());
+	GEMM(1.0f, beta, *dLdc_t, *reset_context, weight_c_hh_->mutable_grad());
 	if (bias_c_ != nullptr)
 		MVSumRow(1.0f, beta, dLdc, bias_c_->mutable_grad());
 	delete dLdc_t;
 
-	Blob<float> *reset_dLdc_t = Transpose(reset_dLdc);
-	GEMM(1.0f, beta, *reset_dLdc_t, *context, weight_c_hh_->mutable_grad());
-	delete reset_dLdc_t;
-
 	// Compute gradients for data input layer
 	if (srclayers[0]->mutable_grad(this) != nullptr) {
-		GEMM(1.0f,0.0f,dLdc, weight_c_hx_->data(), ilayer->mutable_grad(this));
-		GEMM(1.0f,1.0f,dLdz, weight_z_hx_->data(), ilayer->mutable_grad(this));
-		GEMM(1.0f,1.0f,dLdr, weight_r_hx_->data(), ilayer->mutable_grad(this));
+		GEMM(1.0f,0.0f, dLdc, weight_c_hx_->data(), ilayer->mutable_grad(this));
+		GEMM(1.0f,1.0f, dLdz, weight_z_hx_->data(), ilayer->mutable_grad(this));
+		GEMM(1.0f,1.0f, dLdr, weight_r_hx_->data(), ilayer->mutable_grad(this));
 	}
 
 	if (clayer != nullptr && clayer->mutable_grad(this) != nullptr) {
 		// Compute gradients for context layer
-		GEMM(1.0f, 0.0f, reset_dLdc, weight_c_hh_->data(),
-        clayer->mutable_grad(this));
+    Mult(reset_dLdc, *reset_gate, clayer->mutable_grad(this));
 		GEMM(1.0f, 1.0f, dLdr, weight_r_hh_->data(), clayer->mutable_grad(this));
 		GEMM(1.0f, 1.0f, dLdz, weight_z_hh_->data(), clayer->mutable_grad(this));
-		Add(clayer->grad(this), *update_gate, clayer->mutable_grad(this));
+		AXPY(-1.0f, *update_gate, clayer->mutable_grad(this));
     // LOG(ERROR) << "grad to prev gru " << Asum(clayer->grad(this));
 	}
 

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6a4c9960/src/proto/job.proto
----------------------------------------------------------------------
diff --git a/src/proto/job.proto b/src/proto/job.proto
index e520eba..28a3a68 100644
--- a/src/proto/job.proto
+++ b/src/proto/job.proto
@@ -253,6 +253,7 @@ message LayerProto {
   optional ConcateProto concate_conf = 502;
   optional SliceProto slice_conf = 503;
   optional SplitProto split_conf = 504;
+  optional RNNDummyProto rnn_dummy_conf = 505;
 
   extensions 1001 to 1100;
 }
@@ -456,6 +457,17 @@ message DummyProto {
   repeated int32 shape = 3;
 }
 
+message RNNDummyProto {
+  optional string dynamic_srclayer = 1;
+  // if shape set, random generate the data blob
+  repeated int32 shape = 2;
+  // if integer is true, generate integer data
+  optional bool integer = 3 [default = false];
+  // range of the random generation
+  optional float low = 4 [default = 0];
+  optional float high = 5 [default = 0];
+}
+
 // Message that stores parameters used by DropoutLayer
 message DropoutProto {
   // dropout ratio
@@ -667,6 +679,7 @@ enum LayerType {
   kArgSort = 401;
   kCSVOutput = 402;
   kRecordOutput = 403;
+  kCharRNNOutput = 404;
 
   /*
    * Connection layers
@@ -677,6 +690,7 @@ enum LayerType {
   kConcate = 502;
   kSlice = 503;
   kSplit = 504;
+  kRNNDummy = 505;
 
   /*
    * User defined layer

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6a4c9960/src/utils/updater.cc
----------------------------------------------------------------------
diff --git a/src/utils/updater.cc b/src/utils/updater.cc
index a9f70c0..200670a 100644
--- a/src/utils/updater.cc
+++ b/src/utils/updater.cc
@@ -174,31 +174,27 @@ void AdaGradUpdater::Update(int step, Param* param, float grad_scale) {
   data -= lr * grad / (F<sqrtop>(history, proto_.delta()));
 }
 
-/***********************RMSProp******************************
-void RMSPropUpdater::Init(const UpdaterProto& proto){
+/***********************RMSProp******************************/
+void RMSPropUpdater::Init(const UpdaterProto& proto) {
   Updater::Init(proto);
-  base_lr_ = proto.base_lr();
-  CHECK_GT(base_lr_, 0);
-  delta_ = proto.delta();
   rho_ = proto.rmsprop_conf().rho();
-  weight_decay_ = proto.weight_decay();
 }
 
-void RMSPropUpdater::Update(int step, Param* param, float grad_scale){
+void RMSPropUpdater::Update(int step, Param* param, float grad_scale) {
   Shape<1> s=Shape1(param->size());
   Tensor<cpu, 1> data(param->mutable_cpu_data(), s);
   Tensor<cpu, 1> grad(param->mutable_cpu_grad(), s);
   Tensor<cpu, 1> history(param->mutable_cpu_history(), s);
-  history=history*rho_+(1-rho_)*F<op::square>(grad*grad_scale);
-  float lr=GetLearningRate(step)*param->lr_scale();
-  float wd=weight_decay_*param->wd_scale();
-  if(wd>0){ // L2 regularization
-    grad+=data*wd;
-  }
-  data-=lr*grad/(F<op::sqrtop>(history,delta_));
+  float lr = lr_gen_->Get(step) * param->lr_scale();
+  float wd = weight_decay_ * param->wd_scale();
+  if (grad_scale != 1.f)
+    grad *= grad_scale;
+  if (wd > 0)  //  L2 regularization, should be done after timing grad_scale
+    grad += data * wd;
+  history = history * rho_ + (1 - rho_) * F<square>(grad);
+  data -= lr * grad / (F<sqrtop>(history, proto_.delta()));
 }
-
-***********************AdaDelta******************************
+/***********************AdaDelta******************************
 void AdaDeltaUpdater::Init(const UpdaterProto& proto){
   Updater::Init(proto);
   delta_=proto.delta();

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6a4c9960/src/worker.cc
----------------------------------------------------------------------
diff --git a/src/worker.cc b/src/worker.cc
index 4e1dc75..abe74e7 100644
--- a/src/worker.cc
+++ b/src/worker.cc
@@ -390,13 +390,24 @@ void BPTTWorker::Forward(int step, Phase phase, NeuralNet* net) {
         }
       }
       vector<Layer*> src = net->srclayers(layer);
+      if ((phase & kTest) && typeid(*layer) == typeid(RNNDummyLayer)) {
+        CHECK_LE(src.size(), 1);
+        auto dummy = dynamic_cast<RNNDummyLayer*>(layer);
+        Layer* srclayer = net->name2layer(dummy->srclayer(step));
+        if (step > 0)
+          CHECK(srclayer != nullptr);
+        if (srclayer != nullptr) {
+          src.clear();
+          src.push_back(srclayer);
+        }
+      }
       // if full state rnn and not the starting of a new passing of the dataset,
       // feed the hidden state of the last unit to the first unit.
       if (layer->unroll_index() == 0 && full_state_ && !begin_) {
         Layer* last = net->last_unroll_layer(layer);
-        if (last != layer) {
+        CHECK(last != nullptr);
+        if (last != layer || (phase & kTest))
           src.push_back(last);
-        }
       }
       // LOG(ERROR) << layer->name() << " forward";
       // int ret =
@@ -405,7 +416,6 @@ void BPTTWorker::Forward(int step, Phase phase, NeuralNet* net) {
       if ((phase & Phase::kTrain) && ret == Status::kEnd)
         begin_ = true;
       */
-
       if (job_conf_.debug() && DisplayNow(step) && grp_id_ == 0)
         label[layer->name()] = layer->ToString(true, phase | kForward);
     }


[03/10] incubator-singa git commit: SINGA-120 - Implemented GRU and BPTT: Fixed bugs regarding the Map function

Posted by wa...@apache.org.
SINGA-120 - Implemented GRU and BPTT: Fixed bugs regarding the Map function


Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/c440e4a4
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/c440e4a4
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/c440e4a4

Branch: refs/heads/master
Commit: c440e4a491a970ae6af8eaf5e371ee6b2509af15
Parents: 777dfb6
Author: Ju Fan <fa...@gmail.com>
Authored: Fri Jan 1 10:51:17 2016 +0800
Committer: Wei Wang <wa...@comp.nus.edu.sg>
Committed: Wed Jan 6 01:53:05 2016 +0800

----------------------------------------------------------------------
 include/singa/utils/math_blob.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/c440e4a4/include/singa/utils/math_blob.h
----------------------------------------------------------------------
diff --git a/include/singa/utils/math_blob.h b/include/singa/utils/math_blob.h
index 34e67ad..125509a 100644
--- a/include/singa/utils/math_blob.h
+++ b/include/singa/utils/math_blob.h
@@ -280,7 +280,7 @@ template<typename Op, typename Dtype>
 void Map(const Blob<Dtype> & A, const Blob<Dtype> & B, Blob<Dtype> * C) {
   CHECK_EQ(A.count(), B.count()) << "Blobs must have the same size";
   CHECK_EQ(A.count(), C->count()) << "Blobs must have the same size";
-  cpu_e_f<Op>(A.count(), A.cpu_data(), B.cpu_data(), C->mutable_cpu_data());
+  //cpu_e_f<Op>(A.count(), A.cpu_data(), B.cpu_data(), C->mutable_cpu_data());
   auto context = Singleton<Context>::Instance();
   int device = context->device_id(std::this_thread::get_id());
   if (device == -1) {


[07/10] incubator-singa git commit: SINGA-120 - Implemented GRU and BPTT: 1) Implemented the unrolling function for BPTT; 2) Added tests for unrolling

Posted by wa...@apache.org.
SINGA-120 - Implemented GRU and BPTT: 1) Implemented the unrolling function for BPTT; 2) Added tests for unrolling


Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/777dfb6a
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/777dfb6a
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/777dfb6a

Branch: refs/heads/master
Commit: 777dfb6a6fc4058b35368dfbe1fba4a27a93e828
Parents: 473c985
Author: Ju Fan <fa...@gmail.com>
Authored: Fri Jan 1 10:50:20 2016 +0800
Committer: Wei Wang <wa...@comp.nus.edu.sg>
Committed: Wed Jan 6 01:53:05 2016 +0800

----------------------------------------------------------------------
 include/singa/neuralnet/neuralnet.h |   1 +
 src/neuralnet/neuralnet.cc          | 121 ++++++++--
 src/test/test_connection_layers.cc  |   8 +-
 src/test/test_unrolling.cc          | 398 +++++++++++++++++++++++++++++++
 4 files changed, 510 insertions(+), 18 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/777dfb6a/include/singa/neuralnet/neuralnet.h
----------------------------------------------------------------------
diff --git a/include/singa/neuralnet/neuralnet.h b/include/singa/neuralnet/neuralnet.h
index bc1a7d8..be8f5c8 100644
--- a/include/singa/neuralnet/neuralnet.h
+++ b/include/singa/neuralnet/neuralnet.h
@@ -58,6 +58,7 @@ class NeuralNet {
   static NeuralNet* Create(const NetProto& net_conf, Phase phase,
                            int npartitions);
 
+  static const NetProto Unrolling(const NetProto& net_conf);
   /**
    * construct the net structure from protocol buffer.
    * @param netproto neural net config

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/777dfb6a/src/neuralnet/neuralnet.cc
----------------------------------------------------------------------
diff --git a/src/neuralnet/neuralnet.cc b/src/neuralnet/neuralnet.cc
index 9cdaff4..6bb0ecd 100644
--- a/src/neuralnet/neuralnet.cc
+++ b/src/neuralnet/neuralnet.cc
@@ -24,6 +24,8 @@
 #include <algorithm>
 #include <queue>
 #include "singa/utils/singleton.h"
+#include <unordered_map>
+using namespace std;
 
 namespace singa {
 
@@ -36,9 +38,6 @@ NeuralNet* NeuralNet::Create(const NetProto& net_conf, Phase phase,
   NetProto conf;
   conf.CopyFrom(net_conf);
   conf.clear_layer();
-  // for sharing param conf
-  std::unordered_map<string, ParamProto*> name2param;
-  std::vector<ParamProto*> shares;
   // flag=0: neither exclude nor include field appears
   // flag=1: exclude field appears
   // flag=2: include field appears
@@ -78,16 +77,25 @@ NeuralNet* NeuralNet::Create(const NetProto& net_conf, Phase phase,
     // using net partition if layer partition is not set
     if (!layer_conf->has_partition_dim())
       layer_conf->set_partition_dim(net_conf.partition_dim());
-    for (int i = 0; i < layer_conf->param_size(); i++) {
-      ParamProto* param = layer_conf->mutable_param(i);
-      if (param->has_name() && param->name() != "") {
-        CHECK(name2param.find(param->name()) == name2param.end())
-          << "param name is repeated: " << param->name();
-        name2param[param->name()] = param;
-      }
-      if (param->has_share_from() && param->share_from() != "")
-        shares.push_back(param);
-    }
+  }
+  //LOG(INFO) << "Before unrolling: \n" << conf.DebugString();
+  conf = Unrolling (conf);
+
+  // Copy shared parameters for sharing param conf
+  std::unordered_map<string, ParamProto*> name2param;
+  std::vector<ParamProto*> shares;
+  for (int index = 0; index < conf.layer_size();index ++) {
+	  LayerProto* layer = conf.mutable_layer(index);
+	  for (int i = 0; i < layer->param_size(); i++) {
+		  ParamProto* param = layer->mutable_param(i);
+		  if (param->has_name() && param->name() != "") {
+			  CHECK(name2param.find(param->name()) == name2param.end())
+	        		  << "param name is repeated: " << param->name();
+			  name2param[param->name()] = param;
+		  }
+		  if (param->has_share_from() && param->share_from() != "")
+			  shares.push_back(param);
+	  }
   }
   for (auto param : shares) {
     const std::string from = param->share_from();
@@ -104,6 +112,91 @@ NeuralNet* NeuralNet::Create(const NetProto& net_conf, Phase phase,
   return new NeuralNet(conf, npartitions);
 }
 
+const NetProto NeuralNet::Unrolling(const NetProto& net_conf) {
+	// Step 1: Unroll each layer & set parameter sharing
+	NetProto conf;
+
+	std::vector<std::vector<int>> layer_groups;
+	std::unordered_map<string,int> org_layer_names;
+	for (int index = 0; index < net_conf.layer_size(); index ++) {
+		const LayerProto& org_layer = net_conf.layer(index);
+		org_layer_names[org_layer.name()] = index; // layer_name -> index
+
+		std::vector<int> layer_group;
+		for (int i = 0; i < org_layer.unroll_len(); i ++) { // unroll
+			LayerProto* unroll_layer = conf.add_layer();
+			unroll_layer->CopyFrom(org_layer); // create a new layer conf
+			if (org_layer.unroll_len() > 1) {
+				// update layer names
+				std::stringstream sstm;
+				sstm << unroll_layer->name() << "_" << i;
+				unroll_layer->set_name(sstm.str());
+				// update layer parameter sharing
+				for (int j = 0; j < unroll_layer->param_size(); j ++) {
+					ParamProto* param = unroll_layer->mutable_param(j);
+					if (i == 0) continue; // no need to rename parameters in the i-th unrolled layer
+					if (!param->has_share_from() || param->share_from() == "") {// not shared from others
+						param->set_share_from(param->name());
+					}
+					std::stringstream sstm1;
+					sstm1 << param->name() << "_" << i;
+					param->set_name(sstm1.str());
+				}
+			}
+			// clear unrolling related fields
+			unroll_layer->clear_unroll_len();
+			unroll_layer->clear_unroll_conn_type();
+			unroll_layer->clear_shift();
+			unroll_layer->clear_srclayers();
+
+			layer_group.push_back(conf.layer_size() - 1);
+		}
+		layer_groups.push_back(layer_group);
+	}
+	// Step 2: Connect unrolled layers by setting `srclayers`
+	for (int index = 0; index < net_conf.layer_size(); index ++) {
+		const LayerProto& org_layer = net_conf.layer(index);
+		if (org_layer.srclayers_size() == 0) continue; // no src layer
+		//TODO(fanju): add LSTM when it is ready
+		if (org_layer.type() == kGRU) { // connect GRU layers
+			for (unsigned int j = 1; j < layer_groups[index].size(); j ++) {
+				LayerProto* unroll_layer = conf.mutable_layer(layer_groups[index][j]);
+				unroll_layer->add_srclayers(conf.layer(layer_groups[index][j-1]).name());
+			}
+		}
+		for (int i = 0; i < org_layer.srclayers_size(); i ++) {
+			const string& org_layer_src = org_layer.srclayers(i);
+
+			singa::UnrollConnType unroll_conn_type = kUnrollOneToOne; // Default value
+			if (i < org_layer.unroll_conn_type_size()) unroll_conn_type = org_layer.unroll_conn_type(i);
+			unsigned int shift = 0; // Default shift value
+			if (i < org_layer.shift_size()) shift = org_layer.shift(i);
+
+			const std::vector<int> unroll_layer_srcs = layer_groups[org_layer_names[org_layer_src]];
+
+			for (unsigned int j = 0; j < layer_groups[index].size(); j ++) {
+				LayerProto* unroll_layer = conf.mutable_layer(layer_groups[index][j]);
+				// Update src layers of `unroll_layer` by considering the types
+				if (unroll_conn_type == kUnrollOneToAll) {
+					for (int unroll_layer_src : unroll_layer_srcs) {
+						unroll_layer->add_srclayers(conf.layer(unroll_layer_src).name());
+					}
+				} else if (unroll_conn_type == kUnrollOneToOne) {
+					if (j < shift) continue; // no need to connect with the src
+					int unroll_layer_src = unroll_layer_srcs[j - shift];
+					unroll_layer->add_srclayers(conf.layer(unroll_layer_src).name());
+				} else if (unroll_conn_type == kUnrollFirstToLast) {
+					if (j > 0) break;
+					int unroll_layer_src = unroll_layer_srcs[unroll_layer_srcs.size() - 1];
+					unroll_layer->add_srclayers(conf.layer(unroll_layer_src).name());
+				}
+			}
+		}
+	}
+	return conf;
+}
+
+
 NeuralNet::NeuralNet(NetProto netproto, int npartitions) {
   LOG(INFO) << "Constructing NeuralNet...";
   auto graph = CreateGraph(netproto, npartitions);
@@ -260,7 +353,7 @@ NetProto NeuralNet::AddPartitionConnectionLayers(const NetProto& netproto,
    *   (NO)  src_pdim = dst_pdim ?
    *           (YES) Direct Connection
    *           (NO)  Slice -> Concate
-   */ 
+   */
   for (const LayerProto& origin_layer : netproto.layer()) {
     LayerProto* dst_layer = name2proto[origin_layer.name()];
     int dst_pdim = dst_layer->partition_dim();

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/777dfb6a/src/test/test_connection_layers.cc
----------------------------------------------------------------------
diff --git a/src/test/test_connection_layers.cc b/src/test/test_connection_layers.cc
index 5517bde..6529840 100644
--- a/src/test/test_connection_layers.cc
+++ b/src/test/test_connection_layers.cc
@@ -179,8 +179,8 @@ TEST(ConnectionLayerTest, DataSliceTest) {
   proto_slice.mutable_slice_conf()->set_num_slices(K);
   SliceLayer slice;
   slice.Setup(proto_slice, src_slice);
-  ASSERT_EQ(slice.data(static_cast<Layer*>(&slice)).shape(0), N / K);
-  ASSERT_EQ(slice.data(static_cast<Layer*>(&slice)).shape(1), M);
+  ASSERT_EQ(slice.data(nullptr).shape(0), N / K);
+  ASSERT_EQ(slice.data(nullptr).shape(1), M);
 
   // use dummy as output layers
   LayerProto proto_out[K];
@@ -236,8 +236,8 @@ TEST(ConnectionLayerTest, ModelSliceTest) {
   proto_slice.mutable_slice_conf()->set_num_slices(K);
   SliceLayer slice;
   slice.Setup(proto_slice, src_slice);
-  ASSERT_EQ(slice.data(static_cast<Layer*>(&slice)).shape(0), N);
-  ASSERT_EQ(slice.data(static_cast<Layer*>(&slice)).shape(1), M / K);
+  ASSERT_EQ(slice.data(nullptr).shape(0), N);
+  ASSERT_EQ(slice.data(nullptr).shape(1), M / K);
 
   // use dummy as output layers
   LayerProto proto_out[K];

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/777dfb6a/src/test/test_unrolling.cc
----------------------------------------------------------------------
diff --git a/src/test/test_unrolling.cc b/src/test/test_unrolling.cc
new file mode 100644
index 0000000..e32c528
--- /dev/null
+++ b/src/test/test_unrolling.cc
@@ -0,0 +1,398 @@
+/************************************************************
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ *************************************************************/
+#include <string>
+#include <vector>
+#include <fstream>
+#include <iostream>
+using namespace std;
+
+#include "gtest/gtest.h"
+#include "singa/neuralnet/input_layer.h"
+#include "singa/neuralnet/neuron_layer.h"
+#include "singa/neuralnet/neuralnet.h"
+#include "singa/neuralnet/connection_layer.h"
+#include "singa/driver.h"
+#include "singa/proto/job.pb.h"
+#include "singa/utils/common.h"
+
+using namespace singa;
+
+class UnrollingTest: public ::testing::Test {
+protected:
+	virtual void SetUp() {
+		NetProto* net_conf1 = job_conf1.mutable_neuralnet();
+
+		LayerProto* data_layer1 = net_conf1->add_layer();
+		data_layer1->set_name("data");
+		data_layer1->set_type(kRecordInput);
+
+		LayerProto* embedding_layer1 = net_conf1->add_layer();
+		embedding_layer1->set_name("embedding");
+		embedding_layer1->set_type(kDummy);
+		embedding_layer1->add_srclayers("data");
+		embedding_layer1->set_unroll_len(3);
+		embedding_layer1->add_unroll_conn_type(kUnrollOneToAll);
+		embedding_layer1->add_shift(0);
+
+		LayerProto* gru_layer1 = net_conf1->add_layer();
+		gru_layer1->set_name("gru");
+		gru_layer1->set_type(kGRU);
+		gru_layer1->add_srclayers("embedding");
+		gru_layer1->mutable_gru_conf()->set_dim_hidden(20);
+		gru_layer1->mutable_gru_conf()->set_bias_term(false);
+		gru_layer1->add_param()->set_name("w_z_hx");
+		gru_layer1->add_param()->set_name("w_r_hx");
+		gru_layer1->add_param()->set_name("w_c_hx");
+		gru_layer1->add_param()->set_name("w_z_hh");
+		gru_layer1->add_param()->set_name("w_r_hh");
+		gru_layer1->add_param()->set_name("w_c_hh");
+		gru_layer1->set_unroll_len(3);
+		gru_layer1->add_unroll_conn_type(kUnrollOneToOne);
+		gru_layer1->add_shift(0);
+
+		LayerProto* out_layer1 = net_conf1->add_layer();
+		out_layer1->set_name("out");
+		out_layer1->set_type(kInnerProduct);
+		out_layer1->add_srclayers("gru");
+		out_layer1->mutable_innerproduct_conf()->set_num_output(100);
+		out_layer1->add_param()->set_name("w");
+		out_layer1->add_param()->set_name("b");
+		out_layer1->set_unroll_len(3);
+		out_layer1->add_unroll_conn_type(kUnrollOneToOne);
+		out_layer1->add_shift(0);
+
+		LayerProto* softmax_layer1 = net_conf1->add_layer();
+		softmax_layer1->set_name("softmax");
+		softmax_layer1->set_type(kSoftmax);
+		softmax_layer1->add_srclayers("out");
+		softmax_layer1->set_unroll_len(3);
+		softmax_layer1->add_unroll_conn_type(kUnrollOneToOne);
+		softmax_layer1->add_shift(0);
+
+		LayerProto* loss_layer1 = net_conf1->add_layer();
+		loss_layer1->set_name("loss");
+		loss_layer1->set_type(kSoftmaxLoss);
+		loss_layer1->add_srclayers("softmax");
+		loss_layer1->add_srclayers("data");
+		loss_layer1->set_unroll_len(3);
+		loss_layer1->add_unroll_conn_type(kUnrollOneToOne);
+		loss_layer1->add_shift(0);
+		loss_layer1->add_unroll_conn_type(kUnrollOneToAll);
+		loss_layer1->add_shift(0);
+
+
+		/*
+		 * Initialize job conf 2
+		 */
+		NetProto* net_conf2 = job_conf2.mutable_neuralnet();
+
+		LayerProto* data_layer2 = net_conf2->add_layer();
+		data_layer2->set_name("data");
+		data_layer2->set_type(kRecordInput);
+
+		LayerProto* embedding_layer2 = net_conf2->add_layer();
+		embedding_layer2->set_name("embedding");
+		embedding_layer2->set_type(kDummy);
+		embedding_layer2->add_srclayers("data");
+		embedding_layer2->add_srclayers("softmax");
+		embedding_layer2->set_unroll_len(3);
+		embedding_layer2->add_unroll_conn_type(kUnrollOneToAll);
+		embedding_layer2->add_shift(0);
+		embedding_layer2->add_unroll_conn_type(kUnrollOneToOne);
+		embedding_layer2->add_shift(1);
+
+		LayerProto* gru_layer2 = net_conf2->add_layer();
+		gru_layer2->set_name("gru");
+		gru_layer2->set_type(kGRU);
+		gru_layer2->add_srclayers("embedding");
+		gru_layer2->mutable_gru_conf()->set_dim_hidden(20);
+		gru_layer2->mutable_gru_conf()->set_bias_term(false);
+		gru_layer2->add_param()->set_name("w_z_hx");
+		gru_layer2->add_param()->set_name("w_r_hx");
+		gru_layer2->add_param()->set_name("w_c_hx");
+		gru_layer2->add_param()->set_name("w_z_hh");
+		gru_layer2->add_param()->set_name("w_r_hh");
+		gru_layer2->add_param()->set_name("w_c_hh");
+		gru_layer2->set_unroll_len(3);
+		gru_layer2->add_unroll_conn_type(kUnrollOneToOne);
+		gru_layer2->add_shift(0);
+
+		LayerProto* out_layer2 = net_conf2->add_layer();
+		out_layer2->set_name("out");
+		out_layer2->set_type(kInnerProduct);
+		out_layer2->add_srclayers("gru");
+		out_layer2->mutable_innerproduct_conf()->set_num_output(100);
+		out_layer2->add_param()->set_name("w");
+		out_layer2->add_param()->set_name("b");
+		out_layer2->set_unroll_len(3);
+		out_layer2->add_unroll_conn_type(kUnrollOneToOne);
+		out_layer2->add_shift(0);
+
+		LayerProto* softmax_layer2 = net_conf2->add_layer();
+		softmax_layer2->set_name("softmax");
+		softmax_layer2->set_type(kSoftmax);
+		softmax_layer2->add_srclayers("out");
+		softmax_layer2->set_unroll_len(3);
+		softmax_layer2->add_unroll_conn_type(kUnrollOneToOne);
+		softmax_layer2->add_shift(0);
+
+		LayerProto* loss_layer2 = net_conf2->add_layer();
+		loss_layer2->set_name("loss");
+		loss_layer2->set_type(kSoftmaxLoss);
+		loss_layer2->add_srclayers("softmax");
+		loss_layer2->add_srclayers("data");
+		loss_layer2->set_unroll_len(3);
+		loss_layer2->add_unroll_conn_type(kUnrollOneToOne);
+		loss_layer2->add_shift(0);
+		loss_layer2->add_unroll_conn_type(kUnrollOneToAll);
+		loss_layer2->add_shift(0);
+	}
+
+	singa::JobProto job_conf1;
+	singa::JobProto job_conf2;
+};
+
+TEST_F(UnrollingTest, GRULanguageModelTrain) {
+	NetProto net;
+	net.CopyFrom(job_conf1.neuralnet());
+	NetProto unrolled_net = NeuralNet::Unrolling(net);
+	EXPECT_EQ("data", unrolled_net.layer(0).name());
+
+	EXPECT_EQ("embedding_0", unrolled_net.layer(1).name());
+	EXPECT_EQ(1, unrolled_net.layer(1).srclayers_size());
+	EXPECT_EQ("data", unrolled_net.layer(1).srclayers(0));
+
+	EXPECT_EQ("embedding_1", unrolled_net.layer(2).name());
+	EXPECT_EQ(1, unrolled_net.layer(2).srclayers_size());
+	EXPECT_EQ("data", unrolled_net.layer(2).srclayers(0));
+
+	EXPECT_EQ("embedding_2", unrolled_net.layer(3).name());
+	EXPECT_EQ(1, unrolled_net.layer(3).srclayers_size());
+	EXPECT_EQ("data", unrolled_net.layer(3).srclayers(0));
+
+	EXPECT_EQ("gru_0", unrolled_net.layer(4).name());
+	EXPECT_EQ(1, unrolled_net.layer(4).srclayers_size());
+	EXPECT_EQ("embedding_0", unrolled_net.layer(4).srclayers(0));
+	EXPECT_EQ("w_z_hx", unrolled_net.layer(4).param(0).name());
+	EXPECT_EQ("w_r_hx", unrolled_net.layer(4).param(1).name());
+	EXPECT_EQ("w_c_hx", unrolled_net.layer(4).param(2).name());
+	EXPECT_EQ("w_z_hh", unrolled_net.layer(4).param(3).name());
+	EXPECT_EQ("w_r_hh", unrolled_net.layer(4).param(4).name());
+	EXPECT_EQ("w_c_hh", unrolled_net.layer(4).param(5).name());
+
+	EXPECT_EQ("gru_1", unrolled_net.layer(5).name());
+	EXPECT_EQ(2, unrolled_net.layer(5).srclayers_size());
+	EXPECT_EQ("gru_0", unrolled_net.layer(5).srclayers(0));
+	EXPECT_EQ("embedding_1", unrolled_net.layer(5).srclayers(1));
+	EXPECT_EQ("w_z_hx_1", unrolled_net.layer(5).param(0).name());
+	EXPECT_EQ("w_z_hx", unrolled_net.layer(5).param(0).share_from());
+	EXPECT_EQ("w_r_hx_1", unrolled_net.layer(5).param(1).name());
+	EXPECT_EQ("w_r_hx", unrolled_net.layer(5).param(1).share_from());
+	EXPECT_EQ("w_c_hx_1", unrolled_net.layer(5).param(2).name());
+	EXPECT_EQ("w_c_hx", unrolled_net.layer(5).param(2).share_from());
+	EXPECT_EQ("w_z_hh_1", unrolled_net.layer(5).param(3).name());
+	EXPECT_EQ("w_z_hh", unrolled_net.layer(5).param(3).share_from());
+	EXPECT_EQ("w_r_hh_1", unrolled_net.layer(5).param(4).name());
+	EXPECT_EQ("w_r_hh", unrolled_net.layer(5).param(4).share_from());
+	EXPECT_EQ("w_c_hh_1", unrolled_net.layer(5).param(5).name());
+	EXPECT_EQ("w_c_hh", unrolled_net.layer(5).param(5).share_from());
+
+	EXPECT_EQ("gru_2", unrolled_net.layer(6).name());
+	EXPECT_EQ(2, unrolled_net.layer(6).srclayers_size());
+	EXPECT_EQ("gru_1", unrolled_net.layer(6).srclayers(0));
+	EXPECT_EQ("embedding_2", unrolled_net.layer(6).srclayers(1));
+	EXPECT_EQ("w_z_hx_2", unrolled_net.layer(6).param(0).name());
+	EXPECT_EQ("w_z_hx", unrolled_net.layer(6).param(0).share_from());
+	EXPECT_EQ("w_r_hx_2", unrolled_net.layer(6).param(1).name());
+	EXPECT_EQ("w_r_hx", unrolled_net.layer(6).param(1).share_from());
+	EXPECT_EQ("w_c_hx_2", unrolled_net.layer(6).param(2).name());
+	EXPECT_EQ("w_c_hx", unrolled_net.layer(6).param(2).share_from());
+	EXPECT_EQ("w_z_hh_2", unrolled_net.layer(6).param(3).name());
+	EXPECT_EQ("w_z_hh", unrolled_net.layer(6).param(3).share_from());
+	EXPECT_EQ("w_r_hh_2", unrolled_net.layer(6).param(4).name());
+	EXPECT_EQ("w_r_hh", unrolled_net.layer(6).param(4).share_from());
+	EXPECT_EQ("w_c_hh_2", unrolled_net.layer(6).param(5).name());
+	EXPECT_EQ("w_c_hh", unrolled_net.layer(6).param(5).share_from());
+
+	EXPECT_EQ("out_0", unrolled_net.layer(7).name());
+	EXPECT_EQ(1, unrolled_net.layer(7).srclayers_size());
+	EXPECT_EQ("gru_0", unrolled_net.layer(7).srclayers(0));
+	EXPECT_EQ("w", unrolled_net.layer(7).param(0).name());
+	EXPECT_EQ("b", unrolled_net.layer(7).param(1).name());
+
+	EXPECT_EQ("out_1", unrolled_net.layer(8).name());
+	EXPECT_EQ(1, unrolled_net.layer(8).srclayers_size());
+	EXPECT_EQ("gru_1", unrolled_net.layer(8).srclayers(0));
+	EXPECT_EQ("w_1", unrolled_net.layer(8).param(0).name());
+	EXPECT_EQ("w", unrolled_net.layer(8).param(0).share_from());
+	EXPECT_EQ("b_1", unrolled_net.layer(8).param(1).name());
+	EXPECT_EQ("b", unrolled_net.layer(8).param(1).share_from());
+
+	EXPECT_EQ("out_2", unrolled_net.layer(9).name());
+	EXPECT_EQ(1, unrolled_net.layer(9).srclayers_size());
+	EXPECT_EQ("gru_2", unrolled_net.layer(9).srclayers(0));
+	EXPECT_EQ("w_2", unrolled_net.layer(9).param(0).name());
+	EXPECT_EQ("w", unrolled_net.layer(9).param(0).share_from());
+	EXPECT_EQ("b_2", unrolled_net.layer(9).param(1).name());
+	EXPECT_EQ("b", unrolled_net.layer(9).param(1).share_from());
+
+	EXPECT_EQ("softmax_0", unrolled_net.layer(10).name());
+	EXPECT_EQ(1, unrolled_net.layer(10).srclayers_size());
+	EXPECT_EQ("out_0", unrolled_net.layer(10).srclayers(0));
+
+	EXPECT_EQ("softmax_1", unrolled_net.layer(11).name());
+	EXPECT_EQ(1, unrolled_net.layer(11).srclayers_size());
+	EXPECT_EQ("out_1", unrolled_net.layer(11).srclayers(0));
+
+	EXPECT_EQ("softmax_2", unrolled_net.layer(12).name());
+	EXPECT_EQ(1, unrolled_net.layer(12).srclayers_size());
+	EXPECT_EQ("out_2", unrolled_net.layer(12).srclayers(0));
+
+	EXPECT_EQ("loss_0", unrolled_net.layer(13).name());
+	EXPECT_EQ(2, unrolled_net.layer(13).srclayers_size());
+	EXPECT_EQ("softmax_0", unrolled_net.layer(13).srclayers(0));
+	EXPECT_EQ("data", unrolled_net.layer(13).srclayers(1));
+
+	EXPECT_EQ("loss_1", unrolled_net.layer(14).name());
+	EXPECT_EQ(2, unrolled_net.layer(14).srclayers_size());
+	EXPECT_EQ("softmax_1", unrolled_net.layer(14).srclayers(0));
+	EXPECT_EQ("data", unrolled_net.layer(14).srclayers(1));
+
+	EXPECT_EQ("loss_2", unrolled_net.layer(15).name());
+	EXPECT_EQ(2, unrolled_net.layer(15).srclayers_size());
+	EXPECT_EQ("softmax_2", unrolled_net.layer(15).srclayers(0));
+	EXPECT_EQ("data", unrolled_net.layer(15).srclayers(1));
+}
+
+TEST_F(UnrollingTest, GRULanguageModelTest) {
+	NetProto net;
+	net.CopyFrom(job_conf2.neuralnet());
+	NetProto unrolled_net = NeuralNet::Unrolling(net);
+
+	EXPECT_EQ("data", unrolled_net.layer(0).name());
+
+	EXPECT_EQ("embedding_0", unrolled_net.layer(1).name());
+	EXPECT_EQ(1, unrolled_net.layer(1).srclayers_size());
+	EXPECT_EQ("data", unrolled_net.layer(1).srclayers(0));
+
+	EXPECT_EQ("embedding_1", unrolled_net.layer(2).name());
+	EXPECT_EQ(2, unrolled_net.layer(2).srclayers_size());
+	EXPECT_EQ("data", unrolled_net.layer(2).srclayers(0));
+	EXPECT_EQ("softmax_0", unrolled_net.layer(2).srclayers(1));
+
+	EXPECT_EQ("embedding_2", unrolled_net.layer(3).name());
+	EXPECT_EQ(2, unrolled_net.layer(3).srclayers_size());
+	EXPECT_EQ("data", unrolled_net.layer(3).srclayers(0));
+	EXPECT_EQ("softmax_1", unrolled_net.layer(3).srclayers(1));
+
+	EXPECT_EQ("gru_0", unrolled_net.layer(4).name());
+	EXPECT_EQ(1, unrolled_net.layer(4).srclayers_size());
+	EXPECT_EQ("embedding_0", unrolled_net.layer(4).srclayers(0));
+	EXPECT_EQ("w_z_hx", unrolled_net.layer(4).param(0).name());
+	EXPECT_EQ("w_r_hx", unrolled_net.layer(4).param(1).name());
+	EXPECT_EQ("w_c_hx", unrolled_net.layer(4).param(2).name());
+	EXPECT_EQ("w_z_hh", unrolled_net.layer(4).param(3).name());
+	EXPECT_EQ("w_r_hh", unrolled_net.layer(4).param(4).name());
+	EXPECT_EQ("w_c_hh", unrolled_net.layer(4).param(5).name());
+
+	EXPECT_EQ("gru_1", unrolled_net.layer(5).name());
+	EXPECT_EQ(2, unrolled_net.layer(5).srclayers_size());
+	EXPECT_EQ("gru_0", unrolled_net.layer(5).srclayers(0));
+	EXPECT_EQ("embedding_1", unrolled_net.layer(5).srclayers(1));
+	EXPECT_EQ("w_z_hx_1", unrolled_net.layer(5).param(0).name());
+	EXPECT_EQ("w_z_hx", unrolled_net.layer(5).param(0).share_from());
+	EXPECT_EQ("w_r_hx_1", unrolled_net.layer(5).param(1).name());
+	EXPECT_EQ("w_r_hx", unrolled_net.layer(5).param(1).share_from());
+	EXPECT_EQ("w_c_hx_1", unrolled_net.layer(5).param(2).name());
+	EXPECT_EQ("w_c_hx", unrolled_net.layer(5).param(2).share_from());
+	EXPECT_EQ("w_z_hh_1", unrolled_net.layer(5).param(3).name());
+	EXPECT_EQ("w_z_hh", unrolled_net.layer(5).param(3).share_from());
+	EXPECT_EQ("w_r_hh_1", unrolled_net.layer(5).param(4).name());
+	EXPECT_EQ("w_r_hh", unrolled_net.layer(5).param(4).share_from());
+	EXPECT_EQ("w_c_hh_1", unrolled_net.layer(5).param(5).name());
+	EXPECT_EQ("w_c_hh", unrolled_net.layer(5).param(5).share_from());
+
+	EXPECT_EQ("gru_2", unrolled_net.layer(6).name());
+	EXPECT_EQ(2, unrolled_net.layer(6).srclayers_size());
+	EXPECT_EQ("gru_1", unrolled_net.layer(6).srclayers(0));
+	EXPECT_EQ("embedding_2", unrolled_net.layer(6).srclayers(1));
+	EXPECT_EQ("w_z_hx_2", unrolled_net.layer(6).param(0).name());
+	EXPECT_EQ("w_z_hx", unrolled_net.layer(6).param(0).share_from());
+	EXPECT_EQ("w_r_hx_2", unrolled_net.layer(6).param(1).name());
+	EXPECT_EQ("w_r_hx", unrolled_net.layer(6).param(1).share_from());
+	EXPECT_EQ("w_c_hx_2", unrolled_net.layer(6).param(2).name());
+	EXPECT_EQ("w_c_hx", unrolled_net.layer(6).param(2).share_from());
+	EXPECT_EQ("w_z_hh_2", unrolled_net.layer(6).param(3).name());
+	EXPECT_EQ("w_z_hh", unrolled_net.layer(6).param(3).share_from());
+	EXPECT_EQ("w_r_hh_2", unrolled_net.layer(6).param(4).name());
+	EXPECT_EQ("w_r_hh", unrolled_net.layer(6).param(4).share_from());
+	EXPECT_EQ("w_c_hh_2", unrolled_net.layer(6).param(5).name());
+	EXPECT_EQ("w_c_hh", unrolled_net.layer(6).param(5).share_from());
+
+	EXPECT_EQ("out_0", unrolled_net.layer(7).name());
+	EXPECT_EQ(1, unrolled_net.layer(7).srclayers_size());
+	EXPECT_EQ("gru_0", unrolled_net.layer(7).srclayers(0));
+	EXPECT_EQ("w", unrolled_net.layer(7).param(0).name());
+	EXPECT_EQ("b", unrolled_net.layer(7).param(1).name());
+
+	EXPECT_EQ("out_1", unrolled_net.layer(8).name());
+	EXPECT_EQ(1, unrolled_net.layer(8).srclayers_size());
+	EXPECT_EQ("gru_1", unrolled_net.layer(8).srclayers(0));
+	EXPECT_EQ("w_1", unrolled_net.layer(8).param(0).name());
+	EXPECT_EQ("w", unrolled_net.layer(8).param(0).share_from());
+	EXPECT_EQ("b_1", unrolled_net.layer(8).param(1).name());
+	EXPECT_EQ("b", unrolled_net.layer(8).param(1).share_from());
+
+	EXPECT_EQ("out_2", unrolled_net.layer(9).name());
+	EXPECT_EQ(1, unrolled_net.layer(9).srclayers_size());
+	EXPECT_EQ("gru_2", unrolled_net.layer(9).srclayers(0));
+	EXPECT_EQ("w_2", unrolled_net.layer(9).param(0).name());
+	EXPECT_EQ("w", unrolled_net.layer(9).param(0).share_from());
+	EXPECT_EQ("b_2", unrolled_net.layer(9).param(1).name());
+	EXPECT_EQ("b", unrolled_net.layer(9).param(1).share_from());
+
+	EXPECT_EQ("softmax_0", unrolled_net.layer(10).name());
+	EXPECT_EQ(1, unrolled_net.layer(10).srclayers_size());
+	EXPECT_EQ("out_0", unrolled_net.layer(10).srclayers(0));
+
+	EXPECT_EQ("softmax_1", unrolled_net.layer(11).name());
+	EXPECT_EQ(1, unrolled_net.layer(11).srclayers_size());
+	EXPECT_EQ("out_1", unrolled_net.layer(11).srclayers(0));
+
+	EXPECT_EQ("softmax_2", unrolled_net.layer(12).name());
+	EXPECT_EQ(1, unrolled_net.layer(12).srclayers_size());
+	EXPECT_EQ("out_2", unrolled_net.layer(12).srclayers(0));
+
+	EXPECT_EQ("loss_0", unrolled_net.layer(13).name());
+	EXPECT_EQ(2, unrolled_net.layer(13).srclayers_size());
+	EXPECT_EQ("softmax_0", unrolled_net.layer(13).srclayers(0));
+	EXPECT_EQ("data", unrolled_net.layer(13).srclayers(1));
+
+	EXPECT_EQ("loss_1", unrolled_net.layer(14).name());
+	EXPECT_EQ(2, unrolled_net.layer(14).srclayers_size());
+	EXPECT_EQ("softmax_1", unrolled_net.layer(14).srclayers(0));
+	EXPECT_EQ("data", unrolled_net.layer(14).srclayers(1));
+
+	EXPECT_EQ("loss_2", unrolled_net.layer(15).name());
+	EXPECT_EQ(2, unrolled_net.layer(15).srclayers_size());
+	EXPECT_EQ("softmax_2", unrolled_net.layer(15).srclayers(0));
+	EXPECT_EQ("data", unrolled_net.layer(15).srclayers(1));
+}


[05/10] incubator-singa git commit: SINGA-120 - Implemented GRU and BPTT: Updated dummy layer for fixing bugs

Posted by wa...@apache.org.
SINGA-120 - Implemented GRU and BPTT: Updated dummy layer for fixing bugs


Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/1f03f9d8
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/1f03f9d8
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/1f03f9d8

Branch: refs/heads/master
Commit: 1f03f9d8486cb85e55c585121b2a8838bae4ed44
Parents: b4b6a38
Author: Ju Fan <fa...@gmail.com>
Authored: Fri Jan 1 10:54:43 2016 +0800
Committer: Wei Wang <wa...@comp.nus.edu.sg>
Committed: Wed Jan 6 01:53:05 2016 +0800

----------------------------------------------------------------------
 src/neuralnet/neuron_layer/dummy.cc | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/1f03f9d8/src/neuralnet/neuron_layer/dummy.cc
----------------------------------------------------------------------
diff --git a/src/neuralnet/neuron_layer/dummy.cc b/src/neuralnet/neuron_layer/dummy.cc
index 11ce3ec..9ccb179 100644
--- a/src/neuralnet/neuron_layer/dummy.cc
+++ b/src/neuralnet/neuron_layer/dummy.cc
@@ -22,12 +22,14 @@
 #include <glog/logging.h>
 #include "singa/neuralnet/neuron_layer.h"
 #include "singa/utils/math_blob.h"
+#include "singa/utils/context.h"
+#include "singa/utils/singleton.h"
 
 namespace singa {
 
 void DummyLayer::Setup(const LayerProto& proto,
                        const vector<Layer*>& srclayers) {
-  Layer::Setup(proto, srclayers);
+  NeuronLayer::Setup(proto, srclayers);
   if (proto.dummy_conf().input()) {  // use as input layer
     CHECK_EQ(srclayers.size(), 0);
     input_ = true;
@@ -43,27 +45,28 @@ void DummyLayer::Setup(const LayerProto& proto,
   if (proto.dummy_conf().output()) {  // use as output layer
     output_ = true;
   }
-}
 
-std::random_device rd;
-std::mt19937 gen(rd());
-std::uniform_real_distribution<> dis(0, 1);
+}
 
 void DummyLayer::ComputeFeature(int flag, const vector<Layer*>& srclayers) {
+  std::uniform_real_distribution<float> dis(0, 1);
+  auto gen = Singleton<Context>::Instance()->rand_generator();
   if (input_) {
     // randomly init data with [0,1] values
     for (int i = 0; i < data_.count(); ++i)
-      data_.mutable_cpu_data()[i] = dis(gen);
+      data_.mutable_cpu_data()[i] = dis(*gen);
   }
   if (srclayers.size() > 0)
     Copy(srclayers[0]->data(this), &data_);
 }
 
 void DummyLayer::ComputeGradient(int flag, const vector<Layer*>& srclayers) {
+  std::uniform_real_distribution<float> dis(0, 1);
+  auto gen = Singleton<Context>::Instance()->rand_generator();
   if (output_) {
     // randomly init data with [0,1] values
     for (int i = 0; i < data_.count(); ++i)
-      grad_.mutable_cpu_data()[i] = dis(gen);
+      grad_.mutable_cpu_data()[i] = dis(*gen);
   }
   if (srclayers.size() > 0)
     Copy(grad_, srclayers[0]->mutable_grad(this));