You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@singa.apache.org by wa...@apache.org on 2016/01/05 19:10:37 UTC
[08/10] incubator-singa git commit: SINGA-120 - Implemented GRU and
BPTT
SINGA-120 - Implemented GRU and BPTT
Change new memory computation formula following char-rnn (i.e., element-wise multiplication before matrix multiplication)
Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/6a4c9960
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/6a4c9960
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/6a4c9960
Branch: refs/heads/master
Commit: 6a4c9960e0795aeac6df282d7190b6f93b305c58
Parents: 959ef70
Author: Wei Wang <wa...@comp.nus.edu.sg>
Authored: Tue Jan 5 18:14:46 2016 +0800
Committer: Wei Wang <wa...@comp.nus.edu.sg>
Committed: Wed Jan 6 01:55:08 2016 +0800
----------------------------------------------------------------------
Makefile.am | 2 +
include/singa/neuralnet/connection_layer.h | 28 +++++++++++++
include/singa/neuralnet/layer.h | 14 +++++++
include/singa/neuralnet/neuron_layer.h | 2 +-
include/singa/neuralnet/output_layer.h | 15 +++++++
include/singa/utils/updater.h | 8 ++--
src/driver.cc | 6 ++-
src/neuralnet/neuralnet.cc | 9 ++---
src/neuralnet/neuron_layer/gru.cc | 52 +++++++++++--------------
src/proto/job.proto | 14 +++++++
src/utils/updater.cc | 28 ++++++-------
src/worker.cc | 16 ++++++--
12 files changed, 133 insertions(+), 61 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6a4c9960/Makefile.am
----------------------------------------------------------------------
diff --git a/Makefile.am b/Makefile.am
index d2b2aa8..7ae4537 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -75,6 +75,7 @@ SINGA_SRCS := src/driver.cc \
src/neuralnet/connection_layer/concate.cc \
src/neuralnet/connection_layer/slice.cc \
src/neuralnet/connection_layer/split.cc \
+ src/neuralnet/connection_layer/rnn_dummy.cc \
src/neuralnet/input_layer/char_rnn.cc \
src/neuralnet/input_layer/onehot.cc \
src/neuralnet/input_layer/csv.cc \
@@ -88,6 +89,7 @@ SINGA_SRCS := src/driver.cc \
src/neuralnet/output_layer/argsort.cc \
src/neuralnet/output_layer/csv.cc \
src/neuralnet/output_layer/record.cc \
+ src/neuralnet/output_layer/char_rnn.cc \
src/neuralnet/loss_layer/euclidean.cc \
src/neuralnet/loss_layer/softmax.cc \
src/neuralnet/neuron_layer/activation.cc \
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6a4c9960/include/singa/neuralnet/connection_layer.h
----------------------------------------------------------------------
diff --git a/include/singa/neuralnet/connection_layer.h b/include/singa/neuralnet/connection_layer.h
index a18f458..481d991 100644
--- a/include/singa/neuralnet/connection_layer.h
+++ b/include/singa/neuralnet/connection_layer.h
@@ -153,6 +153,34 @@ class SplitLayer : public ConnectionLayer {
Layer2Index layer_idx_;
};
+/**
+ * Dummy layer for RNN models, which provides input for other layers.
+ *
+ * Particularly, it is used in the test phase of RNN models to connect other
+ * layers and avoid cycles in the neural net config.
+ */
+class RNNDummyLayer : public ConnectionLayer {
+ public:
+ void Setup(const LayerProto& proto, const vector<Layer*>& srclayers) override;
+ void ComputeFeature(int flag, const vector<Layer*>& srclayers) override;
+ void ComputeGradient(int flag, const vector<Layer*>& srclayers) {
+ LOG(FATAL) << "Not implemented";
+ }
+
+ const string srclayer(int step) const {
+ if (step > 0)
+ return dynamic_src_;
+ else
+ return "";
+ }
+
+ private:
+ string dynamic_src_;
+ float low_, high_;
+ bool integer_;
+ Layer* srclayer_;
+};
+
} // namespace singa
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6a4c9960/include/singa/neuralnet/layer.h
----------------------------------------------------------------------
diff --git a/include/singa/neuralnet/layer.h b/include/singa/neuralnet/layer.h
index f4738fa..c1612a2 100644
--- a/include/singa/neuralnet/layer.h
+++ b/include/singa/neuralnet/layer.h
@@ -36,6 +36,20 @@ using std::string;
// TODO(wangwei) make AuxType a template argument for Layer.
using AuxType = int;
+
+inline const string AddUnrollingPrefix(int unroll_idx, const string& name) {
+ return std::to_string(unroll_idx) + "#" + name;
+}
+inline const string AddPartitionSuffix(int partition_idx, const string& name) {
+ return name + "@" + std::to_string(partition_idx);
+}
+
+
+inline const string AddPrefixSuffix(int unroll_idx, int partition_idx,
+ const string& name) {
+ return std::to_string(unroll_idx) + "#" + name + "@" +
+ std::to_string(partition_idx);
+}
/**
* Base layer class.
*
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6a4c9960/include/singa/neuralnet/neuron_layer.h
----------------------------------------------------------------------
diff --git a/include/singa/neuralnet/neuron_layer.h b/include/singa/neuralnet/neuron_layer.h
index e587e38..e1a63a2 100644
--- a/include/singa/neuralnet/neuron_layer.h
+++ b/include/singa/neuralnet/neuron_layer.h
@@ -203,7 +203,7 @@ class GRULayer : public NeuronLayer {
int batchsize_; // batch size
int vdim_, hdim_; // dimensions
- Blob<float> *update_gate, *reset_gate, *new_memory;
+ Blob<float> *update_gate, *reset_gate, *new_memory, *reset_context;
//!< gru layer connect to two dst layers, hence need to grad blobs.
Blob<float> aux_grad_;
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6a4c9960/include/singa/neuralnet/output_layer.h
----------------------------------------------------------------------
diff --git a/include/singa/neuralnet/output_layer.h b/include/singa/neuralnet/output_layer.h
index c7e5d6a..9071f33 100644
--- a/include/singa/neuralnet/output_layer.h
+++ b/include/singa/neuralnet/output_layer.h
@@ -80,5 +80,20 @@ class RecordOutputLayer : public OutputLayer {
int inst_ = 0; //!< instance No.
io::Store* store_ = nullptr;
};
+
+/**
+ * Output layer for char rnn model, which convert sample id back to char and
+ * dump to stdout.
+ */
+class CharRNNOutputLayer : public OutputLayer {
+ public:
+ void Setup(const LayerProto& proto, const vector<Layer*>& srclayers) override;
+
+ void ComputeFeature(int flag, const vector<Layer*>& srclayers) override;
+
+ private:
+ string vocab_;
+};
+
} // namespace singa
#endif // SINGA_NEURALNET_OUTPUT_LAYER_H_
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6a4c9960/include/singa/utils/updater.h
----------------------------------------------------------------------
diff --git a/include/singa/utils/updater.h b/include/singa/utils/updater.h
index 575ab86..7fec78c 100644
--- a/include/singa/utils/updater.h
+++ b/include/singa/utils/updater.h
@@ -118,18 +118,16 @@ class NesterovUpdater : public Updater {
void Update(int step, Param* param, float grad_scale) override;
};
-/*
class RMSPropUpdater : public Updater {
public:
- virtual void Update(int step, Param* param, float grad_scale);
+ void Init(const UpdaterProto &proto) override;
+ void Update(int step, Param* param, float grad_scale) override;
protected:
- float base_lr_;
- float delta_;
float rho_;
- float weight_decay_;
};
+/*
class AdaDeltaUpdater : public Updater {
public:
virtual void Update(int step, Param* param, float grad_scale);
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6a4c9960/src/driver.cc
----------------------------------------------------------------------
diff --git a/src/driver.cc b/src/driver.cc
index 21968bb..1e4929f 100644
--- a/src/driver.cc
+++ b/src/driver.cc
@@ -74,6 +74,7 @@ void Driver::Init(int argc, char **argv) {
RegisterLayer<CharRNNInputLayer, int>(kCharRNN);
RegisterLayer<RNNLabelLayer, int>(kRNNLabel);
RegisterLayer<OneHotLayer, int>(kOneHot);
+ RegisterLayer<CharRNNOutputLayer, int>(kCharRNNOutput);
// connection layers
RegisterLayer<BridgeDstLayer, int>(kBridgeDst);
@@ -81,6 +82,7 @@ void Driver::Init(int argc, char **argv) {
RegisterLayer<ConcateLayer, int>(kConcate);
RegisterLayer<SliceLayer, int>(kSlice);
RegisterLayer<SplitLayer, int>(kSplit);
+ RegisterLayer<RNNDummyLayer, int>(kRNNDummy);
RegisterLayer<AccuracyLayer, int>(kAccuracy);
RegisterLayer<ArgSortLayer, int>(kArgSort);
@@ -125,7 +127,7 @@ void Driver::Init(int argc, char **argv) {
// register updaters
RegisterUpdater<AdaGradUpdater>(kAdaGrad);
RegisterUpdater<NesterovUpdater>(kNesterov);
- // TODO(wangwei) RegisterUpdater<kRMSPropUpdater>(kRMSProp);
+ RegisterUpdater<RMSPropUpdater>(kRMSProp);
RegisterUpdater<SGDUpdater>(kSGD);
// register learning rate change methods
@@ -198,6 +200,8 @@ void Driver::Test(const JobProto& job_conf) {
auto worker = Worker::Create(job_conf.train_one_batch());
worker->Setup(0, 0, job_conf, nullptr, nullptr, nullptr);
auto net = NeuralNet::Create(job_conf.neuralnet(), kTest, 1);
+ WriteStringToTextFile(Cluster::Get()->vis_folder() + "/test_net.json",
+ net->ToGraph(true).ToJson());
vector<string> paths;
for (const auto& p : job_conf.checkpoint_path())
paths.push_back(p);
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6a4c9960/src/neuralnet/neuralnet.cc
----------------------------------------------------------------------
diff --git a/src/neuralnet/neuralnet.cc b/src/neuralnet/neuralnet.cc
index f9579b1..49978a1 100644
--- a/src/neuralnet/neuralnet.cc
+++ b/src/neuralnet/neuralnet.cc
@@ -144,7 +144,7 @@ const NetProto NeuralNet::Unrolling(const NetProto& net_conf) {
for (int i = 0; i < org_layer.unroll_len(); i ++) { // unroll
LayerProto* unroll_layer = conf.add_layer();
unroll_layer->CopyFrom(org_layer); // create a new layer conf
- if (org_layer.unroll_len() > 1) {
+ // if (org_layer.unroll_len() > 1) {
// update layer names
std::stringstream sstm;
sstm << i << '#' << unroll_layer->name();
@@ -160,7 +160,7 @@ const NetProto NeuralNet::Unrolling(const NetProto& net_conf) {
sstm1 << i << '#' << param->name();
param->set_name(sstm1.str());
}
- }
+ // }
// clear unrolling related fields
unroll_layer->clear_unroll_len();
unroll_layer->clear_unroll_conn_type();
@@ -257,6 +257,7 @@ void NeuralNet::Load(const vector<string>& paths,
ReadProtoFromBinaryFile(path.c_str(), &bps);
for (int i = 0; i < bps.name_size(); i++) {
if (params.find(bps.name(i)) != params.end()) {
+ // LOG(ERROR) << "Loading param = " << bps.name(i);
params.at(bps.name(i))->FromProto(bps.blob(i));
params.at(bps.name(i))->set_version(bps.version(i));
}
@@ -458,12 +459,10 @@ Graph* NeuralNet::CreateGraph(const NetProto& netproto, int npartitions) {
map<string, const LayerProto*> name2proto;
for (const LayerProto& layer : net_w_connection.layer()) {
vector<Node*> nodes;
- char suffix[4];
for (int i = 0; i < npartitions; i++) {
LayerProto *proto = new LayerProto(layer);
- snprintf(suffix, sizeof(suffix), "%02d", i);
// differentiate partitions
- string nodename = layer.name() + "@" + string(suffix);
+ string nodename = layer.name() + "@" + std::to_string(i);
proto->set_name(nodename);
proto->set_type(layer.type());
proto->set_partition_dim(layer.partition_dim());
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6a4c9960/src/neuralnet/neuron_layer/gru.cc
----------------------------------------------------------------------
diff --git a/src/neuralnet/neuron_layer/gru.cc b/src/neuralnet/neuron_layer/gru.cc
index 9ba5a50..cf7425b 100644
--- a/src/neuralnet/neuron_layer/gru.cc
+++ b/src/neuralnet/neuron_layer/gru.cc
@@ -98,6 +98,8 @@ void GRULayer::Setup(const LayerProto& conf,
update_gate = new Blob<float>(batchsize_, hdim_);
reset_gate = new Blob<float>(batchsize_, hdim_);
+ // reset gate x context
+ reset_context = new Blob<float>(batchsize_, hdim_);
new_memory = new Blob<float>(batchsize_, hdim_);
}
@@ -130,24 +132,23 @@ void GRULayer::ComputeFeature(int flag,
Map<op::Sigmoid<float>,float>(*update_gate, update_gate);
// Compute the reset gate
- GEMM(1.0f, 0.0f, src,*w_r_hx_t,reset_gate);
+ GEMM(1.0f, 0.0f, src, *w_r_hx_t, reset_gate);
if (bias_r_ != nullptr)
- MVAddRow(1.0f,1.0f,bias_r_->data(),reset_gate);
+ MVAddRow(1.0f,1.0f, bias_r_->data(),reset_gate);
GEMM(1.0f, 1.0f, *context, *w_r_hh_t, reset_gate);
Map<op::Sigmoid<float>,float>(*reset_gate, reset_gate);
// Compute the new memory
- GEMM(1.0f, 0.0f, src, *w_c_hx_t, new_memory);
+ Mult<float>(*reset_gate, *context, reset_context);
+ GEMM(1.0f, 0.0f, *reset_context, *w_c_hh_t, new_memory);
+ GEMM(1.0f, 1.0f, src, *w_c_hx_t, new_memory);
if (bias_c_ != nullptr)
- MVAddRow(1.0f,1.0f,bias_c_->data(), new_memory);
- Mult<float>(*reset_gate, *new_memory, new_memory);
- GEMM(1.0f, 1.0f, *context, *w_c_hh_t, new_memory);
+ MVAddRow(1.0f, 1.0f, bias_c_->data(), new_memory);
Map<op::Tanh<float>,float>(*new_memory, new_memory);
-
- Sub(*context, *new_memory, &data_);
+ Sub(*new_memory, *context, &data_);
Mult(data_, *update_gate, &data_);
- Add(data_, *new_memory, &data_);
+ AXPY(1.0f, *context, &data_);
// delete the pointers
if (srclayers.size() == 1)
@@ -192,24 +193,19 @@ void GRULayer::ComputeGradient(int flag,
Map<singa::op::TanhGrad<float>, float>(*new_memory, &dnewmdc);
Blob<float> dLdz (batchsize_, hdim_);
- Sub<float>(*context, *new_memory, &dLdz);
+ Sub<float>(*new_memory, *context, &dLdz);
Mult<float>(dLdz, grad_, &dLdz);
Mult<float>(dLdz, dugatedz, &dLdz);
Blob<float> dLdc (batchsize_,hdim_);
- Blob<float> z1 (batchsize_,hdim_);
- z1.SetValue(1.0f);
- AXPY<float>(-1.0f, *update_gate, &z1);
- Mult(grad_,z1,&dLdc);
- Mult(dLdc,dnewmdc,&dLdc);
+ Mult(grad_, *update_gate, &dLdc);
+ Mult(dLdc, dnewmdc, &dLdc);
Blob<float> reset_dLdc (batchsize_,hdim_);
- Mult(dLdc, *reset_gate, &reset_dLdc);
+ GEMM(1.0f, 0.0f, dLdc, weight_c_hh_->data(), &reset_dLdc);
Blob<float> dLdr (batchsize_, hdim_);
- Blob<float> cprev (batchsize_, hdim_);
- GEMM(1.0f, 0.0f, *context, weight_c_hh_->data().T(), &cprev);
- Mult(dLdc, cprev, &dLdr);
+ Mult(reset_dLdc, *context, &dLdr);
Mult(dLdr, drgatedr, &dLdr);
// Compute gradients for parameters of update gate
@@ -230,29 +226,25 @@ void GRULayer::ComputeGradient(int flag,
// Compute gradients for parameters of new memory
Blob<float> *dLdc_t = Transpose(dLdc);
- GEMM(1.0f, beta, *dLdc_t, src,weight_c_hx_->mutable_grad());
+ GEMM(1.0f, beta, *dLdc_t, src, weight_c_hx_->mutable_grad());
+ GEMM(1.0f, beta, *dLdc_t, *reset_context, weight_c_hh_->mutable_grad());
if (bias_c_ != nullptr)
MVSumRow(1.0f, beta, dLdc, bias_c_->mutable_grad());
delete dLdc_t;
- Blob<float> *reset_dLdc_t = Transpose(reset_dLdc);
- GEMM(1.0f, beta, *reset_dLdc_t, *context, weight_c_hh_->mutable_grad());
- delete reset_dLdc_t;
-
// Compute gradients for data input layer
if (srclayers[0]->mutable_grad(this) != nullptr) {
- GEMM(1.0f,0.0f,dLdc, weight_c_hx_->data(), ilayer->mutable_grad(this));
- GEMM(1.0f,1.0f,dLdz, weight_z_hx_->data(), ilayer->mutable_grad(this));
- GEMM(1.0f,1.0f,dLdr, weight_r_hx_->data(), ilayer->mutable_grad(this));
+ GEMM(1.0f,0.0f, dLdc, weight_c_hx_->data(), ilayer->mutable_grad(this));
+ GEMM(1.0f,1.0f, dLdz, weight_z_hx_->data(), ilayer->mutable_grad(this));
+ GEMM(1.0f,1.0f, dLdr, weight_r_hx_->data(), ilayer->mutable_grad(this));
}
if (clayer != nullptr && clayer->mutable_grad(this) != nullptr) {
// Compute gradients for context layer
- GEMM(1.0f, 0.0f, reset_dLdc, weight_c_hh_->data(),
- clayer->mutable_grad(this));
+ Mult(reset_dLdc, *reset_gate, clayer->mutable_grad(this));
GEMM(1.0f, 1.0f, dLdr, weight_r_hh_->data(), clayer->mutable_grad(this));
GEMM(1.0f, 1.0f, dLdz, weight_z_hh_->data(), clayer->mutable_grad(this));
- Add(clayer->grad(this), *update_gate, clayer->mutable_grad(this));
+ AXPY(-1.0f, *update_gate, clayer->mutable_grad(this));
// LOG(ERROR) << "grad to prev gru " << Asum(clayer->grad(this));
}
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6a4c9960/src/proto/job.proto
----------------------------------------------------------------------
diff --git a/src/proto/job.proto b/src/proto/job.proto
index e520eba..28a3a68 100644
--- a/src/proto/job.proto
+++ b/src/proto/job.proto
@@ -253,6 +253,7 @@ message LayerProto {
optional ConcateProto concate_conf = 502;
optional SliceProto slice_conf = 503;
optional SplitProto split_conf = 504;
+ optional RNNDummyProto rnn_dummy_conf = 505;
extensions 1001 to 1100;
}
@@ -456,6 +457,17 @@ message DummyProto {
repeated int32 shape = 3;
}
+message RNNDummyProto {
+ optional string dynamic_srclayer = 1;
+ // if shape set, random generate the data blob
+ repeated int32 shape = 2;
+ // if integer is true, generate integer data
+ optional bool integer = 3 [default = false];
+ // range of the random generation
+ optional float low = 4 [default = 0];
+ optional float high = 5 [default = 0];
+}
+
// Message that stores parameters used by DropoutLayer
message DropoutProto {
// dropout ratio
@@ -667,6 +679,7 @@ enum LayerType {
kArgSort = 401;
kCSVOutput = 402;
kRecordOutput = 403;
+ kCharRNNOutput = 404;
/*
* Connection layers
@@ -677,6 +690,7 @@ enum LayerType {
kConcate = 502;
kSlice = 503;
kSplit = 504;
+ kRNNDummy = 505;
/*
* User defined layer
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6a4c9960/src/utils/updater.cc
----------------------------------------------------------------------
diff --git a/src/utils/updater.cc b/src/utils/updater.cc
index a9f70c0..200670a 100644
--- a/src/utils/updater.cc
+++ b/src/utils/updater.cc
@@ -174,31 +174,27 @@ void AdaGradUpdater::Update(int step, Param* param, float grad_scale) {
data -= lr * grad / (F<sqrtop>(history, proto_.delta()));
}
-/***********************RMSProp******************************
-void RMSPropUpdater::Init(const UpdaterProto& proto){
+/***********************RMSProp******************************/
+void RMSPropUpdater::Init(const UpdaterProto& proto) {
Updater::Init(proto);
- base_lr_ = proto.base_lr();
- CHECK_GT(base_lr_, 0);
- delta_ = proto.delta();
rho_ = proto.rmsprop_conf().rho();
- weight_decay_ = proto.weight_decay();
}
-void RMSPropUpdater::Update(int step, Param* param, float grad_scale){
+void RMSPropUpdater::Update(int step, Param* param, float grad_scale) {
Shape<1> s=Shape1(param->size());
Tensor<cpu, 1> data(param->mutable_cpu_data(), s);
Tensor<cpu, 1> grad(param->mutable_cpu_grad(), s);
Tensor<cpu, 1> history(param->mutable_cpu_history(), s);
- history=history*rho_+(1-rho_)*F<op::square>(grad*grad_scale);
- float lr=GetLearningRate(step)*param->lr_scale();
- float wd=weight_decay_*param->wd_scale();
- if(wd>0){ // L2 regularization
- grad+=data*wd;
- }
- data-=lr*grad/(F<op::sqrtop>(history,delta_));
+ float lr = lr_gen_->Get(step) * param->lr_scale();
+ float wd = weight_decay_ * param->wd_scale();
+ if (grad_scale != 1.f)
+ grad *= grad_scale;
+ if (wd > 0) // L2 regularization, should be done after timing grad_scale
+ grad += data * wd;
+ history = history * rho_ + (1 - rho_) * F<square>(grad);
+ data -= lr * grad / (F<sqrtop>(history, proto_.delta()));
}
-
-***********************AdaDelta******************************
+/***********************AdaDelta******************************
void AdaDeltaUpdater::Init(const UpdaterProto& proto){
Updater::Init(proto);
delta_=proto.delta();
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6a4c9960/src/worker.cc
----------------------------------------------------------------------
diff --git a/src/worker.cc b/src/worker.cc
index 4e1dc75..abe74e7 100644
--- a/src/worker.cc
+++ b/src/worker.cc
@@ -390,13 +390,24 @@ void BPTTWorker::Forward(int step, Phase phase, NeuralNet* net) {
}
}
vector<Layer*> src = net->srclayers(layer);
+ if ((phase & kTest) && typeid(*layer) == typeid(RNNDummyLayer)) {
+ CHECK_LE(src.size(), 1);
+ auto dummy = dynamic_cast<RNNDummyLayer*>(layer);
+ Layer* srclayer = net->name2layer(dummy->srclayer(step));
+ if (step > 0)
+ CHECK(srclayer != nullptr);
+ if (srclayer != nullptr) {
+ src.clear();
+ src.push_back(srclayer);
+ }
+ }
// if full state rnn and not the starting of a new passing of the dataset,
// feed the hidden state of the last unit to the first unit.
if (layer->unroll_index() == 0 && full_state_ && !begin_) {
Layer* last = net->last_unroll_layer(layer);
- if (last != layer) {
+ CHECK(last != nullptr);
+ if (last != layer || (phase & kTest))
src.push_back(last);
- }
}
// LOG(ERROR) << layer->name() << " forward";
// int ret =
@@ -405,7 +416,6 @@ void BPTTWorker::Forward(int step, Phase phase, NeuralNet* net) {
if ((phase & Phase::kTrain) && ret == Status::kEnd)
begin_ = true;
*/
-
if (job_conf_.debug() && DisplayNow(step) && grp_id_ == 0)
label[layer->name()] = layer->ToString(true, phase | kForward);
}