You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@singa.apache.org by zh...@apache.org on 2016/06/13 13:20:21 UTC
[28/50] [abbrv] incubator-singa git commit: SINGA-174 Add Batch
Normalization layer and Local Response Normalization layer.
SINGA-174 Add Batch Normalization layer and Local Response Normalization layer.
Remove buffering input/output if the flag of Layer::Forward() is not kTrain.
The input/output are used during Layer::Backward() which only appears
for kTrain.
Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/fa2ea304
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/fa2ea304
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/fa2ea304
Branch: refs/heads/master
Commit: fa2ea304d8989818a80780c9f428e0fcc19db031
Parents: eadd3f9
Author: Wei Wang <wa...@comp.nus.edu.sg>
Authored: Thu Jun 2 14:01:45 2016 +0800
Committer: Wei Wang <wa...@comp.nus.edu.sg>
Committed: Thu Jun 2 14:02:08 2016 +0800
----------------------------------------------------------------------
src/model/layer/activation.cc | 10 +++++++---
src/model/layer/cudnn_activation.cc | 13 ++++++++-----
src/model/layer/cudnn_convolution.cc | 3 ++-
src/model/layer/cudnn_lrn.cc | 16 ++++++++++------
src/model/layer/cudnn_pooling.cc | 7 +++++--
src/model/layer/cudnn_softmax.cc | 4 +++-
src/model/layer/dense.cc | 5 +++--
src/model/layer/softmax.cc | 10 +++++++---
test/singa/test_activation.cc | 8 ++++----
test/singa/test_cudnn_activation.cc | 6 +++---
test/singa/test_cudnn_softmax.cc | 6 +++---
test/singa/test_softmax.cc | 6 +++---
12 files changed, 58 insertions(+), 36 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/fa2ea304/src/model/layer/activation.cc
----------------------------------------------------------------------
diff --git a/src/model/layer/activation.cc b/src/model/layer/activation.cc
index 464e24d..e7c0696 100644
--- a/src/model/layer/activation.cc
+++ b/src/model/layer/activation.cc
@@ -32,13 +32,16 @@ const Tensor Activation::Forward(int flag, const Tensor& input) {
Tensor output;
if (mode_ == "SIGMOID") {
output = Sigmoid(input);
- buf_.push(output);
+ if (flag & kTrain)
+ buf_.push(output);
} else if (mode_ == "TANH") {
output = Tanh(input);
- buf_.push(output);
+ if (flag & kTrain)
+ buf_.push(output);
} else if (mode_ == "RELU") {
output = ReLU(input);
- buf_.push(input);
+ if (flag & kTrain)
+ buf_.push(input);
} else {
LOG(FATAL) << "Unkown activation: " << mode_;
}
@@ -48,6 +51,7 @@ const Tensor Activation::Forward(int flag, const Tensor& input) {
const std::pair<Tensor, vector<Tensor>> Activation::Backward(
int flag, const Tensor& grad) {
vector<Tensor> param_grad;
+ CHECK(!buf_.empty());
// inout means either input or output, but only one is valid for an
// activation.
Tensor input_grad, inout = buf_.top();
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/fa2ea304/src/model/layer/cudnn_activation.cc
----------------------------------------------------------------------
diff --git a/src/model/layer/cudnn_activation.cc b/src/model/layer/cudnn_activation.cc
index 73c70d7..8ecbbc7 100644
--- a/src/model/layer/cudnn_activation.cc
+++ b/src/model/layer/cudnn_activation.cc
@@ -75,11 +75,13 @@ const Tensor CudnnActivation::Forward(int flag, const Tensor& input) {
inblob->data(), &beta, this->desc_, outblob->mutable_data()));
#endif
}, {input.blob()}, {output.blob()});
- if (cudnn_mode_ == CUDNN_ACTIVATION_SIGMOID ||
- cudnn_mode_ == CUDNN_ACTIVATION_TANH) {
- buf_.push(output);
- } else if (cudnn_mode_ == CUDNN_ACTIVATION_RELU) {
- buf_.push(input);
+ if (flag & kTrain) {
+ if (cudnn_mode_ == CUDNN_ACTIVATION_SIGMOID ||
+ cudnn_mode_ == CUDNN_ACTIVATION_TANH) {
+ buf_.push(output);
+ } else if (cudnn_mode_ == CUDNN_ACTIVATION_RELU) {
+ buf_.push(input);
+ }
}
return output;
}
@@ -88,6 +90,7 @@ const std::pair<Tensor, vector<Tensor>> CudnnActivation::Backward(
int flag, const Tensor& grad) {
vector<Tensor> param_grad;
Tensor dx; // inout = buf_.top();
+ CHECK(!buf_.empty());
// inout means either used as input or output, only one is valid for one type
// of activation
Tensor inout = buf_.top();
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/fa2ea304/src/model/layer/cudnn_convolution.cc
----------------------------------------------------------------------
diff --git a/src/model/layer/cudnn_convolution.cc b/src/model/layer/cudnn_convolution.cc
index 922b7e0..97aa256 100644
--- a/src/model/layer/cudnn_convolution.cc
+++ b/src/model/layer/cudnn_convolution.cc
@@ -138,7 +138,7 @@ void CudnnConvolution::InitCudnn(const Tensor& input) {
const Tensor CudnnConvolution::Forward(int flag, const Tensor &input) {
CHECK_EQ(input.device()->lang(), kCuda);
CHECK_EQ(input.nDim(), 4u);
- buf_.push(input);
+ if (flag & kTrain) buf_.push(input); // buffer the input for backward
size_t batchsize = input.shape()[0];
DataType dtype = input.data_type();
Device *dev = input.device();
@@ -175,6 +175,7 @@ const std::pair<Tensor, vector<Tensor>> CudnnConvolution::Backward(
CHECK(has_init_cudnn_);
CHECK_EQ(grad.device()->lang(), kCuda);
CHECK_EQ(grad.nDim(), 4u);
+ CHECK(!buf_.empty());
Tensor src_data = buf_.top();
buf_.pop();
vector<Tensor> param_grad;
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/fa2ea304/src/model/layer/cudnn_lrn.cc
----------------------------------------------------------------------
diff --git a/src/model/layer/cudnn_lrn.cc b/src/model/layer/cudnn_lrn.cc
index ee661b6..1012813 100644
--- a/src/model/layer/cudnn_lrn.cc
+++ b/src/model/layer/cudnn_lrn.cc
@@ -33,7 +33,7 @@ void CudnnLRN::InitCudnn(const Shape& shape , DataType dtype) {
CHECK(!has_init_cudnn_);
mode_ = CUDNN_LRN_CROSS_CHANNEL_DIM1;
CUDNN_CHECK(cudnnCreateTensorDescriptor(&shape_desc_));
- CHECK_EQ(shape.size(), 4);
+ CHECK_EQ(shape.size(), 4u);
CUDNN_CHECK(cudnnSetTensor4dDescriptor(shape_desc_,
CUDNN_TENSOR_NCHW,
GetCudnnDataType(dtype),
@@ -58,9 +58,9 @@ const Tensor CudnnLRN::Forward(int flag, const Tensor& input) {
output.ResetLike(input);
output.device()->Exec(
[=](Context* ctx) {
- Blob *inblob = input.blob(), *outblob = output.blob();
- const float alpha = 1.0f, beta = 0.0f;
- CUDNN_CHECK(cudnnLRNCrossChannelForward(ctx->cudnn_handle,
+ Blob *inblob = input.blob(), *outblob = output.blob();
+ const float alpha = 1.0f, beta = 0.0f;
+ CUDNN_CHECK(cudnnLRNCrossChannelForward(ctx->cudnn_handle,
this->lrn_desc_,
this->mode_,
&alpha,
@@ -70,8 +70,11 @@ const Tensor CudnnLRN::Forward(int flag, const Tensor& input) {
this->shape_desc_,
outblob->mutable_data()));
}, {input.blob()}, {output.blob()});
- buf_.push(input);
- buf_.push(output);
+
+ if (flag & kTrain) {
+ buf_.push(input);
+ buf_.push(output);
+ }
return output;
}
@@ -79,6 +82,7 @@ const std::pair<Tensor, vector<Tensor>> CudnnLRN::Backward(
int flag, const Tensor& grad) {
vector <Tensor> param_grad;
Tensor dx;
+ CHECK(!buf_.empty());
Tensor output = buf_.top();
buf_.pop();
Tensor input = buf_.top();
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/fa2ea304/src/model/layer/cudnn_pooling.cc
----------------------------------------------------------------------
diff --git a/src/model/layer/cudnn_pooling.cc b/src/model/layer/cudnn_pooling.cc
index afbc490..842685d 100644
--- a/src/model/layer/cudnn_pooling.cc
+++ b/src/model/layer/cudnn_pooling.cc
@@ -80,7 +80,6 @@ void CudnnPooling::InitCudnn(const Tensor& input) {
const Tensor CudnnPooling::Forward(int flag, const Tensor &input) {
CHECK_EQ(input.device()->lang(), kCuda);
CHECK_EQ(input.nDim(), 4u);
- buf_.push(input);
size_t batchsize = input.shape(0);
DataType dtype = input.data_type();
Device *dev = input.device();
@@ -97,7 +96,10 @@ const Tensor CudnnPooling::Forward(int flag, const Tensor &input) {
outblob->mutable_data());
},
{input.blob()}, {output.blob()});
- buf_.push(output);
+ if (flag & kTrain) {
+ buf_.push(input);
+ buf_.push(output);
+ }
return output;
}
@@ -106,6 +108,7 @@ const std::pair<Tensor, vector<Tensor>> CudnnPooling::Backward(
CHECK_EQ(grad.device()->lang(), kCuda);
CHECK_EQ(grad.nDim(), 4u);
vector<Tensor> param_grad;
+ CHECK(!buf_.empty());
Tensor y = buf_.top();
buf_.pop();
Tensor x = buf_.top();
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/fa2ea304/src/model/layer/cudnn_softmax.cc
----------------------------------------------------------------------
diff --git a/src/model/layer/cudnn_softmax.cc b/src/model/layer/cudnn_softmax.cc
index bc7fe78..85b0c3d 100644
--- a/src/model/layer/cudnn_softmax.cc
+++ b/src/model/layer/cudnn_softmax.cc
@@ -53,13 +53,15 @@ const Tensor CudnnSoftmax::Forward(int flag, const Tensor& input) {
&alpha, this->desc_, inblob->data(), &beta, this->desc_,
outblob->mutable_data());
}, {input.blob()}, {output.blob()});
- buf_.push(output);
+ if (flag & kTrain)
+ buf_.push(output);
return output;
}
const std::pair<Tensor, vector<Tensor>> CudnnSoftmax::Backward(
int flag, const Tensor& grad) {
vector<Tensor> param_grad;
+ CHECK(!buf_.empty());
Tensor dx, output = buf_.top();
buf_.pop();
dx.ResetLike(grad);
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/fa2ea304/src/model/layer/dense.cc
----------------------------------------------------------------------
diff --git a/src/model/layer/dense.cc b/src/model/layer/dense.cc
index 29ff8cb..b349787 100644
--- a/src/model/layer/dense.cc
+++ b/src/model/layer/dense.cc
@@ -45,13 +45,13 @@ void Dense::Setup(const LayerConf &conf) {
/// \copydoc Layer::Forward(int flag, const Tensor&)
const Tensor Dense::Forward(int flag, const Tensor &input) {
Tensor output;
-
if (transpose_) // use the transposed version of weight_ for computing
output = Mult(input, weight_);
else
output = Mult(input, weight_.T());
AddRow(bias_, &output);
- buf_.push(input);
+ if (flag & kTrain)
+ buf_.push(input);
return output;
}
@@ -59,6 +59,7 @@ const Tensor Dense::Forward(int flag, const Tensor &input) {
const std::pair<Tensor, vector<Tensor>> Dense::Backward(int flag,
const Tensor &grad) {
vector<Tensor> param_grad;
+ CHECK(!buf_.empty());
Tensor src_data = buf_.top();
buf_.pop();
Tensor db, dw, dx;
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/fa2ea304/src/model/layer/softmax.cc
----------------------------------------------------------------------
diff --git a/src/model/layer/softmax.cc b/src/model/layer/softmax.cc
index 813ebf0..b379fc1 100644
--- a/src/model/layer/softmax.cc
+++ b/src/model/layer/softmax.cc
@@ -25,13 +25,16 @@ void Softmax::Setup(const LayerConf& conf) {
}
const Tensor Softmax::Forward(int flag, const Tensor& input) {
+ Tensor output;
if (input.nDim() == 1) {
Tensor tmp = Reshape(input, Shape{1, input.Size()});
- buf_.push(SoftMax(tmp, 0));
+ output = SoftMax(tmp, 0);
} else {
- buf_.push(SoftMax(input, axis_));
+ output = SoftMax(input, axis_);
}
- return buf_.top();
+ if (flag & kTrain)
+ buf_.push(output);
+ return output;
}
const std::pair<Tensor, vector<Tensor>> Softmax::Backward(int flag,
@@ -43,6 +46,7 @@ const std::pair<Tensor, vector<Tensor>> Softmax::Backward(int flag,
}
Tensor input_grad = grad.Clone();
input_grad.Reshape(Shape{nrow, ncol});
+ CHECK(!buf_.empty());
Tensor y = buf_.top();
buf_.pop();
CHECK(y.shape() == input_grad.shape());
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/fa2ea304/test/singa/test_activation.cc
----------------------------------------------------------------------
diff --git a/test/singa/test_activation.cc b/test/singa/test_activation.cc
index 9e34282..2d88121 100644
--- a/test/singa/test_activation.cc
+++ b/test/singa/test_activation.cc
@@ -57,7 +57,7 @@ TEST(Activation, Forward) {
}
acti.Setup(conf);
- singa::Tensor out = acti.Forward(0, in);
+ singa::Tensor out = acti.Forward(singa::kTrain, in);
const float* yptr = out.data<const float*>();
EXPECT_EQ(n, out.Size());
@@ -90,7 +90,7 @@ TEST(Activation, Backward) {
in.CopyDataFromHostPtr<float>(x, n);
float neg_slope = 0.5f;
- std::string types[] = {"SIGMOID","TANH","RELU"};
+ std::string types[] = {"SIGMOID","TANH","RELU"};
for (int j = 0; j < 3; j++) {
Activation acti;
singa::LayerConf conf;
@@ -102,13 +102,13 @@ TEST(Activation, Backward) {
}
acti.Setup(conf);
- singa::Tensor out = acti.Forward(0, in);
+ singa::Tensor out = acti.Forward(singa::kTrain, in);
const float* yptr = out.data<const float*>();
const float grad[] = {2.0f, -3.0f, 1.0f, 3.0f, -1.0f, -2.0};
singa::Tensor out_diff(singa::Shape{n});
out_diff.CopyDataFromHostPtr<float>(grad, n);
- const auto in_diff = acti.Backward(0, out_diff);
+ const auto in_diff = acti.Backward(singa::kTrain, out_diff);
const float* xptr = in_diff.first.data<const float*>();
float* dx = new float[n];
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/fa2ea304/test/singa/test_cudnn_activation.cc
----------------------------------------------------------------------
diff --git a/test/singa/test_cudnn_activation.cc b/test/singa/test_cudnn_activation.cc
index ee9f9b5..892b80b 100644
--- a/test/singa/test_cudnn_activation.cc
+++ b/test/singa/test_cudnn_activation.cc
@@ -64,7 +64,7 @@ TEST(TCudnnActivation, Forward) {
acti.Setup(conf);
// acti.InitCudnn(n, singa::kFloat32);
- singa::Tensor out = acti.Forward(0, in);
+ singa::Tensor out = acti.Forward(singa::kTrain, in);
EXPECT_EQ(n, out.Size());
singa::CppCPU host(0, 1);
out.ToDevice(&host);
@@ -103,7 +103,7 @@ TEST(TCudnnActivation, Backward) {
}
acti.Setup(conf);
acti.InitCudnn(n, singa::kFloat32);
- singa::Tensor out = acti.Forward(0, in);
+ singa::Tensor out = acti.Forward(singa::kTrain, in);
EXPECT_EQ(n, out.Size());
singa::CppCPU host(0, 1);
out.ToDevice(&host);
@@ -113,7 +113,7 @@ TEST(TCudnnActivation, Backward) {
-1.0, 1.5, 2.5, -1.5, -2.5};
singa::Tensor out_diff(singa::Shape{n}, &cuda);
out_diff.CopyDataFromHostPtr<float>(grad, n);
- const auto ret = acti.Backward(0, out_diff);
+ const auto ret = acti.Backward(singa::kTrain, out_diff);
singa::Tensor in_diff = ret.first;
in_diff.ToDevice(&host);
const float* xptr = in_diff.data<const float*>();
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/fa2ea304/test/singa/test_cudnn_softmax.cc
----------------------------------------------------------------------
diff --git a/test/singa/test_cudnn_softmax.cc b/test/singa/test_cudnn_softmax.cc
index dcbf1ed..05783e2 100644
--- a/test/singa/test_cudnn_softmax.cc
+++ b/test/singa/test_cudnn_softmax.cc
@@ -55,7 +55,7 @@ TEST(CudnnSoftmax, Forward) {
sft.Setup(conf);
sft.InitCudnn(n, singa::kFloat32);
- singa::Tensor out = sft.Forward(0, in);
+ singa::Tensor out = sft.Forward(singa::kTrain, in);
singa::CppCPU host(0, 1);
out.ToDevice(&host);
const float* yptr = out.data<const float*>();
@@ -83,7 +83,7 @@ TEST(CudnnSoftmax, Backward) {
singa::SoftmaxConf* softmaxconf = conf.mutable_softmax_conf();
softmaxconf->set_axis(axis);
sft.Setup(conf);
- singa::Tensor out = sft.Forward(0, in);
+ singa::Tensor out = sft.Forward(singa::kTrain, in);
singa::CppCPU host(0, 1);
out.ToDevice(&host);
const float* yptr = out.data<const float*>();
@@ -91,7 +91,7 @@ TEST(CudnnSoftmax, Backward) {
const float grad[] = {2.0f, -3.0f, 1.0f, 3.0f, -1.0f, -2.0};
singa::Tensor out_diff(singa::Shape{n}, &cuda);
out_diff.CopyDataFromHostPtr<float>(grad, n);
- const auto ret = sft.Backward(0, out_diff);
+ const auto ret = sft.Backward(singa::kTrain, out_diff);
singa::Tensor in_diff = ret.first;
in_diff.ToDevice(&host);
const float* xptr = in_diff.data<const float*>();
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/fa2ea304/test/singa/test_softmax.cc
----------------------------------------------------------------------
diff --git a/test/singa/test_softmax.cc b/test/singa/test_softmax.cc
index da2a6ef..6ee8b3f 100644
--- a/test/singa/test_softmax.cc
+++ b/test/singa/test_softmax.cc
@@ -51,7 +51,7 @@ TEST(Softmax, Forward) {
softmaxconf->set_axis(axis);
sft.Setup(conf);
- singa::Tensor out = sft.Forward(0, in);
+ singa::Tensor out = sft.Forward(singa::kTrain, in);
const float* yptr = out.data<const float*>();
EXPECT_EQ(n, out.Size());
@@ -84,13 +84,13 @@ TEST(Softmax, Backward) {
singa::SoftmaxConf* softmaxconf = conf.mutable_softmax_conf();
softmaxconf->set_axis(axis);
sft.Setup(conf);
- singa::Tensor out = sft.Forward(0, in);
+ singa::Tensor out = sft.Forward(singa::kTrain, in);
const float* yptr = out.data<const float*>();
const float grad[] = {2.0f, -3.0f, 1.0f, 3.0f, -1.0f, -2.0};
singa::Tensor out_diff(singa::Shape{row, col});
out_diff.CopyDataFromHostPtr<float>(grad, n);
- const auto in_diff = sft.Backward(0, out_diff);
+ const auto in_diff = sft.Backward(singa::kTrain, out_diff);
const float* xptr = in_diff.first.data<const float*>();
float* dx = new float[n];