You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@singa.apache.org by wa...@apache.org on 2016/08/03 09:06:29 UTC
[1/2] incubator-singa git commit: SINGA-174 Add Batch Normalization
layer and Local Response Nomalization layer.
Repository: incubator-singa
Updated Branches:
refs/heads/dev f07e3545c -> db5478efa
SINGA-174 Add Batch Normalization layer and Local Response Nomalization
layer.
Add Local Response Normalization layer inplementation in C++ language.
Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/db5478ef
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/db5478ef
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/db5478ef
Branch: refs/heads/dev
Commit: db5478efa051738ff3377908a8655201f9f9f18f
Parents: 96ed638
Author: WANG Ji <ij...@gmail.com>
Authored: Sat Jul 30 13:10:45 2016 +0800
Committer: WANG Ji <ij...@gmail.com>
Committed: Wed Aug 3 16:49:13 2016 +0800
----------------------------------------------------------------------
src/model/layer/lrn.cc | 123 +++++++++++++++++++++++++++++++++++++-------
test/singa/test_lrn.cc | 116 +++++++++++++++++++++++++++++++++++++++++
2 files changed, 221 insertions(+), 18 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/db5478ef/src/model/layer/lrn.cc
----------------------------------------------------------------------
diff --git a/src/model/layer/lrn.cc b/src/model/layer/lrn.cc
index f17b743..a624147 100644
--- a/src/model/layer/lrn.cc
+++ b/src/model/layer/lrn.cc
@@ -19,8 +19,9 @@
*
************************************************************/
#include "lrn.h"
+#include <vector>
-namespace singa{
+namespace singa {
RegisterLayerClass(LRN);
void LRN::Setup(const Shape& in_sample, const LayerConf& conf) {
Layer::Setup(in_sample, conf);
@@ -33,27 +34,113 @@ void LRN::Setup(const Shape& in_sample, const LayerConf& conf) {
}
const Tensor LRN::Forward(int flag, const Tensor& input) {
- //Tensor output;
- //const float salpha = alpha_ / local_size_;
- LOG(FATAL) << "Not implemented";
- /* Tensor API may be need
- * 1. set
- * template <typename Dtype>
- * void Set(Dtype val);
- *
- * 2. axpy
- * 3. padding
- *
- *
- */
- Tensor output;
+ Tensor x = input.Clone();
+ x.Reshape(Shape{input.shape(0), input.Size() / input.shape(0)});
+ vector<Tensor> channels, images;
+ // for each image
+ for (size_t i = 0; i < input.shape(0); ++i) {
+ Tensor image = CopyRows(x, i, i + 1);
+ image.Reshape(Shape{input.shape(1), input.shape(2) * input.shape(3)});
+ // for each channel of the image
+ channels.clear();
+ for (size_t c = 0; c < input.shape(1); ++c) {
+ Tensor window =
+ CopyRows(image, std::max(0, static_cast<int>(c) - local_size_ / 2),
+ std::min(input.shape(1), c + local_size_ / 2 + 1));
+ window = Square(window);
+
+ Tensor tmp, ch;
+ tmp.Reshape(Shape{input.shape(2) * input.shape(3)});
+ SumRows(window, &tmp);
+
+ tmp *= alpha_;
+ tmp += k_;
+ tmp = Pow(tmp, beta_);
+
+ ch = CopyRows(image, c, c + 1);
+ ch = ch / tmp;
+ ch.Reshape(Shape{input.shape(2), input.shape(3)});
+ channels.push_back(ch);
+ }
+ Tensor normalized_image = ConcatenateRows(channels);
+ normalized_image.Reshape(
+ Shape{input.shape(1), input.shape(2) * input.shape(3)});
+ images.push_back(normalized_image);
+ }
+ Tensor output = ConcatenateRows(images);
+ output.Reshape(input.shape());
+ buf_.push(input);
+
return output;
}
-const std::pair<Tensor, vector<Tensor>> LRN::Backward(
- int flag, const Tensor& grad) {
- LOG(FATAL) << "Not implemented";
+const std::pair<Tensor, vector<Tensor>> LRN::Backward(int flag,
+ const Tensor& grad) {
Tensor dx;
+ if ((flag & kTrain) == kTrain) {
+ Tensor dy = grad.Clone();
+ dy.Reshape(Shape{grad.shape(0), grad.Size() / grad.shape(0)});
+ Tensor x = buf_.top();
+ buf_.pop();
+ x.Reshape(dy.shape());
+ vector<Tensor> channels, images;
+ // for each image
+ for (size_t i = 0; i < grad.shape(0); ++i) {
+ Tensor image = CopyRows(x, i, i + 1);
+ image.Reshape(Shape{grad.shape(1), grad.shape(2) * grad.shape(3)});
+ // for each channel of the image
+ channels.clear();
+ for (size_t c = 0; c < grad.shape(1); ++c) {
+ Tensor window =
+ CopyRows(image, std::max(0, static_cast<int>(c) - local_size_ / 2),
+ std::min(grad.shape(1), c + local_size_ / 2 + 1));
+ Tensor tmp;
+ tmp.Reshape(Shape{grad.shape(2) * grad.shape(3)});
+ window = Square(window);
+ SumRows(window, &tmp);
+ tmp *= alpha_;
+ tmp += k_;
+ tmp.Reshape(Shape{grad.shape(2), grad.shape(3)});
+ channels.push_back(tmp);
+ }
+ Tensor norm_image = ConcatenateRows(channels);
+ norm_image.Reshape(Shape{grad.shape(1), grad.shape(2) * grad.shape(3)});
+ images.push_back(norm_image);
+ }
+ Tensor norm = ConcatenateRows(images);
+ norm.Reshape(dy.shape());
+ dx = Pow(norm, -beta_);
+ dx = dx * dy;
+ Tensor tmp = dx * x;
+ tmp = tmp / norm;
+ images.clear();
+ for (size_t i = 0; i < grad.shape(0); ++i) {
+ Tensor image = CopyRows(tmp, i, i + 1);
+ image.Reshape(Shape{grad.shape(1), grad.shape(2) * grad.shape(3)});
+ // for each channel of the image
+ channels.clear();
+ for (size_t c = 0; c < grad.shape(1); ++c) {
+ Tensor window =
+ CopyRows(image, std::max(0, static_cast<int>(c) - local_size_ / 2),
+ std::min(grad.shape(1), c + local_size_ / 2 + 1));
+ Tensor tmpr;
+ tmpr.Reshape(Shape{grad.shape(2) * grad.shape(3)});
+ SumRows(window, &tmpr);
+ tmpr.Reshape(Shape{grad.shape(2), grad.shape(3)});
+ channels.push_back(tmpr);
+ }
+ Tensor pooled_image = ConcatenateRows(channels);
+ pooled_image.Reshape(Shape{grad.shape(1), grad.shape(2) * grad.shape(3)});
+ images.push_back(pooled_image);
+ }
+ Tensor tmp2 = ConcatenateRows(images);
+ tmp2 *= (-2.0f * beta_ * alpha_);
+ tmp2 = tmp2 * x;
+ dx = dx + tmp2;
+ dx.Reshape(grad.shape());
+ } else {
+ LOG(ERROR) << "Do not call backward for evaluation phase";
+ }
vector<Tensor> param_grad;
return std::make_pair(dx, param_grad);
}
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/db5478ef/test/singa/test_lrn.cc
----------------------------------------------------------------------
diff --git a/test/singa/test_lrn.cc b/test/singa/test_lrn.cc
new file mode 100644
index 0000000..5de4535
--- /dev/null
+++ b/test/singa/test_lrn.cc
@@ -0,0 +1,116 @@
+/*********************************************************
+*
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements. See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership. The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License. You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing,
+* software distributed under the License is distributed on an
+* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+* KIND, either express or implied. See the License for the
+* specific language governing permissions and limitations
+* under the License.
+*
+************************************************************/
+
+#include "../src/model/layer/lrn.h"
+#include "gtest/gtest.h"
+
+using namespace singa;
+
+TEST(LRN, Setup) {
+ LRN lrn;
+ EXPECT_EQ("LRN", lrn.layer_type());
+
+ LayerConf conf;
+ LRNConf *lrn_conf = conf.mutable_lrn_conf();
+ lrn_conf->set_k(1.0);
+ lrn_conf->set_local_size(3);
+ lrn_conf->set_alpha(0.1);
+ lrn_conf->set_beta(0.75);
+ lrn.Setup(Shape{1}, conf);
+
+ EXPECT_FLOAT_EQ(1.0, lrn.k());
+ EXPECT_EQ(3, lrn.local_size());
+ EXPECT_FLOAT_EQ(0.1, lrn.alpha());
+ EXPECT_FLOAT_EQ(0.75, lrn.beta());
+}
+
+TEST(LRN, Forward) {
+ LRN lrn;
+ const float x[] = {1, 2, 3, 4, 5, 6, 7, 8};
+ Tensor in(Shape{2, 4, 1, 1});
+ in.CopyDataFromHostPtr(x, 8);
+
+ singa::LayerConf conf;
+ singa::LRNConf *lrn_conf = conf.mutable_lrn_conf();
+ lrn_conf->set_k(1.0);
+ lrn_conf->set_local_size(3);
+ lrn_conf->set_alpha(0.1);
+ lrn_conf->set_beta(0.75);
+ lrn.Setup(Shape{4, 1, 1}, conf);
+
+ Tensor out = lrn.Forward(kTrain, in);
+ const float *outptr = out.data<float>();
+ const auto &shape = out.shape();
+ EXPECT_EQ(4u, shape.size());
+ EXPECT_EQ(2u, shape[0]);
+ EXPECT_EQ(4u, shape[1]);
+ EXPECT_EQ(1u, shape[2]);
+ EXPECT_EQ(1u, shape[3]);
+
+ EXPECT_NEAR(0.737787, outptr[0], 1e-6f);
+ EXPECT_NEAR(1.037221, outptr[1], 1e-6f);
+ EXPECT_NEAR(1.080992, outptr[2], 1e-6f);
+ EXPECT_NEAR(1.563179, outptr[3], 1e-6f);
+ EXPECT_NEAR(1.149545, outptr[4], 1e-6f);
+ EXPECT_NEAR(0.930604, outptr[5], 1e-6f);
+ EXPECT_NEAR(0.879124, outptr[6], 1e-6f);
+ EXPECT_NEAR(1.218038, outptr[7], 1e-6f);
+}
+
+TEST(LRN, Backward) {
+ LRN lrn;
+ const float x[] = {1, 2, 3, 4, 5, 6, 7, 8};
+ Tensor in(Shape{2, 4, 1, 1});
+ in.CopyDataFromHostPtr(x, 8);
+
+ singa::LayerConf conf;
+ singa::LRNConf *lrn_conf = conf.mutable_lrn_conf();
+ lrn_conf->set_k(1.0);
+ lrn_conf->set_local_size(3);
+ lrn_conf->set_alpha(0.1);
+ lrn_conf->set_beta(0.75);
+ lrn.Setup(Shape{4, 1, 1}, conf);
+
+ Tensor out = lrn.Forward(kTrain, in);
+
+ const float dy_arr[] = {8, 7, 6, 5, 4, 3, 2, 1};
+ Tensor dy(Shape{2, 4, 1, 1});
+ dy.CopyDataFromHostPtr(dy_arr, 8);
+
+ const auto ret = lrn.Backward(singa::kTrain, dy);
+ singa::Tensor dx = ret.first;
+ const float *dxptr = dx.data<float>();
+ const auto &shape = dx.shape();
+ EXPECT_EQ(4u, shape.size());
+ EXPECT_EQ(2u, shape[0]);
+ EXPECT_EQ(4u, shape[1]);
+ EXPECT_EQ(1u, shape[2]);
+ EXPECT_EQ(1u, shape[3]);
+
+ EXPECT_NEAR(4.858288752f, dxptr[0], 1e-6f);
+ EXPECT_NEAR(1.04332631f, dxptr[1], 1e-6f);
+ EXPECT_NEAR(-0.952648779f, dxptr[2], 1e-6f);
+ EXPECT_NEAR(-0.38373312f, dxptr[3], 1e-6f);
+ EXPECT_NEAR(0.259424615f, dxptr[4], 1e-6f);
+ EXPECT_NEAR(-0.426475393f, dxptr[5], 1e-6f);
+ EXPECT_NEAR(-0.213195118f, dxptr[6], 1e-6f);
+ EXPECT_NEAR(-0.099276183f, dxptr[7], 1e-6f);
+}
[2/2] incubator-singa git commit: SINGA-174 Add Batch Normalization
layer and Local Response Nomalization layer.
Posted by wa...@apache.org.
SINGA-174 Add Batch Normalization layer and Local Response Nomalization
layer.
Add batch normalization layer inplementation in C++ language.
Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/96ed638b
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/96ed638b
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/96ed638b
Branch: refs/heads/dev
Commit: 96ed638b7b7c1fa66072aaf94f97804b634bdcb3
Parents: f07e354
Author: WANG Ji <ij...@gmail.com>
Authored: Thu Jul 28 21:53:43 2016 +0800
Committer: WANG Ji <ij...@gmail.com>
Committed: Wed Aug 3 16:49:13 2016 +0800
----------------------------------------------------------------------
src/model/layer/batchnorm.cc | 120 ++++++++++++++++++++++++++++++++--
test/singa/test_batchnorm.cc | 132 ++++++++++++++++++++++++++++++++++++++
2 files changed, 248 insertions(+), 4 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/96ed638b/src/model/layer/batchnorm.cc
----------------------------------------------------------------------
diff --git a/src/model/layer/batchnorm.cc b/src/model/layer/batchnorm.cc
index e583f8e..b6edc9e 100644
--- a/src/model/layer/batchnorm.cc
+++ b/src/model/layer/batchnorm.cc
@@ -39,7 +39,7 @@ void BatchNorm::Setup(const Shape& in_sample, const LayerConf& conf) {
dbnBias_.ResetLike(bnBias_);
// Push back params into param_values_
// Assume the order of param is: bnScale, bnBias, runningMean, runningVariance
- for (const auto &spec : conf.param()) param_specs_.push_back(spec);
+ for (const auto& spec : conf.param()) param_specs_.push_back(spec);
}
void BatchNorm::ToDevice(std::shared_ptr<Device> device) {
@@ -52,16 +52,128 @@ void BatchNorm::ToDevice(std::shared_ptr<Device> device) {
}
const Tensor BatchNorm::Forward(int flag, const Tensor& input) {
- LOG(FATAL) << "Not implemented";
- Tensor output;
+ Tensor x = input.Clone();
+ x.Reshape(Shape{input.shape(0), input.Size() / input.shape(0)});
+ Tensor output, mean, var, xnorm;
+ output.ResetLike(x);
+
+ if ((flag & kTrain) == kTrain) {
+ mean = Average(x, 0);
+ runningMean_ *= 1.0f - factor_;
+ Axpy(factor_, mean, &runningMean_);
+ xnorm = x.Clone();
+ SubRow(mean, &xnorm);
+ xnorm = Square(xnorm);
+ var = Average(xnorm, 0);
+ runningVariance_ *= 1.0f - factor_;
+ Axpy(factor_, var, &runningVariance_);
+ Tensor tmp = var.Clone();
+ tmp += 1e-6f;
+ tmp = Sqrt(tmp);
+ xnorm = x.Clone();
+ SubRow(mean, &xnorm);
+ DivRow(tmp, &xnorm);
+ output = xnorm.Clone();
+ MultRow(bnScale_, &output);
+ AddRow(bnBias_, &output);
+ buf_.push(x);
+ buf_.push(mean);
+ buf_.push(var);
+ buf_.push(xnorm);
+ } else {
+ xnorm = x.Clone();
+ SubRow(runningMean_, &xnorm);
+ Tensor tmp = runningVariance_.Clone();
+ tmp += 1e-6f;
+ tmp = Sqrt(tmp);
+ DivRow(tmp, &xnorm);
+ output = xnorm.Clone();
+ MultRow(bnScale_, &output);
+ AddRow(bnBias_, &output);
+ }
+
+ output.Reshape(Shape{output.shape(0), channels_, height_, width_});
return output;
}
const std::pair<Tensor, vector<Tensor>> BatchNorm::Backward(
int flag, const Tensor& grad) {
- LOG(FATAL) << "Not implemented";
+ Tensor dy = grad.Clone();
+ dy.Reshape(Shape{grad.shape(0), grad.Size() / grad.shape(0)});
+ Tensor xnorm = buf_.top();
+ buf_.pop();
+ Tensor var = buf_.top();
+ buf_.pop();
+ Tensor mean = buf_.top();
+ buf_.pop();
+ Tensor input = buf_.top();
+ buf_.pop();
+
Tensor dx;
vector<Tensor> param_grad;
+
+ if ((flag & kTrain) == kTrain) {
+ // gxnrom
+ Tensor gxnorm = dy.Clone();
+ MultRow(bnScale_, &gxnorm);
+ // gvar
+ Tensor tmp = var.Clone();
+ tmp += 1e-6f;
+ tmp = Pow(var, -1.5f);
+ tmp *= -0.5f;
+
+ Tensor tmpx = input.Clone();
+ SubRow(mean, &tmpx);
+
+ tmpx = tmpx * gxnorm;
+ MultRow(tmp, &tmpx);
+ Tensor gvar;
+ gvar.ResetLike(var);
+ SumRows(tmpx, &gvar);
+ // gmean
+ tmp = var.Clone();
+ tmp += 1e-6f;
+ tmp = Pow(tmp, -0.5f);
+ tmp *= -1.0f;
+ Tensor tmpx_r;
+ tmpx_r.ResetLike(tmp);
+ SumRows(gxnorm, &tmpx_r);
+ Tensor gmean = tmpx_r * tmp;
+
+ tmpx = input.Clone();
+ SubRow(mean, &tmpx);
+ SumRows(tmpx, &tmp);
+ tmp *= -2.0f / input.shape(0);
+ tmp = tmp * gvar;
+ gmean = gmean + tmp;
+ // dx
+ tmp = var.Clone();
+ tmp += 1e-6f;
+ tmp = Pow(tmp, -0.5f);
+ dx = gxnorm.Clone();
+ MultRow(tmp, &dx);
+
+ tmpx = input.Clone();
+ SubRow(mean, &tmpx);
+ tmpx *= 2.0f / input.shape(0);
+ MultRow(gvar, &tmpx);
+ dx = dx + tmpx;
+
+ tmp = gmean.Clone();
+ tmp *= 1.0f / input.shape(0);
+
+ AddRow(tmp, &dx);
+ // dbnScale
+ tmpx = dy * xnorm;
+ SumRows(tmpx, &dbnScale_);
+ // dbnBias
+ SumRows(dy, &dbnBias_);
+ param_grad.push_back(dbnScale_);
+ param_grad.push_back(dbnBias_);
+ } else {
+ LOG(ERROR) << "Do not call backward for evaluation phase";
+ }
+ dx.Reshape(Shape{dx.shape(0), channels_, height_, width_});
return std::make_pair(dx, param_grad);
}
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/96ed638b/test/singa/test_batchnorm.cc
----------------------------------------------------------------------
diff --git a/test/singa/test_batchnorm.cc b/test/singa/test_batchnorm.cc
new file mode 100644
index 0000000..c72dc0f
--- /dev/null
+++ b/test/singa/test_batchnorm.cc
@@ -0,0 +1,132 @@
+/*********************************************************
+*
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements. See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership. The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License. You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing,
+* software distributed under the License is distributed on an
+* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+* KIND, either express or implied. See the License for the
+* specific language governing permissions and limitations
+* under the License.
+*
+************************************************************/
+
+#include "../src/model/layer/batchnorm.h"
+#include "gtest/gtest.h"
+#include <iostream>
+
+using namespace singa;
+
+TEST(BatchNorm, Setup) {
+ BatchNorm batchnorm;
+ EXPECT_EQ("BatchNorm", batchnorm.layer_type());
+
+ singa::LayerConf conf;
+ singa::BatchNormConf *batchnorm_conf = conf.mutable_batchnorm_conf();
+ batchnorm_conf->set_factor(0.01);
+ batchnorm.Setup(Shape{2, 4, 4}, conf);
+
+ EXPECT_FLOAT_EQ(0.01, batchnorm.factor());
+ EXPECT_EQ(2u, batchnorm.channels());
+ EXPECT_EQ(4u, batchnorm.height());
+ EXPECT_EQ(4u, batchnorm.width());
+}
+
+TEST(BatchNorm, Forward) {
+ BatchNorm batchnorm;
+ const float x[] = {1, 2, 3, 4};
+ Tensor in(Shape{2, 1, 2, 1});
+ in.CopyDataFromHostPtr(x, 2 * 1 * 2 * 1);
+ const float alpha_[] = {1, 1};
+ Tensor alpha(Shape{1, 2});
+ alpha.CopyDataFromHostPtr(alpha_, 1 * 2);
+
+ const float beta_[] = {2, 2};
+ Tensor beta(Shape{1, 2});
+ beta.CopyDataFromHostPtr(beta_, 1 * 2);
+ singa::LayerConf conf;
+ singa::BatchNormConf *batchnorm_conf = conf.mutable_batchnorm_conf();
+ batchnorm_conf->set_factor(1);
+ batchnorm.Setup(Shape{1, 2, 1}, conf);
+ batchnorm.set_bnScale(alpha);
+ batchnorm.set_bnBias(beta);
+ batchnorm.set_runningMean(beta);
+ batchnorm.set_runningVariance(beta);
+ Tensor out = batchnorm.Forward(kTrain, in);
+ const float *outptr = out.data<float>();
+ const auto &shape = out.shape();
+ EXPECT_EQ(4u, shape.size());
+ EXPECT_EQ(2u, shape[0]);
+ EXPECT_EQ(1u, shape[1]);
+ EXPECT_EQ(2u, shape[2]);
+ EXPECT_EQ(1u, shape[3]);
+ EXPECT_NEAR(1.0f, outptr[0], 1e-6f);
+ EXPECT_NEAR(1.0f, outptr[1], 1e-6f);
+ EXPECT_NEAR(3.0f, outptr[2], 1e-6f);
+ EXPECT_NEAR(3.0f, outptr[3], 1e-6f);
+}
+
+TEST(BatchNorm, Backward) {
+ BatchNorm batchnorm;
+ const float x[] = {1, 2, 3, 4};
+ Tensor in(Shape{2, 1, 2, 1});
+ in.CopyDataFromHostPtr(x, 2 * 1 * 2 * 1);
+ const float dy[] = {4, 3, 2, 1};
+ Tensor dy_in(Shape{2, 1, 2, 1});
+ dy_in.CopyDataFromHostPtr(dy, 2 * 1 * 2 * 1);
+ const float alpha_[] = {1, 1};
+ Tensor alpha(Shape{1, 2});
+ alpha.CopyDataFromHostPtr(alpha_, 1 * 2);
+
+ const float beta_[] = {0, 0};
+ Tensor beta(Shape{1, 2});
+ beta.CopyDataFromHostPtr(beta_, 1 * 2);
+ singa::LayerConf conf;
+ singa::BatchNormConf *batchnorm_conf = conf.mutable_batchnorm_conf();
+ batchnorm_conf->set_factor(1);
+ batchnorm.Setup(Shape{1, 2, 1}, conf);
+ batchnorm.set_bnScale(alpha);
+ batchnorm.set_bnBias(beta);
+ batchnorm.set_runningMean(beta);
+ batchnorm.set_runningVariance(beta);
+ Tensor out = batchnorm.Forward(kTrain, in);
+ auto ret = batchnorm.Backward(kTrain, dy_in);
+ Tensor dx = ret.first;
+ const auto & shape = dx.shape();
+ EXPECT_EQ(4u, shape.size());
+ EXPECT_EQ(2u, shape[0]);
+ EXPECT_EQ(1u, shape[1]);
+ EXPECT_EQ(2u, shape[2]);
+ EXPECT_EQ(1u, shape[3]);
+ const float *dxptr = ret.first.data<float>();
+ EXPECT_NEAR(.0f, dxptr[0], 1e-6f);
+ EXPECT_NEAR(.0f, dxptr[1], 1e-6f);
+ EXPECT_NEAR(.0f, dxptr[2], 1e-6f);
+ EXPECT_NEAR(.0f, dxptr[3], 1e-6f);
+
+ Tensor dbnScale = ret.second.at(0);
+ const float *dbnScaleptr = dbnScale.data<float>();
+ const auto & dbnScaleShape = dbnScale.shape();
+ EXPECT_EQ(1u, dbnScaleShape.size());
+ EXPECT_EQ(2u, dbnScaleShape[0]);
+
+ EXPECT_NEAR(-2.0f, dbnScaleptr[0], 1e-6f);
+ EXPECT_NEAR(-2.0f, dbnScaleptr[1], 1e-6f);
+
+ Tensor dbnBias = ret.second.at(1);
+ const float *dbnBiasptr = dbnBias.data<float>();
+ const auto & dbnBiasShape = dbnBias.shape();
+ EXPECT_EQ(1u, dbnBiasShape.size());
+ EXPECT_EQ(2u, dbnBiasShape[0]);
+
+ EXPECT_NEAR(6.0f, dbnBiasptr[0], 1e-6f);
+ EXPECT_NEAR(4.0f, dbnBiasptr[1], 1e-6f);
+}