You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@singa.apache.org by wa...@apache.org on 2016/08/03 09:06:30 UTC
[2/2] incubator-singa git commit: SINGA-174 Add Batch Normalization layer and Local Response Nomalization layer.

SINGA-174 Add Batch Normalization layer and Local Response Nomalization
layer.

Add batch normalization layer inplementation in C++ language.


Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/96ed638b
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/96ed638b
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/96ed638b

Branch: refs/heads/dev
Commit: 96ed638b7b7c1fa66072aaf94f97804b634bdcb3
Parents: f07e354
Author: WANG Ji <ij...@gmail.com>
Authored: Thu Jul 28 21:53:43 2016 +0800
Committer: WANG Ji <ij...@gmail.com>
Committed: Wed Aug 3 16:49:13 2016 +0800

----------------------------------------------------------------------
 src/model/layer/batchnorm.cc | 120 ++++++++++++++++++++++++++++++++--
 test/singa/test_batchnorm.cc | 132 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 248 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/96ed638b/src/model/layer/batchnorm.cc
----------------------------------------------------------------------
diff --git a/src/model/layer/batchnorm.cc b/src/model/layer/batchnorm.cc
index e583f8e..b6edc9e 100644
--- a/src/model/layer/batchnorm.cc
+++ b/src/model/layer/batchnorm.cc
@@ -39,7 +39,7 @@ void BatchNorm::Setup(const Shape& in_sample, const LayerConf& conf) {
   dbnBias_.ResetLike(bnBias_);
   // Push back params into param_values_
   // Assume the order of param is: bnScale, bnBias, runningMean, runningVariance
-  for (const auto &spec : conf.param()) param_specs_.push_back(spec);
+  for (const auto& spec : conf.param()) param_specs_.push_back(spec);
 }
 
 void BatchNorm::ToDevice(std::shared_ptr<Device> device) {
@@ -52,16 +52,128 @@ void BatchNorm::ToDevice(std::shared_ptr<Device> device) {
 }
 
 const Tensor BatchNorm::Forward(int flag, const Tensor& input) {
-  LOG(FATAL) << "Not implemented";
-  Tensor output;
+  Tensor x = input.Clone();
+  x.Reshape(Shape{input.shape(0), input.Size() / input.shape(0)});
+  Tensor output, mean, var, xnorm;
+  output.ResetLike(x);
+
+  if ((flag & kTrain) == kTrain) {
+    mean = Average(x, 0);
+    runningMean_ *= 1.0f - factor_;
+    Axpy(factor_, mean, &runningMean_);
+    xnorm = x.Clone();
+    SubRow(mean, &xnorm);
+    xnorm = Square(xnorm);
+    var = Average(xnorm, 0);
+    runningVariance_ *= 1.0f - factor_;
+    Axpy(factor_, var, &runningVariance_);
+    Tensor tmp = var.Clone();
+    tmp += 1e-6f;
+    tmp = Sqrt(tmp);
+    xnorm = x.Clone();
+    SubRow(mean, &xnorm);
+    DivRow(tmp, &xnorm);
+    output = xnorm.Clone();
+    MultRow(bnScale_, &output);
+    AddRow(bnBias_, &output);
+    buf_.push(x);
+    buf_.push(mean);
+    buf_.push(var);
+    buf_.push(xnorm);
+  } else {
+    xnorm = x.Clone();
+    SubRow(runningMean_, &xnorm);
+    Tensor tmp = runningVariance_.Clone();
+    tmp += 1e-6f;
+    tmp = Sqrt(tmp);
+    DivRow(tmp, &xnorm);
+    output = xnorm.Clone();
+    MultRow(bnScale_, &output);
+    AddRow(bnBias_, &output);
+  }
+
+  output.Reshape(Shape{output.shape(0), channels_, height_, width_});
   return output;
 }
 
 const std::pair<Tensor, vector<Tensor>> BatchNorm::Backward(
     int flag, const Tensor& grad) {
-  LOG(FATAL) << "Not implemented";
+  Tensor dy = grad.Clone();
+  dy.Reshape(Shape{grad.shape(0), grad.Size() / grad.shape(0)});
+  Tensor xnorm = buf_.top();
+  buf_.pop();
+  Tensor var = buf_.top();
+  buf_.pop();
+  Tensor mean = buf_.top();
+  buf_.pop();
+  Tensor input = buf_.top();
+  buf_.pop();
+
   Tensor dx;
   vector<Tensor> param_grad;
+
+  if ((flag & kTrain) == kTrain) {
+    // gxnrom
+    Tensor gxnorm = dy.Clone();
+    MultRow(bnScale_, &gxnorm);
+    // gvar
+    Tensor tmp = var.Clone();
+    tmp += 1e-6f;
+    tmp = Pow(var, -1.5f);
+    tmp *= -0.5f;
+
+    Tensor tmpx = input.Clone();
+    SubRow(mean, &tmpx);
+
+    tmpx = tmpx * gxnorm;
+    MultRow(tmp, &tmpx);
+    Tensor gvar;
+    gvar.ResetLike(var);
+    SumRows(tmpx, &gvar);
+    // gmean
+    tmp = var.Clone();
+    tmp += 1e-6f;
+    tmp = Pow(tmp, -0.5f);
+    tmp *= -1.0f;
+    Tensor tmpx_r;
+    tmpx_r.ResetLike(tmp);
+    SumRows(gxnorm, &tmpx_r);
+    Tensor gmean = tmpx_r * tmp;
+
+    tmpx = input.Clone();
+    SubRow(mean, &tmpx);
+    SumRows(tmpx, &tmp);
+    tmp *= -2.0f / input.shape(0);
+    tmp = tmp * gvar;
+    gmean = gmean + tmp;
+    // dx
+    tmp = var.Clone();
+    tmp += 1e-6f;
+    tmp = Pow(tmp, -0.5f);
+    dx = gxnorm.Clone();
+    MultRow(tmp, &dx);
+
+    tmpx = input.Clone();
+    SubRow(mean, &tmpx);
+    tmpx *= 2.0f / input.shape(0);
+    MultRow(gvar, &tmpx);
+    dx = dx + tmpx;
+
+    tmp = gmean.Clone();
+    tmp *= 1.0f / input.shape(0);
+
+    AddRow(tmp, &dx);
+    // dbnScale
+    tmpx = dy * xnorm;
+    SumRows(tmpx, &dbnScale_);
+    // dbnBias
+    SumRows(dy, &dbnBias_);
+    param_grad.push_back(dbnScale_);
+    param_grad.push_back(dbnBias_);
+  } else {
+    LOG(ERROR) << "Do not call backward for evaluation phase";
+  }
+  dx.Reshape(Shape{dx.shape(0), channels_, height_, width_});
   return std::make_pair(dx, param_grad);
 }
 

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/96ed638b/test/singa/test_batchnorm.cc
----------------------------------------------------------------------
diff --git a/test/singa/test_batchnorm.cc b/test/singa/test_batchnorm.cc
new file mode 100644
index 0000000..c72dc0f
--- /dev/null
+++ b/test/singa/test_batchnorm.cc
@@ -0,0 +1,132 @@
+/*********************************************************
+*
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements.  See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership.  The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License.  You may obtain a copy of the License at
+*
+*   http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing,
+* software distributed under the License is distributed on an
+* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+* KIND, either express or implied.  See the License for the
+* specific language governing permissions and limitations
+* under the License.
+*
+************************************************************/
+
+#include "../src/model/layer/batchnorm.h"
+#include "gtest/gtest.h"
+#include <iostream>
+
+using namespace singa;
+
+TEST(BatchNorm, Setup) {
+  BatchNorm batchnorm;
+  EXPECT_EQ("BatchNorm", batchnorm.layer_type());
+
+  singa::LayerConf conf;
+  singa::BatchNormConf *batchnorm_conf = conf.mutable_batchnorm_conf();
+  batchnorm_conf->set_factor(0.01);
+  batchnorm.Setup(Shape{2, 4, 4}, conf);
+
+  EXPECT_FLOAT_EQ(0.01, batchnorm.factor());
+  EXPECT_EQ(2u, batchnorm.channels());
+  EXPECT_EQ(4u, batchnorm.height());
+  EXPECT_EQ(4u, batchnorm.width());
+}
+
+TEST(BatchNorm, Forward) {
+  BatchNorm batchnorm;
+  const float x[] = {1, 2, 3, 4};
+  Tensor in(Shape{2, 1, 2, 1});
+  in.CopyDataFromHostPtr(x, 2 * 1 * 2 * 1);
+  const float alpha_[] = {1, 1};
+  Tensor alpha(Shape{1, 2});
+  alpha.CopyDataFromHostPtr(alpha_, 1 * 2);
+
+  const float beta_[] = {2, 2};
+  Tensor beta(Shape{1, 2});
+  beta.CopyDataFromHostPtr(beta_, 1 * 2);
+  singa::LayerConf conf;
+  singa::BatchNormConf *batchnorm_conf = conf.mutable_batchnorm_conf();
+  batchnorm_conf->set_factor(1);
+  batchnorm.Setup(Shape{1, 2, 1}, conf);
+  batchnorm.set_bnScale(alpha);
+  batchnorm.set_bnBias(beta);
+  batchnorm.set_runningMean(beta);
+  batchnorm.set_runningVariance(beta);
+  Tensor out = batchnorm.Forward(kTrain, in);
+  const float *outptr = out.data<float>();
+  const auto &shape = out.shape();
+  EXPECT_EQ(4u, shape.size());
+  EXPECT_EQ(2u, shape[0]);
+  EXPECT_EQ(1u, shape[1]);
+  EXPECT_EQ(2u, shape[2]);
+  EXPECT_EQ(1u, shape[3]);
+  EXPECT_NEAR(1.0f, outptr[0], 1e-6f);
+  EXPECT_NEAR(1.0f, outptr[1], 1e-6f);
+  EXPECT_NEAR(3.0f, outptr[2], 1e-6f);
+  EXPECT_NEAR(3.0f, outptr[3], 1e-6f);
+}
+
+TEST(BatchNorm, Backward) {
+  BatchNorm batchnorm;
+  const float x[] = {1, 2, 3, 4};
+  Tensor in(Shape{2, 1, 2, 1});
+  in.CopyDataFromHostPtr(x, 2 * 1 * 2 * 1);
+  const float dy[] = {4, 3, 2, 1};
+  Tensor dy_in(Shape{2, 1, 2, 1});
+  dy_in.CopyDataFromHostPtr(dy, 2 * 1 * 2 * 1);
+  const float alpha_[] = {1, 1};
+  Tensor alpha(Shape{1, 2});
+  alpha.CopyDataFromHostPtr(alpha_, 1 * 2);
+
+  const float beta_[] = {0, 0};
+  Tensor beta(Shape{1, 2});
+  beta.CopyDataFromHostPtr(beta_, 1 * 2);
+  singa::LayerConf conf;
+  singa::BatchNormConf *batchnorm_conf = conf.mutable_batchnorm_conf();
+  batchnorm_conf->set_factor(1);
+  batchnorm.Setup(Shape{1, 2, 1}, conf);
+  batchnorm.set_bnScale(alpha);
+  batchnorm.set_bnBias(beta);
+  batchnorm.set_runningMean(beta);
+  batchnorm.set_runningVariance(beta);
+  Tensor out = batchnorm.Forward(kTrain, in);
+  auto ret = batchnorm.Backward(kTrain, dy_in);
+  Tensor dx = ret.first;
+  const auto & shape = dx.shape();
+  EXPECT_EQ(4u, shape.size());
+  EXPECT_EQ(2u, shape[0]);
+  EXPECT_EQ(1u, shape[1]);
+  EXPECT_EQ(2u, shape[2]);
+  EXPECT_EQ(1u, shape[3]);
+  const float *dxptr = ret.first.data<float>();
+  EXPECT_NEAR(.0f, dxptr[0], 1e-6f);
+  EXPECT_NEAR(.0f, dxptr[1], 1e-6f);
+  EXPECT_NEAR(.0f, dxptr[2], 1e-6f);
+  EXPECT_NEAR(.0f, dxptr[3], 1e-6f);
+
+  Tensor dbnScale = ret.second.at(0);
+  const float *dbnScaleptr = dbnScale.data<float>();
+  const auto & dbnScaleShape = dbnScale.shape();
+  EXPECT_EQ(1u, dbnScaleShape.size());
+  EXPECT_EQ(2u, dbnScaleShape[0]);
+
+  EXPECT_NEAR(-2.0f, dbnScaleptr[0], 1e-6f);
+  EXPECT_NEAR(-2.0f, dbnScaleptr[1], 1e-6f);
+
+  Tensor dbnBias = ret.second.at(1);
+  const float *dbnBiasptr = dbnBias.data<float>();
+  const auto & dbnBiasShape = dbnBias.shape();
+  EXPECT_EQ(1u, dbnBiasShape.size());
+  EXPECT_EQ(2u, dbnBiasShape[0]);
+
+  EXPECT_NEAR(6.0f, dbnBiasptr[0], 1e-6f);
+  EXPECT_NEAR(4.0f, dbnBiasptr[1], 1e-6f);
+}