You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@singa.apache.org by zh...@apache.org on 2018/07/12 08:39:53 UTC
[5/5] incubator-singa git commit: SINGA-379 Implement batchnorm
operation and its related functions for autograd
SINGA-379 Implement batchnorm operation and its related functions for autograd
Test mnist_cnn.py with batchnorm
Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/f134a24e
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/f134a24e
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/f134a24e
Branch: refs/heads/master
Commit: f134a24e2b58baad9dc29167e323d14cdf89d2a4
Parents: ce1a733
Author: wang wei <wa...@comp.nus.edu.sg>
Authored: Thu Jul 12 12:28:41 2018 +0800
Committer: wang wei <wa...@comp.nus.edu.sg>
Committed: Thu Jul 12 12:33:04 2018 +0800
----------------------------------------------------------------------
examples/autograd/mnist_cnn.py | 4 ++++
python/singa/autograd.py | 10 +++++-----
src/api/model_layer.i | 18 +++++++++---------
src/api/model_operation.i | 9 +++++----
src/model/operation/batchnorm.cc | 31 ++++++++++++++-----------------
5 files changed, 37 insertions(+), 35 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/f134a24e/examples/autograd/mnist_cnn.py
----------------------------------------------------------------------
diff --git a/examples/autograd/mnist_cnn.py b/examples/autograd/mnist_cnn.py
index f78ccc8..b1d8dbe 100755
--- a/examples/autograd/mnist_cnn.py
+++ b/examples/autograd/mnist_cnn.py
@@ -106,15 +106,19 @@ if __name__ == '__main__':
# operations initialization
conv1 = autograd.Conv2D(1, 32, 3, padding=1, bias=False)
+ bn1 = autograd.BatchNorm(32)
conv2 = autograd.Conv2D(32, 32, 3, padding=1)
+ bn2 = autograd.BatchNorm(32)
linear = autograd.Linear(32 * 28 * 28, 10)
def forward(x, t):
y = conv1(x)
y = autograd.relu(y)
+ y = bn1(y)
y = autograd.max_pool_2d(y)
y = conv2(y)
+ y = bn2(y)
y = autograd.relu(y)
y = autograd.max_pool_2d(y)
y=autograd.flatten(y)
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/f134a24e/python/singa/autograd.py
----------------------------------------------------------------------
diff --git a/python/singa/autograd.py b/python/singa/autograd.py
index 3a2eddd..d272dcd 100755
--- a/python/singa/autograd.py
+++ b/python/singa/autograd.py
@@ -771,7 +771,7 @@ class Conv2D(Layer):
return y
-class BatchNorm2d(Layer):
+class BatchNorm(Layer):
def __init__(self, num_features, momentum=0.9):
self.channels = num_features
@@ -810,12 +810,12 @@ class BatchNorm2d(Layer):
self.momentum, x.data)
self.handle.device_id = x.device.id()
- y = batchnorm2d(x, self.scale, self.bias,
+ y = batchnorm(x, self.scale, self.bias,
self.running_mean, self.running_var, self.handle)
return y
-class _BatchNorm2d(Operation):
+class _BatchNorm(Operation):
def __init__(self, running_mean, running_var, handle):
self.running_mean = running_mean.data
@@ -855,5 +855,5 @@ class _BatchNorm2d(Operation):
return dx, ds, db
-def batchnorm2d(x, scale, bias, running_mean, running_var, handle):
- return _BatchNorm2d(running_mean, running_var, handle)(x, scale, bias)[0]
+def batchnorm(x, scale, bias, running_mean, running_var, handle):
+ return _BatchNorm(running_mean, running_var, handle)(x, scale, bias)[0]
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/f134a24e/src/api/model_layer.i
----------------------------------------------------------------------
diff --git a/src/api/model_layer.i b/src/api/model_layer.i
index d449f24..dc04be0 100644
--- a/src/api/model_layer.i
+++ b/src/api/model_layer.i
@@ -29,21 +29,21 @@
%{
-// To make the code compatible between py2 and py3, the follow
-// macro is required, which forces the
-// interface (function) to accept byte string (from python) and
-// return byte string (in python) in py3. Otherwise the strings
+// To make the code compatible between py2 and py3, the follow
+// macro is required, which forces the
+// interface (function) to accept byte string (from python) and
+// return byte string (in python) in py3. Otherwise the strings
// should be unicode strings in py3.
// Note that by default the strings in python3 are of type unicode.
-// You have to encode it with the correct encoding (default is utf-8)
+// You have to encode it with the correct encoding (default is utf-8)
// to convert it into bytes. Sometimes, the string is already byte string
// e.g. from protobuf SerializeToString, then there is no need to do
// conversion. The output byte strings should be decoded into unicode.
-// For python2, the default type of string is byte string.
+// For python2, the default type of string is byte string.
//
-// Because protobuf::SerializeToString cannot be decoded into unicode
-// string, we cannot use SWIG_PYTHON_2_UNICODE which forces the
-// interface (function) to accept unicode strings as input args
+// Because protobuf::SerializeToString cannot be decoded into unicode
+// string, we cannot use SWIG_PYTHON_2_UNICODE which forces the
+// interface (function) to accept unicode strings as input args
// and return unicode strings.
//
// TODO(wangwei) make strings compatible between py2 and py3.
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/f134a24e/src/api/model_operation.i
----------------------------------------------------------------------
diff --git a/src/api/model_operation.i b/src/api/model_operation.i
index 6f2d1fa..eb41fd0 100755
--- a/src/api/model_operation.i
+++ b/src/api/model_operation.i
@@ -7,6 +7,7 @@
#include "../src/model/operation/convolution.h"
#include "../src/model/operation/batchnorm.h"
%}
+
namespace singa {
class ConvHandle {
@@ -68,15 +69,15 @@ class CudnnBatchNormHandle: public BatchNormHandle{
size_t batchsize;
};
-const vector<Tensor> GpuBatchNormForwardTraining(const CudnnBatchNormHandle &cbnh,
+const std::vector<Tensor> GpuBatchNormForwardTraining(const CudnnBatchNormHandle &cbnh,
const Tensor& x, const Tensor& bnScale, const Tensor& bnBias, Tensor& running_mean, Tensor& running_var);
-Tensor GpuBatchNormForwardInference(const CudnnBatchNormHandle &cbnh, const Tensor& x,
+Tensor GpuBatchNormForwardInference(const CudnnBatchNormHandle &cbnh, const Tensor& x,
const Tensor& bnScale, const Tensor& bnBias, const Tensor& running_mean, const Tensor& running_var);
-const std::vector<Tensor> GpuBatchNormBackward(const CudnnBatchNormHandle &cbnh,
+const std::vector<Tensor> GpuBatchNormBackward(const CudnnBatchNormHandle &cbnh,
const Tensor& dy, const Tensor& x, const Tensor& bnScale, const Tensor& mean, const Tensor& var);
-
+
#endif // USE_CUDNN
}
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/f134a24e/src/model/operation/batchnorm.cc
----------------------------------------------------------------------
diff --git a/src/model/operation/batchnorm.cc b/src/model/operation/batchnorm.cc
index 7040895..29eaba9 100755
--- a/src/model/operation/batchnorm.cc
+++ b/src/model/operation/batchnorm.cc
@@ -19,7 +19,7 @@ BatchNormHandle::BatchNormHandle(const float momentum, const Tensor& input) {
}
};
-#if USE_CUDNN
+#ifdef USE_CUDNN
CudnnBatchNormHandle::CudnnBatchNormHandle(const float momentum,
const Tensor& input): BatchNormHandle(momentum, input) {
if (is_2d)
@@ -38,14 +38,14 @@ CudnnBatchNormHandle::CudnnBatchNormHandle(const float momentum,
1, 1));
};
-Tensor GpuBatchNormForwardTraining(const CudnnBatchNormHandle &cbnh,
+const std::vector<Tensor> GpuBatchNormForwardTraining(const CudnnBatchNormHandle &cbnh,
const Tensor& x, const Tensor& bnScale, const Tensor& bnBias,
Tensor& running_mean, Tensor& running_var) {
CHECK_EQ(x.device()->lang(), kCuda);
CHECK_EQ(bnScale.device()->lang(), kCuda);
CHECK_EQ(bnBias.device()->lang(), kCuda);
- CHECK_EQ(runningMean.device()->lang(), kCuda);
- CHECK_EQ(runningVariance.device()->lang(), kCuda);
+ CHECK_EQ(running_mean.device()->lang(), kCuda);
+ CHECK_EQ(running_var.device()->lang(), kCuda);
Tensor mean, var;
mean.ResetLike(running_mean);
@@ -78,7 +78,7 @@ Tensor GpuBatchNormForwardTraining(const CudnnBatchNormHandle &cbnh,
});
if (cbnh.is_2d) output.Reshape(Shape{shape.at(0), shape.at(1)});
return {output, mean, var};
-};
+}
Tensor GpuBatchNormForwardInference(const CudnnBatchNormHandle &cbnh,
const Tensor& x, const Tensor& bnScale,
@@ -86,8 +86,8 @@ Tensor GpuBatchNormForwardInference(const CudnnBatchNormHandle &cbnh,
CHECK_EQ(x.device()->lang(), kCuda);
CHECK_EQ(bnScale.device()->lang(), kCuda);
CHECK_EQ(bnBias.device()->lang(), kCuda);
- CHECK_EQ(cbnh.running_mean.device()->lang(), kCuda);
- CHECK_EQ(cbnh.running_variance.device()->lang(), kCuda);
+ CHECK_EQ(running_mean.device()->lang(), kCuda);
+ CHECK_EQ(running_var.device()->lang(), kCuda);
Shape shape = x.shape();
@@ -106,17 +106,13 @@ Tensor GpuBatchNormForwardInference(const CudnnBatchNormHandle &cbnh,
input.block()->data(), cbnh.shape_desc, output.block()->mutable_data(),
cbnh.param_desc, bnScale.block()->data(), bnBias.block()->data(),
running_mean.block()->data(), running_var.block()->data(), epsilon));
- }, {
- input.block(), bnScale.block(), bnBias.block(), running_mean.block(),
- running_variance.block()
- },
+ }, { input.block(), bnScale.block(), bnBias.block(), running_mean.block(), running_var.block() },
{output.block()});
- if (cbnh.is_2d) output.Reshape(Shape{shape.at(0), shape.at(1)});
return output;
-};
+}
-std::vector<Tensor> GpuBatchNormBackward(const CudnnBatchNormHandle &cbnh,
+const std::vector<Tensor> GpuBatchNormBackward(const CudnnBatchNormHandle &cbnh,
const Tensor& dy, const Tensor& x, const Tensor& bnScale, const Tensor& mean,
const Tensor& var) {
CHECK_EQ(dy.device()->lang(), kCuda);
@@ -137,7 +133,7 @@ std::vector<Tensor> GpuBatchNormBackward(const CudnnBatchNormHandle &cbnh,
dx.device()->Exec(
[&](Context * ctx) {
-
+
const float alpha = 1.0f, beta = .0f;
double epsilon = CUDNN_BN_MIN_EPSILON;
CUDNN_CHECK(cudnnBatchNormalizationBackward(
@@ -151,8 +147,9 @@ std::vector<Tensor> GpuBatchNormBackward(const CudnnBatchNormHandle &cbnh,
{dx.block(), dbnScale.block(), dbnBias.block()});
if (cbnh.is_2d) dx.Reshape(Shape{dx.shape().at(0), dx.shape().at(1)});
-
+
return {dx, dbnScale, dbnBias};
-};
+}
+#endif //USE_CUDNN
}