You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@singa.apache.org by zh...@apache.org on 2018/07/12 08:39:53 UTC
[5/5] incubator-singa git commit: SINGA-379 Implement batchnorm operation and its related functions for autograd

SINGA-379 Implement batchnorm operation and its related functions for autograd

Test mnist_cnn.py with batchnorm


Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/f134a24e
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/f134a24e
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/f134a24e

Branch: refs/heads/master
Commit: f134a24e2b58baad9dc29167e323d14cdf89d2a4
Parents: ce1a733
Author: wang wei <wa...@comp.nus.edu.sg>
Authored: Thu Jul 12 12:28:41 2018 +0800
Committer: wang wei <wa...@comp.nus.edu.sg>
Committed: Thu Jul 12 12:33:04 2018 +0800

----------------------------------------------------------------------
 examples/autograd/mnist_cnn.py   |  4 ++++
 python/singa/autograd.py         | 10 +++++-----
 src/api/model_layer.i            | 18 +++++++++---------
 src/api/model_operation.i        |  9 +++++----
 src/model/operation/batchnorm.cc | 31 ++++++++++++++-----------------
 5 files changed, 37 insertions(+), 35 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/f134a24e/examples/autograd/mnist_cnn.py
----------------------------------------------------------------------
diff --git a/examples/autograd/mnist_cnn.py b/examples/autograd/mnist_cnn.py
index f78ccc8..b1d8dbe 100755
--- a/examples/autograd/mnist_cnn.py
+++ b/examples/autograd/mnist_cnn.py
@@ -106,15 +106,19 @@ if __name__ == '__main__':
 
     # operations initialization
     conv1 = autograd.Conv2D(1, 32, 3, padding=1, bias=False)
+    bn1 = autograd.BatchNorm(32)
     conv2 = autograd.Conv2D(32, 32, 3, padding=1)
+    bn2 = autograd.BatchNorm(32)
     linear = autograd.Linear(32 * 28 * 28, 10)
 
 
     def forward(x, t):
         y = conv1(x)
         y = autograd.relu(y)
+        y = bn1(y)
         y = autograd.max_pool_2d(y)
         y = conv2(y)
+        y = bn2(y)
         y = autograd.relu(y)
         y = autograd.max_pool_2d(y)
         y=autograd.flatten(y)

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/f134a24e/python/singa/autograd.py
----------------------------------------------------------------------
diff --git a/python/singa/autograd.py b/python/singa/autograd.py
index 3a2eddd..d272dcd 100755
--- a/python/singa/autograd.py
+++ b/python/singa/autograd.py
@@ -771,7 +771,7 @@ class Conv2D(Layer):
         return y
 
 
-class BatchNorm2d(Layer):
+class BatchNorm(Layer):
 
     def __init__(self, num_features, momentum=0.9):
         self.channels = num_features
@@ -810,12 +810,12 @@ class BatchNorm2d(Layer):
                     self.momentum, x.data)
         self.handle.device_id = x.device.id()
 
-        y = batchnorm2d(x, self.scale, self.bias,
+        y = batchnorm(x, self.scale, self.bias,
                         self.running_mean, self.running_var, self.handle)
         return y
 
 
-class _BatchNorm2d(Operation):
+class _BatchNorm(Operation):
 
     def __init__(self, running_mean, running_var, handle):
         self.running_mean = running_mean.data
@@ -855,5 +855,5 @@ class _BatchNorm2d(Operation):
             return dx, ds, db
 
 
-def batchnorm2d(x, scale, bias, running_mean, running_var, handle):
-    return _BatchNorm2d(running_mean, running_var, handle)(x, scale, bias)[0]
+def batchnorm(x, scale, bias, running_mean, running_var, handle):
+    return _BatchNorm(running_mean, running_var, handle)(x, scale, bias)[0]

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/f134a24e/src/api/model_layer.i
----------------------------------------------------------------------
diff --git a/src/api/model_layer.i b/src/api/model_layer.i
index d449f24..dc04be0 100644
--- a/src/api/model_layer.i
+++ b/src/api/model_layer.i
@@ -29,21 +29,21 @@
 
 
 %{
-// To make the code compatible between py2 and py3, the follow 
-// macro is required, which forces the 
-// interface (function) to accept byte string (from python) and 
-// return byte string (in python) in py3. Otherwise the strings 
+// To make the code compatible between py2 and py3, the follow
+// macro is required, which forces the
+// interface (function) to accept byte string (from python) and
+// return byte string (in python) in py3. Otherwise the strings
 // should be unicode strings in py3.
 // Note that by default the strings in python3 are of type unicode.
-// You have to encode it with the correct encoding (default is utf-8) 
+// You have to encode it with the correct encoding (default is utf-8)
 // to convert it into bytes. Sometimes, the string is already byte string
 // e.g. from protobuf SerializeToString, then there is no need to do
 // conversion. The output byte strings should be decoded into unicode.
-// For python2, the default type of string is byte string. 
+// For python2, the default type of string is byte string.
 //
-// Because protobuf::SerializeToString cannot be decoded into unicode 
-// string, we cannot use SWIG_PYTHON_2_UNICODE which forces the 
-// interface (function) to accept unicode strings as input args 
+// Because protobuf::SerializeToString cannot be decoded into unicode
+// string, we cannot use SWIG_PYTHON_2_UNICODE which forces the
+// interface (function) to accept unicode strings as input args
 // and return unicode strings.
 //
 // TODO(wangwei) make strings compatible between py2 and py3.

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/f134a24e/src/api/model_operation.i
----------------------------------------------------------------------
diff --git a/src/api/model_operation.i b/src/api/model_operation.i
index 6f2d1fa..eb41fd0 100755
--- a/src/api/model_operation.i
+++ b/src/api/model_operation.i
@@ -7,6 +7,7 @@
 #include "../src/model/operation/convolution.h"
 #include "../src/model/operation/batchnorm.h"
 %}
+
 namespace singa {
 
 class ConvHandle {
@@ -68,15 +69,15 @@ class CudnnBatchNormHandle: public BatchNormHandle{
     size_t batchsize;
 };
 
-const vector<Tensor> GpuBatchNormForwardTraining(const CudnnBatchNormHandle &cbnh, 
+const std::vector<Tensor> GpuBatchNormForwardTraining(const CudnnBatchNormHandle &cbnh,
   const Tensor& x, const Tensor& bnScale, const Tensor& bnBias, Tensor& running_mean, Tensor& running_var);
 
-Tensor GpuBatchNormForwardInference(const CudnnBatchNormHandle &cbnh, const Tensor& x, 
+Tensor GpuBatchNormForwardInference(const CudnnBatchNormHandle &cbnh, const Tensor& x,
   const Tensor& bnScale, const Tensor& bnBias,  const Tensor& running_mean, const Tensor& running_var);
 
-const std::vector<Tensor> GpuBatchNormBackward(const CudnnBatchNormHandle &cbnh, 
+const std::vector<Tensor> GpuBatchNormBackward(const CudnnBatchNormHandle &cbnh,
   const Tensor& dy, const Tensor& x, const Tensor& bnScale, const Tensor& mean, const Tensor& var);
-     
+
 #endif  // USE_CUDNN
 }
 

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/f134a24e/src/model/operation/batchnorm.cc
----------------------------------------------------------------------
diff --git a/src/model/operation/batchnorm.cc b/src/model/operation/batchnorm.cc
index 7040895..29eaba9 100755
--- a/src/model/operation/batchnorm.cc
+++ b/src/model/operation/batchnorm.cc
@@ -19,7 +19,7 @@ BatchNormHandle::BatchNormHandle(const float momentum, const Tensor& input) {
   }
 };
 
-#if USE_CUDNN
+#ifdef USE_CUDNN
 CudnnBatchNormHandle::CudnnBatchNormHandle(const float momentum,
     const Tensor& input): BatchNormHandle(momentum, input) {
   if (is_2d)
@@ -38,14 +38,14 @@ CudnnBatchNormHandle::CudnnBatchNormHandle(const float momentum,
                                          1, 1));
 };
 
-Tensor GpuBatchNormForwardTraining(const CudnnBatchNormHandle &cbnh,
+const std::vector<Tensor> GpuBatchNormForwardTraining(const CudnnBatchNormHandle &cbnh,
                                    const Tensor& x, const Tensor& bnScale, const Tensor& bnBias,
                                    Tensor& running_mean, Tensor& running_var) {
   CHECK_EQ(x.device()->lang(), kCuda);
   CHECK_EQ(bnScale.device()->lang(), kCuda);
   CHECK_EQ(bnBias.device()->lang(), kCuda);
-  CHECK_EQ(runningMean.device()->lang(), kCuda);
-  CHECK_EQ(runningVariance.device()->lang(), kCuda);
+  CHECK_EQ(running_mean.device()->lang(), kCuda);
+  CHECK_EQ(running_var.device()->lang(), kCuda);
 
   Tensor mean, var;
   mean.ResetLike(running_mean);
@@ -78,7 +78,7 @@ Tensor GpuBatchNormForwardTraining(const CudnnBatchNormHandle &cbnh,
   });
   if (cbnh.is_2d) output.Reshape(Shape{shape.at(0), shape.at(1)});
   return {output, mean, var};
-};
+}
 
 Tensor GpuBatchNormForwardInference(const CudnnBatchNormHandle &cbnh,
                                     const Tensor& x, const Tensor& bnScale,
@@ -86,8 +86,8 @@ Tensor GpuBatchNormForwardInference(const CudnnBatchNormHandle &cbnh,
   CHECK_EQ(x.device()->lang(), kCuda);
   CHECK_EQ(bnScale.device()->lang(), kCuda);
   CHECK_EQ(bnBias.device()->lang(), kCuda);
-  CHECK_EQ(cbnh.running_mean.device()->lang(), kCuda);
-  CHECK_EQ(cbnh.running_variance.device()->lang(), kCuda);
+  CHECK_EQ(running_mean.device()->lang(), kCuda);
+  CHECK_EQ(running_var.device()->lang(), kCuda);
 
   Shape shape = x.shape();
 
@@ -106,17 +106,13 @@ Tensor GpuBatchNormForwardInference(const CudnnBatchNormHandle &cbnh,
                   input.block()->data(), cbnh.shape_desc, output.block()->mutable_data(),
                   cbnh.param_desc, bnScale.block()->data(), bnBias.block()->data(),
                   running_mean.block()->data(), running_var.block()->data(), epsilon));
-  }, {
-    input.block(), bnScale.block(), bnBias.block(), running_mean.block(),
-    running_variance.block()
-  },
+  }, { input.block(), bnScale.block(), bnBias.block(), running_mean.block(), running_var.block() },
   {output.block()});
-  if (cbnh.is_2d) output.Reshape(Shape{shape.at(0), shape.at(1)});
   return output;
-};
+}
 
 
-std::vector<Tensor> GpuBatchNormBackward(const CudnnBatchNormHandle &cbnh,
+const std::vector<Tensor> GpuBatchNormBackward(const CudnnBatchNormHandle &cbnh,
     const Tensor& dy, const Tensor& x, const Tensor& bnScale, const Tensor& mean,
     const Tensor& var) {
   CHECK_EQ(dy.device()->lang(), kCuda);
@@ -137,7 +133,7 @@ std::vector<Tensor> GpuBatchNormBackward(const CudnnBatchNormHandle &cbnh,
 
   dx.device()->Exec(
   [&](Context * ctx) {
-    
+
     const float alpha = 1.0f, beta = .0f;
     double epsilon = CUDNN_BN_MIN_EPSILON;
     CUDNN_CHECK(cudnnBatchNormalizationBackward(
@@ -151,8 +147,9 @@ std::vector<Tensor> GpuBatchNormBackward(const CudnnBatchNormHandle &cbnh,
   {dx.block(), dbnScale.block(), dbnBias.block()});
 
   if (cbnh.is_2d) dx.Reshape(Shape{dx.shape().at(0), dx.shape().at(1)});
-  
+
   return {dx, dbnScale, dbnBias};
-};
+}
 
+#endif  //USE_CUDNN
 }