You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@singa.apache.org by wa...@apache.org on 2018/07/05 03:10:00 UTC

[05/18] incubator-singa git commit: SINGA-371 Implement functional operations in c++ for autograd

SINGA-371 Implement functional operations in c++ for autograd

- integrate convolution functions into conv2d autograd operation(gpu part)

- export the field 'batchsize' of CudnnConvHandle to python as it is needed in
  Con2d_GPU.__call__().

- set default 'workspace_byte_limit' as 1GB, which is consistent
  with the default setting in Conv2D Layer.


Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/c57b87ae
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/c57b87ae
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/c57b87ae

Branch: refs/heads/master
Commit: c57b87ae7ffd051d818b048de3c20c69643cbd25
Parents: 2cac057
Author: xuewanqi <xu...@u.nus.edu>
Authored: Wed Jun 20 08:46:25 2018 +0000
Committer: xuewanqi <xu...@u.nus.edu>
Committed: Wed Jun 20 14:47:37 2018 +0000

----------------------------------------------------------------------
 python/singa/autograd.py          | 99 ++++++++++++++++++++++++++++++++++
 src/api/model_operation.i         |  4 +-
 src/model/convolution_functions.h |  2 +-
 3 files changed, 102 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/c57b87ae/python/singa/autograd.py
----------------------------------------------------------------------
diff --git a/python/singa/autograd.py b/python/singa/autograd.py
index 83362e2..c7e0adb 100644
--- a/python/singa/autograd.py
+++ b/python/singa/autograd.py
@@ -583,6 +583,105 @@ class Flatten(Operation):
 def flatten(x):
     return Flatten()(x)[0]
 
+class Conv2d_GPU(Operation):
+    def __init__(self, in_channels, out_channels, kernel_size, stride=1,
+                 padding=0, dilation=1, groups=1, bias=True, **kwargs):
+
+        self.in_channels = in_channels
+        self.out_channels = out_channels
+
+        if isinstance(kernel_size, int):
+            self.kernel_size = (kernel_size, kernel_size)
+        elif isinstance(kernel_size, tuple):
+            self.kernel_size = kernel_size
+        else:
+            raise TypeError('Wrong kernel_size type.')
+        
+        if isinstance(stride, int):
+            self.stride = (stride,stride)
+        elif isinstance(stride, tuple):
+            self.stride = stride
+        else:
+            raise TypeError('Wrong stride type.')
+
+        if isinstance(padding, int):
+            self.padding = (padding,padding)
+        elif isinstance(padding, tuple):
+            self.padding = padding
+        else:
+            raise TypeError('Wrong padding type.')
+
+        if dilation != 1 or groups != 1:
+            raise ValueError('Not implemented yet')
+
+        self.bias = bias
+
+        inner_params = {'cudnn_prefer': 'fastest', 'workspace_byte_limit': 1024}
+        # TODO valid value of inner_params check
+
+        for kwarg in kwargs:
+            if kwarg not in inner_params:
+                raise TypeError('Keyword argument not understood:', kwarg)
+            else:
+                inner_params[kwarg] = kwargs[kwarg]
+
+        self.convhandle = singa.SetupConv(self.kernel_size[0], self.kernel_size[1],
+        			self.padding[0], self.padding[1], self.stride[0], self.stride[1],
+        			self.bias, inner_params['workspace_byte_limit']*1024*1024,
+        			inner_params['cudnn_prefer'])
+        
+        w_shape = (self.out_channels, self.in_channels, self.kernel_size[0], self.kernel_size[1])
+        self.W = Tensor(shape=w_shape, requires_grad=True, stores_grad=True)
+        std = math.sqrt(
+                2.0 / (self.in_channels * self.kernel_size[0] * self.kernel_size[1] + self.out_channels))
+        self.W.gaussian(0.0, std)
+
+        if self.bias:
+            b_shape = (self.out_channels,)
+        else:
+            b_shape = (1,) #to keep consistency when to do forward.
+        self.b = Tensor(shape=b_shape, requires_grad=True, stores_grad=True)
+        self.b.set_value(0.0)
+
+
+    def __call__(self, x):
+        assert x.ndim() == 4, 'The dimensions of input should be 4D.'
+        assert x.shape[1] == self.in_channels, 'in_channels dismatched.'
+        assert 0 == 0, 'invalid padding.'
+    	# TODO valid padding check.
+
+    	if not hasattr (self, cudnnconvhandle):
+    	    self.cudnnconvhandle = singa.InitCudnn(x.data, self.convhandle)
+    	elif x.shape[0] != self.cudnnconvhandle.batchsize:
+    	    self.cudnnconvhandle = singa.InitCudnn(x.data, self.convhandle)
+
+    	self.dev = x.device
+
+    	self.W.to_device(self.dev)
+    	xs = [x, self.W]
+    	
+    	self.b.to_device(self.dev)
+    	xs.append(self.b)
+    	return self._do_forward(*xs)[0]
+
+    def forward(self, *xs):
+        if training:
+    	    self.x = xs[0]
+        return singa.CudnnConvForward(xs[0], xs[1], xs[2], self.convhandle, self.cudnnconvhandle)
+
+    def backward(self, dy):
+        assert training is True and hasattr(self, x), 'Please set \'trainging\' as True before do BP. '
+
+        # todo check device?
+        dy.ToDevice(self.dev)
+
+        dx = singa.CudnnConvBackwardx(dy, self.W, self.x, self.cch)
+        dW = singa.CudnnConvBackwardW(dy, self.x, self.W, self.cch)
+        if self.bias:
+    	    db = singa.CudnnConvBackwardb(dy, self.b, self.cch)
+    	    return dx, dW, db
+        else:
+    	    return dx, dW
 
 def infer_dependency(op):
     '''

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/c57b87ae/src/api/model_operation.i
----------------------------------------------------------------------
diff --git a/src/api/model_operation.i b/src/api/model_operation.i
index 77ef6bb..a74ec5e 100644
--- a/src/api/model_operation.i
+++ b/src/api/model_operation.i
@@ -7,14 +7,14 @@ namespace singa{
 
 struct ConvHandle{};
 
-struct CudnnConvHandle{};
+struct CudnnConvHandle{size_t batchsize;};
 
 ConvHandle SetupConv(
     const size_t kernel_h_, const size_t kernel_w_,
     const size_t pad_h_, const size_t pad_w_,
     const size_t stride_h_,const size_t stride_w_,
     const size_t channels_, const size_t num_filters_,
-    const bool bias_term_ = true, const size_t workspace_byte_limit_ =1024*1024,
+    const bool bias_term_ = true, const size_t workspace_byte_limit_ =1024*1024*1024,
     const std::string prefer_="fastest");
 
 CudnnConvHandle InitCudnn(const Tensor &input, const ConvHandle ch);

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/c57b87ae/src/model/convolution_functions.h
----------------------------------------------------------------------
diff --git a/src/model/convolution_functions.h b/src/model/convolution_functions.h
index 9462805..e34423f 100644
--- a/src/model/convolution_functions.h
+++ b/src/model/convolution_functions.h
@@ -48,7 +48,7 @@ ConvHandle SetupConv(
     const size_t pad_h_, const size_t pad_w_,
     const size_t stride_h_,const size_t stride_w_,
     const size_t channels_, const size_t num_filters_,
-    const bool bias_term_ = true ,const size_t workspace_byte_limit_=1024*1024,
+    const bool bias_term_ = true ,const size_t workspace_byte_limit_=1024*1024*1024,
     const std::string prefer_="fastest");
 
 void testInitCudnn(const Tensor &input, const ConvHandle ch);