You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@singa.apache.org by wa...@apache.org on 2018/07/05 03:10:00 UTC
[05/18] incubator-singa git commit: SINGA-371 Implement functional
operations in c++ for autograd
SINGA-371 Implement functional operations in c++ for autograd
- integrate convolution functions into conv2d autograd operation(gpu part)
- export the field 'batchsize' of CudnnConvHandle to python as it is needed in
Con2d_GPU.__call__().
- set default 'workspace_byte_limit' as 1GB, which is consistent
with the default setting in Conv2D Layer.
Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/c57b87ae
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/c57b87ae
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/c57b87ae
Branch: refs/heads/master
Commit: c57b87ae7ffd051d818b048de3c20c69643cbd25
Parents: 2cac057
Author: xuewanqi <xu...@u.nus.edu>
Authored: Wed Jun 20 08:46:25 2018 +0000
Committer: xuewanqi <xu...@u.nus.edu>
Committed: Wed Jun 20 14:47:37 2018 +0000
----------------------------------------------------------------------
python/singa/autograd.py | 99 ++++++++++++++++++++++++++++++++++
src/api/model_operation.i | 4 +-
src/model/convolution_functions.h | 2 +-
3 files changed, 102 insertions(+), 3 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/c57b87ae/python/singa/autograd.py
----------------------------------------------------------------------
diff --git a/python/singa/autograd.py b/python/singa/autograd.py
index 83362e2..c7e0adb 100644
--- a/python/singa/autograd.py
+++ b/python/singa/autograd.py
@@ -583,6 +583,105 @@ class Flatten(Operation):
def flatten(x):
return Flatten()(x)[0]
+class Conv2d_GPU(Operation):
+ def __init__(self, in_channels, out_channels, kernel_size, stride=1,
+ padding=0, dilation=1, groups=1, bias=True, **kwargs):
+
+ self.in_channels = in_channels
+ self.out_channels = out_channels
+
+ if isinstance(kernel_size, int):
+ self.kernel_size = (kernel_size, kernel_size)
+ elif isinstance(kernel_size, tuple):
+ self.kernel_size = kernel_size
+ else:
+ raise TypeError('Wrong kernel_size type.')
+
+ if isinstance(stride, int):
+ self.stride = (stride,stride)
+ elif isinstance(stride, tuple):
+ self.stride = stride
+ else:
+ raise TypeError('Wrong stride type.')
+
+ if isinstance(padding, int):
+ self.padding = (padding,padding)
+ elif isinstance(padding, tuple):
+ self.padding = padding
+ else:
+ raise TypeError('Wrong padding type.')
+
+ if dilation != 1 or groups != 1:
+ raise ValueError('Not implemented yet')
+
+ self.bias = bias
+
+ inner_params = {'cudnn_prefer': 'fastest', 'workspace_byte_limit': 1024}
+ # TODO valid value of inner_params check
+
+ for kwarg in kwargs:
+ if kwarg not in inner_params:
+ raise TypeError('Keyword argument not understood:', kwarg)
+ else:
+ inner_params[kwarg] = kwargs[kwarg]
+
+ self.convhandle = singa.SetupConv(self.kernel_size[0], self.kernel_size[1],
+ self.padding[0], self.padding[1], self.stride[0], self.stride[1],
+ self.bias, inner_params['workspace_byte_limit']*1024*1024,
+ inner_params['cudnn_prefer'])
+
+ w_shape = (self.out_channels, self.in_channels, self.kernel_size[0], self.kernel_size[1])
+ self.W = Tensor(shape=w_shape, requires_grad=True, stores_grad=True)
+ std = math.sqrt(
+ 2.0 / (self.in_channels * self.kernel_size[0] * self.kernel_size[1] + self.out_channels))
+ self.W.gaussian(0.0, std)
+
+ if self.bias:
+ b_shape = (self.out_channels,)
+ else:
+ b_shape = (1,) #to keep consistency when to do forward.
+ self.b = Tensor(shape=b_shape, requires_grad=True, stores_grad=True)
+ self.b.set_value(0.0)
+
+
+ def __call__(self, x):
+ assert x.ndim() == 4, 'The dimensions of input should be 4D.'
+ assert x.shape[1] == self.in_channels, 'in_channels dismatched.'
+ assert 0 == 0, 'invalid padding.'
+ # TODO valid padding check.
+
+ if not hasattr (self, cudnnconvhandle):
+ self.cudnnconvhandle = singa.InitCudnn(x.data, self.convhandle)
+ elif x.shape[0] != self.cudnnconvhandle.batchsize:
+ self.cudnnconvhandle = singa.InitCudnn(x.data, self.convhandle)
+
+ self.dev = x.device
+
+ self.W.to_device(self.dev)
+ xs = [x, self.W]
+
+ self.b.to_device(self.dev)
+ xs.append(self.b)
+ return self._do_forward(*xs)[0]
+
+ def forward(self, *xs):
+ if training:
+ self.x = xs[0]
+ return singa.CudnnConvForward(xs[0], xs[1], xs[2], self.convhandle, self.cudnnconvhandle)
+
+ def backward(self, dy):
+ assert training is True and hasattr(self, x), 'Please set \'trainging\' as True before do BP. '
+
+ # todo check device?
+ dy.ToDevice(self.dev)
+
+ dx = singa.CudnnConvBackwardx(dy, self.W, self.x, self.cch)
+ dW = singa.CudnnConvBackwardW(dy, self.x, self.W, self.cch)
+ if self.bias:
+ db = singa.CudnnConvBackwardb(dy, self.b, self.cch)
+ return dx, dW, db
+ else:
+ return dx, dW
def infer_dependency(op):
'''
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/c57b87ae/src/api/model_operation.i
----------------------------------------------------------------------
diff --git a/src/api/model_operation.i b/src/api/model_operation.i
index 77ef6bb..a74ec5e 100644
--- a/src/api/model_operation.i
+++ b/src/api/model_operation.i
@@ -7,14 +7,14 @@ namespace singa{
struct ConvHandle{};
-struct CudnnConvHandle{};
+struct CudnnConvHandle{size_t batchsize;};
ConvHandle SetupConv(
const size_t kernel_h_, const size_t kernel_w_,
const size_t pad_h_, const size_t pad_w_,
const size_t stride_h_,const size_t stride_w_,
const size_t channels_, const size_t num_filters_,
- const bool bias_term_ = true, const size_t workspace_byte_limit_ =1024*1024,
+ const bool bias_term_ = true, const size_t workspace_byte_limit_ =1024*1024*1024,
const std::string prefer_="fastest");
CudnnConvHandle InitCudnn(const Tensor &input, const ConvHandle ch);
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/c57b87ae/src/model/convolution_functions.h
----------------------------------------------------------------------
diff --git a/src/model/convolution_functions.h b/src/model/convolution_functions.h
index 9462805..e34423f 100644
--- a/src/model/convolution_functions.h
+++ b/src/model/convolution_functions.h
@@ -48,7 +48,7 @@ ConvHandle SetupConv(
const size_t pad_h_, const size_t pad_w_,
const size_t stride_h_,const size_t stride_w_,
const size_t channels_, const size_t num_filters_,
- const bool bias_term_ = true ,const size_t workspace_byte_limit_=1024*1024,
+ const bool bias_term_ = true ,const size_t workspace_byte_limit_=1024*1024*1024,
const std::string prefer_="fastest");
void testInitCudnn(const Tensor &input, const ConvHandle ch);