You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@singa.apache.org by ka...@apache.org on 2018/07/13 05:43:46 UTC
[2/3] incubator-singa git commit: SINGA-378 Implement maxpooling
operation and its related functions for autograd
SINGA-378 Implement maxpooling operation and its related functions for autograd
Update API for pooling functions
Add MaxPooling2D, AvgPooling2D, MaxPooling1D and AvgPooling1D.
Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/fb5cb9ab
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/fb5cb9ab
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/fb5cb9ab
Branch: refs/heads/master
Commit: fb5cb9ab000d776eed11a5f4fd3b0e7285a109c0
Parents: 571818e
Author: Wang Wei <wa...@gmail.com>
Authored: Thu Jul 12 17:53:22 2018 +0800
Committer: Wang Wei <wa...@gmail.com>
Committed: Thu Jul 12 17:53:22 2018 +0800
----------------------------------------------------------------------
examples/autograd/mnist_cnn.py | 6 +-
python/singa/autograd.py | 227 +++++++++++++++++++-----------------
src/api/model_operation.i | 10 +-
src/model/operation/pooling.cc | 76 +++++-------
src/model/operation/pooling.h | 19 ++-
5 files changed, 169 insertions(+), 169 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/fb5cb9ab/examples/autograd/mnist_cnn.py
----------------------------------------------------------------------
diff --git a/examples/autograd/mnist_cnn.py b/examples/autograd/mnist_cnn.py
index 2cb3cae..d42dc76 100755
--- a/examples/autograd/mnist_cnn.py
+++ b/examples/autograd/mnist_cnn.py
@@ -110,8 +110,8 @@ if __name__ == '__main__':
conv2 = autograd.Conv2D(32, 32, 3, padding=1)
bn2 = autograd.BatchNorm(32)
linear = autograd.Linear(32 * 28 * 28, 10)
- pooling1 = autograd.MaxPool2D(3, 1, padding=1)
- pooling2 = autograd.MaxPool2D(3, 1, padding=1)
+ pooling1 = autograd.MaxPooling2D(3, 1, padding=1)
+ pooling2 = autograd.AvgPooling2D(3, 1, padding=1)
def forward(x, t):
y = conv1(x)
@@ -130,7 +130,7 @@ if __name__ == '__main__':
return loss, y
autograd.training = True
- for epoch in range(50):
+ for epoch in range(epochs):
for i in range(batch_number):
inputs = tensor.Tensor(device=dev, data=x_train[
i * 100:(1 + i) * 100], stores_grad=False)
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/fb5cb9ab/python/singa/autograd.py
----------------------------------------------------------------------
diff --git a/python/singa/autograd.py b/python/singa/autograd.py
index fcdc020..7b4d18d 100755
--- a/python/singa/autograd.py
+++ b/python/singa/autograd.py
@@ -483,35 +483,6 @@ def ctensor2numpy(x):
return np_array.reshape(x.shape())
-class _MaxPool2D(Operation):
-
- def __init__(self, handle):
- self.handle = handle
-
- def forward(self, x):
- if self.handle.device_id == -1:
- raise NotImplementedError
- else:
- y = singa.GpuPoolingForward(x, self.handle)
-
- if training:
- self.cache = (x, y)
-
- return y
-
- def backward(self, dy):
- if self.handle.device_id == -1:
- raise NotImplementedError
- else:
- dx = singa.GpuPoolingBackward(
- dy, self.cache[0], self.cache[1], self.handle)
- return dx
-
-
-def max_pool_2d(x, handle):
- return _MaxPool2D(handle)(x)[0]
-
-
class Flatten(Operation):
def __init(self, start_axis=1):
@@ -534,6 +505,46 @@ def flatten(x):
return Flatten()(x)[0]
+class Layer(object):
+
+ def __init__(self):
+ pass
+
+ def device_check(self, *inputs):
+ x_device = inputs[0].device
+ for var in inputs:
+ if var.device.id() != x_device:
+ var.to_device(x_device)
+
+
+class Linear(Layer):
+
+ def __init__(self, in_features, out_features, bias=True):
+ w_shape = (in_features, out_features)
+ b_shape = (1, out_features)
+ self.bias = bias
+
+ self.W = Tensor(shape=w_shape,
+ requires_grad=True, stores_grad=True)
+ std = math.sqrt(2.0 / (in_features + out_features))
+ self.W.gaussian(0.0, std)
+
+ if self.bias:
+ self.b = Tensor(shape=b_shape,
+ requires_grad=True, stores_grad=True)
+ self.b.set_value(0.0)
+
+ def __call__(self, x):
+ if self.bias:
+ self.device_check(x, self.W, self.b)
+ else:
+ self.device_check(x, self.W)
+ y = matmul(x, self.W)
+ if self.bias:
+ y = add_bias(y, self.b, axis=0)
+ return y
+
+
class _Conv2D(Operation):
def __init__(self, handle):
@@ -583,50 +594,10 @@ class _Conv2D(Operation):
return dx, dW, None
-def conv2d(x, W, b, handle):
+def conv2d(handle, x, W, b):
return _Conv2D(handle)(x, W, b)[0]
-class Layer(object):
-
- def __init__(self):
- pass
-
- def device_check(self, *inputs):
- x_device = inputs[0].device
- for var in inputs:
- if var.device.id() != x_device:
- var.to_device(x_device)
-
-
-class Linear(Layer):
-
- def __init__(self, in_features, out_features, bias=True):
- w_shape = (in_features, out_features)
- b_shape = (1, out_features)
- self.bias = bias
-
- self.W = Tensor(shape=w_shape,
- requires_grad=True, stores_grad=True)
- std = math.sqrt(2.0 / (in_features + out_features))
- self.W.gaussian(0.0, std)
-
- if self.bias:
- self.b = Tensor(shape=b_shape,
- requires_grad=True, stores_grad=True)
- self.b.set_value(0.0)
-
- def __call__(self, x):
- if self.bias:
- self.device_check(x, self.W, self.b)
- else:
- self.device_check(x, self.W)
- y = matmul(x, self.W)
- if self.bias:
- y = add_bias(y, self.b, axis=0)
- return y
-
-
class Conv2D(Layer):
def __init__(self, in_channels, out_channels, kernel_size, stride=1,
@@ -713,13 +684,10 @@ class Conv2D(Layer):
self.padding, self.in_channels, self.out_channels, self.bias)
self.handle.device_id = x.device.id()
- y = conv2d(x, self.W, self.b, self.handle)
+ y = conv2d(self.handle, x, self.W, self.b)
return y
-<< << << < HEAD
-
-
class BatchNorm(Layer):
def __init__(self, num_features, momentum=0.9):
@@ -759,14 +727,14 @@ class BatchNorm(Layer):
self.momentum, x.data)
self.handle.device_id = x.device.id()
- y = batchnorm(x, self.scale, self.bias,
- self.running_mean, self.running_var, self.handle)
+ y = batchnorm(self.handle, x, self.scale, self.bias,
+ self.running_mean, self.running_var)
return y
class _BatchNorm(Operation):
- def __init__(self, running_mean, running_var, handle):
+ def __init__(self, handle, running_mean, running_var):
self.running_mean = running_mean.data
self.running_var = running_var.data
self.handle = handle
@@ -804,14 +772,42 @@ class _BatchNorm(Operation):
return dx, ds, db
-def batchnorm(x, scale, bias, running_mean, running_var, handle):
- return _BatchNorm(running_mean, running_var, handle)(x, scale, bias)[0]
+def batchnorm(handle, x, scale, bias, running_mean, running_var):
+ return _BatchNorm(handle, running_mean, running_var, handle)(x, scale, bias)[0]
+
+
+class _Pooling2D(Operation):
+
+ def __init__(self, handle):
+ self.handle = handle
+
+ def forward(self, x):
+ if self.handle.device_id == -1:
+ raise NotImplementedError
+ else:
+ y = singa.GpuPoolingForward(self.handle, x)
+
+ if training:
+ self.cache = (x, y)
+ return y
+
+ def backward(self, dy):
+ if self.handle.device_id == -1:
+ raise NotImplementedError
+ else:
+ dx = singa.GpuPoolingBackward(self.handle,
+ dy, self.cache[0], self.cache[1])
+ return dx
+
+
+def pooling_2d(handle, x):
+ return _Pooling2D(handle)(x)[0]
-class MaxPool2D(Layer):
- def __init__(self, kernel_size, stride=None, padding=0, dilation=1,
- return_indices=False, ceil_mode=False):
+class Pooling2D(Layer):
+
+ def __init__(self, kernel_size, stride=None, padding=0, is_max=True):
if isinstance(kernel_size, int):
self.kernel_size = (kernel_size, kernel_size)
elif isinstance(kernel_size, tuple):
@@ -825,6 +821,8 @@ class MaxPool2D(Layer):
self.stride = (stride, stride)
elif isinstance(stride, tuple):
self.stride = stride
+ assert stride[0] > 0 or (kernel_size[0] == 1 and padding[
+ 0] == 0), 'stride[0]=0, but kernel_size[0]=%d, padding[0]=%d' % (kernel_size[0], padding[0])
else:
raise TypeError('Wrong stride type.')
@@ -835,43 +833,62 @@ class MaxPool2D(Layer):
else:
raise TypeError('Wrong padding type.')
- if dilation != 1:
- raise ValueError('Not implemented yet')
-
- if return_indices is not False:
- raise ValueError('Not implemented yet')
-
- self.ceil_mode = ceil_mode
+ self.is_max = is_max
def __call__(self, x):
- if self.ceil_mode:
- out_shape_h = int(math.ceil(
- (x.shape[2] + 2 * self.padding[0] - self.kernel_size[0]) / self.stride[0])) + 1
- out_shape_w = int(math.ceil(
- (x.shape[3] + 2 * self.padding[1] - self.kernel_size[1]) / self.stride[1])) + 1
- else:
- out_shape_h = int(
- (x.shape[2] + 2 * self.padding[0] - self.kernel_size[0]) // self.stride[0]) + 1
- out_shape_w = int(
- (x.shape[3] + 2 * self.padding[1] - self.kernel_size[1]) // self.stride[1]) + 1
+
+ out_shape_h = int(
+ (x.shape[2] + 2 * self.padding[0] - self.kernel_size[0]) // self.stride[0]) + 1
+ out_shape_w = int(
+ (x.shape[3] + 2 * self.padding[1] - self.kernel_size[1]) // self.stride[1]) + 1
if x.device.id() == -1:
if not hasattr(self, 'handle'):
- self.handle = singa.PoolingHandle(x.data, self.kernel_size, self.stride,
- self.padding, self.ceil_mode, 'MAX')
+ self.handle = singa.PoolingHandle(
+ x.data, self.kernel_size, self.stride, self.padding, self.is_max)
elif x.shape[0] != self.handle.batchsize or out_shape_h != self.handle.pooled_height or \
out_shape_w != self.handle.pooled_width:
self.handle = singa.PoolingHandle(x.data, self.kernel_size, self.stride,
- self.padding, self.ceil_mode, 'MAX')
+ self.padding, self.is_max)
else:
if not hasattr(self, 'handle'):
self.handle = singa.CudnnPoolingHandle(x.data, self.kernel_size, self.stride,
- self.padding, self.ceil_mode, 'MAX', False) # False for nan_prop
+ self.padding, self.is_max) # False for nan_prop
elif x.shape[0] != self.handle.batchsize or out_shape_h != self.handle.pooled_height or \
out_shape_w != self.handle.pooled_width:
self.handle = singa.CudnnPoolingHandle(x.data, self.kernel_size, self.stride,
- self.padding, self.ceil_mode, 'MAX', False) # False for nan_prop
+ self.padding, self.is_max) # False for nan_prop
self.handle.device_id = x.device.id()
- y = max_pool_2d(x, self.handle)
+ y = pooling_2d(self.handle, x)
return y
+
+
+class MaxPooling2D(Pooling2D):
+
+ def __init__(self, kernel_size, stride=None, padding=0):
+ super(MaxPooling2D, self).__init__(kernel_size, stride, padding, True)
+
+
+class AvgPooling2D(Pooling2D):
+
+ def __init__(self, kernel_size, stride=None, padding=0):
+ super(AvgPooling2D, self).__init__(kernel_size, stride, padding, False)
+
+
+class MaxPooling1D(Pooling2D):
+
+ def __init__(self, kernel_size, stride=None, padding=0):
+ if stride is None:
+ stride = kernel_size
+ super(MaxPooling2D, self).__init__(
+ (1, kernel_size), (0, stride), (0, padding), True)
+
+
+class AvgPooling1D(Pooling2D):
+
+ def __init__(self, kernel_size, stride=None, padding=0):
+ if stride is None:
+ stride = kernel_size
+ super(MaxPooling2D, self).__init__(
+ (1, kernel_size), (0, stride), (0, padding), False)
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/fb5cb9ab/src/api/model_operation.i
----------------------------------------------------------------------
diff --git a/src/api/model_operation.i b/src/api/model_operation.i
index 4800ff1..3d9bdbe 100755
--- a/src/api/model_operation.i
+++ b/src/api/model_operation.i
@@ -43,7 +43,7 @@ class PoolingHandle {
public:
PoolingHandle(const Tensor &input, const std::vector<size_t>& kernel_size,
const std::vector<size_t>& stride, const std::vector<size_t>& padding,
- const bool ceil_mode = false, const std::string pooling_method = "MAX");
+ const bool is_max=true);
size_t batchsize;
@@ -94,8 +94,7 @@ class CudnnPoolingHandle : public PoolingHandle {
public:
CudnnPoolingHandle(const Tensor &input, const std::vector<size_t>& kernel_size,
const std::vector<size_t>& stride, const std::vector<size_t>& padding,
- const bool ceil_mode = false, const std::string pooling_method = "MAX",
- const bool NaN_prop = false);
+ const bool is_max=true);
size_t batchsize;
@@ -103,10 +102,9 @@ class CudnnPoolingHandle : public PoolingHandle {
size_t pooled_width;
};
-Tensor GpuPoolingForward(const Tensor &x, const CudnnPoolingHandle &cph);
+Tensor GpuPoolingForward(const CudnnPoolingHandle &cph, const Tensor &x);
-Tensor GpuPoolingBackward(const Tensor &dy, const Tensor& x, const Tensor& y,
- const CudnnPoolingHandle &cph);
+Tensor GpuPoolingBackward(const CudnnPoolingHandle &cph, const Tensor &dy, const Tensor& x, const Tensor& y);
#endif // USE_CUDNN
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/fb5cb9ab/src/model/operation/pooling.cc
----------------------------------------------------------------------
diff --git a/src/model/operation/pooling.cc b/src/model/operation/pooling.cc
index 0abda35..0072671 100644
--- a/src/model/operation/pooling.cc
+++ b/src/model/operation/pooling.cc
@@ -3,9 +3,10 @@
namespace singa {
-PoolingHandle::PoolingHandle(const Tensor &input, const std::vector<size_t>& kernel_size,
+PoolingHandle::PoolingHandle(const Tensor &input,
+ const std::vector<size_t>& kernel_size,
const std::vector<size_t>& stride, const std::vector<size_t>& padding,
- const bool ceil_mode, const std::string pooling_method) {
+ const bool is_max) {
kernel_h = kernel_size[0];
kernel_w = kernel_size[1];
@@ -21,34 +22,24 @@ PoolingHandle::PoolingHandle(const Tensor &input, const std::vector<size_t>& ker
width = input.shape(3);
pooled_height = 1;
- if (ceil_mode) {
- if (stride_h > 0)
- pooled_height = static_cast<int>(ceil(static_cast<float>(height + 2 * pad_h - kernel_h) / stride_h)) + 1;
- pooled_width = static_cast<int>(ceil(static_cast<float>(width + 2 * pad_w - kernel_w) / stride_w)) + 1;
- }
- else {
- if (stride_h > 0)
- pooled_height =
- static_cast<size_t>((height + 2 * pad_h - kernel_h) / stride_h) + 1;
- pooled_width =
- static_cast<size_t>((width + 2 * pad_w - kernel_w) / stride_w) + 1;
- }
-
- method = pooling_method;
- CHECK(method == "MAX" || method == "AVERAGE")
- << "Padding implemented only for average and max pooling.";
+
+ if (stride_h > 0)
+ pooled_height =
+ static_cast<size_t>((height + 2 * pad_h - kernel_h) / stride_h) + 1;
+ pooled_width =
+ static_cast<size_t>((width + 2 * pad_w - kernel_w) / stride_w) + 1;
+ is_max_pooling = is_max;
}
#ifdef USE_CUDNN
-CudnnPoolingHandle::CudnnPoolingHandle(const Tensor &input, const std::vector<size_t>& kernel_size,
+CudnnPoolingHandle::CudnnPoolingHandle(const Tensor &input,
+ const std::vector<size_t>& kernel_size,
const std::vector<size_t>& stride, const std::vector<size_t>& padding,
- const bool ceil_mode, const std::string pooling_method, const bool NaN_prop)
- : PoolingHandle(input, kernel_size, stride, padding, ceil_mode, pooling_method) {
- if (NaN_prop)
- nan_prop = CUDNN_PROPAGATE_NAN;
- else
- nan_prop = CUDNN_NOT_PROPAGATE_NAN;
+ const bool is_max)
+ : PoolingHandle(input, kernel_size, stride, padding, is_max) {
+
+#nan_prop = CUDNN_NOT_PROPAGATE_NAN;
DataType dtype = input.data_type();
@@ -64,12 +55,10 @@ CudnnPoolingHandle::CudnnPoolingHandle(const Tensor &input, const std::vector<si
y_desc, CUDNN_TENSOR_NCHW, GetCudnnDataType(dtype), batchsize, channels,
pooled_height, pooled_width));
auto pool_method = CUDNN_POOLING_MAX;
- if (method == "MAX")
+ if (is_max)
pool_method = CUDNN_POOLING_MAX;
- else if (method == "AVERAGE")
- pool_method = CUDNN_POOLING_AVERAGE_COUNT_EXCLUDE_PADDING;
else
- LOG(FATAL) << "Not implemented!";
+ pool_method = CUDNN_POOLING_AVERAGE_COUNT_EXCLUDE_PADDING;
CUDNN_CHECK(cudnnSetPooling2dDescriptor(pool_desc, pool_method, nan_prop,
kernel_h, kernel_w, pad_h, pad_w,
@@ -81,26 +70,24 @@ CudnnPoolingHandle::~CudnnPoolingHandle() {
CUDNN_CHECK(cudnnDestroyPoolingDescriptor(pool_desc));
if (x_desc != nullptr) CUDNN_CHECK(cudnnDestroyTensorDescriptor(x_desc));
if (y_desc != nullptr) CUDNN_CHECK(cudnnDestroyTensorDescriptor(y_desc));
-};
+}
+
Tensor GpuPoolingForward(const Tensor &x, const CudnnPoolingHandle &cph) {
CHECK_EQ(x.device()->lang(), kCuda);
CHECK_EQ(x.nDim(), 4u);
- DataType dtype = x.data_type();
- auto dev = x.device();
- Shape shape{cph.batchsize, cph.channels, cph.pooled_height, cph.pooled_width};
- Tensor output = Tensor(shape, dev, dtype);
+ Tensor output = Tensor({cph.batchsize, cph.channels, cph.pooled_height, cph.pooled_width},
+ x.device(), x.data_type());
- output.device()->Exec([&x, &output, &cph](Context * ctx) {
- Block *inblock = x.block(), *outblock = output.block();
+ output.device()->Exec([&](Context * ctx) {
float alpha = 1.0f, beta = 0.0f;
cudnnPoolingForward(ctx->cudnn_handle, cph.pool_desc, &alpha,
- cph.x_desc, inblock->data(), &beta, cph.y_desc,
- outblock->mutable_data());
+ cph.x_desc, x.block()->data(), &beta, cph.y_desc,
+ output.block()->mutable_data());
}, {x.block()}, {output.block()});
return output;
-};
+}
Tensor GpuPoolingBackward(const Tensor &dy, const Tensor& x, const Tensor& y,
const CudnnPoolingHandle &cph) {
@@ -110,14 +97,13 @@ Tensor GpuPoolingBackward(const Tensor &dy, const Tensor& x, const Tensor& y,
Tensor dx;
dx.ResetLike(x);
- dx.device()->Exec([&dx, &dy, &x, &y, &cph](Context * ctx) {
- Block *dyblock = dy.block(), *dxblock = dx.block(), *yblock = y.block(),
- *xblock = x.block();
+ dx.device()->Exec([&](Context * ctx) {
+
float alpha = 1.0f, beta = 0.0f;
cudnnPoolingBackward(ctx->cudnn_handle, cph.pool_desc, &alpha,
- cph.y_desc, yblock->data(), cph.y_desc,
- dyblock->data(), cph.x_desc, xblock->data(), &beta,
- cph.x_desc, dxblock->mutable_data());
+ cph.y_desc, y.block()->data(), cph.y_desc,
+ dy.block()->data(), cph.x_desc, x.block()->data(), &beta,
+ cph.x_desc, dx.block()->mutable_data());
}, {dy.block(), y.block(), x.block()}, {dx.block()});
return dx;
};
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/fb5cb9ab/src/model/operation/pooling.h
----------------------------------------------------------------------
diff --git a/src/model/operation/pooling.h b/src/model/operation/pooling.h
index 9ed7e33..a4d1051 100644
--- a/src/model/operation/pooling.h
+++ b/src/model/operation/pooling.h
@@ -12,10 +12,10 @@
namespace singa {
class PoolingHandle {
-public:
+ public:
PoolingHandle(const Tensor &input, const std::vector<size_t>& kernel_size,
const std::vector<size_t>& stride, const std::vector<size_t>& padding,
- const bool ceil_mode = false, const std::string pooling_method = "MAX");
+ const bool is_max = true);
size_t kernel_w;
size_t pad_w;
@@ -32,29 +32,28 @@ public:
size_t pooled_height;
size_t pooled_width;
- std::string method;
+ bool is_max_pooling;
};
#ifdef USE_CUDNN
class CudnnPoolingHandle : public PoolingHandle {
-public:
+ public:
CudnnPoolingHandle(const Tensor &input, const std::vector<size_t>& kernel_size,
const std::vector<size_t>& stride, const std::vector<size_t>& padding,
- const bool ceil_mode = false, const std::string pooling_method = "MAX",
- const bool NaN_prop = false);
+ const bool is_max = true);
~CudnnPoolingHandle();
cudnnTensorDescriptor_t x_desc = nullptr;
cudnnTensorDescriptor_t y_desc = nullptr;
cudnnPoolingDescriptor_t pool_desc = nullptr;
- cudnnNanPropagation_t nan_prop;
+ cudnnNanPropagation_t nan_prop = CUDNN_PROPAGATE_NAN;
};
-Tensor GpuPoolingForward(const Tensor &x, const CudnnPoolingHandle &cph);
+Tensor GpuPoolingForward(const CudnnPoolingHandle &cph, const Tensor &x);
-Tensor GpuPoolingBackward(const Tensor &dy, const Tensor& x, const Tensor& y,
- const CudnnPoolingHandle &cph);
+Tensor GpuPoolingBackward(const CudnnPoolingHandle &cph, const Tensor &dy,
+ const Tensor& x, const Tensor& y);
#endif //USE_CUDNN