You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@singa.apache.org by ka...@apache.org on 2018/07/13 05:43:46 UTC

[2/3] incubator-singa git commit: SINGA-378 Implement maxpooling operation and its related functions for autograd

SINGA-378 Implement maxpooling operation and its related functions for autograd

Update API for pooling functions

Add MaxPooling2D, AvgPooling2D, MaxPooling1D and AvgPooling1D.


Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/fb5cb9ab
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/fb5cb9ab
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/fb5cb9ab

Branch: refs/heads/master
Commit: fb5cb9ab000d776eed11a5f4fd3b0e7285a109c0
Parents: 571818e
Author: Wang Wei <wa...@gmail.com>
Authored: Thu Jul 12 17:53:22 2018 +0800
Committer: Wang Wei <wa...@gmail.com>
Committed: Thu Jul 12 17:53:22 2018 +0800

----------------------------------------------------------------------
 examples/autograd/mnist_cnn.py |   6 +-
 python/singa/autograd.py       | 227 +++++++++++++++++++-----------------
 src/api/model_operation.i      |  10 +-
 src/model/operation/pooling.cc |  76 +++++-------
 src/model/operation/pooling.h  |  19 ++-
 5 files changed, 169 insertions(+), 169 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/fb5cb9ab/examples/autograd/mnist_cnn.py
----------------------------------------------------------------------
diff --git a/examples/autograd/mnist_cnn.py b/examples/autograd/mnist_cnn.py
index 2cb3cae..d42dc76 100755
--- a/examples/autograd/mnist_cnn.py
+++ b/examples/autograd/mnist_cnn.py
@@ -110,8 +110,8 @@ if __name__ == '__main__':
     conv2 = autograd.Conv2D(32, 32, 3, padding=1)
     bn2 = autograd.BatchNorm(32)
     linear = autograd.Linear(32 * 28 * 28, 10)
-    pooling1 = autograd.MaxPool2D(3, 1, padding=1)
-    pooling2 = autograd.MaxPool2D(3, 1, padding=1)
+    pooling1 = autograd.MaxPooling2D(3, 1, padding=1)
+    pooling2 = autograd.AvgPooling2D(3, 1, padding=1)
 
     def forward(x, t):
         y = conv1(x)
@@ -130,7 +130,7 @@ if __name__ == '__main__':
         return loss, y
 
     autograd.training = True
-    for epoch in range(50):
+    for epoch in range(epochs):
         for i in range(batch_number):
             inputs = tensor.Tensor(device=dev, data=x_train[
                                    i * 100:(1 + i) * 100], stores_grad=False)

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/fb5cb9ab/python/singa/autograd.py
----------------------------------------------------------------------
diff --git a/python/singa/autograd.py b/python/singa/autograd.py
index fcdc020..7b4d18d 100755
--- a/python/singa/autograd.py
+++ b/python/singa/autograd.py
@@ -483,35 +483,6 @@ def ctensor2numpy(x):
     return np_array.reshape(x.shape())
 
 
-class _MaxPool2D(Operation):
-
-    def __init__(self, handle):
-        self.handle = handle
-
-    def forward(self, x):
-        if self.handle.device_id == -1:
-            raise NotImplementedError
-        else:
-            y = singa.GpuPoolingForward(x, self.handle)
-
-        if training:
-            self.cache = (x, y)
-
-        return y
-
-    def backward(self, dy):
-        if self.handle.device_id == -1:
-            raise NotImplementedError
-        else:
-            dx = singa.GpuPoolingBackward(
-                dy, self.cache[0], self.cache[1], self.handle)
-        return dx
-
-
-def max_pool_2d(x, handle):
-    return _MaxPool2D(handle)(x)[0]
-
-
 class Flatten(Operation):
 
     def __init(self, start_axis=1):
@@ -534,6 +505,46 @@ def flatten(x):
     return Flatten()(x)[0]
 
 
+class Layer(object):
+
+    def __init__(self):
+        pass
+
+    def device_check(self, *inputs):
+        x_device = inputs[0].device
+        for var in inputs:
+            if var.device.id() != x_device:
+                var.to_device(x_device)
+
+
+class Linear(Layer):
+
+    def __init__(self, in_features, out_features, bias=True):
+        w_shape = (in_features, out_features)
+        b_shape = (1, out_features)
+        self.bias = bias
+
+        self.W = Tensor(shape=w_shape,
+                        requires_grad=True, stores_grad=True)
+        std = math.sqrt(2.0 / (in_features + out_features))
+        self.W.gaussian(0.0, std)
+
+        if self.bias:
+            self.b = Tensor(shape=b_shape,
+                            requires_grad=True, stores_grad=True)
+            self.b.set_value(0.0)
+
+    def __call__(self, x):
+        if self.bias:
+            self.device_check(x, self.W, self.b)
+        else:
+            self.device_check(x, self.W)
+        y = matmul(x, self.W)
+        if self.bias:
+            y = add_bias(y, self.b, axis=0)
+        return y
+
+
 class _Conv2D(Operation):
 
     def __init__(self, handle):
@@ -583,50 +594,10 @@ class _Conv2D(Operation):
                 return dx, dW, None
 
 
-def conv2d(x, W, b, handle):
+def conv2d(handle, x, W, b):
     return _Conv2D(handle)(x, W, b)[0]
 
 
-class Layer(object):
-
-    def __init__(self):
-        pass
-
-    def device_check(self, *inputs):
-        x_device = inputs[0].device
-        for var in inputs:
-            if var.device.id() != x_device:
-                var.to_device(x_device)
-
-
-class Linear(Layer):
-
-    def __init__(self, in_features, out_features, bias=True):
-        w_shape = (in_features, out_features)
-        b_shape = (1, out_features)
-        self.bias = bias
-
-        self.W = Tensor(shape=w_shape,
-                        requires_grad=True, stores_grad=True)
-        std = math.sqrt(2.0 / (in_features + out_features))
-        self.W.gaussian(0.0, std)
-
-        if self.bias:
-            self.b = Tensor(shape=b_shape,
-                            requires_grad=True, stores_grad=True)
-            self.b.set_value(0.0)
-
-    def __call__(self, x):
-        if self.bias:
-            self.device_check(x, self.W, self.b)
-        else:
-            self.device_check(x, self.W)
-        y = matmul(x, self.W)
-        if self.bias:
-            y = add_bias(y, self.b, axis=0)
-        return y
-
-
 class Conv2D(Layer):
 
     def __init__(self, in_channels, out_channels, kernel_size, stride=1,
@@ -713,13 +684,10 @@ class Conv2D(Layer):
                                                     self.padding, self.in_channels, self.out_channels, self.bias)
         self.handle.device_id = x.device.id()
 
-        y = conv2d(x, self.W, self.b, self.handle)
+        y = conv2d(self.handle, x, self.W, self.b)
         return y
 
 
-<< << << < HEAD
-
-
 class BatchNorm(Layer):
 
     def __init__(self, num_features, momentum=0.9):
@@ -759,14 +727,14 @@ class BatchNorm(Layer):
                     self.momentum, x.data)
         self.handle.device_id = x.device.id()
 
-        y = batchnorm(x, self.scale, self.bias,
-                      self.running_mean, self.running_var, self.handle)
+        y = batchnorm(self.handle, x, self.scale, self.bias,
+                      self.running_mean, self.running_var)
         return y
 
 
 class _BatchNorm(Operation):
 
-    def __init__(self, running_mean, running_var, handle):
+    def __init__(self, handle, running_mean, running_var):
         self.running_mean = running_mean.data
         self.running_var = running_var.data
         self.handle = handle
@@ -804,14 +772,42 @@ class _BatchNorm(Operation):
             return dx, ds, db
 
 
-def batchnorm(x, scale, bias, running_mean, running_var, handle):
-    return _BatchNorm(running_mean, running_var, handle)(x, scale, bias)[0]
+def batchnorm(handle, x, scale, bias, running_mean, running_var):
+    return _BatchNorm(handle, running_mean, running_var, handle)(x, scale, bias)[0]
+
+
+class _Pooling2D(Operation):
+
+    def __init__(self, handle):
+        self.handle = handle
+
+    def forward(self, x):
+        if self.handle.device_id == -1:
+            raise NotImplementedError
+        else:
+            y = singa.GpuPoolingForward(self.handle, x)
+
+        if training:
+            self.cache = (x, y)
 
+        return y
+
+    def backward(self, dy):
+        if self.handle.device_id == -1:
+            raise NotImplementedError
+        else:
+            dx = singa.GpuPoolingBackward(self.handle,
+                                          dy, self.cache[0], self.cache[1])
+        return dx
+
+
+def pooling_2d(handle, x):
+    return _Pooling2D(handle)(x)[0]
 
-class MaxPool2D(Layer):
 
-    def __init__(self, kernel_size, stride=None, padding=0, dilation=1,
-                 return_indices=False, ceil_mode=False):
+class Pooling2D(Layer):
+
+    def __init__(self, kernel_size, stride=None, padding=0, is_max=True):
         if isinstance(kernel_size, int):
             self.kernel_size = (kernel_size, kernel_size)
         elif isinstance(kernel_size, tuple):
@@ -825,6 +821,8 @@ class MaxPool2D(Layer):
             self.stride = (stride, stride)
         elif isinstance(stride, tuple):
             self.stride = stride
+            assert stride[0] > 0 or (kernel_size[0] == 1 and padding[
+                0] == 0), 'stride[0]=0, but kernel_size[0]=%d, padding[0]=%d' % (kernel_size[0], padding[0])
         else:
             raise TypeError('Wrong stride type.')
 
@@ -835,43 +833,62 @@ class MaxPool2D(Layer):
         else:
             raise TypeError('Wrong padding type.')
 
-        if dilation != 1:
-            raise ValueError('Not implemented yet')
-
-        if return_indices is not False:
-            raise ValueError('Not implemented yet')
-
-        self.ceil_mode = ceil_mode
+        self.is_max = is_max
 
     def __call__(self, x):
-        if self.ceil_mode:
-            out_shape_h = int(math.ceil(
-                (x.shape[2] + 2 * self.padding[0] - self.kernel_size[0]) / self.stride[0])) + 1
-            out_shape_w = int(math.ceil(
-                (x.shape[3] + 2 * self.padding[1] - self.kernel_size[1]) / self.stride[1])) + 1
-        else:
-            out_shape_h = int(
-                (x.shape[2] + 2 * self.padding[0] - self.kernel_size[0]) // self.stride[0]) + 1
-            out_shape_w = int(
-                (x.shape[3] + 2 * self.padding[1] - self.kernel_size[1]) // self.stride[1]) + 1
+
+        out_shape_h = int(
+            (x.shape[2] + 2 * self.padding[0] - self.kernel_size[0]) // self.stride[0]) + 1
+        out_shape_w = int(
+            (x.shape[3] + 2 * self.padding[1] - self.kernel_size[1]) // self.stride[1]) + 1
         if x.device.id() == -1:
             if not hasattr(self, 'handle'):
-                self.handle = singa.PoolingHandle(x.data, self.kernel_size, self.stride,
-                                                  self.padding, self.ceil_mode, 'MAX')
+                self.handle = singa.PoolingHandle(
+                    x.data, self.kernel_size, self.stride, self.padding, self.is_max)
             elif x.shape[0] != self.handle.batchsize or out_shape_h != self.handle.pooled_height or \
                     out_shape_w != self.handle.pooled_width:
                 self.handle = singa.PoolingHandle(x.data, self.kernel_size, self.stride,
-                                                  self.padding, self.ceil_mode, 'MAX')
+                                                  self.padding, self.is_max)
         else:
             if not hasattr(self, 'handle'):
                 self.handle = singa.CudnnPoolingHandle(x.data, self.kernel_size, self.stride,
-                                                       self.padding, self.ceil_mode, 'MAX', False)  # False for nan_prop
+                                                       self.padding, self.is_max)  # False for nan_prop
             elif x.shape[0] != self.handle.batchsize or out_shape_h != self.handle.pooled_height or \
                     out_shape_w != self.handle.pooled_width:
                 self.handle = singa.CudnnPoolingHandle(x.data, self.kernel_size, self.stride,
-                                                       self.padding, self.ceil_mode, 'MAX', False)  # False for nan_prop
+                                                       self.padding, self.is_max)  # False for nan_prop
 
         self.handle.device_id = x.device.id()
 
-        y = max_pool_2d(x, self.handle)
+        y = pooling_2d(self.handle, x)
         return y
+
+
+class MaxPooling2D(Pooling2D):
+
+    def __init__(self, kernel_size, stride=None, padding=0):
+        super(MaxPooling2D, self).__init__(kernel_size, stride, padding, True)
+
+
+class AvgPooling2D(Pooling2D):
+
+    def __init__(self, kernel_size, stride=None, padding=0):
+        super(AvgPooling2D, self).__init__(kernel_size, stride, padding, False)
+
+
+class MaxPooling1D(Pooling2D):
+
+    def __init__(self, kernel_size, stride=None, padding=0):
+        if stride is None:
+            stride = kernel_size
+        super(MaxPooling2D, self).__init__(
+            (1, kernel_size), (0, stride), (0, padding), True)
+
+
+class AvgPooling1D(Pooling2D):
+
+    def __init__(self, kernel_size, stride=None, padding=0):
+        if stride is None:
+            stride = kernel_size
+        super(MaxPooling2D, self).__init__(
+            (1, kernel_size), (0, stride), (0, padding), False)

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/fb5cb9ab/src/api/model_operation.i
----------------------------------------------------------------------
diff --git a/src/api/model_operation.i b/src/api/model_operation.i
index 4800ff1..3d9bdbe 100755
--- a/src/api/model_operation.i
+++ b/src/api/model_operation.i
@@ -43,7 +43,7 @@ class PoolingHandle {
  public:
   PoolingHandle(const Tensor &input, const std::vector<size_t>& kernel_size,
                 const std::vector<size_t>& stride, const std::vector<size_t>& padding,
-                const bool ceil_mode = false, const std::string pooling_method = "MAX");
+                const bool is_max=true);
 
   size_t batchsize;
 
@@ -94,8 +94,7 @@ class CudnnPoolingHandle : public PoolingHandle {
  public:
   CudnnPoolingHandle(const Tensor &input, const std::vector<size_t>& kernel_size,
                      const std::vector<size_t>& stride, const std::vector<size_t>& padding,
-                     const bool ceil_mode = false, const std::string pooling_method = "MAX",
-                     const bool NaN_prop = false);
+                     const bool is_max=true);
 
   size_t batchsize;
   
@@ -103,10 +102,9 @@ class CudnnPoolingHandle : public PoolingHandle {
   size_t pooled_width;
 };
 
-Tensor GpuPoolingForward(const Tensor &x, const CudnnPoolingHandle &cph);
+Tensor GpuPoolingForward(const CudnnPoolingHandle &cph, const Tensor &x);
 
-Tensor GpuPoolingBackward(const Tensor &dy, const Tensor& x, const Tensor& y,
-                          const CudnnPoolingHandle &cph);
+Tensor GpuPoolingBackward(const CudnnPoolingHandle &cph, const Tensor &dy, const Tensor& x, const Tensor& y);
 
 #endif  // USE_CUDNN
 

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/fb5cb9ab/src/model/operation/pooling.cc
----------------------------------------------------------------------
diff --git a/src/model/operation/pooling.cc b/src/model/operation/pooling.cc
index 0abda35..0072671 100644
--- a/src/model/operation/pooling.cc
+++ b/src/model/operation/pooling.cc
@@ -3,9 +3,10 @@
 
 namespace singa {
 
-PoolingHandle::PoolingHandle(const Tensor &input, const std::vector<size_t>& kernel_size,
+PoolingHandle::PoolingHandle(const Tensor &input,
+                             const std::vector<size_t>& kernel_size,
                              const std::vector<size_t>& stride, const std::vector<size_t>& padding,
-                             const bool ceil_mode, const std::string pooling_method) {
+                             const bool is_max) {
   kernel_h = kernel_size[0];
   kernel_w = kernel_size[1];
 
@@ -21,34 +22,24 @@ PoolingHandle::PoolingHandle(const Tensor &input, const std::vector<size_t>& ker
   width = input.shape(3);
 
   pooled_height = 1;
-  if (ceil_mode) {
-    if (stride_h > 0)
-      pooled_height = static_cast<int>(ceil(static_cast<float>(height + 2 * pad_h - kernel_h) / stride_h)) + 1;
-    pooled_width = static_cast<int>(ceil(static_cast<float>(width + 2 * pad_w - kernel_w) / stride_w)) + 1;
-  }
-  else {
-    if (stride_h > 0)
-      pooled_height =
-        static_cast<size_t>((height + 2 * pad_h - kernel_h) / stride_h) + 1;
-    pooled_width =
-      static_cast<size_t>((width + 2 * pad_w - kernel_w) / stride_w) + 1;
-  }
-
-  method = pooling_method;
-  CHECK(method == "MAX" || method == "AVERAGE")
-      << "Padding implemented only for average and max pooling.";
+
+  if (stride_h > 0)
+    pooled_height =
+      static_cast<size_t>((height + 2 * pad_h - kernel_h) / stride_h) + 1;
+  pooled_width =
+    static_cast<size_t>((width + 2 * pad_w - kernel_w) / stride_w) + 1;
+  is_max_pooling = is_max;
 }
 
 #ifdef USE_CUDNN
 
-CudnnPoolingHandle::CudnnPoolingHandle(const Tensor &input, const std::vector<size_t>& kernel_size,
+CudnnPoolingHandle::CudnnPoolingHandle(const Tensor &input,
+                                       const std::vector<size_t>& kernel_size,
                                        const std::vector<size_t>& stride, const std::vector<size_t>& padding,
-                                       const bool ceil_mode, const std::string pooling_method, const bool NaN_prop)
-  : PoolingHandle(input, kernel_size, stride, padding, ceil_mode, pooling_method) {
-  if (NaN_prop)
-    nan_prop = CUDNN_PROPAGATE_NAN;
-  else
-    nan_prop = CUDNN_NOT_PROPAGATE_NAN;
+                                       const bool is_max)
+  : PoolingHandle(input, kernel_size, stride, padding, is_max) {
+
+#nan_prop = CUDNN_NOT_PROPAGATE_NAN;
 
   DataType dtype = input.data_type();
 
@@ -64,12 +55,10 @@ CudnnPoolingHandle::CudnnPoolingHandle(const Tensor &input, const std::vector<si
                 y_desc, CUDNN_TENSOR_NCHW, GetCudnnDataType(dtype), batchsize, channels,
                 pooled_height, pooled_width));
   auto pool_method = CUDNN_POOLING_MAX;
-  if (method == "MAX")
+  if (is_max)
     pool_method = CUDNN_POOLING_MAX;
-  else if (method == "AVERAGE")
-    pool_method = CUDNN_POOLING_AVERAGE_COUNT_EXCLUDE_PADDING;
   else
-    LOG(FATAL) << "Not implemented!";
+    pool_method = CUDNN_POOLING_AVERAGE_COUNT_EXCLUDE_PADDING;
 
   CUDNN_CHECK(cudnnSetPooling2dDescriptor(pool_desc, pool_method, nan_prop,
                                           kernel_h, kernel_w, pad_h, pad_w,
@@ -81,26 +70,24 @@ CudnnPoolingHandle::~CudnnPoolingHandle() {
     CUDNN_CHECK(cudnnDestroyPoolingDescriptor(pool_desc));
   if (x_desc != nullptr) CUDNN_CHECK(cudnnDestroyTensorDescriptor(x_desc));
   if (y_desc != nullptr) CUDNN_CHECK(cudnnDestroyTensorDescriptor(y_desc));
-};
+}
+
 
 Tensor GpuPoolingForward(const Tensor &x, const CudnnPoolingHandle &cph) {
   CHECK_EQ(x.device()->lang(), kCuda);
   CHECK_EQ(x.nDim(), 4u);
 
-  DataType dtype = x.data_type();
-  auto dev = x.device();
-  Shape shape{cph.batchsize, cph.channels, cph.pooled_height, cph.pooled_width};
-  Tensor output = Tensor(shape, dev, dtype);
+  Tensor output = Tensor({cph.batchsize, cph.channels, cph.pooled_height, cph.pooled_width},
+                         x.device(), x.data_type());
 
-  output.device()->Exec([&x, &output, &cph](Context * ctx) {
-    Block *inblock = x.block(), *outblock = output.block();
+  output.device()->Exec([&](Context * ctx) {
     float alpha = 1.0f, beta = 0.0f;
     cudnnPoolingForward(ctx->cudnn_handle, cph.pool_desc, &alpha,
-                        cph.x_desc, inblock->data(), &beta, cph.y_desc,
-                        outblock->mutable_data());
+                        cph.x_desc, x.block()->data(), &beta, cph.y_desc,
+                        output.block()->mutable_data());
   }, {x.block()}, {output.block()});
   return output;
-};
+}
 
 Tensor GpuPoolingBackward(const Tensor &dy, const Tensor& x, const Tensor& y,
                           const CudnnPoolingHandle &cph) {
@@ -110,14 +97,13 @@ Tensor GpuPoolingBackward(const Tensor &dy, const Tensor& x, const Tensor& y,
   Tensor dx;
   dx.ResetLike(x);
 
-  dx.device()->Exec([&dx, &dy, &x, &y, &cph](Context * ctx) {
-    Block *dyblock = dy.block(), *dxblock = dx.block(), *yblock = y.block(),
-           *xblock = x.block();
+  dx.device()->Exec([&](Context * ctx) {
+
     float alpha = 1.0f, beta = 0.0f;
     cudnnPoolingBackward(ctx->cudnn_handle, cph.pool_desc, &alpha,
-                         cph.y_desc, yblock->data(), cph.y_desc,
-                         dyblock->data(), cph.x_desc, xblock->data(), &beta,
-                         cph.x_desc, dxblock->mutable_data());
+                         cph.y_desc, y.block()->data(), cph.y_desc,
+                         dy.block()->data(), cph.x_desc, x.block()->data(), &beta,
+                         cph.x_desc, dx.block()->mutable_data());
   }, {dy.block(), y.block(), x.block()}, {dx.block()});
   return dx;
 };

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/fb5cb9ab/src/model/operation/pooling.h
----------------------------------------------------------------------
diff --git a/src/model/operation/pooling.h b/src/model/operation/pooling.h
index 9ed7e33..a4d1051 100644
--- a/src/model/operation/pooling.h
+++ b/src/model/operation/pooling.h
@@ -12,10 +12,10 @@
 namespace singa {
 
 class PoolingHandle {
-public:
+ public:
   PoolingHandle(const Tensor &input, const std::vector<size_t>& kernel_size,
                 const std::vector<size_t>& stride, const std::vector<size_t>& padding,
-                const bool ceil_mode = false, const std::string pooling_method = "MAX");
+                const bool is_max = true);
 
   size_t kernel_w;
   size_t pad_w;
@@ -32,29 +32,28 @@ public:
   size_t pooled_height;
   size_t pooled_width;
 
-  std::string method;
+  bool is_max_pooling;
 };
 
 #ifdef USE_CUDNN
 class CudnnPoolingHandle : public PoolingHandle {
-public:
+ public:
   CudnnPoolingHandle(const Tensor &input, const std::vector<size_t>& kernel_size,
                      const std::vector<size_t>& stride, const std::vector<size_t>& padding,
-                     const bool ceil_mode = false, const std::string pooling_method = "MAX",
-                     const bool NaN_prop = false);
+                     const bool is_max = true);
   ~CudnnPoolingHandle();
 
   cudnnTensorDescriptor_t x_desc = nullptr;
   cudnnTensorDescriptor_t y_desc = nullptr;
   cudnnPoolingDescriptor_t pool_desc = nullptr;
-  cudnnNanPropagation_t nan_prop;
+  cudnnNanPropagation_t nan_prop = CUDNN_PROPAGATE_NAN;
 
 };
 
-Tensor GpuPoolingForward(const Tensor &x, const CudnnPoolingHandle &cph);
+Tensor GpuPoolingForward(const CudnnPoolingHandle &cph, const Tensor &x);
 
-Tensor GpuPoolingBackward(const Tensor &dy, const Tensor& x, const Tensor& y,
-                          const CudnnPoolingHandle &cph);
+Tensor GpuPoolingBackward(const CudnnPoolingHandle &cph, const Tensor &dy,
+                          const Tensor& x, const Tensor& y);
 
 #endif  //USE_CUDNN