You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mxnet.apache.org by GitBox <gi...@apache.org> on 2018/05/29 18:38:18 UTC

[GitHub] piiswrong closed pull request #10804: Use depthwise convolution(group convolution) by cuDNNv7 if available

piiswrong closed pull request #10804: Use depthwise convolution(group convolution) by cuDNNv7 if available
URL: https://github.com/apache/incubator-mxnet/pull/10804
 
 
   

This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:

As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):

diff --git a/src/operator/nn/cudnn/cudnn_convolution-inl.h b/src/operator/nn/cudnn/cudnn_convolution-inl.h
index ca60c99683e..0c445a9d644 100644
--- a/src/operator/nn/cudnn/cudnn_convolution-inl.h
+++ b/src/operator/nn/cudnn/cudnn_convolution-inl.h
@@ -137,6 +137,35 @@ class CuDNNConvolutionOp {
     DType *wmat_ptr = GetNdPtr(in_data[conv::kWeight], param_.kernel.ndim() + 2, s);
     DType *out_ptr = GetNdPtr(out_data[conv::kOut], param_.kernel.ndim() + 2, s);
 
+    #if CUDNN_MAJOR >= 7
+      typename DataType<DType>::ScaleType alpha = 1.0f;
+      typename DataType<DType>::ScaleType beta = 0.0f;
+      typename DataType<DType>::ScaleType beta_add = 1.0f;
+      CUDNN_CALL(cudnnConvolutionForward(s->dnn_handle_,
+                      &alpha,
+                      in_desc_,
+                      data_ptr,
+                      filter_desc_,
+                      wmat_ptr,
+                      forward_conv_desc_,
+                      forward_algo_.AlgoNumber(),
+                      workspace.dptr_,
+                      workspace_size,
+                      req[conv::kOut] == kAddTo? &beta_add : &beta,
+                      out_desc_,
+                        out_ptr));
+
+      if (!param_.no_bias) {
+        Tensor<gpu, 1, DType> bias = in_data[conv::kBias].get<gpu, 1, DType>(s);
+        CUDNN_CALL(cudnnAddTensor(s->dnn_handle_,
+                                &alpha,
+                                bias_desc_,
+                                bias.dptr_,
+                                &beta_add,
+                                out_desc_,
+                                out_ptr));
+      }
+    #else
     for (uint32_t g = 0; g < param_.num_group; ++g) {
       typename DataType<DType>::ScaleType alpha = 1.0f;
       typename DataType<DType>::ScaleType beta = 0.0f;
@@ -177,6 +206,7 @@ class CuDNNConvolutionOp {
         #endif
       }
     }
+    #endif  // CUDNN_MAJOR >= 7
   }
 
   void Backward(const OpContext &ctx,
@@ -202,83 +232,129 @@ class CuDNNConvolutionOp {
     GetTempSize(ctx);
     Tensor<gpu, 1, DType> workspace = AllocateTempWorkspace(ctx, backward_workspace_byte_);
     size_t workspace_size = TensorSizeBytes(workspace);
-    for (uint32_t g = 0; g < param_.num_group; ++g) {
-      typename DataType<DType>::ScaleType alpha = 1.0f;
-      typename DataType<DType>::ScaleType beta = 0.0f;
-      typename DataType<DType>::ScaleType beta_add = 1.0f;
-      if (!param_.no_bias && (req[conv::kBias] != kNullOp)) {
-        Tensor<gpu, 1, DType> gbias = in_grad[conv::kBias].get<gpu, 1, DType>(s);
-        CUDNN_CALL(cudnnConvolutionBackwardBias(s->dnn_handle_,
-                                              &alpha,
-                                              out_desc_,
-                                              grad_ptr + out_offset_ * g,
-                                              req[conv::kBias] == kAddTo ? &beta_add : &beta,
-                                              bias_desc_,
-                                              gbias.dptr_ + bias_offset_ * g));
-      }
-      if (req[conv::kWeight] != kNullOp) {
-        #if CUDNN_MAJOR <= 4
-          CUDNN_CALL(cudnnConvolutionBackwardFilter_v3(s->dnn_handle_,
-               &alpha,
-               in_desc_,
-               data_ptr + data_offset_ * g,
-               out_desc_,
-               grad_ptr + out_offset_ * g,
-               back_conv_desc_w_,
-               back_algo_w_.AlgoNumber(),
-               workspace.dptr_,
-               workspace_size,
-               req[conv::kWeight] == kAddTo? &beta_add : &beta,
-               filter_desc_,
-               gwmat_ptr + weight_offset_ * g));
-        #elif CUDNN_MAJOR >= 5
-          CUDNN_CALL(cudnnConvolutionBackwardFilter(s->dnn_handle_,
-               &alpha,
-               in_desc_,
-               data_ptr + data_offset_ * g,
-               out_desc_,
-               grad_ptr + out_offset_ * g,
-               back_conv_desc_w_,
-               back_algo_w_.AlgoNumber(),
-               workspace.dptr_,
-               workspace_size,
-               req[conv::kWeight] == kAddTo? &beta_add : &beta,
-               filter_desc_,
-               gwmat_ptr + weight_offset_ * g));
-        #endif
-      }
-      if (req[conv::kData] != kNullOp) {
-        #if CUDNN_MAJOR <= 4
-          CUDNN_CALL(cudnnConvolutionBackwardData_v3(s->dnn_handle_,
-               &alpha,
-               filter_desc_,
-               wmat_ptr + weight_offset_ * g,
-               out_desc_,
-               grad_ptr + out_offset_ * g,
-               back_conv_desc_,
-               back_algo_.AlgoNumber(),
-               workspace.dptr_,
-               workspace_size,
-               req[conv::kData] == kAddTo? &beta_add : &beta,
-               in_desc_,
-               gdata_ptr + data_offset_ * g));
-        #elif CUDNN_MAJOR >= 5
-          CUDNN_CALL(cudnnConvolutionBackwardData(s->dnn_handle_,
-               &alpha,
-               filter_desc_,
-               wmat_ptr + weight_offset_ * g,
-               out_desc_,
-               grad_ptr + out_offset_ * g,
-               back_conv_desc_,
-               back_algo_.AlgoNumber(),
-               workspace.dptr_,
-               workspace_size,
-               req[conv::kData] == kAddTo? &beta_add : &beta,
-               in_desc_,
-               gdata_ptr + data_offset_ * g));
-        #endif
-      }
-    }
+    #if CUDNN_MAJOR >= 7
+        typename DataType<DType>::ScaleType alpha = 1.0f;
+        typename DataType<DType>::ScaleType beta = 0.0f;
+        typename DataType<DType>::ScaleType beta_add = 1.0f;
+        if (!param_.no_bias && (req[conv::kBias] != kNullOp)) {
+            Tensor<gpu, 1, DType> gbias = in_grad[conv::kBias].get<gpu, 1, DType>(s);
+            CUDNN_CALL(cudnnConvolutionBackwardBias(s->dnn_handle_,
+                                                &alpha,
+                                                out_desc_,
+                                                grad_ptr,
+                                                req[conv::kBias] == kAddTo ? &beta_add : &beta,
+                                                bias_desc_,
+                                                gbias.dptr_));
+        }
+        if (req[conv::kWeight] != kNullOp) {
+            CUDNN_CALL(cudnnConvolutionBackwardFilter(s->dnn_handle_,
+                &alpha,
+                in_desc_,
+                data_ptr,
+                out_desc_,
+                grad_ptr,
+                back_conv_desc_w_,
+                back_algo_w_.AlgoNumber(),
+                workspace.dptr_,
+                workspace_size,
+                req[conv::kWeight] == kAddTo? &beta_add : &beta,
+                filter_desc_,
+                gwmat_ptr));
+        }
+        if (req[conv::kData] != kNullOp) {
+            CUDNN_CALL(cudnnConvolutionBackwardData(s->dnn_handle_,
+                &alpha,
+                filter_desc_,
+                wmat_ptr,
+                out_desc_,
+                grad_ptr,
+                back_conv_desc_,
+                back_algo_.AlgoNumber(),
+                workspace.dptr_,
+                workspace_size,
+                req[conv::kData] == kAddTo? &beta_add : &beta,
+                in_desc_,
+                gdata_ptr));
+        }
+    #else
+        for (uint32_t g = 0; g < param_.num_group; ++g) {
+            typename DataType<DType>::ScaleType alpha = 1.0f;
+            typename DataType<DType>::ScaleType beta = 0.0f;
+            typename DataType<DType>::ScaleType beta_add = 1.0f;
+            if (!param_.no_bias && (req[conv::kBias] != kNullOp)) {
+                Tensor<gpu, 1, DType> gbias = in_grad[conv::kBias].get<gpu, 1, DType>(s);
+                CUDNN_CALL(cudnnConvolutionBackwardBias(s->dnn_handle_,
+                                                    &alpha,
+                                                    out_desc_,
+                                                    grad_ptr + out_offset_ * g,
+                                                    req[conv::kBias] == kAddTo ? &beta_add : &beta,
+                                                    bias_desc_,
+                                                    gbias.dptr_ + bias_offset_ * g));
+            }
+            if (req[conv::kWeight] != kNullOp) {
+                #if CUDNN_MAJOR <= 4
+                CUDNN_CALL(cudnnConvolutionBackwardFilter_v3(s->dnn_handle_,
+                    &alpha,
+                    in_desc_,
+                    data_ptr + data_offset_ * g,
+                    out_desc_,
+                    grad_ptr + out_offset_ * g,
+                    back_conv_desc_w_,
+                    back_algo_w_.AlgoNumber(),
+                    workspace.dptr_,
+                    workspace_size,
+                    req[conv::kWeight] == kAddTo? &beta_add : &beta,
+                    filter_desc_,
+                    gwmat_ptr + weight_offset_ * g));
+                #elif CUDNN_MAJOR >= 5
+                CUDNN_CALL(cudnnConvolutionBackwardFilter(s->dnn_handle_,
+                    &alpha,
+                    in_desc_,
+                    data_ptr + data_offset_ * g,
+                    out_desc_,
+                    grad_ptr + out_offset_ * g,
+                    back_conv_desc_w_,
+                    back_algo_w_.AlgoNumber(),
+                    workspace.dptr_,
+                    workspace_size,
+                    req[conv::kWeight] == kAddTo? &beta_add : &beta,
+                    filter_desc_,
+                    gwmat_ptr + weight_offset_ * g));
+                #endif
+            }
+            if (req[conv::kData] != kNullOp) {
+                #if CUDNN_MAJOR <= 4
+                CUDNN_CALL(cudnnConvolutionBackwardData_v3(s->dnn_handle_,
+                    &alpha,
+                    filter_desc_,
+                    wmat_ptr + weight_offset_ * g,
+                    out_desc_,
+                    grad_ptr + out_offset_ * g,
+                    back_conv_desc_,
+                    back_algo_.AlgoNumber(),
+                    workspace.dptr_,
+                    workspace_size,
+                    req[conv::kData] == kAddTo? &beta_add : &beta,
+                    in_desc_,
+                    gdata_ptr + data_offset_ * g));
+                #elif CUDNN_MAJOR >= 5
+                CUDNN_CALL(cudnnConvolutionBackwardData(s->dnn_handle_,
+                    &alpha,
+                    filter_desc_,
+                    wmat_ptr + weight_offset_ * g,
+                    out_desc_,
+                    grad_ptr + out_offset_ * g,
+                    back_conv_desc_,
+                    back_algo_.AlgoNumber(),
+                    workspace.dptr_,
+                    workspace_size,
+                    req[conv::kData] == kAddTo? &beta_add : &beta,
+                    in_desc_,
+                    gdata_ptr + data_offset_ * g));
+                #endif
+            }
+        }
+    #endif  // CUDNN_MAJOR >= 7
   }
 
 /*!
@@ -342,7 +418,10 @@ class CuDNNConvolutionOp {
     TShape wshape = in_shape[conv::kWeight];
     TShape oshape = out_shape[conv::kOut];
     TShape dstride, ostride;
+#if CUDNN_MAJOR <= 6
     wshape[0] /= param_.num_group;
+#endif
+
 #if CUDNN_MAJOR <= 5
       // As of cuDNN_v6, the unsuffixed version of cudnnSetConvolution2dDescriptor()
       // takes an additional 'computeType' parameter to set the precision of the
@@ -464,9 +543,15 @@ class CuDNNConvolutionOp {
       CUDNN_CALL(cudnnSetConvolutionMathType(forward_conv_desc_, math_type));
       CUDNN_CALL(cudnnSetConvolutionMathType(back_conv_desc_, math_type));
       CUDNN_CALL(cudnnSetConvolutionMathType(back_conv_desc_w_, math_type));
+      CUDNN_CALL(cudnnSetConvolutionGroupCount(forward_conv_desc_, param_.num_group));
+      CUDNN_CALL(cudnnSetConvolutionGroupCount(back_conv_desc_, param_.num_group));
+      CUDNN_CALL(cudnnSetConvolutionGroupCount(back_conv_desc_w_, param_.num_group));
     #endif
+
+  #if CUDNN_MAJOR <= 6
     dshape[1] /= param_.num_group;
     oshape[1] /= param_.num_group;
+  #endif
     weight_offset_ = wshape.Size();
     data_offset_ = dstride[1] * dshape[1];
     out_offset_ = ostride[1] * oshape[1];
@@ -494,10 +579,17 @@ class CuDNNConvolutionOp {
 
     if (!param_.no_bias) {
       TShape bias = in_shape[conv::kBias];
-      bias_offset_ = bias[0] / param_.num_group;
-      std::vector<int> bias_shape = {1,
-                                     static_cast<int>(bias[0] / param_.num_group),
-                                     1, 1};
+      #if CUDNN_MAJOR >= 7
+        bias_offset_ = bias[0];
+        std::vector<int> bias_shape = {1,
+                                       static_cast<int>(bias[0]),
+                                       1, 1};
+      #else
+        bias_offset_ = bias[0] / param_.num_group;
+        std::vector<int> bias_shape = {1,
+                                       static_cast<int>(bias[0] / param_.num_group),
+                                       1, 1};
+      #endif
       std::vector<int> bias_stride = {static_cast<int>(bias_offset_), 1, 1, 1};
       if (param_.kernel.ndim() == 3) {
         bias_shape.push_back(1);


 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services