You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tvm.apache.org by GitBox <gi...@apache.org> on 2019/11/27 13:03:31 UTC

[GitHub] [incubator-tvm] optima2005 commented on a change in pull request #4418: [RUNTIME] Add cudnn conv3d

optima2005 commented on a change in pull request #4418: [RUNTIME] Add cudnn conv3d
URL: https://github.com/apache/incubator-tvm/pull/4418#discussion_r351274384
 
 

 ##########
 File path: src/runtime/contrib/cudnn/conv_forward.cc
 ##########
 @@ -120,137 +200,144 @@ TVM_REGISTER_GLOBAL("tvm.contrib.cudnn.conv2d.forward")
                                      CuDNNDataType::GetConst<0>(entry_ptr->conv_entry.data_type),
                                      entry_ptr->conv_entry.output_desc,
                                      y->data));
-});
+}
 
 
-TVM_REGISTER_GLOBAL("tvm.contrib.cudnn.conv2d.output_shape")
-.set_body([](TVMArgs args, TVMRetValue *ret) {
+void OutputShape(
+  int format,
+  int dims,
+  const int pad_v[],
+  const int stride_v[],
+  const int dilation_v[],
+  const int x_dim_v[],
+  const int w_dim_v[],
+  void *out_shape,
+  const std::string& data_dtype,
+  const std::string& conv_dtype) {
+  // Dims includes N and C
+  int full_dims = dims + 2;
+
   CuDNNThreadEntry* entry_ptr = CuDNNThreadEntry::ThreadLocal();
-  int format = args[0];
-  int pad_h = args[1];
-  int pad_w = args[2];
-  int stride_h = args[3];
-  int stride_w = args[4];
-  int dilation_h = args[5];
-  int dilation_w = args[6];
-  int x_dim0 = args[7];
-  int x_dim1 = args[8];
-  int x_dim2 = args[9];
-  int x_dim3 = args[10];
-  int w_dim0 = args[11];
-  int w_dim1 = args[12];
-  int w_dim2 = args[13];
-  int w_dim3 = args[14];
-  void *out_shape = args[15];
-  std::string data_dtype = args[16];
-  std::string conv_dtype = args[17];
+
   // Set Data Type
   entry_ptr->conv_entry.data_type = CuDNNDataType::DLTypeToCuDNNType(String2TVMType(conv_dtype));
   cudnnDataType_t data_type = CuDNNDataType::DLTypeToCuDNNType(String2TVMType(data_dtype));
   // Set Format
   entry_ptr->conv_entry.tensor_format = static_cast<cudnnTensorFormat_t>(format);
+
   // conv desc
-  CUDNN_CALL(cudnnSetConvolution2dDescriptor(entry_ptr->conv_entry.conv_desc,
-                                             pad_h,
-                                             pad_w,
-                                             stride_h,
-                                             stride_w,
-                                             dilation_h,
-                                             dilation_w,
+  CUDNN_CALL(cudnnSetConvolutionNdDescriptor(entry_ptr->conv_entry.conv_desc,
+                                             dims,
+                                             pad_v,
+                                             stride_v,
+                                             dilation_v,
                                              CUDNN_CROSS_CORRELATION,
                                              entry_ptr->conv_entry.data_type));
-  // input desc
-  CUDNN_CALL(cudnnSetTensor4dDescriptor(entry_ptr->conv_entry.input_desc,
-                                        entry_ptr->conv_entry.tensor_format,
-                                        data_type,
-                                        x_dim0,
-                                        x_dim1,
-                                        x_dim2,
-                                        x_dim3));
-  // filter desc
-  CUDNN_CALL(cudnnSetFilter4dDescriptor(entry_ptr->conv_entry.filter_desc,
-                                        data_type,
-                                        entry_ptr->conv_entry.tensor_format,
-                                        w_dim0,
-                                        w_dim1,
-                                        w_dim2,
-                                        w_dim3));
-
-  CUDNN_CALL(cudnnGetConvolution2dForwardOutputDim(entry_ptr->conv_entry.conv_desc,
-                                                   entry_ptr->conv_entry.input_desc,
-                                                   entry_ptr->conv_entry.filter_desc,
-                                                   static_cast<int*>(out_shape),
-                                                   static_cast<int*>(out_shape) + 1,
-                                                   static_cast<int*>(out_shape) + 2,
-                                                   static_cast<int*>(out_shape) + 3));
-});
 
+  if (dims == 2 && entry_ptr->conv_entry.tensor_format ==  CUDNN_TENSOR_NHWC) {
 
 Review comment:
   without this special case, I always got below error:
   ```
   E             [bt] (2) /host/root/ligc/tmp/incubator-tvm/build/libtvm.so(TVMFuncCall+0x90) [0x3fff7599dfa0]
   E             [bt] (1) /host/root/ligc/tmp/incubator-tvm/build/libtvm.so(+0xeea644) [0x3fff75a5a644]
   E             [bt] (0) /host/root/ligc/tmp/incubator-tvm/build/libtvm.so(tvm::contrib::OutputShape(int, int, int const*, int const*, int const*, int const*, int const*, void*, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&)+0x1114) [0x3fff75a59a94]
   E             File "/host/root/ligc/tmp/incubator-tvm/src/runtime/contrib/cudnn/conv_forward.cc", line 261
   E           cuDNN: Check failed: e == CUDNN_STATUS_SUCCESS (3 vs. 0) : CUDNN_STATUS_BAD_PARAM
   ```
   I have double checked the shape for input and weight, they should follow the SDK spec, would you please also take a look to see if any wrong in the code:
   ```
   format
   
       Input.Type of the filter layout format. If this input is set to CUDNN_TENSOR_NCHW, which is one of the enumerant values allowed by cudnnTensorFormat_t descriptor, then the layout of the filter is as follows:
   
           For N=4, a 4D filter descriptor, the filter layout is in the form of KCRS:
               K represents the number of output feature maps
               C is the number of input feature maps
               R is the number of rows per filter
               S is the number of columns per filter
           For N=3, a 3D filter descriptor, the number S (number of columns per filter) is omitted.
           For N=5 and greater, the layout of the higher dimensions immediately follow RS.
   
       On the other hand, if this input is set to CUDNN_TENSOR_NHWC, then the layout of the filter is as follows:
   
           For N=4, a 4D filter descriptor, the filter layout is in the form of KRSC.
           For N=3, a 3D filter descriptor, the number S (number of columns per filter) is omitted and the layout of C immediately follows R.
           For N=5 and greater, the layout of the higher dimensions are inserted between S and C. For more information, see cudnnTensorFormat_t.
   ```
   
   

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services