You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mxnet.apache.org by GitBox <gi...@apache.org> on 2018/05/29 18:35:48 UTC
[GitHub] piiswrong closed pull request #11076: [MXNET-491] Use depthwise
convolution by cuDNNv7 if available, updated version
piiswrong closed pull request #11076: [MXNET-491] Use depthwise convolution by cuDNNv7 if available, updated version
URL: https://github.com/apache/incubator-mxnet/pull/11076
This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:
As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):
diff --git a/src/operator/nn/convolution.cu b/src/operator/nn/convolution.cu
index 045e570f9d8..65a320ded16 100644
--- a/src/operator/nn/convolution.cu
+++ b/src/operator/nn/convolution.cu
@@ -97,7 +97,9 @@ void ConvolutionCompute<gpu>(const nnvm::NodeAttrs& attrs,
op.Forward(ctx, inputs, req, outputs);
})
return;
- } else if (param.num_filter == param.num_group &&
+ }
+#if MXNET_USE_CUDNN == 0 || CUDNN_MAJOR < 7
+ if (param.num_filter == param.num_group &&
param.layout.value() == mshadow::kNCHW &&
param.num_filter == inputs[conv::kData].shape_[1] &&
param.kernel.ndim() == 2 &&
@@ -112,6 +114,7 @@ void ConvolutionCompute<gpu>(const nnvm::NodeAttrs& attrs,
op.Forward(ctx, inputs, req, outputs);
return;
}
+#endif
#if MXNET_USE_CUDNN == 1
// On fp16-I/O instances, use fp32 compute (i.e. pseudo-fp16).
@@ -167,7 +170,9 @@ void ConvolutionGradCompute<gpu>(const nnvm::NodeAttrs& attrs,
op.Backward(ctx, std::vector<TBlob>{out_grad}, in_data, req, in_grad);
})
return;
- } else if (param.num_filter == param.num_group &&
+ }
+#if MXNET_USE_CUDNN == 0 || CUDNN_MAJOR < 7
+ if (param.num_filter == param.num_group &&
param.layout.value() == mshadow::kNCHW &&
param.num_filter == in_data[conv::kData].shape_[1] &&
param.kernel.ndim() == 2 &&
@@ -183,6 +188,7 @@ void ConvolutionGradCompute<gpu>(const nnvm::NodeAttrs& attrs,
op.Backward(ctx, std::vector<TBlob>{out_grad}, in_data, req, in_grad);
return;
}
+#endif
#if MXNET_USE_CUDNN == 1
// On fp16-I/O instances, use fp32 compute (i.e. pseudo-fp16).
diff --git a/src/operator/nn/cudnn/cudnn_convolution-inl.h b/src/operator/nn/cudnn/cudnn_convolution-inl.h
index ca60c99683e..4b1cbbe7057 100644
--- a/src/operator/nn/cudnn/cudnn_convolution-inl.h
+++ b/src/operator/nn/cudnn/cudnn_convolution-inl.h
@@ -137,6 +137,35 @@ class CuDNNConvolutionOp {
DType *wmat_ptr = GetNdPtr(in_data[conv::kWeight], param_.kernel.ndim() + 2, s);
DType *out_ptr = GetNdPtr(out_data[conv::kOut], param_.kernel.ndim() + 2, s);
+ #if CUDNN_MAJOR >= 7
+ typename DataType<DType>::ScaleType alpha = 1.0f;
+ typename DataType<DType>::ScaleType beta = 0.0f;
+ typename DataType<DType>::ScaleType beta_add = 1.0f;
+ CUDNN_CALL(cudnnConvolutionForward(s->dnn_handle_,
+ &alpha,
+ in_desc_,
+ data_ptr,
+ filter_desc_,
+ wmat_ptr,
+ forward_conv_desc_,
+ forward_algo_.AlgoNumber(),
+ workspace.dptr_,
+ workspace_size,
+ req[conv::kOut] == kAddTo? &beta_add : &beta,
+ out_desc_,
+ out_ptr));
+
+ if (!param_.no_bias) {
+ Tensor<gpu, 1, DType> bias = in_data[conv::kBias].get<gpu, 1, DType>(s);
+ CUDNN_CALL(cudnnAddTensor(s->dnn_handle_,
+ &alpha,
+ bias_desc_,
+ bias.dptr_,
+ &beta_add,
+ out_desc_,
+ out_ptr));
+ }
+ #else
for (uint32_t g = 0; g < param_.num_group; ++g) {
typename DataType<DType>::ScaleType alpha = 1.0f;
typename DataType<DType>::ScaleType beta = 0.0f;
@@ -177,6 +206,7 @@ class CuDNNConvolutionOp {
#endif
}
}
+ #endif // CUDNN_MAJOR >= 7
}
void Backward(const OpContext &ctx,
@@ -202,6 +232,51 @@ class CuDNNConvolutionOp {
GetTempSize(ctx);
Tensor<gpu, 1, DType> workspace = AllocateTempWorkspace(ctx, backward_workspace_byte_);
size_t workspace_size = TensorSizeBytes(workspace);
+ #if CUDNN_MAJOR >= 7
+ typename DataType<DType>::ScaleType alpha = 1.0f;
+ typename DataType<DType>::ScaleType beta = 0.0f;
+ typename DataType<DType>::ScaleType beta_add = 1.0f;
+ if (!param_.no_bias && (req[conv::kBias] != kNullOp)) {
+ Tensor<gpu, 1, DType> gbias = in_grad[conv::kBias].get<gpu, 1, DType>(s);
+ CUDNN_CALL(cudnnConvolutionBackwardBias(s->dnn_handle_,
+ &alpha,
+ out_desc_,
+ grad_ptr,
+ req[conv::kBias] == kAddTo ? &beta_add : &beta,
+ bias_desc_,
+ gbias.dptr_));
+ }
+ if (req[conv::kWeight] != kNullOp) {
+ CUDNN_CALL(cudnnConvolutionBackwardFilter(s->dnn_handle_,
+ &alpha,
+ in_desc_,
+ data_ptr,
+ out_desc_,
+ grad_ptr,
+ back_conv_desc_w_,
+ back_algo_w_.AlgoNumber(),
+ workspace.dptr_,
+ workspace_size,
+ req[conv::kWeight] == kAddTo? &beta_add : &beta,
+ filter_desc_,
+ gwmat_ptr));
+ }
+ if (req[conv::kData] != kNullOp) {
+ CUDNN_CALL(cudnnConvolutionBackwardData(s->dnn_handle_,
+ &alpha,
+ filter_desc_,
+ wmat_ptr,
+ out_desc_,
+ grad_ptr,
+ back_conv_desc_,
+ back_algo_.AlgoNumber(),
+ workspace.dptr_,
+ workspace_size,
+ req[conv::kData] == kAddTo? &beta_add : &beta,
+ in_desc_,
+ gdata_ptr));
+ }
+ #else
for (uint32_t g = 0; g < param_.num_group; ++g) {
typename DataType<DType>::ScaleType alpha = 1.0f;
typename DataType<DType>::ScaleType beta = 0.0f;
@@ -279,6 +354,7 @@ class CuDNNConvolutionOp {
#endif
}
}
+ #endif // CUDNN_MAJOR >= 7
}
/*!
@@ -342,7 +418,10 @@ class CuDNNConvolutionOp {
TShape wshape = in_shape[conv::kWeight];
TShape oshape = out_shape[conv::kOut];
TShape dstride, ostride;
+#if CUDNN_MAJOR <= 6
wshape[0] /= param_.num_group;
+#endif
+
#if CUDNN_MAJOR <= 5
// As of cuDNN_v6, the unsuffixed version of cudnnSetConvolution2dDescriptor()
// takes an additional 'computeType' parameter to set the precision of the
@@ -464,9 +543,15 @@ class CuDNNConvolutionOp {
CUDNN_CALL(cudnnSetConvolutionMathType(forward_conv_desc_, math_type));
CUDNN_CALL(cudnnSetConvolutionMathType(back_conv_desc_, math_type));
CUDNN_CALL(cudnnSetConvolutionMathType(back_conv_desc_w_, math_type));
+ CUDNN_CALL(cudnnSetConvolutionGroupCount(forward_conv_desc_, param_.num_group));
+ CUDNN_CALL(cudnnSetConvolutionGroupCount(back_conv_desc_, param_.num_group));
+ CUDNN_CALL(cudnnSetConvolutionGroupCount(back_conv_desc_w_, param_.num_group));
#endif
+
+ #if CUDNN_MAJOR <= 6
dshape[1] /= param_.num_group;
oshape[1] /= param_.num_group;
+ #endif
weight_offset_ = wshape.Size();
data_offset_ = dstride[1] * dshape[1];
out_offset_ = ostride[1] * oshape[1];
@@ -494,10 +579,17 @@ class CuDNNConvolutionOp {
if (!param_.no_bias) {
TShape bias = in_shape[conv::kBias];
+ #if CUDNN_MAJOR >= 7
+ bias_offset_ = bias[0];
+ std::vector<int> bias_shape = {1,
+ static_cast<int>(bias[0]),
+ 1, 1};
+ #else
bias_offset_ = bias[0] / param_.num_group;
std::vector<int> bias_shape = {1,
static_cast<int>(bias[0] / param_.num_group),
1, 1};
+ #endif
std::vector<int> bias_stride = {static_cast<int>(bias_offset_), 1, 1, 1};
if (param_.kernel.ndim() == 3) {
bias_shape.push_back(1);
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
users@infra.apache.org
With regards,
Apache Git Services