You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mxnet.apache.org by GitBox <gi...@apache.org> on 2018/03/01 19:30:21 UTC
[GitHub] piiswrong closed pull request #9904: Improve workspace in convolution/deconvolution

piiswrong closed pull request #9904: Improve workspace in convolution/deconvolution
URL: https://github.com/apache/incubator-mxnet/pull/9904
 
 
   

This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:

As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):

diff --git a/src/operator/convolution_v1-inl.h b/src/operator/convolution_v1-inl.h
index bc6326aad76..d8310e6f1fc 100644
--- a/src/operator/convolution_v1-inl.h
+++ b/src/operator/convolution_v1-inl.h
@@ -76,7 +76,11 @@ struct ConvolutionV1Param : public dmlc::Parameter<ConvolutionV1Param> {
     .describe("Number of group partitions. Equivalent to slicing input into num_group\n    "
               "partitions, apply convolution on each, then concatenate the results");
     DMLC_DECLARE_FIELD(workspace).set_default(1024).set_range(0, 8192)
-    .describe("Maximum tmp workspace allowed for convolution (MB).");
+    .describe("Maximum temporary workspace allowed for convolution (MB)."
+              "This parameter determines the effective batch size of the convolution "
+              "kernel, which may be smaller than the given batch size. "
+              "Also, the workspace will be automatically enlarged to make sure that we can "
+              "run the kernel with batch_size=1");
     DMLC_DECLARE_FIELD(no_bias).set_default(false)
     .describe("Whether to disable bias parameter.");
     DMLC_DECLARE_FIELD(cudnn_tune)
@@ -344,9 +348,6 @@ class ConvolutionV1Op : public Operator {
                                              shape_dstunit_[1],
                                              shape_dstunit_[2] * nstep_);
     index_t required_size = scol.Size() + sdst.Size();
-    CHECK_GE(param_.workspace, required_size)
-      << "\nMinimum workspace size: " << required_size * sizeof(DType) << " Bytes\n"
-      << "Given: " << param_.workspace * sizeof(DType) << " Bytes";
     return required_size;
   }
 
diff --git a/src/operator/nn/convolution-inl.h b/src/operator/nn/convolution-inl.h
index 6204f75c469..d0dd7dd27a6 100644
--- a/src/operator/nn/convolution-inl.h
+++ b/src/operator/nn/convolution-inl.h
@@ -79,7 +79,11 @@ struct ConvolutionParam : public dmlc::Parameter<ConvolutionParam> {
     DMLC_DECLARE_FIELD(num_group).set_default(1)
     .describe("Number of group partitions.");
     DMLC_DECLARE_FIELD(workspace).set_default(1024).set_range(0, 8192)
-    .describe("Maximum temporary workspace allowed for convolution (MB).");
+    .describe("Maximum temporary workspace allowed (MB) in convolution."
+              "This parameter has two usages. When CUDNN is not used, it determines the "
+              "effective batch size of the convolution kernel. When CUDNN is used, it controls "
+              "the maximum temporary storage used for tuning the best CUDNN kernel when "
+              "`limited_workspace` strategy is used.");
     DMLC_DECLARE_FIELD(no_bias).set_default(false)
     .describe("Whether to disable bias parameter.");
     DMLC_DECLARE_FIELD(cudnn_tune)
diff --git a/src/operator/nn/deconvolution-inl.h b/src/operator/nn/deconvolution-inl.h
index b6d522b9e6f..badbb8b9d67 100644
--- a/src/operator/nn/deconvolution-inl.h
+++ b/src/operator/nn/deconvolution-inl.h
@@ -90,7 +90,11 @@ struct DeconvolutionParam : public dmlc::Parameter<DeconvolutionParam> {
     DMLC_DECLARE_FIELD(num_group).set_default(1)
         .describe("Number of groups partition.");
     DMLC_DECLARE_FIELD(workspace).set_default(512).set_range(0, 8192)
-      .describe("Maximum temporal workspace allowed for deconvolution (MB).");
+        .describe("Maximum temporary workspace allowed (MB) in deconvolution."
+                  "This parameter has two usages. When CUDNN is not used, it determines the "
+                  "effective batch size of the deconvolution kernel. When CUDNN is used, "
+                  "it controls the maximum temporary storage used for tuning "
+                  "the best CUDNN kernel when `limited_workspace` strategy is used.");
     DMLC_DECLARE_FIELD(no_bias).set_default(true)
         .describe("Whether to disable bias parameter.");
     DMLC_DECLARE_FIELD(cudnn_tune)
@@ -200,7 +204,7 @@ class DeconvolutionOp {
   void Init(DeconvolutionParam p) {
     this->param_ = p;
     // convert MBytes first to Bytes and then to elements.
-    param_.workspace = (param_.workspace << 20) / sizeof(real_t);
+    param_.workspace = (param_.workspace << 20) / sizeof(DType);
   }
 
   void Forward(const OpContext &ctx,
@@ -451,7 +455,7 @@ class DeconvolutionOp {
     shape_dstunit_ = mshadow::Shape3(param_.num_group,
                                      oshape[1] / param_.num_group,
                                      oshape[2] * oshape[3]);
-    // See convolution for workspace calculations
+    // See convolution for workspace calculations. nstep_ will be the effective batch size
     nstep_ = std::max(
         std::min(
             static_cast<index_t>(
@@ -465,9 +469,6 @@ class DeconvolutionOp {
                                              shape_dstunit_[1],
                                              shape_dstunit_[2] * nstep_);
     index_t required_size = scol.Size() + sdst.Size();
-    CHECK_GE(param_.workspace, required_size)
-      << "\nMinimum workspace size: " << required_size * sizeof(DType) << " Bytes\n"
-      << "Given: " << param_.workspace * sizeof(DType);
     return required_size;
   }
 


 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services