You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mxnet.apache.org by zh...@apache.org on 2019/05/23 04:52:56 UTC
[incubator-mxnet] branch master updated: Add warning for fp16 inputs with MXNET_SAFE_ACCUMULATION=0 (#15046)

This is an automated email from the ASF dual-hosted git repository.

zhasheng pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git


The following commit(s) were added to refs/heads/master by this push:
     new d2e397a  Add warning for fp16 inputs with MXNET_SAFE_ACCUMULATION=0 (#15046)
d2e397a is described below

commit d2e397a01324ddaa01f06dea0d43c82b2f29ff8b
Author: Haibin Lin <li...@gmail.com>
AuthorDate: Wed May 22 21:52:21 2019 -0700

    Add warning for fp16 inputs with MXNET_SAFE_ACCUMULATION=0 (#15046)
---
 src/operator/nn/layer_norm-inl.h          | 21 +++++++++++++++------
 src/operator/nn/layer_norm.cu             |  5 +++++
 src/operator/nn/softmax-inl.h             |  5 +++++
 src/operator/tensor/broadcast_reduce_op.h | 11 ++++++++---
 4 files changed, 33 insertions(+), 9 deletions(-)

diff --git a/src/operator/nn/layer_norm-inl.h b/src/operator/nn/layer_norm-inl.h
index 7636c9b..3294874 100644
--- a/src/operator/nn/layer_norm-inl.h
+++ b/src/operator/nn/layer_norm-inl.h
@@ -114,10 +114,18 @@ void LayerNormComputeGeneral(const nnvm::NodeAttrs& attrs,
     });
   });
   workspace = ctx.requested[0].get_space_typed<xpu, 1, char>(Shape1(workspace_size), s);
+
+  bool safe_acc = dmlc::GetEnv("MXNET_SAFE_ACCUMULATION", false);
+  if (!safe_acc && inputs[0].type_flag_ == mshadow::kFloat16) {
+    common::LogOnce("MXNET_SAFE_ACCUMULATION=1 is recommended for float16 inputs for LayerNorm. "
+                    "See https://mxnet.incubator.apache.org/versions/master/faq/env_var.html "
+                    "for more details.");
+  }
+
   // Calculate mean
   MSHADOW_REAL_TYPE_SWITCH(outputs[0].type_flag_, DType, {
     BROADCAST_NDIM_SWITCH(red_dst_shape.ndim(), NDim, {
-      if (dmlc::GetEnv("MXNET_SAFE_ACCUMULATION", false)) {
+      if (safe_acc) {
         broadcast::Reduce<mshadow_op::sum, NDim, DType, mshadow_op::identity, false>(
           s, mean_data, req[0], workspace, in_data);
       } else {
@@ -136,7 +144,7 @@ void LayerNormComputeGeneral(const nnvm::NodeAttrs& attrs,
   const TBlob centered_out = outputs[0].reshape(red_src_shape);
   MSHADOW_REAL_TYPE_SWITCH(outputs[0].type_flag_, DType, {
     BROADCAST_NDIM_SWITCH(red_dst_shape.ndim(), NDim, {
-      if (dmlc::GetEnv("MXNET_SAFE_ACCUMULATION", false)) {
+      if (safe_acc) {
         broadcast::Reduce<mshadow_op::sum, NDim, DType, mshadow_op::square, false>(
           s, std_data, req[0], workspace, centered_out);
       } else {
@@ -251,10 +259,11 @@ void LayerNormGradComputeGeneral(const nnvm::NodeAttrs& attrs,
                                                {normalized_data, std},
                                                {kWriteTo}, {normalized_data});
   // Calculate grad_beta
+  bool safe_acc = dmlc::GetEnv("MXNET_SAFE_ACCUMULATION", false);
   if (req[2] != kNullOp) {
     MSHADOW_REAL_TYPE_SWITCH(outputs[2].type_flag_, DType, {
       BROADCAST_NDIM_SWITCH(red_exclude_dst_shape.ndim(), NDim, {
-        if (dmlc::GetEnv("MXNET_SAFE_ACCUMULATION", false)) {
+        if (safe_acc) {
           broadcast::Reduce<mshadow_op::sum, NDim, DType, mshadow_op::identity, false>(
             s, outputs[2].reshape(red_exclude_dst_shape), req[2], workspace,
             ograd.reshape(red_exclude_src_shape));
@@ -272,7 +281,7 @@ void LayerNormGradComputeGeneral(const nnvm::NodeAttrs& attrs,
   if (req[1] != kNullOp) {
     MSHADOW_REAL_TYPE_SWITCH(outputs[1].type_flag_, DType, {
       BROADCAST_NDIM_SWITCH(red_exclude_dst_shape.ndim(), NDim, {
-        if (dmlc::GetEnv("MXNET_SAFE_ACCUMULATION", false)) {
+        if (safe_acc) {
           broadcast::Reduce<mshadow_op::sum, NDim, DType, mshadow_op::identity, false>(
             s, outputs[1].reshape(red_exclude_dst_shape), req[1], workspace,
             ograd_mult.reshape(red_exclude_src_shape));
@@ -297,7 +306,7 @@ void LayerNormGradComputeGeneral(const nnvm::NodeAttrs& attrs,
                                                     {kWriteTo}, {ograd_mult});
     MSHADOW_REAL_TYPE_SWITCH(outputs[0].type_flag_, DType, {
       BROADCAST_NDIM_SWITCH(red_dst_shape.ndim(), NDim, {
-        if (dmlc::GetEnv("MXNET_SAFE_ACCUMULATION", false)) {
+        if (safe_acc) {
           broadcast::Reduce<mshadow_op::sum, NDim, DType, mshadow_op::identity, false>(
             s, red_out.reshape(red_dst_shape), kWriteTo, workspace,
             ograd_mult.reshape(red_src_shape));
@@ -317,7 +326,7 @@ void LayerNormGradComputeGeneral(const nnvm::NodeAttrs& attrs,
                                                         {kWriteTo}, {ograd_mult});
     MSHADOW_REAL_TYPE_SWITCH(outputs[0].type_flag_, DType, {
       BROADCAST_NDIM_SWITCH(red_dst_shape.ndim(), NDim, {
-        if (dmlc::GetEnv("MXNET_SAFE_ACCUMULATION", false)) {
+        if (safe_acc) {
           broadcast::Reduce<mshadow_op::sum, NDim, DType, mshadow_op::identity, false>(
             s, red_out.reshape(red_dst_shape), kWriteTo, workspace,
             ograd_mult.reshape(red_src_shape));
diff --git a/src/operator/nn/layer_norm.cu b/src/operator/nn/layer_norm.cu
index db09969..fead2a6 100644
--- a/src/operator/nn/layer_norm.cu
+++ b/src/operator/nn/layer_norm.cu
@@ -340,6 +340,11 @@ void LayerNormCompute<gpu>(const nnvm::NodeAttrs& attrs,
   if (axis == inputs[0].ndim() - 1) {
     // Try to use the accelerated CUDA kernels
     bool safe_acc = dmlc::GetEnv("MXNET_SAFE_ACCUMULATION", false);
+    if (!safe_acc && inputs[0].type_flag_ == mshadow::kFloat16) {
+      common::LogOnce("MXNET_SAFE_ACCUMULATION=1 is recommended for LayerNorm with float16 inputs. "
+                      "See https://mxnet.incubator.apache.org/versions/master/faq/env_var.html "
+                      "for more details.");
+    }
     if (safe_acc) {
       return LayerNormGPUContig<true>(param, ctx, inputs, req, outputs);
     } else {
diff --git a/src/operator/nn/softmax-inl.h b/src/operator/nn/softmax-inl.h
index 1910ff4..d6113b0 100644
--- a/src/operator/nn/softmax-inl.h
+++ b/src/operator/nn/softmax-inl.h
@@ -411,6 +411,11 @@ void SoftmaxCompute(const nnvm::NodeAttrs& attrs,
     param.temperature.value() : 1.0;
   mxnet::TShape shape = AxisShapeCompact(inputs[0].shape_, &axis, true);
   bool safe_acc = dmlc::GetEnv("MXNET_SAFE_ACCUMULATION", false);
+  if (!safe_acc && inputs[0].type_flag_ == mshadow::kFloat16) {
+    common::LogOnce("MXNET_SAFE_ACCUMULATION=1 is recommended for softmax with float16 inputs. "
+                    "See https://mxnet.incubator.apache.org/versions/master/faq/env_var.html "
+                    "for more details.");
+  }
 
   MXNET_REAL_ACC_TYPE_SWITCH(inputs[0].type_flag_, DType, AType, {
     MSHADOW_REAL_TYPE_SWITCH(outputs[0].type_flag_, OType, {
diff --git a/src/operator/tensor/broadcast_reduce_op.h b/src/operator/tensor/broadcast_reduce_op.h
index c334472..1723e9a 100644
--- a/src/operator/tensor/broadcast_reduce_op.h
+++ b/src/operator/tensor/broadcast_reduce_op.h
@@ -1183,9 +1183,14 @@ void LpNormCompute(const nnvm::NodeAttrs& attrs,
   } else {
     small = ReduceAxesShapeImpl(inputs[0].shape_, param.axis, true, false);
   }
-
+  bool safe_acc = dmlc::GetEnv("MXNET_SAFE_ACCUMULATION", false);
+  if (!safe_acc && inputs[0].type_flag_ == mshadow::kFloat16) {
+    common::LogOnce("MXNET_SAFE_ACCUMULATION=1 is recommended for LpNorm with float16 inputs. "
+                    "See https://mxnet.incubator.apache.org/versions/master/faq/env_var.html "
+                    "for more details.");
+  }
   if (param.ord == 1) {
-    if (dmlc::GetEnv("MXNET_SAFE_ACCUMULATION", false)) {
+    if (safe_acc) {
       ReduceAxesComputeImpl<xpu, mshadow_op::sum, true, false, mshadow_op::abs>(
         ctx, inputs, req, outputs, small);
     } else {
@@ -1193,7 +1198,7 @@ void LpNormCompute(const nnvm::NodeAttrs& attrs,
         ctx, inputs, req, outputs, small);
     }
   } else if (param.ord == 2) {
-    if (dmlc::GetEnv("MXNET_SAFE_ACCUMULATION", false)) {
+    if (safe_acc) {
       ReduceAxesComputeImpl<xpu, mshadow_op::nrm2, true, false, mshadow_op::identity>(
         ctx, inputs, req, outputs, small);
     } else {