You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mxnet.apache.org by zh...@apache.org on 2019/05/23 04:52:56 UTC
[incubator-mxnet] branch master updated: Add warning for fp16
inputs with MXNET_SAFE_ACCUMULATION=0 (#15046)
This is an automated email from the ASF dual-hosted git repository.
zhasheng pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git
The following commit(s) were added to refs/heads/master by this push:
new d2e397a Add warning for fp16 inputs with MXNET_SAFE_ACCUMULATION=0 (#15046)
d2e397a is described below
commit d2e397a01324ddaa01f06dea0d43c82b2f29ff8b
Author: Haibin Lin <li...@gmail.com>
AuthorDate: Wed May 22 21:52:21 2019 -0700
Add warning for fp16 inputs with MXNET_SAFE_ACCUMULATION=0 (#15046)
---
src/operator/nn/layer_norm-inl.h | 21 +++++++++++++++------
src/operator/nn/layer_norm.cu | 5 +++++
src/operator/nn/softmax-inl.h | 5 +++++
src/operator/tensor/broadcast_reduce_op.h | 11 ++++++++---
4 files changed, 33 insertions(+), 9 deletions(-)
diff --git a/src/operator/nn/layer_norm-inl.h b/src/operator/nn/layer_norm-inl.h
index 7636c9b..3294874 100644
--- a/src/operator/nn/layer_norm-inl.h
+++ b/src/operator/nn/layer_norm-inl.h
@@ -114,10 +114,18 @@ void LayerNormComputeGeneral(const nnvm::NodeAttrs& attrs,
});
});
workspace = ctx.requested[0].get_space_typed<xpu, 1, char>(Shape1(workspace_size), s);
+
+ bool safe_acc = dmlc::GetEnv("MXNET_SAFE_ACCUMULATION", false);
+ if (!safe_acc && inputs[0].type_flag_ == mshadow::kFloat16) {
+ common::LogOnce("MXNET_SAFE_ACCUMULATION=1 is recommended for float16 inputs for LayerNorm. "
+ "See https://mxnet.incubator.apache.org/versions/master/faq/env_var.html "
+ "for more details.");
+ }
+
// Calculate mean
MSHADOW_REAL_TYPE_SWITCH(outputs[0].type_flag_, DType, {
BROADCAST_NDIM_SWITCH(red_dst_shape.ndim(), NDim, {
- if (dmlc::GetEnv("MXNET_SAFE_ACCUMULATION", false)) {
+ if (safe_acc) {
broadcast::Reduce<mshadow_op::sum, NDim, DType, mshadow_op::identity, false>(
s, mean_data, req[0], workspace, in_data);
} else {
@@ -136,7 +144,7 @@ void LayerNormComputeGeneral(const nnvm::NodeAttrs& attrs,
const TBlob centered_out = outputs[0].reshape(red_src_shape);
MSHADOW_REAL_TYPE_SWITCH(outputs[0].type_flag_, DType, {
BROADCAST_NDIM_SWITCH(red_dst_shape.ndim(), NDim, {
- if (dmlc::GetEnv("MXNET_SAFE_ACCUMULATION", false)) {
+ if (safe_acc) {
broadcast::Reduce<mshadow_op::sum, NDim, DType, mshadow_op::square, false>(
s, std_data, req[0], workspace, centered_out);
} else {
@@ -251,10 +259,11 @@ void LayerNormGradComputeGeneral(const nnvm::NodeAttrs& attrs,
{normalized_data, std},
{kWriteTo}, {normalized_data});
// Calculate grad_beta
+ bool safe_acc = dmlc::GetEnv("MXNET_SAFE_ACCUMULATION", false);
if (req[2] != kNullOp) {
MSHADOW_REAL_TYPE_SWITCH(outputs[2].type_flag_, DType, {
BROADCAST_NDIM_SWITCH(red_exclude_dst_shape.ndim(), NDim, {
- if (dmlc::GetEnv("MXNET_SAFE_ACCUMULATION", false)) {
+ if (safe_acc) {
broadcast::Reduce<mshadow_op::sum, NDim, DType, mshadow_op::identity, false>(
s, outputs[2].reshape(red_exclude_dst_shape), req[2], workspace,
ograd.reshape(red_exclude_src_shape));
@@ -272,7 +281,7 @@ void LayerNormGradComputeGeneral(const nnvm::NodeAttrs& attrs,
if (req[1] != kNullOp) {
MSHADOW_REAL_TYPE_SWITCH(outputs[1].type_flag_, DType, {
BROADCAST_NDIM_SWITCH(red_exclude_dst_shape.ndim(), NDim, {
- if (dmlc::GetEnv("MXNET_SAFE_ACCUMULATION", false)) {
+ if (safe_acc) {
broadcast::Reduce<mshadow_op::sum, NDim, DType, mshadow_op::identity, false>(
s, outputs[1].reshape(red_exclude_dst_shape), req[1], workspace,
ograd_mult.reshape(red_exclude_src_shape));
@@ -297,7 +306,7 @@ void LayerNormGradComputeGeneral(const nnvm::NodeAttrs& attrs,
{kWriteTo}, {ograd_mult});
MSHADOW_REAL_TYPE_SWITCH(outputs[0].type_flag_, DType, {
BROADCAST_NDIM_SWITCH(red_dst_shape.ndim(), NDim, {
- if (dmlc::GetEnv("MXNET_SAFE_ACCUMULATION", false)) {
+ if (safe_acc) {
broadcast::Reduce<mshadow_op::sum, NDim, DType, mshadow_op::identity, false>(
s, red_out.reshape(red_dst_shape), kWriteTo, workspace,
ograd_mult.reshape(red_src_shape));
@@ -317,7 +326,7 @@ void LayerNormGradComputeGeneral(const nnvm::NodeAttrs& attrs,
{kWriteTo}, {ograd_mult});
MSHADOW_REAL_TYPE_SWITCH(outputs[0].type_flag_, DType, {
BROADCAST_NDIM_SWITCH(red_dst_shape.ndim(), NDim, {
- if (dmlc::GetEnv("MXNET_SAFE_ACCUMULATION", false)) {
+ if (safe_acc) {
broadcast::Reduce<mshadow_op::sum, NDim, DType, mshadow_op::identity, false>(
s, red_out.reshape(red_dst_shape), kWriteTo, workspace,
ograd_mult.reshape(red_src_shape));
diff --git a/src/operator/nn/layer_norm.cu b/src/operator/nn/layer_norm.cu
index db09969..fead2a6 100644
--- a/src/operator/nn/layer_norm.cu
+++ b/src/operator/nn/layer_norm.cu
@@ -340,6 +340,11 @@ void LayerNormCompute<gpu>(const nnvm::NodeAttrs& attrs,
if (axis == inputs[0].ndim() - 1) {
// Try to use the accelerated CUDA kernels
bool safe_acc = dmlc::GetEnv("MXNET_SAFE_ACCUMULATION", false);
+ if (!safe_acc && inputs[0].type_flag_ == mshadow::kFloat16) {
+ common::LogOnce("MXNET_SAFE_ACCUMULATION=1 is recommended for LayerNorm with float16 inputs. "
+ "See https://mxnet.incubator.apache.org/versions/master/faq/env_var.html "
+ "for more details.");
+ }
if (safe_acc) {
return LayerNormGPUContig<true>(param, ctx, inputs, req, outputs);
} else {
diff --git a/src/operator/nn/softmax-inl.h b/src/operator/nn/softmax-inl.h
index 1910ff4..d6113b0 100644
--- a/src/operator/nn/softmax-inl.h
+++ b/src/operator/nn/softmax-inl.h
@@ -411,6 +411,11 @@ void SoftmaxCompute(const nnvm::NodeAttrs& attrs,
param.temperature.value() : 1.0;
mxnet::TShape shape = AxisShapeCompact(inputs[0].shape_, &axis, true);
bool safe_acc = dmlc::GetEnv("MXNET_SAFE_ACCUMULATION", false);
+ if (!safe_acc && inputs[0].type_flag_ == mshadow::kFloat16) {
+ common::LogOnce("MXNET_SAFE_ACCUMULATION=1 is recommended for softmax with float16 inputs. "
+ "See https://mxnet.incubator.apache.org/versions/master/faq/env_var.html "
+ "for more details.");
+ }
MXNET_REAL_ACC_TYPE_SWITCH(inputs[0].type_flag_, DType, AType, {
MSHADOW_REAL_TYPE_SWITCH(outputs[0].type_flag_, OType, {
diff --git a/src/operator/tensor/broadcast_reduce_op.h b/src/operator/tensor/broadcast_reduce_op.h
index c334472..1723e9a 100644
--- a/src/operator/tensor/broadcast_reduce_op.h
+++ b/src/operator/tensor/broadcast_reduce_op.h
@@ -1183,9 +1183,14 @@ void LpNormCompute(const nnvm::NodeAttrs& attrs,
} else {
small = ReduceAxesShapeImpl(inputs[0].shape_, param.axis, true, false);
}
-
+ bool safe_acc = dmlc::GetEnv("MXNET_SAFE_ACCUMULATION", false);
+ if (!safe_acc && inputs[0].type_flag_ == mshadow::kFloat16) {
+ common::LogOnce("MXNET_SAFE_ACCUMULATION=1 is recommended for LpNorm with float16 inputs. "
+ "See https://mxnet.incubator.apache.org/versions/master/faq/env_var.html "
+ "for more details.");
+ }
if (param.ord == 1) {
- if (dmlc::GetEnv("MXNET_SAFE_ACCUMULATION", false)) {
+ if (safe_acc) {
ReduceAxesComputeImpl<xpu, mshadow_op::sum, true, false, mshadow_op::abs>(
ctx, inputs, req, outputs, small);
} else {
@@ -1193,7 +1198,7 @@ void LpNormCompute(const nnvm::NodeAttrs& attrs,
ctx, inputs, req, outputs, small);
}
} else if (param.ord == 2) {
- if (dmlc::GetEnv("MXNET_SAFE_ACCUMULATION", false)) {
+ if (safe_acc) {
ReduceAxesComputeImpl<xpu, mshadow_op::nrm2, true, false, mshadow_op::identity>(
ctx, inputs, req, outputs, small);
} else {