You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mxnet.apache.org by la...@apache.org on 2020/08/19 17:21:12 UTC
[incubator-mxnet] branch master updated: turn on
MXNET_SAFE_ACCUMULATION by default (#18961)
This is an automated email from the ASF dual-hosted git repository.
lausen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git
The following commit(s) were added to refs/heads/master by this push:
new 50312af turn on MXNET_SAFE_ACCUMULATION by default (#18961)
50312af is described below
commit 50312af58b2ec3e951da0809dd0c800a62dcf1f9
Author: Sheng Zha <sz...@users.noreply.github.com>
AuthorDate: Wed Aug 19 10:20:11 2020 -0700
turn on MXNET_SAFE_ACCUMULATION by default (#18961)
---
ci/windows/test_py3_cpu.ps1 | 4 +---
ci/windows/test_py3_gpu.ps1 | 4 +---
docs/static_site/src/pages/api/faq/env_var.md | 2 +-
src/operator/nn/layer_norm-inl.h | 4 ++--
src/operator/nn/layer_norm.cu | 4 ++--
src/operator/nn/softmax-inl.h | 4 ++--
src/operator/numpy/linalg/np_norm-inl.h | 4 ++--
src/operator/tensor/broadcast_reduce_op.h | 2 +-
src/operator/tensor/indexing_op.cu | 2 +-
src/operator/tensor/indexing_op.h | 4 ++--
10 files changed, 15 insertions(+), 19 deletions(-)
diff --git a/ci/windows/test_py3_cpu.ps1 b/ci/windows/test_py3_cpu.ps1
index 5121a53..00a625c 100644
--- a/ci/windows/test_py3_cpu.ps1
+++ b/ci/windows/test_py3_cpu.ps1
@@ -33,12 +33,10 @@ if ($LastExitCode -ne 0) { Throw ("Error running parallel train tests, python ex
C:\Python37\python.exe -m pytest -v -m 'serial' --durations=50 --cov-report xml:tests_train.xml --cov-append tests\python\train
if ($LastExitCode -ne 0) { Throw ("Error running serial train tests, python exited with status code " + ('{0:X}' -f $LastExitCode)) }
# Adding this extra test since it's not possible to set env var on the fly in Windows.
-$env:MXNET_SAFE_ACCUMULATION=1
C:\Python37\python.exe -m pytest -v --durations=50 --cov-report xml:tests_unittest.xml --cov-append tests\python\unittest\test_operator.py::test_norm
if ($LastExitCode -ne 0) { Throw ("Error running unittest, python exited with status code " + ('{0:X}' -f $LastExitCode)) }
-# Similar to the MXNET_SAFE_ACCUMULATION test case above. Need to explicitly
-# set the environment variable for MXNET_MEMORY_OPT.
+# Need to explicitly set the environment variable for MXNET_MEMORY_OPT.
$env:MXNET_MEMORY_OPT=1
C:\Python37\python.exe -m pytest -v --durations=50 --cov-report xml:tests_unittest.xml --cov-append tests\python\unittest\test_memory_opt.py
if ($LastExitCode -ne 0) { Throw ("Error running unittest, python exited with status code " + ('{0:X}' -f $LastExitCode)) }
diff --git a/ci/windows/test_py3_gpu.ps1 b/ci/windows/test_py3_gpu.ps1
index 5dbc6fc..b5769eb 100644
--- a/ci/windows/test_py3_gpu.ps1
+++ b/ci/windows/test_py3_gpu.ps1
@@ -41,14 +41,12 @@ C:\Python37\python.exe -m pytest -v -m 'serial' --durations=50 --cov-report xml:
if ($LastExitCode -ne 0) { Throw ("Error running serial tests, python exited with status code " + ('{0:X}' -f $LastExitCode)) }
# Adding this extra test since it's not possible to set env var on the fly in Windows.
-$env:MXNET_SAFE_ACCUMULATION=1
C:\Python37\python.exe -m pytest -v --durations=50 --cov-report xml:tests_operator.xml --cov-append tests\python\gpu\test_operator_gpu.py::test_norm
if ($LastExitCode -ne 0) { Throw ("Error running tests, python exited with status code " + ('{0:X}' -f $LastExitCode)) }
C:\Python37\python.exe -m pytest -v --durations=50 --cov-report xml:tests_tvm_op.xml tests\python\gpu\test_tvm_op_gpu.py
if ($LastExitCode -ne 0) { Throw ("Error running TVM op tests, python exited with status code " + ('{0:X}' -f $LastExitCode)) }
-# Similar to the MXNET_SAFE_ACCUMULATION test case above. Need to explicitly
-# set the environment variable for MXNET_MEMORY_OPT.
+# Need to explicitly set the environment variable for MXNET_MEMORY_OPT.
$env:MXNET_MEMORY_OPT=1
C:\Python37\python.exe -m pytest -v --durations=50 --cov-report xml:tests_unittest.xml --cov-append tests\python\unittest\test_memory_opt.py
if ($LastExitCode -ne 0) { Throw ("Error running memory optimization tests, python exited with status code " + ('{0:X}' -f $LastExitCode)) }
diff --git a/docs/static_site/src/pages/api/faq/env_var.md b/docs/static_site/src/pages/api/faq/env_var.md
index 55e5f38..65ce0b8 100644
--- a/docs/static_site/src/pages/api/faq/env_var.md
+++ b/docs/static_site/src/pages/api/faq/env_var.md
@@ -377,7 +377,7 @@ If ctypes is used, it must be `mxnet._ctypes.ndarray.NDArrayBase`.
- Set ```MXNET_SUBGRAPH_BACKEND=NONE``` to disable subgraph backend.
* MXNET_SAFE_ACCUMULATION
- - Values: Values: 0(false) or 1(true) ```(default=0)```
+ - Values: Values: 0(false) or 1(true) ```(default=1)```
- If this variable is set, the accumulation will enter the safe mode, meaning accumulation is done in a data type of higher precision than
the input data type, leading to more accurate accumulation results with a possible performance loss and backward compatibility loss.
For example, when the variable is set to 1(true), if the input data type is float16, then the accumulation will be done
diff --git a/src/operator/nn/layer_norm-inl.h b/src/operator/nn/layer_norm-inl.h
index 238a71b..8dcaeb3 100644
--- a/src/operator/nn/layer_norm-inl.h
+++ b/src/operator/nn/layer_norm-inl.h
@@ -115,7 +115,7 @@ void LayerNormComputeGeneral(const nnvm::NodeAttrs& attrs,
});
workspace = ctx.requested[0].get_space_typed<xpu, 1, char>(Shape1(workspace_size), s);
- bool safe_acc = dmlc::GetEnv("MXNET_SAFE_ACCUMULATION", false);
+ bool safe_acc = dmlc::GetEnv("MXNET_SAFE_ACCUMULATION", true);
if (!safe_acc && inputs[0].type_flag_ == mshadow::kFloat16) {
common::LogOnce("MXNET_SAFE_ACCUMULATION=1 is recommended for float16 inputs for LayerNorm. "
"See https://mxnet.apache.org/api/faq/env_var "
@@ -259,7 +259,7 @@ void LayerNormGradComputeGeneral(const nnvm::NodeAttrs& attrs,
{normalized_data, std},
{kWriteTo}, {normalized_data});
// Calculate grad_beta
- bool safe_acc = dmlc::GetEnv("MXNET_SAFE_ACCUMULATION", false);
+ bool safe_acc = dmlc::GetEnv("MXNET_SAFE_ACCUMULATION", true);
if (req[2] != kNullOp) {
MSHADOW_REAL_TYPE_SWITCH(outputs[2].type_flag_, DType, {
BROADCAST_NDIM_SWITCH(red_exclude_dst_shape.ndim(), NDim, {
diff --git a/src/operator/nn/layer_norm.cu b/src/operator/nn/layer_norm.cu
index 4056bd2..a60df41 100644
--- a/src/operator/nn/layer_norm.cu
+++ b/src/operator/nn/layer_norm.cu
@@ -339,7 +339,7 @@ void LayerNormCompute<gpu>(const nnvm::NodeAttrs& attrs,
CHECK(axis >= 0 && axis < inputs[0].ndim()) << "Channel axis out of range: " << param.axis;
if (axis == inputs[0].ndim() - 1) {
// Try to use the accelerated CUDA kernels
- bool safe_acc = dmlc::GetEnv("MXNET_SAFE_ACCUMULATION", false);
+ bool safe_acc = dmlc::GetEnv("MXNET_SAFE_ACCUMULATION", true);
if (!safe_acc && inputs[0].type_flag_ == mshadow::kFloat16) {
common::LogOnce("MXNET_SAFE_ACCUMULATION=1 is recommended for LayerNorm with float16 inputs. "
"See https://mxnet.apache.org/api/faq/env_var "
@@ -733,7 +733,7 @@ void LayerNormGradCompute<gpu>(const nnvm::NodeAttrs& attrs,
CHECK(axis >= 0 && axis < inputs[0].ndim()) << "Channel axis out of range: " << param.axis;
if (axis == inputs[0].ndim() - 1) {
// Use the accelerated CUDA kernels
- bool safe_acc = dmlc::GetEnv("MXNET_SAFE_ACCUMULATION", false);
+ bool safe_acc = dmlc::GetEnv("MXNET_SAFE_ACCUMULATION", true);
if (safe_acc) {
return LayerNormGradGPUContig<true>(param, ctx, inputs, req, outputs);
} else {
diff --git a/src/operator/nn/softmax-inl.h b/src/operator/nn/softmax-inl.h
index ee27006..e34cc26 100644
--- a/src/operator/nn/softmax-inl.h
+++ b/src/operator/nn/softmax-inl.h
@@ -786,7 +786,7 @@ void SoftmaxCompute(const nnvm::NodeAttrs& attrs,
const double temperature = param.temperature.has_value() ?
param.temperature.value() : 1.0;
mxnet::TShape shape = AxisShapeCompact(inputs[0].shape_, &axis, true);
- bool safe_acc = dmlc::GetEnv("MXNET_SAFE_ACCUMULATION", false);
+ bool safe_acc = dmlc::GetEnv("MXNET_SAFE_ACCUMULATION", true);
if (!safe_acc && inputs[0].type_flag_ == mshadow::kFloat16) {
common::LogOnce("MXNET_SAFE_ACCUMULATION=1 is recommended for softmax with float16 inputs. "
"See https://mxnet.apache.org/api/faq/env_var "
@@ -862,7 +862,7 @@ void SoftmaxGradCompute(const nnvm::NodeAttrs& attrs,
int out_idx = softmax_has_dtype_override(attrs) ? 2 : 1;
out_idx = softmax_use_length(attrs) ? 3 : out_idx;
- bool safe_acc = dmlc::GetEnv("MXNET_SAFE_ACCUMULATION", false);
+ bool safe_acc = dmlc::GetEnv("MXNET_SAFE_ACCUMULATION", true);
MXNET_REAL_ACC_TYPE_SWITCH(inputs[0].type_flag_, OType, AType, {
MSHADOW_REAL_TYPE_SWITCH(outputs[0].type_flag_, DType, {
diff --git a/src/operator/numpy/linalg/np_norm-inl.h b/src/operator/numpy/linalg/np_norm-inl.h
index ecb4f8c..b26e680 100644
--- a/src/operator/numpy/linalg/np_norm-inl.h
+++ b/src/operator/numpy/linalg/np_norm-inl.h
@@ -273,7 +273,7 @@ void NumpyLpNormCompute(const nnvm::NodeAttrs& attrs,
small = ReduceAxesShapeImpl(inputs[0].shape_, param.axis, true, false);
const_cast<std::vector<TBlob>&>(outputs)[0] = outputs[0].reshape(small);
}
- bool safe_acc = dmlc::GetEnv("MXNET_SAFE_ACCUMULATION", false);
+ bool safe_acc = dmlc::GetEnv("MXNET_SAFE_ACCUMULATION", true);
if (!safe_acc && inputs[0].type_flag_ == mshadow::kFloat16) {
common::LogOnce("MXNET_SAFE_ACCUMULATION=1 is recommended for LpNorm with float16 inputs. "
"See https://mxnet.apache.org/api/faq/env_var "
@@ -531,7 +531,7 @@ void NumpyMatrixNormCompute(const nnvm::NodeAttrs& attrs,
if (param.flag == 2) { // nuclear norm
ReduceAxesComputeImpl<xpu, mshadow::red::sum, false, false, mshadow_op::identity>(
ctx, eigen, req, outputs, reduced_shape);
- } else if (dmlc::GetEnv("MXNET_SAFE_ACCUMULATION", false)) {
+ } else if (dmlc::GetEnv("MXNET_SAFE_ACCUMULATION", true)) {
if (ord == 2) {
ReduceAxesComputeImpl<xpu, mshadow::red::maximum, true, false, mshadow_op::abs>(
ctx, eigen, req, outputs, reduced_shape);
diff --git a/src/operator/tensor/broadcast_reduce_op.h b/src/operator/tensor/broadcast_reduce_op.h
index bd2af77..371fcee 100644
--- a/src/operator/tensor/broadcast_reduce_op.h
+++ b/src/operator/tensor/broadcast_reduce_op.h
@@ -1480,7 +1480,7 @@ void LpNormCompute(const nnvm::NodeAttrs& attrs,
} else {
small = ReduceAxesShapeImpl(inputs[0].shape_, param.axis, true, false);
}
- bool safe_acc = dmlc::GetEnv("MXNET_SAFE_ACCUMULATION", false);
+ bool safe_acc = dmlc::GetEnv("MXNET_SAFE_ACCUMULATION", true);
if (!safe_acc && inputs[0].type_flag_ == mshadow::kFloat16) {
common::LogOnce("MXNET_SAFE_ACCUMULATION=1 is recommended for LpNorm with float16 inputs. "
"See https://mxnet.apache.org/api/faq/env_var "
diff --git a/src/operator/tensor/indexing_op.cu b/src/operator/tensor/indexing_op.cu
index f9d7a19..f7fdfc2 100644
--- a/src/operator/tensor/indexing_op.cu
+++ b/src/operator/tensor/indexing_op.cu
@@ -843,7 +843,7 @@ void EmbeddingOpBackward<gpu>(const nnvm::NodeAttrs& attrs,
CHECK_NE(req[embedding::kWeight], kWriteInplace)
<< "Backward of Embedding does not support writing in place.";
- bool safe_acc = dmlc::GetEnv("MXNET_SAFE_ACCUMULATION", false);
+ bool safe_acc = dmlc::GetEnv("MXNET_SAFE_ACCUMULATION", true);
if (!safe_acc && outputs[1].type_flag_ == mshadow::kFloat16) {
common::LogOnce("MXNET_SAFE_ACCUMULATION=1 is recommended for EmbeddingOpBackward "
"with float16 inputs. "
diff --git a/src/operator/tensor/indexing_op.h b/src/operator/tensor/indexing_op.h
index 7f0f0fa..44c2365 100644
--- a/src/operator/tensor/indexing_op.h
+++ b/src/operator/tensor/indexing_op.h
@@ -401,7 +401,7 @@ void EmbeddingOpBackward(const nnvm::NodeAttrs& attrs,
Stream<xpu> *s = ctx.get_stream<xpu>();
- bool safe_acc = dmlc::GetEnv("MXNET_SAFE_ACCUMULATION", false);
+ bool safe_acc = dmlc::GetEnv("MXNET_SAFE_ACCUMULATION", true);
if (!safe_acc && outputs[1].type_flag_ == mshadow::kFloat16) {
common::LogOnce("MXNET_SAFE_ACCUMULATION=1 is recommended for EmbeddingOpBackward "
"with float16 inputs. "
@@ -985,7 +985,7 @@ void TakeOpBackward(const nnvm::NodeAttrs& attrs,
// grad_in is the gradient of the inputs in the feed-forward
Stream<xpu> *s = ctx.get_stream<xpu>();
- bool safe_acc = dmlc::GetEnv("MXNET_SAFE_ACCUMULATION", false);
+ bool safe_acc = dmlc::GetEnv("MXNET_SAFE_ACCUMULATION", true);
if (!safe_acc && outputs[0].type_flag_ == mshadow::kFloat16) {
common::LogOnce("MXNET_SAFE_ACCUMULATION=1 is recommended for TakeOpBackward "
"with float16 inputs. "