You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mxnet.apache.org by la...@apache.org on 2022/03/10 16:21:46 UTC
[incubator-mxnet] branch zero_sharding updated (69e6c04 -> fc54fab)
This is an automated email from the ASF dual-hosted git repository.
lausen pushed a change to branch zero_sharding
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git.
from 69e6c04 Optimize 'take' operator for CPU (#20745)
add c93c152 [master][submodule] Upgrade oneDNN to v2.5.1 (#20662)
add 8779faf Fix issue with LogMessageFatal (#20848)
add 8cf49ed [master] Avoid dots, full path to a file. (#20751)
add 5bcf44c Refactor src/operator/subgraph/dnnl/dnnl_conv.cc file (#20849)
add e9840b8 [FEAUTURE] Fuses FC + elemwise_add operators for oneDNN (#20821)
add bdcf137 Make convolution operator fully work with oneDNN v2.4+ (#20847)
add fb52559 [submodule] Upgrade oneDNN to v2.5.2 (#20843)
add 8d67dbb Fix data-api links (#20867)
add 1cb4d1d Add quantization API doc and oneDNN to migration guide (#20813)
add e9becb9 [FEATURE] Add binomial sampling and fix multinomial sampling (#20734)
add c3ea258 ensure type consistent with legacy nvml api (#20499)
add 3507db2 The size of a stack needs to be greather than 4; by default is 8 (#20581)
add e7cee79 Fix data-api links (#20879)
add e65dce2 [DOC] Change of confusing Large Tensors documentation (#20831)
add f03fb23 Resolve the conflict with PR#20499 (#20887)
add fadfaee [Feature] Add bfloat to oneDNN version of binary broadcast operators. (#20846)
add ff4c14f [DOC] Large tensors documentation update (#20860)
add f4c4952 [FEATURE] Fuse dequantize with convolution (#20816)
add 4d915f7 Improve MaskedSoftmax by oneDNN (#20853)
add f35a4d7 [FEATURE] Add quantized version of reshape with DNNL reorder primitive. (#20835)
add abf35b8 [website] Add CPU quantization tutorial (#20856)
add f6266f0 Reduce after quantization memory usage (#20894)
add 9f47730 fix python docs ci (#20903)
add 188d7b6 [master] Implemented oneDNN Backward Adaptive Pooling kernel (#20825)
add 8069a18 [master] Fix issue with even number of channels in BatchNorm (#20907)
add 93aa9e3 add Bartłomiej as committer (#20896)
add 6e25c88 Add oneDNN support for "where" operator (#20862)
add 6c2b3dc Remove first_quantization_pass FC property (#20908)
add 54e122c quantized transpose operator (#20817)
add 588f541 [master] 2022.00 MKL' version, update (#20865)
add 7a49008 [BUGFIX] Type fix for large tensors (#20922)
add 13b8690 Avoid modifying loaded library map while iterating in lib_close() (#20941)
add fc54fab [FEATURE] Add g5 instance to CI (#20876)
No new revisions were added by this update.
Summary of changes:
.github/workflows/link_check.yml | 1 +
3rdparty/mshadow/mshadow/base.h | 2 +-
3rdparty/mshadow/mshadow/expr_engine-inl.h | 2 +-
3rdparty/mshadow/mshadow/logging.h | 257 ------------
3rdparty/mshadow/mshadow/stream_gpu-inl.h | 2 +-
3rdparty/onednn | 2 +-
CMakeLists.txt | 16 +-
CONTRIBUTORS.md | 2 +
ci/Jenkinsfile_utils.groovy | 1 +
ci/docker/Dockerfile.build.ubuntu | 2 +-
ci/docker/runtime_functions.sh | 9 +-
ci/jenkins/Jenkins_steps.groovy | 16 +
ci/jenkins/Jenkinsfile_unix_gpu | 3 +-
config/linux.cmake | 5 +-
docs/python_docs/python/api/contrib/index.rst | 5 +
.../quantization}/index.rst | 4 +-
.../getting-started/gluon_migration_guide.md | 61 +++
.../performance/backend/dnnl/dnnl_quantization.md | 305 ++++++++++++++
.../tutorials/performance/backend/dnnl/index.rst | 7 +-
docs/python_docs/requirements | 2 +-
.../src/pages/api/faq/large_tensor_support.md | 18 +-
include/mxnet/tuple.h | 2 +-
python/mxnet/amp/lists/symbol_fp16.py | 8 +
python/mxnet/contrib/quantization.py | 57 +--
python/mxnet/ndarray/numpy/_op.py | 10 +-
python/mxnet/ndarray/numpy/linalg.py | 2 +-
python/mxnet/ndarray/random.py | 134 +++++-
python/mxnet/numpy/linalg.py | 6 +-
python/mxnet/numpy/multiarray.py | 26 +-
python/mxnet/symbol/numpy/_symbol.py | 6 +-
python/mxnet/symbol/random.py | 82 +++-
python/mxnet/test_utils.py | 6 +-
src/initialize.cc | 22 +-
src/initialize.h | 4 +-
src/operator/contrib/adaptive_avg_pooling-inl.h | 3 +-
src/operator/contrib/adaptive_avg_pooling.cc | 150 +++++--
src/operator/contrib/batch_norm_relu.cc | 6 +-
src/operator/contrib/count_sketch.cu | 3 +
src/operator/nn/batch_norm.cc | 6 +-
src/operator/nn/dnnl/dnnl_act-inl.h | 4 +-
src/operator/nn/dnnl/dnnl_act.cc | 4 +-
src/operator/nn/dnnl/dnnl_base-inl.h | 4 +
src/operator/nn/dnnl/dnnl_base.cc | 6 +-
src/operator/nn/dnnl/dnnl_batch_dot-inl.h | 6 +-
src/operator/nn/dnnl/dnnl_batch_dot.cc | 4 +-
src/operator/nn/dnnl/dnnl_batch_norm-inl.h | 48 ++-
src/operator/nn/dnnl/dnnl_binary.cc | 2 +-
src/operator/nn/dnnl/dnnl_concat-inl.h | 6 +-
src/operator/nn/dnnl/dnnl_convolution-inl.h | 10 +-
src/operator/nn/dnnl/dnnl_convolution.cc | 26 +-
src/operator/nn/dnnl/dnnl_copy.cc | 4 +-
src/operator/nn/dnnl/dnnl_deconvolution-inl.h | 6 +-
src/operator/nn/dnnl/dnnl_deconvolution.cc | 4 +-
src/operator/nn/dnnl/dnnl_fully_connected-inl.h | 85 +++-
src/operator/nn/dnnl/dnnl_fully_connected.cc | 3 +
src/operator/nn/dnnl/dnnl_layer_norm-inl.h | 6 +-
src/operator/nn/dnnl/dnnl_layer_norm.cc | 2 +-
src/operator/nn/dnnl/dnnl_log_softmax.cc | 6 +-
src/operator/nn/dnnl/dnnl_lrn-inl.h | 4 +-
src/operator/nn/dnnl/dnnl_masked_softmax-inl.h | 84 ++++
src/operator/nn/dnnl/dnnl_masked_softmax.cc | 196 +++++++++
src/operator/nn/dnnl/dnnl_ops-inl.h | 13 +
src/operator/nn/dnnl/dnnl_pooling-inl.h | 72 ++--
src/operator/nn/dnnl/dnnl_pooling.cc | 124 ++++--
src/operator/nn/dnnl/dnnl_reshape-inl.h | 2 +-
src/operator/nn/dnnl/dnnl_reshape.cc | 9 +-
src/operator/nn/dnnl/dnnl_rnn-inl.h | 4 +-
src/operator/nn/dnnl/dnnl_rnn.cc | 3 +-
src/operator/nn/dnnl/dnnl_slice-inl.h | 6 +-
src/operator/nn/dnnl/dnnl_slice.cc | 6 +-
src/operator/nn/dnnl/dnnl_softmax-inl.h | 46 +--
src/operator/nn/dnnl/dnnl_softmax.cc | 43 +-
src/operator/nn/dnnl/dnnl_softmax_output.cc | 8 +-
src/operator/nn/dnnl/dnnl_stack.cc | 8 +-
src/operator/nn/dnnl/dnnl_sum.cc | 6 +-
src/operator/nn/dnnl/dnnl_transpose-inl.h | 6 +-
src/operator/nn/dnnl/dnnl_transpose.cc | 7 +-
src/operator/nn/dnnl/dnnl_where-inl.h | 73 ++++
src/operator/nn/dnnl/dnnl_where.cc | 224 ++++++++++
src/operator/nn/masked_softmax.cc | 141 +++++++
src/operator/nn/pooling-inl.h | 11 +-
src/operator/nn/pooling.cc | 33 +-
src/operator/nn/softmax-inl.h | 16 +
src/operator/nn/softmax.cc | 64 ---
src/operator/numpy/np_matrix_op-inl.h | 65 +++
src/operator/numpy/np_matrix_op.cc | 59 ---
src/operator/numpy/np_where_forward_op.cc | 48 ++-
.../quantization/dnnl/dnnl_dequantize-inl.h | 2 +-
src/operator/quantization/dnnl/dnnl_quantize-inl.h | 4 +-
.../quantization/dnnl/dnnl_quantize_v2-inl.h | 4 +-
.../quantization/dnnl/dnnl_quantized_act.cc | 4 +-
.../quantization/dnnl/dnnl_quantized_batch_norm.cc | 4 +-
.../quantization/dnnl/dnnl_quantized_concat.cc | 4 +-
.../quantization/dnnl/dnnl_quantized_conv.cc | 13 +-
.../dnnl/dnnl_quantized_elemwise_add.cc | 8 +-
.../quantization/dnnl/dnnl_quantized_flatten.cc | 4 +-
.../dnnl/dnnl_quantized_fully_connected.cc | 4 +-
.../quantization/dnnl/dnnl_quantized_pooling.cc | 6 +-
...ntized_flatten.cc => dnnl_quantized_reshape.cc} | 60 +--
.../quantization/dnnl/dnnl_quantized_transpose.cc | 102 +++++
.../quantization/dnnl/dnnl_requantize-inl.h | 4 +-
src/operator/quantization/quantized_reshape-inl.h | 73 ++++
src/operator/quantization/quantized_reshape.cc | 132 ++++++
src/operator/quantization/quantized_transpose.cc | 130 ++++++
src/operator/random/multisample_op.cc | 38 ++
src/operator/random/multisample_op.cu | 3 +
src/operator/random/sample_multinomial_op.cc | 80 +++-
src/operator/random/sample_multinomial_op.cu | 9 +-
src/operator/random/sample_multinomial_op.h | 194 ++++++++-
src/operator/random/sample_op.cc | 16 +
src/operator/random/sample_op.cu | 2 +
src/operator/random/sample_op.h | 73 ++++
src/operator/random/sampler.h | 195 +++++++++
src/operator/subgraph/build_subgraph.cc | 11 +
src/operator/subgraph/dnnl/dnnl_batch_dot.cc | 12 +-
.../subgraph/dnnl/dnnl_batch_dot_property.h | 6 +-
src/operator/subgraph/dnnl/dnnl_bn_relu_property.h | 8 +-
src/operator/subgraph/dnnl/dnnl_common.h | 4 +-
src/operator/subgraph/dnnl/dnnl_conv-inl.h | 9 +-
src/operator/subgraph/dnnl/dnnl_conv.cc | 122 +++---
src/operator/subgraph/dnnl/dnnl_conv_property.h | 13 +-
src/operator/subgraph/dnnl/dnnl_fc-inl.h | 3 +-
src/operator/subgraph/dnnl/dnnl_fc.cc | 450 +++++++++++++--------
src/operator/subgraph/dnnl/dnnl_fc_property.h | 5 +-
src/operator/subgraph/dnnl/dnnl_fc_sum_fuse.h | 290 +++++++++++++
.../subgraph/dnnl/dnnl_identity_property.h | 1 -
.../dnnl/dnnl_post_quantize_align_scale_property.h | 3 +-
.../subgraph/dnnl/dnnl_post_quantize_property.h | 25 +-
.../subgraph/dnnl/dnnl_subgraph_base-inl.h | 3 +-
.../subgraph/dnnl/dnnl_subgraph_property.cc | 4 +
src/operator/subgraph/dnnl/dnnl_transformer-inl.h | 4 +-
src/operator/subgraph/dnnl/dnnl_transformer.cc | 10 +-
.../subgraph/dnnl/dnnl_transformer_qk_property.h | 9 +-
.../dnnl/dnnl_transformer_valatt_property.h | 11 +-
src/profiler/storage_profiler.cc | 18 +-
tests/cpp/operator/dnnl_test.cc | 2 +-
tests/python/dnnl/subgraphs/subgraph_common.py | 157 +++++--
tests/python/dnnl/subgraphs/test_conv_subgraph.py | 130 +++++-
tests/python/dnnl/subgraphs/test_fc_subgraph.py | 192 ++++++++-
tests/python/dnnl/test_bf16_operator.py | 44 +-
tests/python/dnnl/test_dnnl.py | 20 +
tests/python/gpu/test_operator_gpu.py | 9 +-
tests/python/quantization/test_quantization.py | 67 +++
tests/python/unittest/test_gluon_rnn.py | 26 +-
tests/python/unittest/test_numpy_op.py | 38 +-
tests/python/unittest/test_operator.py | 25 +-
tests/python/unittest/test_random.py | 79 +++-
tools/dependencies/README.md | 2 +-
148 files changed, 4562 insertions(+), 1290 deletions(-)
delete mode 100644 3rdparty/mshadow/mshadow/logging.h
copy docs/python_docs/python/api/{kvstore_server => contrib/quantization}/index.rst (93%)
create mode 100644 docs/python_docs/python/tutorials/performance/backend/dnnl/dnnl_quantization.md
create mode 100644 src/operator/nn/dnnl/dnnl_masked_softmax-inl.h
create mode 100644 src/operator/nn/dnnl/dnnl_masked_softmax.cc
create mode 100644 src/operator/nn/dnnl/dnnl_where-inl.h
create mode 100644 src/operator/nn/dnnl/dnnl_where.cc
create mode 100644 src/operator/nn/masked_softmax.cc
copy src/operator/quantization/dnnl/{dnnl_quantized_flatten.cc => dnnl_quantized_reshape.cc} (50%)
create mode 100644 src/operator/quantization/dnnl/dnnl_quantized_transpose.cc
create mode 100644 src/operator/quantization/quantized_reshape-inl.h
create mode 100644 src/operator/quantization/quantized_reshape.cc
create mode 100644 src/operator/quantization/quantized_transpose.cc
create mode 100644 src/operator/subgraph/dnnl/dnnl_fc_sum_fuse.h