You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mxnet.apache.org by ha...@apache.org on 2019/05/16 00:08:54 UTC
[incubator-mxnet] branch v1.5.x updated (5f19362 -> c5265fb)
This is an automated email from the ASF dual-hosted git repository.
haibin pushed a change to branch v1.5.x
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git.
from 5f19362 fix tests (#14565)
add 8c2a25f Enhance subgraph API (#14113)
add 4075212 Do not touch GPU 0 during ReleaseAll (#14550)
add b6eac1d Change CUB submodule to track Nvidia CUB project. (#13322)
add 09ba8be Fixes static build script for cub directory rename (#14578)
add 33b6543 example/ssd/evaluate/eval_metric.py (#14561)
add e2f5b47 Support SyncBatchNorm5D (#14542)
add 6392666 Disable Flaky Test test_poisson_generator (#14540)
add 9e4ee99 [MXNET-1357] Fix the cpp-examples to add exception handling (#14441)
add dde77d4 Updates gpu tests to use CUDNN_VERSION supplied by the environment but default to 7.0.3 if not set (#14595)
add 214ce63 fix build cpp examples option (#14562)
add a6fd0e9 Fix flaky test poisson generator & test_negative_binomial_generator (#14571)
add 6478691 Fixing unintentional variable overloading (#14438)
add b482a44 [MXNET-1379] update reshape operator (#14600)
add 28fe552 Updated documentation about nightly tests (#14493)
add 53743d6 set _scale in Trainer using optimizer rescale_grad (#14593)
add 5241c1b Add Gluon Transformer Crop (#14259)
add 7983ef1 fix quantize graph pass (#14605)
add b3ab101 GELU (#14449)
add 43f7c12 added note about cuda9.2 requirement (#14140)
add d5d1d7a Add BERT QA Scala/Java example (#14592)
add b68f18c Fix dockerized GPU builds in dev_menu (#14603)
add dab4ffc replace std::random_shuffle to std::shuffle (#14523)
add 3781816 Add exception handling support for waitall (#14397)
add daabe5c split_and_load can now handle num_ctx > num_data. Issue #13909 (#14607)
add 1dafa1a Comment out test_unix_python3_tensorrt_gpu step (#14642)
add 9dda62f The folder python-howto was removed in an earlier commit. The reference to that folder was not removed. Making a PR to remove the reference to this folder to keep documents consistent (#14573)
add 4530ad8 Fix aspect ratio sampling for RandomResizedCrop (#14585)
add 74e71e9 [MXNET-400] support string type for kvstore key in cpp-package (#10792)
add 733e54c Added repeats for github status updates (#14530)
add 9443ae6 Fix warning on macro expansion using defined. (#14598)
add c6516cc [MXNET-1359] Adds a multiclass-MCC metric derived from Pearson (#14461)
add d596b59 Support SSD f32/int8 evaluation on COCO dataset (#14646)
add fde4963 [WIP][Dependency Update] Upgrade the libtiff to 4.0.10 (#14623)
add 8f0878c [MXNET-1287] Miscellaneous Scala warning fixes (#14658)
add 2c5d7f7 Optimize transpose operator with MKL-DNN (#14545)
add 52e2e8e Fix scalastyle (#14669)
add e701e71 Avoid secondary deployment of package to local (#14647)
add 26b14bc [MXNET-1287] Up scala comp (#14667)
add 596ef3a Add nd.power and sym.pow (#14606)
add 6a93bda Fix scaladoc scalastyle violations in Infer package (#14671)
add 0284236 [Clojure] enhance draw bounding box (#14567)
add 800590e Add MXEnginePushAsync and MXEnginePushSync C APIs (#14615)
add c788804 Updates tolerances for test_layer_bidirectional (#14682)
add 30d479f Tweak the copy for the cudnn autotuning warning. (#14680)
add 5fc5c27 Fix profiler check (#14677)
add 1c49e40 Change RNN OP to stateful (#14476)
add c437d5b use mkl sparse matrix to improve performance (#14492)
add 273ebc7 [DEP] upgrade dmlc-core (#14510)
add a5db391 [Clojure] Add methods based on NDArrayAPI/SymbolAPI (#14195)
add c2ba51b [Clojure] Clojure BERT QA example (#14691)
add b3b952f fp16 safe norm operator (#14616)
add f90d1c0 Use ubuntu_rat container for rat check (#14678)
add 413fe97 Avoid uneccesary vector copies in imperative_utils.cc (#14665)
add 1f84682 Properly handling custom op exception by modify engine (#14693)
add 52a3553 [docstring] improve docstring and indentation in `module.clj` (#14705)
add 3f3ba92 [numpy] Support zero-dim and zero-size tensors in MXNet (#14661)
add 51d3291 Updated docs for R-package installation (#14269)
add 8e04b88 Update inception_inference.cpp (#14674)
add ff04de0 Add vim-nox to ci/docker/install/ubuntu_core.sh (#14632)
add a26ad37 Disable USE_GPERFTOOLS (#14711)
add 3e2f752 Reference engine from chunk via weak pointer (#14591)
add 42e929b Fix spelling in threaded_engine_test (#14709)
add 3b23c2d Fix documentation for bilinear upsampling and add unit test (#14035)
add a083a61 [MKLDNN]Improve quantizeV2 and dequantize latency (#14641)
add 0da4b67 [MKLDNN]Add quantized relu (#14604)
add 18d4051 Add publish test of PyPi cu100mkl (#14637)
add 93238a2 [contrib][op] fix MultiBoxPrior confusing results if first ratio is not 1.0 (#13763)
add 391a1be Set idx2name for Optimizer object (#14703)
add 818be02 [MXNET-1377] Add static-dependencies licenses (#14726)
add 5b6e25b [MXNET-1287] Feat dep (#14668)
add dc48cd2 License Googletest and Appendix (#14687)
add 153d2f4 [MXNET-1385] Improved Scala Init and Macros warning messages (#14656)
add 5331933 Reenable TensorRT step (#14654)
add dd1004b fix pi instructions (#14746)
add 100586a Change size_t to int within for loop to fix windows build error (#14740)
add 68efc15 fix custom op fork test (#14753)
add 494c29e [BUGFIX] fix ELU function will appear nan when calculating the gradient (#14673)
add da7fff7 fix min max on zero-sized ndarray (#14745)
add a1b0a3a [Clojure] Better api docstrings by replacing newlines (#14752)
add 0f63659 add a compiler flag to use int64 as tensor size (#14570)
add 3b39c56 fix shape index bug (#14518)
add 2fd4720 fix acc_type_switch macro with extra tests (#14773)
add 014ca13 [DOC] Update ubuntu install instructions from source (#14534)
add 8604c3c [Mxnet-1397] Support symbolic api for requantize and dequantize (#14749)
add 8cae72e julia/ndarray: fix flaky test cases for `clamp` (#14776)
add 587d480 Use DEFAULT macro in C APIs (#14767)
add 22377ed Fix GELU backward possible NaN (#14782)
add acf53fd Mention additional language bindings and add links (#14798)
add 3d3803e Updates python setup.py for recent license changes (#14778)
add 97e09f2 Improve CMake handling of sse2 and sse3 (#14757)
add 6aeb97e change mxnet_option behavior (#14743)
add 680bade Use correct stash name when running nightly tests (#14809)
add 5dd9fa2 [clojure][generator] ndarray/symbol api random merged (#14800)
add 369b66d Improve cached_op performance for static mode (#14785)
add 40e3d7c data preparation file moved in example (#14781)
add 6cbc273 Scala/Java Predict API fix #14756 (#14804)
add 003800c clean up submodule (#14645)
add 6c60025 [MKLDNN]Refactor requantize to speed up execution (#14608)
add c18381d [MXNET-1398] Enable zero-copy from numpy to MXNet NDArray (#14733)
add 64287dd Speed up SequenceReverse (#14627)
add 1af29e9 Fixes for wine detection tutorial (#13886)
add 3a46980 Use USE_SIGNAL_HANDLER by default set to ON in CMakeLists.txt (#14599)
add 07aef13 Add unpooled gpu memory type (#14716)
add 1238aa0 Revert "use mkl sparse matrix to improve performance (#14492)" (#14806)
add c7577e5 added extraction/generation of diagonal and triangonal matrices to linalg (#14501)
add 5fda0a5 add clojure tutorials to index (#14814)
add 488fad2 Fix iterator over symbol when multiple children have the same name (#14597)
add cdd7087 Fix Clojure BERT example's context argument (#14843)
add 84c1635 [MKLDNN] add quantized sum (#14614)
add bde1b84 [int8] Add MobileNetV2_1.0 & ResNet18 Quantization (#14823)
add 1c874cf reformat trt to use subgraph API, add fp16 support (#14040)
add 5e5a59e Upgrade Pylint version to 2.3.1 (#14807)
add 977e558 Make docblocks for Gluon BatchNorm and SyncBatchNorm consistent with the code (#14840)
add 381a9da Print reproduction command on CI failure (#14815)
add e17b7e2 [MXNET-13578] Fix cmake installation failed (#14692)
add 36c3306 Update base CUDA image for CI to v10.0 cuDNN 7.3.1 (#14513)
add 1540a84 [Clojure] Remove unneeded test files (#14813)
add 372f531 [DEV] update code owner (#14862)
add 204f3f2 Revert "Improve cached_op performance for static mode (#14785)" (#14868)
add d09f68a Update lstm_crf.py (#14865)
add 5ba285b Fix sample_multinomial number of outputs bug (#14873)
add 4d7bae1 Add the Gluon Implementation of Deformable Convolution (#14810)
add 621b391 Refactor ImageRecordIter (#14824)
add 25ba1d1 Prevent crashes for opencv exception and std::exception (#14433)
add b30949f [MXNet-1211] Factor and "Like" modes in BilinearResize2D operator (#13226)
add 2113cb7 [Clojure] Add Fine Tuning Sentence Pair Classification BERT Example (#14769)
add f6ef206 [Bugfix] Fix layer norm for large input shape (#14870)
add 42ede50 rewrite test_custom_op_exc (#14878)
add a722db4 [Dependency Update] Upgrade openssl to 1.1.1b (#14837)
add 0255dd6 [Dependency Update] Upgrade cuDNN & NCCL (#14884)
add fdd45cf Add mkldnn_version.h to pip package (#14899)
add 5bda980 fix add_n bug: when input mem overlap with output mem, results is wrong (#14889)
add 08895b7 Fix the return type of sparse.clip operator (#14856)
add 527573e Add support for fast variable-length LSTM (#14208)
add 0ddef13 Revert "[Dependency Update] Upgrade cuDNN & NCCL (#14884)" (#14910)
add 2e03e9f Adds additional CUDA build environments (#14909)
add 8dddac0 [MXNET-1400] adding tests cases to verify large tensor support for depth_to_space and space_to_depth (#14797)
add da02488 upgrade the libpng to 1.6.35 (#14620)
add 13f81a0 Improve dev_menu virtualenv handling (#14788)
add 874fb89 Add API documentation for upsampling operator with examples (#14919)
add f4598e7 Pins version of scikit-learn for python2 due to drop in support (#14928)
add 4796851 Deprecate NDArrayCollector and instead use ResourceScope (#14780)
add b22ee95 [MXNET-857] Add initial NVTX profiler implementation (#12328)
add 8a4ad9f upgrade the version to 2.0.2 (#14621)
add 669ab2c Updates to cudnn package installation (#14923)
add d577b6f [MXNET-1352] Allow dynamic shape in while_loop and if conditionals (#14393)
add f67d067 Add numpy linspace (#14927)
add 13d6ee6 Fix reshape to add in-place back (#14903)
add 31225485 use mx.context.num_gpus instead of mx.test_utils.list_gpus in MF recommender example (#14926)
add 1eba37a Re-enable static cached_op optimization (#14931)
add 99f5f66 Disables TensorRT build step (#14958)
add f7b7163 Fixed and re-enables TensorRT steps (#14960)
add 8b7e374 Fixes call to build ubuntu gpu in nightly tests (#14964)
add 2ee7f4e Improve order of execution of install scripts. (#14867)
add c5265fb Add primitive cache for MKL-DNN sum(elemwise_add operator (#14914)
No new revisions were added by this update.
Summary of changes:
.gitignore | 2 +-
.gitmodules | 6 +-
3rdparty/cub | 1 -
3rdparty/dmlc-core | 2 +-
3rdparty/googletest | 2 +-
3rdparty/mshadow | 2 +-
3rdparty/nvidia_cub | 1 +
3rdparty/onnx-tensorrt | 2 +-
CMakeLists.txt | 54 +-
CODEOWNERS | 25 +-
CONTRIBUTORS.md | 5 +
LICENSE | 46 +-
Makefile | 14 +-
R-package/src/ndarray.cc | 6 +-
R-package/src/symbol.cc | 20 +-
README.md | 2 +-
amalgamation/amalgamation.py | 27 +-
ci/Jenkinsfile_utils.groovy | 33 +-
ci/build.py | 1 +
ci/docker/Dockerfile.build.centos7_gpu | 6 +-
ci/docker/Dockerfile.build.ubuntu_base_cpu | 1 +
ci/docker/Dockerfile.build.ubuntu_base_gpu | 6 +-
ci/docker/Dockerfile.build.ubuntu_build_cuda | 7 +-
ci/docker/Dockerfile.build.ubuntu_cpu | 2 +-
...buntu_gpu => Dockerfile.build.ubuntu_gpu_cu100} | 9 +-
...ubuntu_gpu => Dockerfile.build.ubuntu_gpu_cu80} | 2 +-
...ubuntu_gpu => Dockerfile.build.ubuntu_gpu_cu90} | 7 +-
...ubuntu_gpu => Dockerfile.build.ubuntu_gpu_cu92} | 6 +-
ci/docker/Dockerfile.build.ubuntu_gpu_tensorrt | 7 +-
ci/docker/Dockerfile.build.ubuntu_nightly_gpu | 8 +-
ci/docker/install/centos7_cudnn.sh | 59 +
ci/docker/install/tensorrt.sh | 3 +-
ci/docker/install/ubuntu_core.sh | 1 +
.../install/{ubuntu_python.sh => ubuntu_cudnn.sh} | 36 +-
ci/docker/install/ubuntu_nvidia.sh | 2 +-
ci/docker/install/ubuntu_publish.sh | 4 +-
ci/docker/install/ubuntu_python.sh | 4 +-
ci/docker/install/ubuntu_tutorials.sh | 4 +-
ci/docker/runtime_functions.sh | 80 +-
ci/jenkins/Jenkins_steps.groovy | 83 +-
ci/jenkins/Jenkinsfile_unix_cpu | 3 +-
ci/jenkins/Jenkinsfile_unix_gpu | 4 +-
.../Modules/FindNVTX.cmake | 27 +-
cmake/Utils.cmake | 2 +-
contrib/clojure-package/.gitignore | 6 +
contrib/clojure-package/examples/bert/.gitignore | 18 +
contrib/clojure-package/examples/bert/README.md | 156 +++
.../examples/bert/fine-tune-bert.ipynb | 510 +++++++
.../examples/bert/fine-tune-bert.md | 371 +++++
.../clojure-package/examples/bert/get_bert_data.sh | 32 +
.../{infer/objectdetector => bert}/project.clj | 24 +-
.../examples/bert/squad-samples.edn | 39 +
.../bert/src/bert/bert_sentence_classification.clj | 160 +++
.../examples/bert/src/bert/infer.clj | 129 ++
.../examples/bert/src/bert/util.clj | 52 +
.../bert/bert_sentence_classification_test.clj | 86 ++
.../examples/bert/test/bert/infer_test.clj | 43 +
.../examples/infer/objectdetector/project.clj | 1 -
.../infer/objectdetector/src/infer/draw.clj | 44 -
.../src/infer/objectdetector_example.clj | 59 +-
.../test/infer/objectdetector_example_test.clj | 6 +-
contrib/clojure-package/integration-tests.sh | 2 +-
contrib/clojure-package/src/dev/generator.clj | 592 ++++++--
.../src/org/apache/clojure_mxnet/callback.clj | 9 +-
.../src/org/apache/clojure_mxnet/image.clj | 26 +-
.../src/org/apache/clojure_mxnet/module.clj | 544 +++++---
.../{callback.clj => ndarray_api.clj} | 29 +-
.../{callback.clj => ndarray_random_api.clj} | 25 +-
.../src/org/apache/clojure_mxnet/symbol_api.clj | 32 +
.../org/apache/clojure_mxnet/symbol_random_api.clj | 32 +
.../src/org/apache/clojure_mxnet/util.clj | 8 +-
.../clojure-package/test/dev/generator_test.clj | 183 ++-
.../clojure-package/test/good-test-ndarray-api.clj | 170 +++
.../test/good-test-ndarray-random-api.clj | 95 ++
.../clojure-package/test/good-test-symbol-api.clj | 192 +++
.../test/good-test-symbol-random-api.clj | 118 ++
.../test/org/apache/clojure_mxnet/conv_test.clj | 24 +-
.../test/org/apache/clojure_mxnet/image_test.clj | 16 +-
.../org/apache/clojure_mxnet/ndarray_api_test.clj | 415 ++++++
.../org/apache/clojure_mxnet/symbol_api_test.clj | 61 +
cpp-package/CMakeLists.txt | 2 +-
cpp-package/example/alexnet.cpp | 2 +
cpp-package/example/charRNN.cpp | 8 +-
cpp-package/example/googlenet.cpp | 2 +
cpp-package/example/inception_bn.cpp | 2 +
.../example/inference/inception_inference.cpp | 2 +-
cpp-package/example/lenet.cpp | 3 +
cpp-package/example/lenet_with_mxdataiter.cpp | 2 +
cpp-package/example/mlp.cpp | 3 +
cpp-package/example/mlp_cpu.cpp | 2 +
cpp-package/example/mlp_csv.cpp | 2 +
cpp-package/example/mlp_gpu.cpp | 2 +
cpp-package/example/resnet.cpp | 4 +-
cpp-package/example/test_kvstore.cpp | 201 +++
cpp-package/example/test_score.cpp | 4 +-
cpp-package/example/utils.h | 9 +
cpp-package/include/mxnet-cpp/kvstore.h | 13 +-
cpp-package/include/mxnet-cpp/kvstore.hpp | 78 +-
cpp-package/include/mxnet-cpp/ndarray.hpp | 14 +-
cpp-package/include/mxnet-cpp/symbol.hpp | 32 +-
cpp-package/tests/ci_test.sh | 7 +-
cpp-package/tests/travis/setup.sh | 2 +-
dev_menu.py | 33 +-
docs/api/python/gluon/contrib.md | 15 +
docs/api/python/ndarray/linalg.md | 6 +-
docs/api/python/profiler/profiler.md | 4 +-
docs/api/python/symbol/linalg.md | 6 +-
docs/architecture/exception_handling.md | 3 -
docs/architecture/note_data_loading.md | 8 +-
docs/faq/env_var.md | 9 +-
docs/install/index.md | 38 +-
docs/install/requirements.txt | 2 +-
docs/install/ubuntu_setup.md | 97 +-
docs/tutorials/embedded/wine_detector.md | 51 +-
docs/tutorials/index.md | 21 +-
docs/tutorials/python/profiler.md | 25 +-
docs/tutorials/python/profiler_nvprof.png | Bin 0 -> 235747 bytes
docs/tutorials/python/profiler_nvprof_zoomed.png | Bin 0 -> 254663 bytes
docs/tutorials/python/profiler_winograd.png | Bin 0 -> 75450 bytes
example/README.md | 1 -
example/gluon/{super_resolution => }/data.py | 0
example/gluon/lstm_crf/lstm_crf.py | 2 +-
example/quantization/README.md | 70 +-
example/quantization/imagenet_gen_qsym_mkldnn.py | 20 +-
example/quantization/imagenet_inference.py | 54 +-
example/recommenders/demo1-MF.ipynb | 2 +-
example/recommenders/matrix_fact.py | 8 +-
example/ssd/README.md | 56 +-
example/ssd/dataset/mscoco.py | 10 +-
example/ssd/dataset/names/mscoco.names | 12 +-
example/ssd/evaluate/eval_metric.py | 2 +-
example/ssd/symbol/legacy_vgg16_ssd_300.py | 3 +-
example/ssd/symbol/legacy_vgg16_ssd_512.py | 3 +-
example/ssd/symbol/symbol_builder.py | 3 +-
example/ssd/train.py | 5 +-
example/ssd/train/metric.py | 11 +
example/ssd/train/train_net.py | 8 +-
include/mxnet/c_api.h | 285 +++-
include/mxnet/c_api_error.h | 25 +-
include/mxnet/c_api_test.h | 2 +-
include/mxnet/engine.h | 6 +-
include/mxnet/imperative.h | 16 +
include/mxnet/libinfo.h | 6 +-
include/mxnet/ndarray.h | 37 +-
include/mxnet/tensor_blob.h | 16 +-
include/mxnet/tuple.h | 176 ++-
julia/test/unittest/ndarray.jl | 22 +-
make/config.mk | 12 +-
make/crosscompile.jetson.mk | 9 +-
make/osx.mk | 6 +
perl-package/AI-MXNet/lib/AI/MXNet/Executor.pm | 2 +-
perl-package/AI-MXNet/lib/AI/MXNet/NDArray.pm | 4 +-
perl-package/AI-MXNet/lib/AI/MXNet/Symbol.pm | 4 +-
perl-package/AI-MXNetCAPI/mxnet.i | 172 +--
perl-package/AI-MXNetCAPI/mxnet_typemaps.i | 22 +-
python/mxnet/__init__.py | 2 +-
python/mxnet/base.py | 153 +-
.../mxnet/contrib/onnx/mx2onnx/_export_helper.py | 2 +-
.../mxnet/contrib/onnx/mx2onnx/_op_translations.py | 2 +-
.../contrib/onnx/onnx2mx/_translation_utils.py | 2 +-
python/mxnet/contrib/quantization.py | 1 +
python/mxnet/contrib/tensorrt.py | 119 +-
python/mxnet/contrib/text/vocab.py | 2 +-
python/mxnet/executor.py | 50 +-
python/mxnet/gluon/contrib/__init__.py | 2 +
python/mxnet/gluon/contrib/{ => cnn}/__init__.py | 10 +-
python/mxnet/gluon/contrib/cnn/conv_layers.py | 221 +++
python/mxnet/gluon/contrib/nn/basic_layers.py | 8 +-
python/mxnet/gluon/data/vision/transforms.py | 61 +
python/mxnet/gluon/nn/activations.py | 24 +-
python/mxnet/gluon/nn/basic_layers.py | 8 +-
python/mxnet/gluon/parameter.py | 2 +-
python/mxnet/gluon/rnn/rnn_layer.py | 41 +-
python/mxnet/gluon/trainer.py | 8 +-
python/mxnet/gluon/utils.py | 12 +-
python/mxnet/image/detection.py | 2 +-
python/mxnet/image/image.py | 10 +-
python/mxnet/io/io.py | 2 +
python/mxnet/metric.py | 132 +-
python/mxnet/model.py | 4 +-
python/mxnet/module/base_module.py | 2 +-
python/mxnet/module/module.py | 16 +-
python/mxnet/module/python_module.py | 2 +-
python/mxnet/ndarray/_internal.py | 2 -
python/mxnet/ndarray/contrib.py | 5 +-
python/mxnet/ndarray/ndarray.py | 188 ++-
python/mxnet/ndarray/register.py | 7 +-
python/mxnet/ndarray/sparse.py | 4 +-
python/mxnet/ndarray_doc.py | 2 +-
python/mxnet/operator.py | 28 +-
python/mxnet/optimizer/optimizer.py | 2 +-
python/mxnet/recordio.py | 4 +
python/mxnet/rnn/rnn_cell.py | 4 +-
python/mxnet/symbol/_internal.py | 2 -
python/mxnet/symbol/register.py | 7 +-
python/mxnet/symbol/symbol.py | 196 ++-
python/mxnet/test_utils.py | 16 +-
python/mxnet/visualization.py | 4 +
.../assembly/src/main/assembly/assembly.xml | 13 +-
.../assembly/src/main/assembly/javadoc.xml | 2 +-
.../assembly/src/main/assembly/source.xml | 2 +-
scala-package/core/pom.xml | 5 +-
.../src/main/scala/org/apache/mxnet/Executor.scala | 143 +-
.../scala/org/apache/mxnet/ExecutorManager.scala | 27 +-
.../main/scala/org/apache/mxnet/FeedForward.scala | 27 +-
.../core/src/main/scala/org/apache/mxnet/IO.scala | 35 +-
.../src/main/scala/org/apache/mxnet/Image.scala | 8 +
.../src/main/scala/org/apache/mxnet/LibInfo.scala | 37 +-
.../scala/org/apache/mxnet/MX_PRIMITIVES.scala | 2 +
.../src/main/scala/org/apache/mxnet/NDArray.scala | 42 +-
.../scala/org/apache/mxnet/NDArrayCollector.scala | 6 +
.../main/scala/org/apache/mxnet/NumpyScope.scala | 63 +
.../scala/org/apache/mxnet/ResourceScope.scala | 46 +-
.../src/main/scala/org/apache/mxnet/Symbol.scala | 93 +-
.../scala/org/apache/mxnet/io/MXDataIter.scala | 3 +-
.../scala/org/apache/mxnet/io/NDArrayIter.scala | 5 +-
.../org/apache/mxnet/io/PrefetchingIter.scala | 87 +-
.../scala/org/apache/mxnet/javaapi/Image.scala | 25 +
.../Layout.scala} | 20 +-
.../scala/org/apache/mxnet/module/BaseModule.scala | 20 +-
.../org/apache/mxnet/module/BucketingModule.scala | 60 +-
.../scala/org/apache/mxnet/module/Module.scala | 157 ++-
.../org/apache/mxnet/module/SequentialModule.scala | 151 +-
.../org/apache/mxnet/util/OptionConversion.scala | 2 +
.../java/org/apache/mxnet/javaapi/NDArrayTest.java | 4 +-
.../src/test/scala/org/apache/mxnet/IOSuite.scala | 29 +-
.../test/scala/org/apache/mxnet/ImageSuite.scala | 10 +-
.../test/scala/org/apache/mxnet/KVStoreSuite.scala | 3 -
.../org/apache/mxnet/ModelParallelSuite.scala | 5 -
.../test/scala/org/apache/mxnet/ModuleSuite.scala | 32 +-
.../test/scala/org/apache/mxnet/NDArraySuite.scala | 8 +-
.../scala/org/apache/mxnet/NumpyScopeSuite.scala} | 26 +-
.../scala/org/apache/mxnet/OperatorSuite.scala | 6 -
.../test/scala/org/apache/mxnet/SymbolSuite.scala | 14 -
.../scala/org/apache/mxnet/train/ConvSuite.scala | 1 +
scala-package/deploy/pom.xml | 3 +
scala-package/deploy/src/main/deploy/deploy.xml | 2 +-
scala-package/examples/pom.xml | 9 +-
.../examples/scripts/infer/bert/get_bert_data.sh | 12 +-
.../scripts/infer/bert/run_bert_qa_example.sh | 10 +-
.../javaapi/infer/bert/BertDataParser.java | 126 ++
.../mxnetexamples/javaapi/infer/bert/BertQA.java | 148 ++
.../mxnetexamples/javaapi/infer/bert/README.md | 103 ++
.../benchmark/ScalaInferenceBenchmark.scala | 8 +-
.../CNNTextClassification.scala | 8 +-
.../mxnetexamples/customop/ExampleCustomOp.scala | 6 +-
.../customop/ExampleCustomOpWithRtc.scala | 2 +-
.../org/apache/mxnetexamples/gan/GanMnist.scala | 6 +-
.../datasets/SyntheticDataIter.scala | 2 +-
.../imageclassifier/ImageClassifierExample.scala | 4 +-
.../objectdetector/SSDClassifierExample.scala | 4 +-
.../org/apache/mxnetexamples/module/MnistMlp.scala | 2 +-
.../mxnetexamples/module/SequentialModuleEx.scala | 2 +-
.../mxnetexamples/multitask/ExampleMultiTask.scala | 15 +-
.../mxnetexamples/neuralstyle/NeuralStyle.scala | 6 +-
.../neuralstyle/end2end/BoostInference.scala | 6 +-
.../neuralstyle/end2end/BoostTrain.scala | 6 +-
.../neuralstyle/end2end/DataProcessing.scala | 4 +-
.../org/apache/mxnetexamples/rnn/BucketIo.scala | 12 +-
.../apache/mxnetexamples/rnn/LstmBucketing.scala | 2 +-
.../org/apache/mxnetexamples/rnn/TestCharRnn.scala | 2 +-
.../apache/mxnetexamples/rnn/TrainCharRnn.scala | 9 +-
.../javaapi/infer/predictor/BertExampleTest.java | 71 +
.../benchmark/ScalaInferenceBenchmarkSuite.scala | 1 +
.../CNNClassifierExampleSuite.scala | 3 +-
.../customop/CustomOpExampleSuite.scala | 1 +
.../apache/mxnetexamples/gan/GanExampleSuite.scala | 3 +-
.../IMClassificationExampleSuite.scala | 1 +
.../ImageClassifierExampleSuite.scala | 1 +
.../ObjectDetectorExampleSuite.scala | 1 +
.../neuralstyle/NeuralStyleSuite.scala | 3 +-
.../apache/mxnetexamples/rnn/ExampleRNNSuite.scala | 3 +-
scala-package/infer/pom.xml | 4 -
.../scala/org/apache/mxnet/infer/Classifier.scala | 17 +
.../org/apache/mxnet/infer/ImageClassifier.scala | 36 +-
.../org/apache/mxnet/infer/MXNetHandler.scala | 14 +
.../org/apache/mxnet/infer/ObjectDetector.scala | 19 +
.../scala/org/apache/mxnet/infer/Predictor.scala | 38 +-
.../mxnet/infer/javaapi/ObjectDetector.scala | 80 +-
.../org/apache/mxnet/infer/javaapi/Predictor.scala | 52 +-
.../org/apache/mxnet/infer/ClassifierSuite.scala | 4 +-
.../main/scala/org/apache/mxnet/init/Base.scala | 27 +-
.../main/scala/org/apache/mxnet/init/LibInfo.scala | 30 +
scala-package/macros/pom.xml | 4 -
.../scala/org/apache/mxnet/APIDocGenerator.scala | 101 +-
.../scala/org/apache/mxnet/GeneratorBase.scala | 81 +-
.../main/scala/org/apache/mxnet/NDArrayMacro.scala | 50 +-
.../main/scala/org/apache/mxnet/SymbolMacro.scala | 47 +-
.../apache/mxnet/javaapi/JavaNDArrayMacro.scala | 17 +-
.../org/apache/mxnet/utils/CToScalaUtils.scala | 15 +-
scala-package/mxnet-demo/java-demo/README.md | 2 +
.../src/main/java/mxnet/NDArrayCreation.java | 2 +-
.../src/main/java/mxnet/NDArrayOperation.java | 2 +-
.../main/native/org_apache_mxnet_native_c_api.cc | 232 +++-
.../main/native/org_apache_mxnet_native_c_api.h | 40 +-
scala-package/pom.xml | 50 +-
scala-package/spark/pom.xml | 5 +
.../scala/org/apache/mxnet/spark/MXNDArray.scala | 2 +-
.../scala/org/apache/mxnet/spark/MXNetModel.scala | 2 +-
.../apache/mxnet/spark/io/LabeledPointIter.scala | 4 +-
.../mxnet/spark/io/LongLivingDataBatch.scala | 3 +-
.../org/apache/mxnet/spark/io/PointIter.scala | 4 +-
.../org/apache/mxnet/spark/utils/Network.scala | 22 +-
.../org/apache/mxnet/spark/MXNetGeneralSuite.scala | 1 +
.../apache/mxnet/spark/SharedSparkContext.scala | 12 +-
src/c_api/c_api.cc | 129 +-
src/c_api/c_api_common.h | 31 +
src/c_api/c_api_executor.cc | 482 ++++++-
src/c_api/c_api_ndarray.cc | 12 +
src/c_api/c_api_symbolic.cc | 109 +-
src/c_api/c_api_test.cc | 4 +-
src/c_api/c_predict_api.cc | 1 +
src/common/exec_utils.h | 4 +-
src/common/serialization.h | 318 -----
src/common/utils.h | 60 +-
src/engine/naive_engine.cc | 3 +
src/engine/threaded_engine.cc | 25 +
src/engine/threaded_engine.h | 43 +-
src/executor/attach_op_execs_pass.cc | 8 +-
src/executor/exec_pass.h | 21 +-
src/executor/graph_executor.cc | 279 +++-
src/executor/graph_executor.h | 3 +
src/executor/infer_graph_attr_pass.cc | 20 +-
src/executor/tensorrt_pass.cc | 596 --------
src/executor/trt_graph_executor.cc | 443 ------
src/executor/trt_graph_executor.h | 111 --
src/imperative/cached_op.cc | 10 +-
src/imperative/cached_op.h | 8 +-
src/imperative/imperative.cc | 4 +-
src/imperative/imperative_utils.cc | 61 +-
src/imperative/imperative_utils.h | 56 +-
src/initialize.cc | 4 +-
src/io/image_aug_default.cc | 2 +-
src/io/image_det_aug_default.cc | 4 +-
src/io/image_io.cc | 6 +-
src/io/image_iter_common.h | 10 +
src/io/iter_batchloader.h | 2 +-
src/io/iter_image_recordio_2.cc | 119 +-
src/io/iter_sparse_batchloader.h | 2 +-
src/kvstore/gradient_compression.cc | 10 +-
src/libinfo.cc | 3 +
src/ndarray/ndarray.cc | 55 +-
src/ndarray/ndarray_function.cc | 12 +-
src/ndarray/ndarray_function.h | 2 +-
src/nnvm/plan_memory.cc | 6 +-
src/operator/batch_norm_v1-inl.h | 2 +-
src/operator/bilinear_sampler-inl.h | 4 +-
src/operator/channel_op_common.h | 4 +
src/operator/contrib/adamw-inl.h | 5 +-
src/operator/contrib/adaptive_avg_pooling-inl.h | 6 +-
src/operator/contrib/bilinear_resize-inl.h | 175 ++-
src/operator/contrib/bilinear_resize.cc | 52 +-
src/operator/contrib/bilinear_resize.cu | 37 +-
src/operator/contrib/boolean_mask.cc | 2 +-
src/operator/contrib/bounding_box-inl.h | 11 +-
src/operator/contrib/count_sketch-inl.h | 2 +-
src/operator/contrib/deformable_convolution-inl.h | 14 +-
src/operator/contrib/dgl_graph.cc | 68 +-
src/operator/contrib/fft-inl.h | 2 +-
src/operator/contrib/ifft-inl.h | 2 +-
src/operator/contrib/index_copy-inl.h | 5 +-
src/operator/contrib/multi_proposal-inl.h | 18 +-
src/operator/contrib/multibox_detection-inl.h | 4 +-
src/operator/contrib/multibox_detection.cc | 2 +-
src/operator/contrib/multibox_detection.cu | 2 +-
src/operator/contrib/multibox_prior-inl.h | 12 +-
src/operator/contrib/multibox_prior.cc | 7 +-
src/operator/contrib/multibox_prior.cu | 5 +-
src/operator/contrib/multibox_target-inl.h | 2 +-
src/operator/contrib/multibox_target.cc | 2 +-
src/operator/contrib/multibox_target.cu | 2 +-
src/operator/contrib/optimizer_op.cc | 2 +-
src/operator/contrib/proposal-inl.h | 18 +-
src/operator/contrib/quadratic_op-inl.h | 2 +-
src/operator/contrib/sync_batch_norm-inl.h | 33 +-
src/operator/contrib/tensorrt-inl.h | 79 --
src/operator/contrib/tensorrt.cc | 181 ---
src/operator/contrib/transformer-inl.h | 4 +-
src/operator/control_flow.cc | 328 ++---
src/operator/convolution_v1-inl.h | 10 +-
src/operator/cudnn_rnn-inl.h | 863 ------------
src/operator/custom/custom-inl.h | 45 +-
src/operator/custom/custom.cc | 18 +-
src/operator/image/crop-inl.h | 190 +++
src/operator/image/crop.cc | 85 ++
src/operator/image/image_random-inl.h | 18 +-
src/operator/image/resize-inl.h | 4 +-
src/operator/leaky_relu-inl.h | 27 +-
src/operator/loss_binary_op-inl.h | 2 +-
src/operator/mshadow_op.h | 78 +-
src/operator/mxnet_op.h | 89 +-
src/operator/nn/batch_norm.cc | 2 +-
src/operator/nn/batch_norm.cu | 4 +-
src/operator/nn/concat.cc | 51 +-
src/operator/nn/convolution-inl.h | 18 +-
src/operator/nn/convolution.cc | 85 +-
src/operator/nn/ctc_loss-inl.h | 2 +-
src/operator/nn/cudnn/cudnn_algoreg-inl.h | 2 +-
src/operator/nn/cudnn/cudnn_batch_norm-inl.h | 14 +-
src/operator/nn/cudnn/cudnn_batch_norm.cc | 2 +-
src/operator/nn/cudnn/cudnn_convolution-inl.h | 6 +-
src/operator/nn/cudnn/cudnn_deconvolution-inl.h | 6 +-
src/operator/nn/deconvolution-inl.h | 30 +-
src/operator/nn/deconvolution.cc | 50 +-
src/operator/nn/dropout-inl.h | 7 +-
src/operator/nn/dropout.cc | 4 +-
src/operator/nn/fully_connected.cc | 4 +-
src/operator/nn/im2col.h | 4 +-
src/operator/nn/layer_norm-inl.h | 10 +-
src/operator/nn/layer_norm.cc | 6 +-
src/operator/nn/lrn.cc | 2 +-
src/operator/nn/mkldnn/mkldnn_act-inl.h | 74 +
src/operator/nn/mkldnn/mkldnn_act.cc | 91 +-
src/operator/nn/mkldnn/mkldnn_base-inl.h | 4 +-
src/operator/nn/mkldnn/mkldnn_base.cc | 1 +
src/operator/nn/mkldnn/mkldnn_concat.cc | 12 +-
src/operator/nn/mkldnn/mkldnn_ops-inl.h | 12 +
src/operator/nn/mkldnn/mkldnn_reshape.cc | 194 +++
src/operator/nn/mkldnn/mkldnn_slice.cc | 6 +-
src/operator/nn/mkldnn/mkldnn_sum.cc | 105 +-
src/operator/nn/mkldnn/mkldnn_transpose.cc | 161 +++
src/operator/nn/pooling-inl.h | 14 +-
src/operator/nn/pooling.cc | 6 +-
src/operator/nn/upsampling-inl.h | 4 +-
src/operator/nn/upsampling.cc | 56 +-
src/operator/operator_common.h | 19 +-
src/operator/operator_tune.cc | 2 +
src/operator/operator_util.cc | 2 +-
src/operator/optimizer_op-inl.h | 8 +-
src/operator/pad-inl.h | 2 +-
src/operator/pooling_v1-inl.h | 35 +-
src/operator/quantization/dequantize-inl.h | 66 +-
src/operator/quantization/dequantize.cc | 25 +-
src/operator/quantization/dequantize.cu | 2 +-
.../quantization/mkldnn/mkldnn_dequantize-inl.h | 140 +-
.../quantization/mkldnn/mkldnn_quantize_v2-inl.h | 202 +--
.../quantization/mkldnn/mkldnn_quantized_act.cc | 55 +
.../quantization/mkldnn/mkldnn_quantized_conv.cc | 2 +-
.../mkldnn/mkldnn_quantized_elemwise_add.cc | 206 +++
.../mkldnn/mkldnn_quantized_fully_connected.cc | 4 +-
.../quantization/mkldnn/mkldnn_requantize-inl.h | 91 +-
src/operator/quantization/quantization_utils.h | 79 +-
src/operator/quantization/quantize-inl.h | 4 +-
src/operator/quantization/quantize_graph_pass.cc | 44 +-
src/operator/quantization/quantize_v2-inl.h | 216 ++-
src/operator/quantization/quantize_v2.cc | 21 +-
src/operator/quantization/quantize_v2.cu | 2 +-
src/operator/quantization/quantized_activation.cc | 138 ++
src/operator/quantization/quantized_concat.cc | 18 +-
src/operator/quantization/quantized_conv.cc | 4 +-
src/operator/quantization/quantized_conv.cu | 2 +-
.../quantization/quantized_elemwise_add-inl.h | 58 +
.../quantization/quantized_elemwise_add.cc | 141 ++
src/operator/quantization/quantized_flatten-inl.h | 6 +-
.../quantization/quantized_fully_connected.cc | 8 +-
.../quantization/quantized_fully_connected.cu | 2 +-
src/operator/quantization/quantized_pooling.cc | 4 +-
src/operator/quantization/requantize-inl.h | 25 +-
src/operator/quantization/requantize.cc | 4 +
src/operator/random/multisample_op.h | 2 +-
src/operator/random/sample_multinomial_op.h | 19 +-
src/operator/random/unique_sample_op.h | 2 +-
src/operator/regression_output-inl.h | 2 +-
src/operator/rnn-inl.h | 1466 +++++++++++++++-----
src/operator/rnn.cc | 230 ++-
src/operator/rnn.cu | 21 +-
src/operator/sequence_last-inl.h | 2 +-
src/operator/sequence_reverse-inl.h | 57 +-
src/operator/slice_channel-inl.h | 17 +-
src/operator/softmax_output-inl.h | 12 +-
src/operator/softmax_output.cc | 12 +-
src/operator/spatial_transformer-inl.h | 4 +-
.../{partition_graph.cc => build_subgraph.cc} | 399 +++---
.../subgraph/default_subgraph_property_v2.cc | 84 ++
src/operator/subgraph/mkldnn/mkldnn_conv.cc | 64 +-
src/operator/subgraph/mkldnn/mkldnn_fc.cc | 4 +-
.../mkldnn_post_quantize_align_scale_property.h | 164 +++
..._property.h => mkldnn_post_quantize_property.h} | 63 +-
.../subgraph/mkldnn/mkldnn_subgraph_property.cc | 6 +-
src/operator/subgraph/subgraph_property.h | 176 ++-
.../tensorrt}/nnvm_to_onnx-inl.h | 131 +-
.../{contrib => subgraph/tensorrt}/nnvm_to_onnx.cc | 284 ++--
.../subgraph/tensorrt}/onnx_to_tensorrt.cc | 27 +-
.../subgraph/tensorrt}/onnx_to_tensorrt.h | 18 +-
src/operator/subgraph/tensorrt/tensorrt-inl.h | 240 ++++
src/operator/subgraph/tensorrt/tensorrt.cc | 336 +++++
.../{contrib => subgraph/tensorrt}/tensorrt.cu | 30 +-
src/operator/subgraph_op_common.cc | 4 +-
src/operator/subgraph_op_common.h | 12 +-
src/operator/svm_output-inl.h | 6 +-
src/operator/swapaxis-inl.h | 10 +-
src/operator/tensor/broadcast_reduce-inl.cuh | 74 +-
src/operator/tensor/broadcast_reduce-inl.h | 51 +-
src/operator/tensor/broadcast_reduce_op.h | 346 +++--
src/operator/tensor/broadcast_reduce_op_value.cc | 6 +-
src/operator/tensor/diag_op-inl.h | 12 +-
src/operator/tensor/dot-inl.h | 12 +-
src/operator/tensor/elemwise_binary_broadcast_op.h | 53 +-
src/operator/tensor/elemwise_binary_op_basic.cc | 11 +-
src/operator/tensor/elemwise_unary_op_basic.cc | 10 +-
src/operator/tensor/histogram-inl.h | 14 +-
src/operator/tensor/indexing_op.h | 31 +-
src/operator/tensor/init_op.cc | 11 +
src/operator/tensor/init_op.cu | 3 +
src/operator/tensor/init_op.h | 87 +-
src/operator/tensor/la_op-inl.h | 94 ++
src/operator/tensor/la_op.cc | 231 +++
src/operator/tensor/la_op.cu | 24 +
src/operator/tensor/la_op.h | 70 +-
src/operator/tensor/matrix_op-inl.h | 377 ++---
src/operator/tensor/matrix_op.cc | 68 +-
src/operator/tensor/ordering_op-inl.h | 2 +-
src/operator/tensor/slice-inl.h | 6 +-
src/operator/tensor/sparse_retain-inl.h | 2 +-
src/operator/tensor/square_sum-inl.h | 9 +-
.../dequantize.cu => profiler/nvtx.cc} | 15 +-
.../nvtx.h} | 45 +-
src/profiler/profiler.h | 26 +
src/profiler/storage_profiler.h | 6 +-
src/resource.cc | 14 +-
src/storage/pooled_storage_manager.h | 16 +-
src/storage/storage.cc | 12 +-
.../cpp/engine/engine_shutdown_test.cc | 31 +-
tests/cpp/engine/threaded_engine_test.cc | 94 +-
tests/cpp/include/test_mkldnn.h | 18 +-
tests/cpp/include/test_util.h | 4 +-
tests/cpp/misc/serialization.cc | 68 -
tests/cpp/operator/batchnorm_test.cc | 4 +-
tests/cpp/operator/mkldnn_operator_test.cc | 6 +-
tests/nightly/JenkinsfileForBinaries | 38 +-
tests/nightly/README.md | 10 +-
.../nightly/apache_rat_license_check/rat-excludes | 1 +
tests/nightly/test_large_array.py | 58 +-
tests/python/gpu/test_gluon_contrib_gpu.py | 63 +
tests/python/gpu/test_gluon_gpu.py | 269 ++--
tests/python/gpu/test_gluon_transforms.py | 4 +-
tests/python/gpu/test_operator_gpu.py | 23 +-
tests/python/mkl/test_mkldnn.py | 56 +-
tests/python/mkl/test_subgraph.py | 97 +-
.../python/profiling/simple_forward.py | 35 +-
tests/python/profiling/test_nvtx.py | 52 +
tests/python/quantization/test_quantization.py | 304 +++-
tests/python/tensorrt/lenet5_train.py | 2 +
tests/python/tensorrt/test_cvnets.py | 34 +-
tests/python/tensorrt/test_cycle.py | 69 -
tests/python/tensorrt/test_resnet18.py | 42 +-
tests/python/tensorrt/test_tensorrt_lenet5.py | 91 +-
tests/python/tensorrt/test_training_warning.py | 70 -
tests/python/train/test_dtype.py | 66 +
tests/python/unittest/test_contrib_control_flow.py | 30 +-
tests/python/unittest/test_contrib_operator.py | 14 +-
tests/python/unittest/test_exc_handling.py | 134 +-
tests/python/unittest/test_gluon.py | 140 +-
tests/python/unittest/test_gluon_data_vision.py | 75 +-
tests/python/unittest/test_gluon_rnn.py | 6 +-
tests/python/unittest/test_image.py | 13 +
tests/python/unittest/test_infer_shape.py | 16 +
tests/python/unittest/test_metric.py | 82 ++
tests/python/unittest/test_module.py | 28 +
tests/python/unittest/test_ndarray.py | 54 +-
tests/python/unittest/test_operator.py | 808 +++++++++--
tests/python/unittest/test_random.py | 13 +-
tests/python/unittest/test_sparse_ndarray.py | 1 +
tests/python/unittest/test_subgraph_op.py | 89 +-
tests/python/unittest/test_symbol.py | 5 +
tools/caffe_converter/compare_layers.py | 5 -
tools/caffe_converter/test_converter.py | 2 -
tools/dependencies/LICENSE.binary.dependencies | 289 ++++
tools/dependencies/eigen.sh | 1 +
tools/dependencies/libpng.sh | 2 +-
tools/dependencies/libtiff.sh | 6 +-
tools/dependencies/libturbojpeg.sh | 2 +-
tools/dependencies/openssl.sh | 2 +-
tools/pip/setup.py | 19 +-
tools/staticbuild/build.sh | 8 +
tools/staticbuild/build_lib.sh | 10 +-
576 files changed, 20185 insertions(+), 8162 deletions(-)
delete mode 160000 3rdparty/cub
create mode 160000 3rdparty/nvidia_cub
copy ci/docker/{Dockerfile.build.ubuntu_gpu => Dockerfile.build.ubuntu_gpu_cu100} (93%)
copy ci/docker/{Dockerfile.build.ubuntu_gpu => Dockerfile.build.ubuntu_gpu_cu80} (97%)
copy ci/docker/{Dockerfile.build.ubuntu_gpu => Dockerfile.build.ubuntu_gpu_cu90} (93%)
rename ci/docker/{Dockerfile.build.ubuntu_gpu => Dockerfile.build.ubuntu_gpu_cu92} (94%)
create mode 100755 ci/docker/install/centos7_cudnn.sh
copy ci/docker/install/{ubuntu_python.sh => ubuntu_cudnn.sh} (56%)
copy ci/docker/install/ubuntu_nvidia.sh => cmake/Modules/FindNVTX.cmake (53%)
mode change 100755 => 100644
create mode 100644 contrib/clojure-package/examples/bert/.gitignore
create mode 100644 contrib/clojure-package/examples/bert/README.md
create mode 100644 contrib/clojure-package/examples/bert/fine-tune-bert.ipynb
create mode 100644 contrib/clojure-package/examples/bert/fine-tune-bert.md
create mode 100755 contrib/clojure-package/examples/bert/get_bert_data.sh
copy contrib/clojure-package/examples/{infer/objectdetector => bert}/project.clj (58%)
create mode 100644 contrib/clojure-package/examples/bert/squad-samples.edn
create mode 100644 contrib/clojure-package/examples/bert/src/bert/bert_sentence_classification.clj
create mode 100644 contrib/clojure-package/examples/bert/src/bert/infer.clj
create mode 100644 contrib/clojure-package/examples/bert/src/bert/util.clj
create mode 100644 contrib/clojure-package/examples/bert/test/bert/bert_sentence_classification_test.clj
create mode 100644 contrib/clojure-package/examples/bert/test/bert/infer_test.clj
delete mode 100644 contrib/clojure-package/examples/infer/objectdetector/src/infer/draw.clj
copy contrib/clojure-package/src/org/apache/clojure_mxnet/{callback.clj => ndarray_api.clj} (53%)
copy contrib/clojure-package/src/org/apache/clojure_mxnet/{callback.clj => ndarray_random_api.clj} (59%)
create mode 100644 contrib/clojure-package/src/org/apache/clojure_mxnet/symbol_api.clj
create mode 100644 contrib/clojure-package/src/org/apache/clojure_mxnet/symbol_random_api.clj
create mode 100644 contrib/clojure-package/test/good-test-ndarray-api.clj
create mode 100644 contrib/clojure-package/test/good-test-ndarray-random-api.clj
create mode 100644 contrib/clojure-package/test/good-test-symbol-api.clj
create mode 100644 contrib/clojure-package/test/good-test-symbol-random-api.clj
create mode 100644 contrib/clojure-package/test/org/apache/clojure_mxnet/ndarray_api_test.clj
create mode 100644 contrib/clojure-package/test/org/apache/clojure_mxnet/symbol_api_test.clj
create mode 100644 cpp-package/example/test_kvstore.cpp
create mode 100644 docs/tutorials/python/profiler_nvprof.png
create mode 100644 docs/tutorials/python/profiler_nvprof_zoomed.png
create mode 100644 docs/tutorials/python/profiler_winograd.png
rename example/gluon/{super_resolution => }/data.py (100%)
copy python/mxnet/gluon/contrib/{ => cnn}/__init__.py (82%)
create mode 100644 python/mxnet/gluon/contrib/cnn/conv_layers.py
create mode 100644 scala-package/core/src/main/scala/org/apache/mxnet/NumpyScope.scala
copy scala-package/core/src/main/scala/org/apache/mxnet/{util/OptionConversion.scala => javaapi/Layout.scala} (63%)
rename scala-package/{macros/src/main/scala/org/apache/mxnet/utils/OperatorBuildUtils.scala => core/src/test/scala/org/apache/mxnet/NumpyScopeSuite.scala} (58%)
copy cpp-package/tests/travis/setup.sh => scala-package/examples/scripts/infer/bert/get_bert_data.sh (58%)
copy cpp-package/tests/travis/setup.sh => scala-package/examples/scripts/infer/bert/run_bert_qa_example.sh (70%)
create mode 100644 scala-package/examples/src/main/java/org/apache/mxnetexamples/javaapi/infer/bert/BertDataParser.java
create mode 100644 scala-package/examples/src/main/java/org/apache/mxnetexamples/javaapi/infer/bert/BertQA.java
create mode 100644 scala-package/examples/src/main/java/org/apache/mxnetexamples/javaapi/infer/bert/README.md
create mode 100644 scala-package/examples/src/test/java/org/apache/mxnetexamples/javaapi/infer/predictor/BertExampleTest.java
delete mode 100644 src/common/serialization.h
delete mode 100644 src/executor/tensorrt_pass.cc
delete mode 100644 src/executor/trt_graph_executor.cc
delete mode 100644 src/executor/trt_graph_executor.h
delete mode 100644 src/operator/contrib/tensorrt-inl.h
delete mode 100644 src/operator/contrib/tensorrt.cc
delete mode 100644 src/operator/cudnn_rnn-inl.h
create mode 100644 src/operator/image/crop-inl.h
create mode 100644 src/operator/image/crop.cc
create mode 100644 src/operator/nn/mkldnn/mkldnn_act-inl.h
create mode 100644 src/operator/nn/mkldnn/mkldnn_reshape.cc
create mode 100644 src/operator/nn/mkldnn/mkldnn_transpose.cc
create mode 100644 src/operator/quantization/mkldnn/mkldnn_quantized_act.cc
create mode 100644 src/operator/quantization/mkldnn/mkldnn_quantized_elemwise_add.cc
create mode 100644 src/operator/quantization/quantized_activation.cc
create mode 100644 src/operator/quantization/quantized_elemwise_add-inl.h
create mode 100644 src/operator/quantization/quantized_elemwise_add.cc
rename src/operator/subgraph/{partition_graph.cc => build_subgraph.cc} (68%)
create mode 100644 src/operator/subgraph/default_subgraph_property_v2.cc
create mode 100644 src/operator/subgraph/mkldnn/mkldnn_post_quantize_align_scale_property.h
rename src/operator/subgraph/mkldnn/{mkldnn_conv_post_quantize_property.h => mkldnn_post_quantize_property.h} (68%)
rename src/operator/{contrib => subgraph/tensorrt}/nnvm_to_onnx-inl.h (61%)
rename src/operator/{contrib => subgraph/tensorrt}/nnvm_to_onnx.cc (68%)
rename src/{executor => operator/subgraph/tensorrt}/onnx_to_tensorrt.cc (89%)
rename src/{executor => operator/subgraph/tensorrt}/onnx_to_tensorrt.h (88%)
create mode 100644 src/operator/subgraph/tensorrt/tensorrt-inl.h
create mode 100644 src/operator/subgraph/tensorrt/tensorrt.cc
rename src/operator/{contrib => subgraph/tensorrt}/tensorrt.cu (69%)
copy src/{operator/quantization/dequantize.cu => profiler/nvtx.cc} (73%)
copy src/{operator/subgraph/mkldnn/mkldnn_subgraph_property.cc => profiler/nvtx.h} (58%)
copy src/operator/quantization/quantize_v2.cu => tests/cpp/engine/engine_shutdown_test.cc (54%)
delete mode 100644 tests/cpp/misc/serialization.cc
create mode 100644 tests/python/gpu/test_gluon_contrib_gpu.py
copy ci/docker/Dockerfile.build.ubuntu_base_gpu => tests/python/profiling/simple_forward.py (54%)
create mode 100644 tests/python/profiling/test_nvtx.py
delete mode 100644 tests/python/tensorrt/test_cycle.py
delete mode 100644 tests/python/tensorrt/test_training_warning.py
create mode 100644 tools/dependencies/LICENSE.binary.dependencies