You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mxnet.apache.org by zh...@apache.org on 2021/09/13 13:30:44 UTC

[incubator-mxnet] branch master updated (bcdfa61 -> e359bcd)

This is an automated email from the ASF dual-hosted git repository.

zhasheng pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git.


    from bcdfa61  fix macos cmake with TVM_OP ON (#20570)
     add e359bcd  [Master] Clang-formatter: only src/ directory (#20571)

No new revisions were added by this update.

Summary of changes:
 src/api/_api_internal/_api_internal.cc             |  110 +-
 src/api/cached_op_api.cc                           |  201 +-
 src/api/operator/numpy/linalg/np_det.cc            |   17 +-
 src/api/operator/numpy/linalg/np_eig.cc            |   34 +-
 src/api/operator/numpy/linalg/np_eigvals.cc        |   56 +-
 src/api/operator/numpy/linalg/np_gesvd.cc          |   18 +-
 src/api/operator/numpy/linalg/np_inv.cc            |   17 +-
 src/api/operator/numpy/linalg/np_lstsq.cc          |   30 +-
 src/api/operator/numpy/linalg/np_matrix_rank.cc    |   50 +-
 src/api/operator/numpy/linalg/np_norm.cc           |   17 +-
 src/api/operator/numpy/linalg/np_pinv.cc           |   33 +-
 src/api/operator/numpy/linalg/np_potrf.cc          |   34 +-
 src/api/operator/numpy/linalg/np_qr.cc             |   18 +-
 src/api/operator/numpy/linalg/np_slogdet.cc        |   27 +-
 src/api/operator/numpy/linalg/np_solve.cc          |   17 +-
 src/api/operator/numpy/linalg/np_tensorinv.cc      |   34 +-
 src/api/operator/numpy/linalg/np_tensorsolve.cc    |   50 +-
 src/api/operator/numpy/np_bincount_op.cc           |   57 +-
 .../numpy/np_broadcast_reduce_op_boolean.cc        |   37 +-
 .../operator/numpy/np_broadcast_reduce_op_index.cc |  124 +-
 .../operator/numpy/np_broadcast_reduce_op_value.cc |  137 +-
 src/api/operator/numpy/np_cross.cc                 |   23 +-
 src/api/operator/numpy/np_cumsum.cc                |   68 +-
 src/api/operator/numpy/np_delete_op.cc             |  135 +-
 src/api/operator/numpy/np_diff_op.cc               |   19 +-
 src/api/operator/numpy/np_dot_op.cc                |   16 +-
 src/api/operator/numpy/np_ediff1d_op.cc            |   84 +-
 src/api/operator/numpy/np_einsum_op.cc             |   70 +-
 .../numpy/np_elemwise_broadcast_logic_op.cc        |   84 +-
 src/api/operator/numpy/np_elemwise_broadcast_op.cc |  189 +-
 .../numpy/np_elemwise_broadcast_op_extended_sec.cc |   24 +-
 .../operator/numpy/np_elemwise_unary_op_basic.cc   |   87 +-
 src/api/operator/numpy/np_fill_diagonal_op.cc      |   68 +-
 src/api/operator/numpy/np_histogram_op.cc          |   82 +-
 src/api/operator/numpy/np_init_op.cc               |  545 ++--
 src/api/operator/numpy/np_insert_op.cc             |  245 +-
 src/api/operator/numpy/np_interp_op.cc             |   96 +-
 src/api/operator/numpy/np_kron.cc                  |   19 +-
 src/api/operator/numpy/np_matmul_op.cc             |   39 +-
 src/api/operator/numpy/np_matrix_op.cc             | 1015 ++++----
 src/api/operator/numpy/np_memory_op.cc             |   26 +-
 src/api/operator/numpy/np_moments_op.cc            |  140 +-
 src/api/operator/numpy/np_nan_to_num_op.cc         |   70 +-
 src/api/operator/numpy/np_nonzero_op.cc            |   26 +-
 src/api/operator/numpy/np_ordering_op.cc           |   63 +-
 src/api/operator/numpy/np_pad_op.cc                |   31 +-
 src/api/operator/numpy/np_percentile_op.cc         |   97 +-
 src/api/operator/numpy/np_polynomial_op.cc         |   26 +-
 src/api/operator/numpy/np_repeat_op.cc             |   44 +-
 src/api/operator/numpy/np_tensordot_op.cc          |   40 +-
 src/api/operator/numpy/np_trace_op.cc              |   17 +-
 src/api/operator/numpy/np_tri_op.cc                |   20 +-
 src/api/operator/numpy/np_tril_op.cc               |   17 +-
 src/api/operator/numpy/np_triu_op.cc               |   15 +-
 src/api/operator/numpy/np_unique_op.cc             |   64 +-
 src/api/operator/numpy/np_where_op.cc              |   55 +-
 src/api/operator/numpy/np_window_op.cc             |   40 +-
 src/api/operator/numpy/random/np_choice_op.cc      |  104 +-
 src/api/operator/numpy/random/np_exponential_op.cc |   81 +-
 src/api/operator/numpy/random/np_laplace_op.cc     |  116 +-
 .../operator/numpy/random/np_location_scale_op.cc  |  226 +-
 src/api/operator/numpy/random/np_multinomial_op.cc |   78 +-
 src/api/operator/numpy/random/np_pareto_op.cc      |   81 +-
 src/api/operator/numpy/random/np_power_op.cc       |   81 +-
 src/api/operator/numpy/random/np_rayleigh_op.cc    |   81 +-
 src/api/operator/numpy/random/np_weibull_op.cc     |   81 +-
 .../operator/numpy_extension/npx_activation_op.cc  |   37 +-
 .../operator/numpy_extension/npx_arange_like_op.cc |   95 +-
 .../operator/numpy_extension/npx_batch_dot_op.cc   |   74 +-
 .../operator/numpy_extension/npx_batch_norm_op.cc  |  111 +-
 .../numpy_extension/npx_broadcast_like_op.cc       |   79 +-
 .../numpy_extension/npx_control_flow_op.cc         |  259 +-
 .../operator/numpy_extension/npx_convolution_op.cc |  238 +-
 .../numpy_extension/npx_deconvolution_op.cc        |  289 ++-
 src/api/operator/numpy_extension/npx_dropout_op.cc |   66 +-
 .../operator/numpy_extension/npx_embedding_op.cc   |   62 +-
 .../numpy_extension/npx_fully_connected_op.cc      |   65 +-
 .../operator/numpy_extension/npx_group_norm_op.cc  |   71 +-
 .../operator/numpy_extension/npx_layer_norm_op.cc  |   95 +-
 .../operator/numpy_extension/npx_leaky_relu_op.cc  |  101 +-
 src/api/operator/numpy_extension/npx_one_hot_op.cc |   66 +-
 src/api/operator/numpy_extension/npx_pick_op.cc    |    9 +-
 src/api/operator/numpy_extension/npx_pooling_op.cc |  183 +-
 src/api/operator/numpy_extension/npx_rnn_op.cc     |   18 +-
 src/api/operator/numpy_extension/npx_softmax_op.cc |  378 +--
 src/api/operator/numpy_extension/npx_topk_op.cc    |   13 +-
 src/api/operator/op_utils.cc                       |    4 +-
 src/api/operator/op_utils.h                        |    4 +-
 src/api/operator/random/np_gamma_op.cc             |   22 +-
 src/api/operator/random/np_normal_op.cc            |  130 +-
 src/api/operator/random/np_randint_op.cc           |   72 +-
 src/api/operator/random/np_uniform_op.cc           |  129 +-
 src/api/operator/random/shuffle_op.cc              |   52 +-
 .../elemwise_binary_broadcast_op_extended.cc       |   31 +-
 src/api/operator/tensor/indexing_op.cc             |   17 +-
 src/api/operator/tensor/matrix_op.cc               |   26 +-
 src/api/operator/ufunc_helper.cc                   |  109 +-
 src/api/operator/ufunc_helper.h                    |    4 +-
 src/api/operator/utils.cc                          |   28 +-
 src/api/operator/utils.h                           |   10 +-
 src/base.cc                                        |    4 +-
 src/c_api/c_api.cc                                 | 2385 ++++++++++--------
 src/c_api/c_api_common.h                           |   35 +-
 src/c_api/c_api_function.cc                        |  154 +-
 src/c_api/c_api_ndarray.cc                         |  182 +-
 src/c_api/c_api_profile.cc                         |  487 ++--
 src/c_api/c_api_symbolic.cc                        |  947 ++++---
 src/c_api/c_api_test.cc                            |   33 +-
 src/common/cuda/rtc.cc                             |  155 +-
 src/common/cuda/rtc.h                              |   10 +-
 src/common/cuda/rtc/reducer-inl.h                  |    2 -
 src/common/cuda/rtc/util-inl.h                     |    6 +-
 src/common/cuda/rtc/vectorization-inl.h            |  103 +-
 src/common/cuda/utils.cc                           |   10 +-
 src/common/cuda/utils.h                            |  389 ++-
 src/common/exec_utils.cc                           |   32 +-
 src/common/exec_utils.h                            |  192 +-
 src/common/lazy_alloc_array.h                      |   34 +-
 src/common/object_pool.h                           |    8 +-
 src/common/random_generator.cu                     |   39 +-
 src/common/rtc.cc                                  |  137 +-
 src/common/static_array.h                          |    6 +-
 src/common/tensor_inspector.h                      |  306 ++-
 src/common/utils.cc                                |   58 +-
 src/common/utils.cu                                |   18 +-
 src/common/utils.h                                 |  338 +--
 src/engine/engine.cc                               |   15 +-
 src/engine/engine_impl.h                           |   12 +-
 src/engine/naive_engine.cc                         |  154 +-
 src/engine/openmp.cc                               |   14 +-
 src/engine/openmp.h                                |   22 +-
 src/engine/stream_manager.h                        |   25 +-
 src/engine/thread_pool.h                           |    8 +-
 src/engine/threaded_engine.cc                      |  239 +-
 src/engine/threaded_engine.h                       |  145 +-
 src/engine/threaded_engine_perdevice.cc            |  136 +-
 src/engine/threaded_engine_pooled.cc               |   43 +-
 src/imperative/attach_op_execs_pass.cc             |  131 +-
 src/imperative/attach_op_resource_pass.cc          |   39 +-
 src/imperative/cached_op.cc                        |  831 +++---
 src/imperative/cached_op.h                         |  405 ++-
 src/imperative/cached_op_threadsafe.cc             |  205 +-
 src/imperative/cached_op_threadsafe.h              |   52 +-
 src/imperative/eliminate_common_expr_pass.cc       |   56 +-
 src/imperative/exec_pass.h                         |   53 +-
 src/imperative/imperative.cc                       |  428 ++--
 src/imperative/imperative_utils.cc                 |  133 +-
 src/imperative/imperative_utils.h                  |  775 +++---
 src/imperative/infer_graph_attr_pass.cc            |  283 ++-
 src/imperative/inplace_addto_detect_pass.cc        |   45 +-
 src/imperative/naive_cached_op.cc                  |   61 +-
 src/imperative/naive_cached_op.h                   |   49 +-
 src/imperative/pointwise_fusion_pass.cc            |  165 +-
 src/imperative/simple_partition_pass.cc            |   78 +-
 src/imperative/simple_partition_pass.h             |   69 +-
 src/initialize.cc                                  |  148 +-
 src/initialize.h                                   |   10 +-
 src/io/batchify.cc                                 |  406 +--
 src/io/dataloader.cc                               |   65 +-
 src/io/dataset.cc                                  |  281 ++-
 src/io/image_aug_default.cc                        |  534 ++--
 src/io/image_augmenter.h                           |   19 +-
 src/io/image_det_aug_default.cc                    |  421 ++--
 src/io/image_io.cc                                 |  286 +--
 src/io/image_iter_common.h                         |  322 +--
 src/io/image_recordio.h                            |   23 +-
 src/io/inst_vector.h                               |   51 +-
 src/io/iter_batchloader.h                          |   57 +-
 src/io/iter_csv.cc                                 |   60 +-
 src/io/iter_image_det_recordio.cc                  |  320 +--
 src/io/iter_image_recordio.cc                      |  208 +-
 src/io/iter_image_recordio_2.cc                    |  718 +++---
 src/io/iter_libsvm.cc                              |   81 +-
 src/io/iter_mnist.cc                               |   96 +-
 src/io/iter_normalize.h                            |  128 +-
 src/io/iter_prefetcher.h                           |   90 +-
 src/io/iter_sampler.cc                             |   60 +-
 src/io/iter_sparse.h                               |    2 +-
 src/io/iter_sparse_batchloader.h                   |   55 +-
 src/io/iter_sparse_prefetcher.h                    |  109 +-
 src/io/opencv_compatibility.h                      |    6 +-
 src/ir/expr.cc                                     |   21 +-
 src/kvstore/comm.h                                 |  377 +--
 src/kvstore/comm_tree.h                            |  194 +-
 src/kvstore/gpu_topology.h                         |  283 ++-
 src/kvstore/gradient_compression-inl.h             |  155 +-
 src/kvstore/gradient_compression.cc                |  180 +-
 src/kvstore/gradient_compression.cu                |   12 +-
 src/kvstore/gradient_compression.h                 |   50 +-
 src/kvstore/kvstore.cc                             |    8 +-
 src/kvstore/kvstore_dist.h                         |  432 ++--
 src/kvstore/kvstore_dist_server.h                  |  282 ++-
 src/kvstore/kvstore_local.h                        |  191 +-
 src/kvstore/kvstore_nccl.h                         |  285 ++-
 src/kvstore/kvstore_utils.cc                       |   12 +-
 src/kvstore/kvstore_utils.cu                       |   63 +-
 src/kvstore/kvstore_utils.h                        |    5 +-
 src/kvstore/p3store_dist.h                         |  238 +-
 src/lib_api.cc                                     | 1166 +++++----
 src/libinfo.cc                                     |   77 +-
 src/ndarray/ndarray_function-inl.h                 |  440 ++--
 src/ndarray/ndarray_function.cc                    |  150 +-
 src/ndarray/ndarray_function.cu                    |  204 +-
 src/ndarray/ndarray_function.h                     |   84 +-
 src/nnvm/amp_infer_unknown.cc                      |   58 +-
 src/nnvm/error.h                                   |    3 +-
 src/nnvm/gradient.cc                               |  255 +-
 src/nnvm/graph_algorithm.h                         |   28 +-
 src/nnvm/graph_editor.cc                           |   22 +-
 src/nnvm/legacy_json_util.cc                       |  218 +-
 src/nnvm/legacy_op_util.cc                         |  241 +-
 src/nnvm/low_precision_pass.cc                     |  198 +-
 src/nnvm/node_op_util.h                            |   80 +-
 src/nnvm/plan_memory.cc                            |  167 +-
 src/nnvm/tvm_bridge.cc                             |   37 +-
 src/operator/all_finite-inl.h                      |   26 +-
 src/operator/all_finite.cc                         |  167 +-
 src/operator/all_finite.cu                         |   40 +-
 src/operator/amp_graph_pass.cc                     |    8 +-
 src/operator/bilinear_sampler-inl.h                |  151 +-
 src/operator/bilinear_sampler.cc                   |  214 +-
 src/operator/bilinear_sampler.cu                   |  218 +-
 src/operator/c_lapack_api.cc                       |  244 +-
 src/operator/c_lapack_api.h                        | 1561 +++++++-----
 src/operator/channel_op_common.h                   |   53 +-
 src/operator/contrib/adabelief-inl.h               |  395 +--
 src/operator/contrib/adabelief.cc                  |  219 +-
 src/operator/contrib/adabelief.cu                  |   19 +-
 src/operator/contrib/adamw-inl.h                   |  397 +--
 src/operator/contrib/adamw.cc                      |  217 +-
 src/operator/contrib/adamw.cu                      |   32 +-
 src/operator/contrib/adaptive_avg_pooling-inl.h    |   80 +-
 src/operator/contrib/adaptive_avg_pooling.cc       |  171 +-
 src/operator/contrib/adaptive_avg_pooling.cu       |  125 +-
 src/operator/contrib/allclose_op-inl.h             |  109 +-
 src/operator/contrib/allclose_op.cc                |   54 +-
 src/operator/contrib/allclose_op.cu                |   30 +-
 src/operator/contrib/batch_norm_relu.cc            |  257 +-
 src/operator/contrib/bilinear_resize-inl.h         |  251 +-
 src/operator/contrib/bilinear_resize.cc            |  177 +-
 src/operator/contrib/bilinear_resize.cu            |  286 +--
 src/operator/contrib/boolean_mask-inl.h            |   53 +-
 src/operator/contrib/boolean_mask.cc               |  149 +-
 src/operator/contrib/boolean_mask.cu               |  147 +-
 src/operator/contrib/bounding_box-common.h         |  120 +-
 src/operator/contrib/bounding_box-inl.h            | 1055 ++++----
 src/operator/contrib/bounding_box.cc               |  226 +-
 src/operator/contrib/bounding_box.cu               |  701 +++---
 src/operator/contrib/count_sketch-inl.h            |  219 +-
 src/operator/contrib/count_sketch.cc               |   35 +-
 src/operator/contrib/count_sketch.cu               |  165 +-
 .../contrib/deformable_psroi_pooling-inl.h         |  186 +-
 src/operator/contrib/deformable_psroi_pooling.cc   |  711 +++---
 src/operator/contrib/deformable_psroi_pooling.cu   |  785 +++---
 src/operator/contrib/dgl_graph-inl.h               |    8 +-
 src/operator/contrib/dgl_graph.cc                  |  807 +++---
 src/operator/contrib/dgl_graph.cu                  |    2 +-
 src/operator/contrib/dynamic_shape_ops-inl.h       |   41 +-
 src/operator/contrib/dynamic_shape_ops.cc          |   52 +-
 src/operator/contrib/erfinv-inl.h                  |  137 +-
 src/operator/contrib/fft-inl.h                     |  200 +-
 src/operator/contrib/fft.cc                        |   17 +-
 src/operator/contrib/fft.cu                        |    8 +-
 src/operator/contrib/gradient_multiplier_op.cc     |   53 +-
 src/operator/contrib/gradient_multiplier_op.cu     |   10 +-
 src/operator/contrib/hawkes_ll-inl.h               |  375 ++-
 src/operator/contrib/hawkes_ll.cc                  |   76 +-
 src/operator/contrib/hawkes_ll.cu                  |    5 +-
 src/operator/contrib/index_array-inl.h             |   33 +-
 src/operator/contrib/index_array.cc                |  135 +-
 src/operator/contrib/index_array.cu                |   40 +-
 src/operator/contrib/index_copy-inl.h              |    8 +-
 src/operator/contrib/index_copy.cc                 |  114 +-
 src/operator/contrib/index_copy.cu                 |   72 +-
 .../contrib/intgemm/intgemm_fully_connected_op.cc  |  217 +-
 src/operator/contrib/intgemm/max_absolute_op.cc    |   55 +-
 src/operator/contrib/intgemm/prepare_data_op.cc    |   72 +-
 src/operator/contrib/intgemm/prepare_weight_op.cc  |  136 +-
 src/operator/contrib/intgemm/take_weight_op.cc     |   68 +-
 src/operator/contrib/krprod.cc                     |   68 +-
 src/operator/contrib/krprod.h                      |  150 +-
 src/operator/contrib/mrcnn_mask_target-inl.h       |   53 +-
 src/operator/contrib/mrcnn_mask_target.cu          |  203 +-
 src/operator/contrib/multi_lamb-inl.h              |  241 +-
 src/operator/contrib/multi_lamb.cc                 |  262 +-
 src/operator/contrib/multi_lamb.cu                 |  207 +-
 src/operator/contrib/multi_lans-inl.h              |  261 +-
 src/operator/contrib/multi_lans.cc                 |  282 ++-
 src/operator/contrib/multi_lans.cu                 |  241 +-
 src/operator/contrib/multi_lars-inl.h              |   71 +-
 src/operator/contrib/multi_lars.cc                 |   38 +-
 src/operator/contrib/multi_lars.cu                 |    3 +-
 src/operator/contrib/multi_proposal-inl.h          |  115 +-
 src/operator/contrib/multi_proposal.cc             |  299 ++-
 src/operator/contrib/multi_proposal.cu             |  309 +--
 src/operator/contrib/multi_sum_sq-inl.h            |   39 +-
 src/operator/contrib/multi_sum_sq.cc               |   85 +-
 src/operator/contrib/multi_sum_sq.cu               |  107 +-
 src/operator/contrib/multibox_detection-inl.h      |  148 +-
 src/operator/contrib/multibox_detection.cc         |  114 +-
 src/operator/contrib/multibox_detection.cu         |  174 +-
 src/operator/contrib/multibox_prior-inl.h          |  138 +-
 src/operator/contrib/multibox_prior.cc             |   61 +-
 src/operator/contrib/multibox_prior.cu             |  108 +-
 src/operator/contrib/multibox_target-inl.h         |  195 +-
 src/operator/contrib/multibox_target.cc            |  191 +-
 src/operator/contrib/multibox_target.cu            |  292 ++-
 src/operator/contrib/nn/deformable_im2col.h        |  276 +-
 .../contrib/nn/modulated_deformable_im2col.h       |  174 +-
 src/operator/contrib/nnz.cc                        |   67 +-
 src/operator/contrib/optimizer_op-inl.h            |  146 +-
 src/operator/contrib/optimizer_op.cc               |   41 +-
 src/operator/contrib/optimizer_op.cu               |    2 +-
 src/operator/contrib/preloaded_multi_sgd-inl.h     |  221 +-
 src/operator/contrib/preloaded_multi_sgd.cc        |  326 +--
 src/operator/contrib/preloaded_multi_sgd.cu        |   12 +-
 src/operator/contrib/proposal-inl.h                |  115 +-
 src/operator/contrib/proposal.cc                   |  235 +-
 src/operator/contrib/proposal.cu                   |  286 +--
 src/operator/contrib/psroi_pooling-inl.h           |  105 +-
 src/operator/contrib/psroi_pooling.cc              |  293 ++-
 src/operator/contrib/psroi_pooling.cu              |  304 +--
 src/operator/contrib/quadratic_op-inl.h            |  123 +-
 src/operator/contrib/quadratic_op.cc               |   50 +-
 src/operator/contrib/quadratic_op.cu               |    6 +-
 src/operator/contrib/reset_arrays-inl.h            |   34 +-
 src/operator/contrib/reset_arrays.cc               |   65 +-
 src/operator/contrib/reset_arrays.cu               |    7 +-
 src/operator/contrib/roi_align-inl.h               |   44 +-
 src/operator/contrib/roi_align.cc                  |  538 ++--
 src/operator/contrib/roi_align.cu                  |  358 ++-
 src/operator/contrib/rroi_align-inl.h              |   26 +-
 src/operator/contrib/rroi_align.cc                 |  291 ++-
 src/operator/contrib/stes_op.cc                    |   61 +-
 src/operator/contrib/stes_op.cu                    |   54 +-
 src/operator/contrib/stes_op.h                     |   46 +-
 src/operator/contrib/sync_batch_norm-inl.h         |  338 +--
 src/operator/contrib/sync_batch_norm.cc            |   57 +-
 src/operator/contrib/sync_batch_norm.cu            |    7 +-
 src/operator/contrib/transformer-inl.h             |  236 +-
 src/operator/contrib/transformer.cc                |  892 +++----
 src/operator/contrib/transformer.cu                |  497 ++--
 src/operator/contrib/tvmop/dot.cc                  |   52 +-
 src/operator/contrib/tvmop/ufunc.cc                |  104 +-
 src/operator/control_flow.cc                       |  852 +++----
 src/operator/correlation-inl.h                     |  258 +-
 src/operator/correlation.cc                        |  243 +-
 src/operator/correlation.cu                        | 1043 +++++---
 src/operator/crop-inl.h                            |  172 +-
 src/operator/crop.cc                               |   16 +-
 src/operator/crop.cu                               |    4 +-
 src/operator/cross_device_copy.cc                  |   38 +-
 src/operator/cudnn_bilinear_sampler-inl.h          |  108 +-
 src/operator/cudnn_lrn-inl.h                       |   76 +-
 src/operator/cudnn_spatial_transformer-inl.h       |  130 +-
 src/operator/custom/custom-inl.h                   |   50 +-
 src/operator/custom/custom.cc                      |  342 +--
 src/operator/custom/native_op-inl.h                |  145 +-
 src/operator/custom/native_op.cc                   |   12 +-
 src/operator/custom/native_op.cu                   |    4 +-
 src/operator/custom/ndarray_op-inl.h               |   96 +-
 src/operator/custom/ndarray_op.cc                  |   71 +-
 src/operator/deformable_convolution-inl.h          |  384 +--
 src/operator/deformable_convolution.cc             |   35 +-
 src/operator/deformable_convolution.cu             |   24 +-
 src/operator/elemwise_op_common.h                  |  166 +-
 src/operator/fusion/fused_op-inl.h                 |  355 ++-
 src/operator/fusion/fused_op.cc                    |  195 +-
 src/operator/fusion/fused_op.cu                    |  452 ++--
 src/operator/fusion/fused_op.h                     |  119 +-
 src/operator/grid_generator-inl.h                  |  246 +-
 src/operator/grid_generator.cc                     |   17 +-
 src/operator/grid_generator.cu                     |   10 +-
 src/operator/identity_attach_KL_sparse_reg-inl.h   |  116 +-
 src/operator/identity_attach_KL_sparse_reg.cc      |   31 +-
 src/operator/identity_attach_KL_sparse_reg.cu      |    6 +-
 src/operator/image/crop-inl.h                      |  466 ++--
 src/operator/image/crop.cc                         |  158 +-
 src/operator/image/crop.cu                         |   49 +-
 src/operator/image/image_random-inl.h              |  931 ++++---
 src/operator/image/image_random.cc                 |  224 +-
 src/operator/image/image_random.cu                 |  354 ++-
 src/operator/image/image_utils.h                   |   44 +-
 src/operator/image/resize-inl.h                    |  161 +-
 src/operator/image/resize.cc                       |   58 +-
 src/operator/image/resize.cu                       |   32 +-
 src/operator/instance_norm-inl.h                   |  148 +-
 src/operator/instance_norm.cc                      |   83 +-
 src/operator/instance_norm.cu                      |    7 +-
 src/operator/l2_normalization-inl.h                |  256 +-
 src/operator/l2_normalization.cc                   |   83 +-
 src/operator/l2_normalization.cu                   |    8 +-
 src/operator/leaky_relu-inl.h                      |  293 ++-
 src/operator/leaky_relu.cc                         |  195 +-
 src/operator/leaky_relu.cu                         |    8 +-
 src/operator/linalg.h                              |  191 +-
 src/operator/linalg_impl.h                         | 2637 +++++++++++---------
 src/operator/loss_binary_op-inl.h                  |   51 +-
 src/operator/loss_binary_op.cc                     |   54 +-
 src/operator/loss_binary_op.cu                     |    6 +-
 src/operator/make_loss-inl.h                       |  130 +-
 src/operator/make_loss.cc                          |   23 +-
 src/operator/make_loss.cu                          |   13 +-
 src/operator/math_functions-inl.h                  |   59 +-
 src/operator/mkl_functions-inl.h                   |   74 +-
 .../modulated_deformable_convolution-inl.h         |  494 ++--
 src/operator/modulated_deformable_convolution.cc   |   48 +-
 src/operator/modulated_deformable_convolution.cu   |   24 +-
 src/operator/mshadow_op.h                          | 1013 ++++----
 src/operator/mxnet_op.h                            | 1176 ++++-----
 src/operator/nn/activation-inl.h                   |   87 +-
 src/operator/nn/activation.cc                      |  124 +-
 src/operator/nn/activation.cu                      |   67 +-
 src/operator/nn/batch_norm.cu                      |  803 +++---
 src/operator/nn/concat-inl.h                       |  178 +-
 src/operator/nn/concat.cc                          |  256 +-
 src/operator/nn/concat.cu                          |   17 +-
 src/operator/nn/convolution-inl.h                  |  328 +--
 src/operator/nn/convolution.cc                     |  310 +--
 src/operator/nn/convolution.cu                     |   96 +-
 src/operator/nn/ctc_loss-inl.h                     |  250 +-
 src/operator/nn/ctc_loss.cc                        |   99 +-
 src/operator/nn/ctc_loss.cu                        |   38 +-
 src/operator/nn/cudnn/cudnn_activation-inl.h       |   71 +-
 src/operator/nn/cudnn/cudnn_algoreg-inl.h          |  100 +-
 src/operator/nn/cudnn/cudnn_algoreg.cc             |   10 +-
 src/operator/nn/cudnn/cudnn_batch_norm-inl.h       |  170 +-
 src/operator/nn/cudnn/cudnn_batch_norm.cc          |  118 +-
 src/operator/nn/cudnn/cudnn_convolution-inl.h      |  580 ++---
 src/operator/nn/cudnn/cudnn_deconvolution-inl.h    |  429 ++--
 src/operator/nn/cudnn/cudnn_pooling-inl.h          |  189 +-
 .../nn/cudnn/cudnn_softmax_activation-inl.h        |   66 +-
 src/operator/nn/deconvolution-inl.h                |  399 ++-
 src/operator/nn/deconvolution.cc                   |  348 +--
 src/operator/nn/deconvolution.cu                   |   84 +-
 src/operator/nn/depthwise_convolution-inl.h        |  201 +-
 src/operator/nn/dropout-inl.h                      |  377 ++-
 src/operator/nn/dropout.cc                         |  186 +-
 src/operator/nn/dropout.cu                         |    9 +-
 src/operator/nn/fully_connected-inl.h              |  408 +--
 src/operator/nn/fully_connected.cc                 |  221 +-
 src/operator/nn/fully_connected.cu                 |   25 +-
 src/operator/nn/group_norm-inl.h                   |  367 +--
 src/operator/nn/group_norm.cc                      |  117 +-
 src/operator/nn/group_norm.cu                      |    7 +-
 src/operator/nn/im2col-inl.h                       |  189 +-
 src/operator/nn/im2col.cc                          |  329 +--
 src/operator/nn/im2col.cu                          |   14 +-
 src/operator/nn/im2col.h                           |  153 +-
 src/operator/nn/layer_norm-inl.h                   |  232 +-
 src/operator/nn/layer_norm.cc                      |  339 +--
 src/operator/nn/layer_norm.cu                      |  543 ++--
 src/operator/nn/log_softmax.cc                     |  210 +-
 src/operator/nn/log_softmax.cu                     |   12 +-
 src/operator/nn/lrn-inl.h                          |  115 +-
 src/operator/nn/lrn.cc                             |  122 +-
 src/operator/nn/lrn.cu                             |   10 +-
 src/operator/nn/mkldnn/mkldnn_base-inl.h           |    2 +-
 src/operator/nn/mkldnn/mkldnn_layer_norm.cc        |    2 +-
 src/operator/nn/mkldnn/mkldnn_ops-inl.h            |   18 +-
 src/operator/nn/moments-inl.h                      |  100 +-
 src/operator/nn/moments.cc                         |   58 +-
 src/operator/nn/moments.cu                         |    8 +-
 src/operator/nn/pool.h                             | 1171 +++++----
 src/operator/nn/pool_utils.h                       |   36 +-
 src/operator/nn/pooling-inl.h                      |  389 +--
 src/operator/nn/pooling.cc                         |  392 ++-
 src/operator/nn/pooling.cu                         |   47 +-
 src/operator/nn/sequence_mask-inl.h                |   23 +-
 src/operator/nn/softmax-inl.h                      | 1121 +++++----
 src/operator/nn/softmax.cc                         |  253 +-
 src/operator/nn/softmax.cu                         |  222 +-
 src/operator/nn/softmax_activation-inl.h           |   79 +-
 src/operator/nn/softmax_activation.cc              |   46 +-
 src/operator/nn/softmax_activation.cu              |   13 +-
 src/operator/nn/softmin.cc                         |   56 +-
 src/operator/nn/softmin.cu                         |    6 +-
 src/operator/nn/upsampling-inl.h                   |  159 +-
 src/operator/nn/upsampling.cc                      |  178 +-
 src/operator/nn/upsampling.cu                      |    7 +-
 src/operator/npx_control_flow.cc                   |  854 ++++---
 src/operator/npx_control_flow.h                    |   80 +-
 .../numpy/linalg/broadcast_reduce_customized-inl.h |  193 +-
 .../numpy/linalg/broadcast_reduce_op_customized.h  |   89 +-
 src/operator/numpy/linalg/np_eig-inl.h             |  192 +-
 src/operator/numpy/linalg/np_eig.cc                |   87 +-
 src/operator/numpy/linalg/np_eig.cu                |    6 +-
 src/operator/numpy/linalg/np_eigvals-inl.h         |  298 ++-
 src/operator/numpy/linalg/np_eigvals.cc            |   67 +-
 src/operator/numpy/linalg/np_eigvals.cu            |    6 +-
 src/operator/numpy/linalg/np_gesvd-inl.h           |  141 +-
 src/operator/numpy/linalg/np_gesvd.cc              |   85 +-
 src/operator/numpy/linalg/np_gesvd.cu              |    5 +-
 src/operator/numpy/linalg/np_lstsq-inl.h           |  528 ++--
 src/operator/numpy/linalg/np_lstsq.cc              |   55 +-
 src/operator/numpy/linalg/np_lstsq.cu              |    3 +-
 src/operator/numpy/linalg/np_matrix_rank-inl.h     |  258 +-
 src/operator/numpy/linalg/np_matrix_rank.cc        |  113 +-
 src/operator/numpy/linalg/np_matrix_rank.cu        |    5 +-
 src/operator/numpy/linalg/np_norm-inl.h            |  515 ++--
 src/operator/numpy/linalg/np_norm.cc               |   76 +-
 src/operator/numpy/linalg/np_norm_backward.cc      |   18 +-
 src/operator/numpy/linalg/np_norm_backward.cu      |   44 +-
 src/operator/numpy/linalg/np_norm_forward.cc       |   30 +-
 src/operator/numpy/linalg/np_norm_forward.cu       |   45 +-
 src/operator/numpy/linalg/np_pinv-inl.h            |  606 +++--
 src/operator/numpy/linalg/np_pinv.cc               |  110 +-
 src/operator/numpy/linalg/np_pinv.cu               |    5 +-
 src/operator/numpy/linalg/np_potrf.cc              |   35 +-
 src/operator/numpy/linalg/np_potrf.cu              |    2 +-
 src/operator/numpy/linalg/np_qr-inl.h              |  576 +++--
 src/operator/numpy/linalg/np_qr.cc                 |   49 +-
 src/operator/numpy/linalg/np_qr.cu                 |    6 +-
 src/operator/numpy/linalg/np_solve-inl.h           |  514 ++--
 src/operator/numpy/linalg/np_solve.cc              |   74 +-
 src/operator/numpy/linalg/np_solve.cu              |    4 +-
 src/operator/numpy/linalg/np_tensorinv-inl.h       |   64 +-
 src/operator/numpy/linalg/np_tensorinv.cc          |   80 +-
 src/operator/numpy/linalg/np_tensorinv.cu          |    5 +-
 src/operator/numpy/linalg/np_tensorsolve-inl.h     |  303 ++-
 src/operator/numpy/linalg/np_tensorsolve.cc        |   82 +-
 src/operator/numpy/linalg/np_tensorsolve.cu        |    4 +-
 src/operator/numpy/np_bincount_op-inl.h            |   81 +-
 src/operator/numpy/np_bincount_op.cc               |  120 +-
 src/operator/numpy/np_bincount_op.cu               |  130 +-
 src/operator/numpy/np_boolean_mask_assign.cc       |  219 +-
 src/operator/numpy/np_boolean_mask_assign.cu       |  145 +-
 src/operator/numpy/np_broadcast_reduce_op.cc       |   41 +-
 src/operator/numpy/np_broadcast_reduce_op.h        |  705 +++---
 .../numpy/np_broadcast_reduce_op_boolean.cc        |   83 +-
 .../numpy/np_broadcast_reduce_op_boolean.cu        |   12 +-
 src/operator/numpy/np_broadcast_reduce_op_index.cc |   63 +-
 src/operator/numpy/np_broadcast_reduce_op_index.cu |    4 +-
 src/operator/numpy/np_broadcast_reduce_op_value.cc |  352 +--
 src/operator/numpy/np_broadcast_reduce_op_value.cu |   55 +-
 src/operator/numpy/np_constraint_check.cc          |   40 +-
 src/operator/numpy/np_constraint_check.cu          |   13 +-
 src/operator/numpy/np_constraint_check.h           |   22 +-
 src/operator/numpy/np_cross-inl.h                  | 1488 ++++++-----
 src/operator/numpy/np_cross.cc                     |   76 +-
 src/operator/numpy/np_cross.cu                     |    6 +-
 src/operator/numpy/np_cumsum-inl.h                 |   86 +-
 src/operator/numpy/np_cumsum.cc                    |   61 +-
 src/operator/numpy/np_cumsum.cu                    |    6 +-
 src/operator/numpy/np_delete_op-inl.h              |  227 +-
 src/operator/numpy/np_delete_op.cc                 |   69 +-
 src/operator/numpy/np_delete_op.cu                 |    5 +-
 src/operator/numpy/np_diff-inl.h                   |   97 +-
 src/operator/numpy/np_diff.cc                      |   68 +-
 src/operator/numpy/np_diff.cu                      |    6 +-
 src/operator/numpy/np_dot-inl.h                    |   42 +-
 src/operator/numpy/np_dot.cc                       |   62 +-
 src/operator/numpy/np_dot.cu                       |    6 +-
 src/operator/numpy/np_ediff1d_op-inl.h             |  106 +-
 src/operator/numpy/np_ediff1d_op.cc                |  151 +-
 src/operator/numpy/np_ediff1d_op.cu                |    6 +-
 src/operator/numpy/np_einsum_op-inl.h              |  619 ++---
 src/operator/numpy/np_einsum_op.cc                 |  202 +-
 src/operator/numpy/np_einsum_op.cu                 |  521 ++--
 src/operator/numpy/np_einsum_path_op-inl.h         |  280 +--
 .../numpy/np_elemwise_broadcast_logic_op.cc        |  260 +-
 .../numpy/np_elemwise_broadcast_logic_op.cu        |   13 +-
 src/operator/numpy/np_elemwise_broadcast_op.cc     |  286 +--
 src/operator/numpy/np_elemwise_broadcast_op.cu     |   39 +-
 src/operator/numpy/np_elemwise_broadcast_op.h      |  243 +-
 .../numpy/np_elemwise_broadcast_op_extended.cc     |  545 ++--
 .../numpy/np_elemwise_broadcast_op_extended.cu     |   68 +-
 .../numpy/np_elemwise_broadcast_op_extended_sec.cc |  161 +-
 .../numpy/np_elemwise_broadcast_op_extended_sec.cu |   36 +-
 src/operator/numpy/np_elemwise_unary_op_basic.cc   |  543 ++--
 src/operator/numpy/np_elemwise_unary_op_basic.cu   |   60 +-
 src/operator/numpy/np_fill_diagonal_op-inl.h       |   72 +-
 src/operator/numpy/np_fill_diagonal_op.cc          |   44 +-
 src/operator/numpy/np_fill_diagonal_op.cu          |    2 +-
 src/operator/numpy/np_indexing_op.cc               |  458 ++--
 src/operator/numpy/np_indexing_op.cu               |  522 ++--
 src/operator/numpy/np_indexing_op.h                |   79 +-
 src/operator/numpy/np_init_op.cc                   |  266 +-
 src/operator/numpy/np_init_op.cu                   |   39 +-
 src/operator/numpy/np_init_op.h                    |  202 +-
 src/operator/numpy/np_insert_op-inl.h              |  233 +-
 src/operator/numpy/np_insert_op_scalar-inl.h       |   64 +-
 src/operator/numpy/np_insert_op_scalar.cc          |   89 +-
 src/operator/numpy/np_insert_op_scalar.cu          |    4 +-
 src/operator/numpy/np_insert_op_slice-inl.h        |  111 +-
 src/operator/numpy/np_insert_op_slice.cc           |   83 +-
 src/operator/numpy/np_insert_op_slice.cu           |    4 +-
 src/operator/numpy/np_insert_op_tensor-inl.h       |  134 +-
 src/operator/numpy/np_insert_op_tensor.cc          |  104 +-
 src/operator/numpy/np_insert_op_tensor.cu          |    4 +-
 src/operator/numpy/np_interp_op-inl.h              |  199 +-
 src/operator/numpy/np_interp_op.cc                 |   73 +-
 src/operator/numpy/np_interp_op.cu                 |    4 +-
 src/operator/numpy/np_kron-inl.h                   |  165 +-
 src/operator/numpy/np_kron.cc                      |   52 +-
 src/operator/numpy/np_kron.cu                      |    6 +-
 src/operator/numpy/np_matmul_op-inl.h              |  343 +--
 src/operator/numpy/np_matmul_op.cc                 |   86 +-
 src/operator/numpy/np_matmul_op.cu                 |    6 +-
 src/operator/numpy/np_matrix_op-inl.h              |  931 +++----
 src/operator/numpy/np_matrix_op.cc                 | 1409 ++++++-----
 src/operator/numpy/np_matrix_op.cu                 |  122 +-
 src/operator/numpy/np_memory_op.cc                 |   30 +-
 src/operator/numpy/np_memory_op.cu                 |    2 +-
 src/operator/numpy/np_memory_op.h                  |   12 +-
 src/operator/numpy/np_moments_op.cc                |  211 +-
 src/operator/numpy/np_nonzero_op-inl.h             |    6 +-
 src/operator/numpy/np_nonzero_op.cc                |   64 +-
 src/operator/numpy/np_nonzero_op.cu                |   80 +-
 src/operator/numpy/np_pad_op-inl.h                 |  612 ++---
 src/operator/numpy/np_pad_op.cc                    |   56 +-
 src/operator/numpy/np_pad_op.cu                    |    6 +-
 src/operator/numpy/np_percentile_op-inl.h          |  199 +-
 src/operator/numpy/np_percentile_op.cc             |   78 +-
 src/operator/numpy/np_percentile_op.cu             |   25 +-
 src/operator/numpy/np_polynomial_op-inl.h          |   32 +-
 src/operator/numpy/np_polynomial_op.cc             |  113 +-
 src/operator/numpy/np_polynomial_op.cu             |   66 +-
 src/operator/numpy/np_repeat_op-inl.h              |  122 +-
 src/operator/numpy/np_repeat_op.cc                 |   34 +-
 src/operator/numpy/np_repeat_op.cu                 |   45 +-
 src/operator/numpy/np_tensordot_op-inl.h           |  339 +--
 src/operator/numpy/np_tensordot_op.cc              |  152 +-
 src/operator/numpy/np_tensordot_op.cu              |    9 +-
 src/operator/numpy/np_trace_op-inl.h               |   93 +-
 src/operator/numpy/np_trace_op.cc                  |   44 +-
 src/operator/numpy/np_trace_op.cu                  |    5 +-
 src/operator/numpy/np_tri_op-inl.h                 |   59 +-
 src/operator/numpy/np_tri_op.cc                    |   34 +-
 src/operator/numpy/np_tri_op.cu                    |    3 +-
 src/operator/numpy/np_tril_op-inl.h                |  113 +-
 src/operator/numpy/np_tril_op.cc                   |   53 +-
 src/operator/numpy/np_tril_op.cu                   |    6 +-
 src/operator/numpy/np_triu_op-inl.h                |  109 +-
 src/operator/numpy/np_triu_op.cc                   |   53 +-
 src/operator/numpy/np_triu_op.cu                   |    6 +-
 src/operator/numpy/np_true_divide-inl.h            |  152 +-
 src/operator/numpy/np_true_divide.cc               |  130 +-
 src/operator/numpy/np_true_divide.cu               |    8 +-
 src/operator/numpy/np_unique_op.cc                 |  234 +-
 src/operator/numpy/np_unique_op.cu                 |  236 +-
 src/operator/numpy/np_unique_op.h                  |   28 +-
 src/operator/numpy/np_where_op-inl.h               |  320 ++-
 src/operator/numpy/np_where_op.cc                  |  356 +--
 src/operator/numpy/np_where_op.cu                  |   16 +-
 src/operator/numpy/np_window_op.cc                 |   57 +-
 src/operator/numpy/np_window_op.cu                 |    9 +-
 src/operator/numpy/np_window_op.h                  |   67 +-
 src/operator/numpy/random/dist_common.cc           |    4 +-
 src/operator/numpy/random/dist_common.cu           |   10 +-
 src/operator/numpy/random/dist_common.h            |  196 +-
 src/operator/numpy/random/np_bernoulli_op.cc       |   65 +-
 src/operator/numpy/random/np_bernoulli_op.cu       |    3 +-
 src/operator/numpy/random/np_bernoulli_op.h        |   93 +-
 src/operator/numpy/random/np_choice_op.cc          |   82 +-
 src/operator/numpy/random/np_choice_op.cu          |    6 +-
 src/operator/numpy/random/np_choice_op.h           |  150 +-
 src/operator/numpy/random/np_exponential_op.cc     |  130 +-
 src/operator/numpy/random/np_exponential_op.cu     |    4 +-
 src/operator/numpy/random/np_exponential_op.h      |  123 +-
 src/operator/numpy/random/np_gamma_op.cc           |  129 +-
 src/operator/numpy/random/np_gamma_op.cu           |    6 +-
 src/operator/numpy/random/np_gamma_op.h            |  326 +--
 src/operator/numpy/random/np_laplace_op.cc         |   73 +-
 src/operator/numpy/random/np_laplace_op.cu         |    3 +-
 src/operator/numpy/random/np_laplace_op.h          |  128 +-
 src/operator/numpy/random/np_location_scale_op.cc  |   20 +-
 src/operator/numpy/random/np_location_scale_op.cu  |   20 +-
 src/operator/numpy/random/np_location_scale_op.h   |  360 +--
 src/operator/numpy/random/np_multinomial_op.cc     |   38 +-
 src/operator/numpy/random/np_multinomial_op.cu     |   11 +-
 src/operator/numpy/random/np_multinomial_op.h      |  128 +-
 src/operator/numpy/random/np_normal_op.cc          |  211 +-
 src/operator/numpy/random/np_normal_op.cu          |    8 +-
 src/operator/numpy/random/np_normal_op.h           |  160 +-
 src/operator/numpy/random/np_pareto_op.cc          |  130 +-
 src/operator/numpy/random/np_pareto_op.cu          |    5 +-
 src/operator/numpy/random/np_pareto_op.h           |  138 +-
 src/operator/numpy/random/np_power_op.cc           |   74 +-
 src/operator/numpy/random/np_power_op.cu           |    3 +-
 src/operator/numpy/random/np_power_op.h            |   85 +-
 src/operator/numpy/random/np_rayleigh_op.cc        |  130 +-
 src/operator/numpy/random/np_rayleigh_op.cu        |    5 +-
 src/operator/numpy/random/np_rayleigh_op.h         |  104 +-
 src/operator/numpy/random/np_uniform_op.cc         |  146 +-
 src/operator/numpy/random/np_uniform_op.cu         |    6 +-
 src/operator/numpy/random/np_uniform_op.h          |  132 +-
 src/operator/numpy/random/np_weibull_op.cc         |  130 +-
 src/operator/numpy/random/np_weibull_op.cu         |    5 +-
 src/operator/numpy/random/np_weibull_op.h          |  130 +-
 src/operator/operator.cc                           |    4 +-
 src/operator/operator_common.h                     |  289 ++-
 src/operator/operator_tune-inl.h                   |  169 +-
 src/operator/operator_tune.cc                      |  640 ++---
 src/operator/operator_tune.h                       |   58 +-
 src/operator/operator_util.cc                      |  542 ++--
 src/operator/optimizer_op-inl.h                    | 2622 ++++++++++---------
 src/operator/optimizer_op.cc                       | 1065 ++++----
 src/operator/optimizer_op.cu                       |  257 +-
 src/operator/pad-inl.h                             |  182 +-
 src/operator/pad.cc                                |  293 +--
 src/operator/pad.cu                                |  392 ++-
 src/operator/quantization/calibrate-inl.h          |    5 +-
 src/operator/quantization/calibrate.cc             |  110 +-
 src/operator/quantization/dequantize-inl.h         |   82 +-
 src/operator/quantization/dequantize.cc            |   64 +-
 src/operator/quantization/dequantize.cu            |    2 +-
 src/operator/quantization/quantization_utils.h     |  108 +-
 src/operator/quantization/quantize-inl.h           |   98 +-
 src/operator/quantization/quantize.cc              |   57 +-
 src/operator/quantization/quantize.cu              |    3 +-
 src/operator/quantization/quantize_graph_pass.cc   |  181 +-
 src/operator/quantization/quantize_v2-inl.h        |  211 +-
 src/operator/quantization/quantize_v2.cc           |   89 +-
 src/operator/quantization/quantize_v2.cu           |    2 +-
 src/operator/quantization/quantized_activation.cc  |   94 +-
 src/operator/quantization/quantized_batch_norm.cc  |  103 +-
 src/operator/quantization/quantized_concat.cc      |  143 +-
 src/operator/quantization/quantized_conv.cc        |  170 +-
 src/operator/quantization/quantized_conv.cu        |  178 +-
 .../quantization/quantized_elemwise_add-inl.h      |   22 +-
 .../quantization/quantized_elemwise_add.cc         |   77 +-
 .../quantization/quantized_elemwise_mul-inl.h      |   31 +-
 .../quantization/quantized_elemwise_mul.cc         |  139 +-
 src/operator/quantization/quantized_flatten-inl.h  |   56 +-
 src/operator/quantization/quantized_flatten.cc     |   80 +-
 src/operator/quantization/quantized_flatten.cu     |    2 +-
 .../quantization/quantized_fully_connected.cc      |  296 +--
 .../quantization/quantized_fully_connected.cu      |   85 +-
 src/operator/quantization/quantized_indexing_op.cc |  171 +-
 src/operator/quantization/quantized_pooling.cc     |  194 +-
 src/operator/quantization/quantized_pooling.cu     |   42 +-
 src/operator/quantization/requantize-inl.h         |  198 +-
 src/operator/quantization/requantize.cc            |   85 +-
 src/operator/quantization/requantize.cu            |    3 +-
 src/operator/random/multisample_op.cc              |  141 +-
 src/operator/random/multisample_op.cu              |   17 +-
 src/operator/random/multisample_op.h               |  101 +-
 src/operator/random/pdf_op.cc                      |  181 +-
 src/operator/random/pdf_op.cu                      |   17 +-
 src/operator/random/pdf_op.h                       |  668 +++--
 src/operator/random/sample_multinomial_op.cc       |   86 +-
 src/operator/random/sample_multinomial_op.cu       |   20 +-
 src/operator/random/sample_multinomial_op.h        |  132 +-
 src/operator/random/sample_op.cc                   |  121 +-
 src/operator/random/sample_op.cu                   |    8 +-
 src/operator/random/sample_op.h                    |  540 ++--
 src/operator/random/sampler.h                      |  327 +--
 src/operator/random/shuffle_op.cc                  |   93 +-
 src/operator/random/shuffle_op.cu                  |   50 +-
 src/operator/random/unique_sample_op.cc            |   25 +-
 src/operator/random/unique_sample_op.h             |   88 +-
 src/operator/regression_output-inl.h               |  173 +-
 src/operator/regression_output.cc                  |  101 +-
 src/operator/regression_output.cu                  |   23 +-
 src/operator/rnn-inl.h                             |  778 +++---
 src/operator/rnn.cc                                |  249 +-
 src/operator/rnn.cu                                |    8 +-
 src/operator/rnn_impl.h                            | 1365 +++++-----
 src/operator/roi_pooling-inl.h                     |  120 +-
 src/operator/roi_pooling.cc                        |  158 +-
 src/operator/roi_pooling.cu                        |  174 +-
 src/operator/sequence_last-inl.h                   |  211 +-
 src/operator/sequence_last.cc                      |   43 +-
 src/operator/sequence_last.cu                      |   13 +-
 src/operator/sequence_mask-inl.h                   |  181 +-
 src/operator/sequence_mask.cc                      |   70 +-
 src/operator/sequence_mask.cu                      |   64 +-
 src/operator/sequence_op_common.h                  |   29 +-
 src/operator/sequence_reverse-inl.h                |  187 +-
 src/operator/sequence_reverse.cc                   |   28 +-
 src/operator/sequence_reverse.cu                   |   13 +-
 src/operator/slice_channel-inl.h                   |  142 +-
 src/operator/slice_channel.cc                      |   20 +-
 src/operator/slice_channel.cu                      |    9 +-
 src/operator/softmax_output-inl.h                  |  263 +-
 src/operator/softmax_output.cc                     |  151 +-
 src/operator/softmax_output.cu                     |    7 +-
 src/operator/spatial_transformer-inl.h             |  209 +-
 src/operator/spatial_transformer.cc                |  225 +-
 src/operator/spatial_transformer.cu                |  198 +-
 src/operator/special_functions-inl.h               |  121 +-
 src/operator/subgraph/build_subgraph.cc            |  199 +-
 src/operator/subgraph/common.h                     |   62 +-
 src/operator/subgraph/default_subgraph_property.cc |   27 +-
 .../subgraph/default_subgraph_property_v2.cc       |   36 +-
 .../partitioner/custom_subgraph_property.h         |  238 +-
 .../subgraph/static_shape_subgraph_property.cc     |   61 +-
 src/operator/subgraph/subgraph_property.h          |   64 +-
 src/operator/subgraph/tensorrt/nnvm_to_onnx-inl.h  |  260 +-
 src/operator/subgraph/tensorrt/nnvm_to_onnx.cc     |  292 ++-
 src/operator/subgraph/tensorrt/onnx_to_tensorrt.cc |   76 +-
 src/operator/subgraph/tensorrt/onnx_to_tensorrt.h  |   44 +-
 src/operator/subgraph/tensorrt/tensorrt-inl.h      |  101 +-
 src/operator/subgraph/tensorrt/tensorrt.cc         |  124 +-
 src/operator/subgraph/tensorrt/tensorrt.cu         |   33 +-
 src/operator/subgraph_op_common.cc                 |  100 +-
 src/operator/subgraph_op_common.h                  |   73 +-
 src/operator/svm_output-inl.h                      |  112 +-
 src/operator/svm_output.cc                         |   86 +-
 src/operator/svm_output.cu                         |   85 +-
 src/operator/swapaxis-inl.h                        |  120 +-
 src/operator/swapaxis.cc                           |   22 +-
 src/operator/swapaxis.cu                           |   13 +-
 src/operator/tensor/amp_cast.cc                    |  285 ++-
 src/operator/tensor/amp_cast.cu                    |   11 +-
 src/operator/tensor/amp_cast.h                     |   54 +-
 src/operator/tensor/broadcast_reduce-inl.h         |  669 +++--
 .../tensor/broadcast_reduce_minmax_value.cc        |   44 +-
 .../tensor/broadcast_reduce_minmax_value.cu        |   18 +-
 src/operator/tensor/broadcast_reduce_norm_value.cc |   63 +-
 src/operator/tensor/broadcast_reduce_norm_value.cu |   15 +-
 src/operator/tensor/broadcast_reduce_op.cc         |   51 +-
 src/operator/tensor/broadcast_reduce_op.h          | 1192 +++++----
 src/operator/tensor/broadcast_reduce_op_index.cc   |  101 +-
 src/operator/tensor/broadcast_reduce_op_index.cu   |   17 +-
 src/operator/tensor/broadcast_reduce_op_value.cc   |  121 +-
 src/operator/tensor/broadcast_reduce_op_value.cu   |   13 +-
 src/operator/tensor/broadcast_reduce_prod_value.cc |   41 +-
 src/operator/tensor/broadcast_reduce_prod_value.cu |   17 +-
 src/operator/tensor/broadcast_reduce_sum_value.cc  |   71 +-
 src/operator/tensor/broadcast_reduce_sum_value.cu  |   27 +-
 src/operator/tensor/cast_storage-inl.h             |  200 +-
 src/operator/tensor/cast_storage.cc                |   34 +-
 src/operator/tensor/cast_storage.cu                |    4 +-
 src/operator/tensor/control_flow_op.cc             |   98 +-
 src/operator/tensor/control_flow_op.cu             |    8 +-
 src/operator/tensor/control_flow_op.h              |  319 ++-
 src/operator/tensor/diag_op-inl.h                  |  183 +-
 src/operator/tensor/diag_op.cc                     |   50 +-
 src/operator/tensor/diag_op.cu                     |   16 +-
 src/operator/tensor/dot-inl.h                      |  877 ++++---
 src/operator/tensor/dot.cc                         |  245 +-
 src/operator/tensor/dot.cu                         |   11 +-
 .../tensor/elemwise_binary_broadcast_op.cc         |  177 +-
 src/operator/tensor/elemwise_binary_broadcast_op.h |  475 ++--
 .../tensor/elemwise_binary_broadcast_op_basic.cc   |  189 +-
 .../tensor/elemwise_binary_broadcast_op_basic.cu   |   32 +-
 .../elemwise_binary_broadcast_op_extended.cc       |  136 +-
 .../elemwise_binary_broadcast_op_extended.cu       |   19 +-
 .../tensor/elemwise_binary_broadcast_op_logic.cc   |   63 +-
 .../tensor/elemwise_binary_broadcast_op_logic.cu   |   18 +-
 src/operator/tensor/elemwise_binary_op-inl.h       |  398 +--
 src/operator/tensor/elemwise_binary_op.cc          |  178 +-
 src/operator/tensor/elemwise_binary_op.h           |  743 +++---
 src/operator/tensor/elemwise_binary_op_basic.cc    |  248 +-
 src/operator/tensor/elemwise_binary_op_basic.cu    |  202 +-
 src/operator/tensor/elemwise_binary_op_extended.cc |   93 +-
 src/operator/tensor/elemwise_binary_op_extended.cu |   22 +-
 src/operator/tensor/elemwise_binary_op_logic.cc    |   54 +-
 src/operator/tensor/elemwise_binary_op_logic.cu    |   21 +-
 src/operator/tensor/elemwise_binary_scalar_op.cc   |   81 +-
 src/operator/tensor/elemwise_binary_scalar_op.h    |  345 ++-
 .../tensor/elemwise_binary_scalar_op_basic.cc      |  181 +-
 .../tensor/elemwise_binary_scalar_op_basic.cu      |   39 +-
 .../tensor/elemwise_binary_scalar_op_extended.cc   |  108 +-
 .../tensor/elemwise_binary_scalar_op_extended.cu   |   26 +-
 .../tensor/elemwise_binary_scalar_op_logic.cc      |   81 +-
 .../tensor/elemwise_binary_scalar_op_logic.cu      |   30 +-
 src/operator/tensor/elemwise_sum.cc                |  136 +-
 src/operator/tensor/elemwise_sum.cu                |   52 +-
 src/operator/tensor/elemwise_sum.h                 |   38 +-
 src/operator/tensor/elemwise_unary_op.cc           |   44 +-
 src/operator/tensor/elemwise_unary_op.h            |  542 ++--
 src/operator/tensor/elemwise_unary_op_basic.cc     |  953 +++----
 src/operator/tensor/elemwise_unary_op_basic.cu     |  149 +-
 src/operator/tensor/elemwise_unary_op_logexp.cc    |  289 +--
 src/operator/tensor/elemwise_unary_op_logexp.cu    |   30 +-
 src/operator/tensor/elemwise_unary_op_pow.cc       |  448 ++--
 src/operator/tensor/elemwise_unary_op_pow.cu       |   36 +-
 src/operator/tensor/elemwise_unary_op_trig.cc      |  784 +++---
 src/operator/tensor/elemwise_unary_op_trig.cu      |   80 +-
 src/operator/tensor/histogram-inl.h                |   59 +-
 src/operator/tensor/histogram.cc                   |  114 +-
 src/operator/tensor/histogram.cu                   |   60 +-
 src/operator/tensor/index_add-inl.h                |  119 +-
 src/operator/tensor/index_add_backward.cc          |   61 +-
 src/operator/tensor/index_add_backward.cu          |   50 +-
 src/operator/tensor/index_add_forward.cc           |  147 +-
 src/operator/tensor/index_add_forward.cu           |   49 +-
 src/operator/tensor/index_update-inl.h             |  143 +-
 src/operator/tensor/index_update.cc                |  278 ++-
 src/operator/tensor/index_update.cu                |  178 +-
 src/operator/tensor/indexing_op.cc                 |  848 ++++---
 src/operator/tensor/indexing_op.cu                 |  596 +++--
 src/operator/tensor/indexing_op.h                  |  888 ++++---
 src/operator/tensor/init_op.cc                     |  199 +-
 src/operator/tensor/init_op.cu                     |   36 +-
 src/operator/tensor/init_op.h                      |  509 ++--
 src/operator/tensor/la_op-inl.h                    |  815 +++---
 src/operator/tensor/la_op.cc                       |  872 ++++---
 src/operator/tensor/la_op.cu                       |   70 +-
 src/operator/tensor/la_op.h                        |  614 ++---
 src/operator/tensor/matrix_op-inl.h                | 1971 ++++++++-------
 src/operator/tensor/matrix_op.cc                   | 1009 ++++----
 src/operator/tensor/matrix_op.cu                   |  288 ++-
 src/operator/tensor/ordering_op-inl.h              |  957 +++----
 src/operator/tensor/ordering_op.cc                 |  177 +-
 src/operator/tensor/ordering_op.cu                 |   13 +-
 src/operator/tensor/ravel.cc                       |   74 +-
 src/operator/tensor/ravel.cu                       |    6 +-
 src/operator/tensor/ravel.h                        |   97 +-
 src/operator/tensor/reduce_rtc.cc                  |  237 +-
 src/operator/tensor/slice-inl.h                    |   26 +-
 src/operator/tensor/sort_op.h                      |   76 +-
 src/operator/tensor/sparse_retain-inl.h            |  243 +-
 src/operator/tensor/sparse_retain.cc               |   51 +-
 src/operator/tensor/sparse_retain.cu               |    6 +-
 src/operator/tensor/square_sum-inl.h               |  352 +--
 src/operator/tensor/square_sum.cc                  |   63 +-
 src/operator/tensor/square_sum.cu                  |   60 +-
 src/operator/tensor/util/tensor_util-inl.h         |   15 +-
 src/operator/tvmop/op_module.cc                    |   39 +-
 src/operator/tvmop/op_module.h                     |   19 +-
 src/optimizer/sgd-inl.h                            |  187 +-
 src/profiler/aggregate_stats.cc                    |  137 +-
 src/profiler/aggregate_stats.h                     |   20 +-
 src/profiler/custom_op_profiler.h                  |   59 +-
 src/profiler/nvtx.cc                               |    1 -
 src/profiler/nvtx.h                                |    8 +-
 src/profiler/profiler.cc                           |   67 +-
 src/profiler/profiler.h                            |  413 +--
 src/profiler/storage_profiler.cc                   |   44 +-
 src/profiler/storage_profiler.h                    |   56 +-
 src/profiler/vtune.h                               |  161 +-
 src/resource.cc                                    |  334 +--
 src/runtime/c_runtime_api.cc                       |   58 +-
 src/runtime/container.cc                           |   66 +-
 src/runtime/ndarray_handle.cc                      |   10 +-
 src/runtime/object.cc                              |   42 +-
 src/runtime/object_internal.h                      |    2 +-
 src/runtime/registry.cc                            |   33 +-
 src/serialization/cnpy.cc                          |  479 ++--
 src/serialization/cnpy.h                           |    6 +-
 src/storage/cpu_device_storage.h                   |    3 +-
 src/storage/cpu_shared_storage_manager.h           |   91 +-
 src/storage/pooled_storage_manager.h               |  175 +-
 src/storage/storage.cc                             |  108 +-
 src/storage/storage_manager.h                      |   12 +-
 src/storage/storage_manager_helpers.h              |   66 +-
 940 files changed, 84073 insertions(+), 75359 deletions(-)
 mode change 100755 => 100644 src/operator/contrib/hawkes_ll.cu
 mode change 100755 => 100644 src/operator/contrib/preloaded_multi_sgd.cc