You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tvm.apache.org by lm...@apache.org on 2020/11/13 11:19:04 UTC

[incubator-tvm-site] branch asf-site updated: Docs build at Fri Nov 13 03:18:33 PST 2020

This is an automated email from the ASF dual-hosted git repository.

lmzheng pushed a commit to branch asf-site
in repository https://gitbox.apache.org/repos/asf/incubator-tvm-site.git


The following commit(s) were added to refs/heads/asf-site by this push:
     new 8bca4dd  Docs build at Fri Nov 13 03:18:33 PST 2020
8bca4dd is described below

commit 8bca4dd1c608f0aa590d86640c4d8989ab8dbc93
Author: Lianmin Zheng <li...@gmail.com>
AuthorDate: Fri Nov 13 03:18:33 2020 -0800

    Docs build at Fri Nov 13 03:18:33 PST 2020
---
 .gitignore                                         |    1 -
 .../tuple_inputs.ipynb                             |    2 +-
 .../from_tflite.py                                 |    2 +-
 .../tune_simple_template.py                        |    4 -
 .../from_tflite.ipynb                              |    4 +-
 .../tune_simple_template.ipynb                     |    4 +-
 .../matrix_multiply.py                             |   10 +-
 .../use_pass_infra.ipynb                           |    2 +-
 .../143c743c62f58570eabd77fd3395ca8c/scan.py       |    1 -
 .../deploy_prequantized_tflite.ipynb               |    4 +-
 .../tvmc_command_line_driver.py                    |  336 -
 .../tune_conv2d_cuda.ipynb                         |    4 +-
 .../relay_quick_start.ipynb                        |    6 +-
 .../tune_relay_cuda.py                             |   10 +-
 .../tune_relay_mobile_gpu.ipynb                    |   12 +-
 .../tune_conv2d_cuda.py                            |    4 -
 .../matrix_multiply_opt.ipynb                      |   10 +-
 .../from_keras.ipynb                               |    6 +-
 .../from_coreml.ipynb                              |    4 +-
 .../from_caffe2.ipynb                              |    4 +-
 .../tensor_expr_get_started.py                     |    5 +-
 .../deploy_model_on_android.ipynb                  |   10 +-
 .../deploy_model_on_android.py                     |   10 +-
 .../opt_gemm.ipynb                                 |    4 +-
 .../tune_relay_vta.ipynb                           |   10 +-
 .../cross_compilation_and_rpc.ipynb                |    6 +-
 .../4e9540fc014621d8d3bd14869c1ab227/scan.ipynb    |    4 +-
 .../deploy_quantized.ipynb                         |    4 +-
 .../from_tensorflow.py                             |    2 +-
 .../intro_topi.ipynb                               |    4 +-
 .../deploy_quantized.py                            |    2 +-
 .../5bd1bb9c6505ea40407fa19f01579414/reduction.py  |    1 -
 .../deploy_prequantized_tflite.py                  |    2 +-
 .../intrin_math.ipynb                              |    2 +-
 .../tune_relay_vta.py                              |    8 +-
 .../schedule_primitives.ipynb                      |    2 +-
 .../tensorize.ipynb                                |    8 +-
 .../opt_conv_cuda.ipynb                            |    8 +-
 .../tune_conv2d_layer_cuda.py                      |   33 +-
 .../matrix_multiply_opt.py                         |    8 +-
 .../696dd37904ef92773435ca321ff41bfb/from_onnx.py  |   11 +-
 .../relay_quick_start.py                           |    6 +-
 .../using_external_lib.ipynb                       |    2 +-
 .../from_pytorch.ipynb                             |    6 +-
 .../tensor_expr_get_started.ipynb                  |    6 +-
 .../70d345c5409f99cb5de9dc44f147ff6f/build_gcn.py  |    6 +-
 .../from_caffe2.py                                 |    2 +-
 .../tune_relay_cuda.ipynb                          |   10 +-
 .../deploy_prequantized.ipynb                      |    6 +-
 .../deploy_ssd_gluoncv.ipynb                       |    4 +-
 .../from_darknet.ipynb                             |    6 +-
 .../836dc3852acf09662e9eb37c4c5e1e1b/opt_gemm.py   |    2 +-
 .../deploy_model_on_rasp.py                        |    6 +-
 .../tune_relay_x86.py                              |    4 -
 .../extern_op.ipynb                                |    2 +-
 .../opt_matmul_auto_tensorcore.ipynb               |    2 +-
 .../deploy_sparse.ipynb                            |    2 +-
 .../deploy_prequantized.py                         |    4 +-
 .../tune_matmul_x86.py                             |   34 +-
 .../opt_conv_tensorcore.ipynb                      |    2 +-
 .../9a950897eeef498440fbe2f0afe2601f/tedd.py       |    6 +-
 .../9b0365fd5723f7c4d4e996637ab9a487/intro_topi.py |    1 -
 .../a2f661bf234a167b5458fa28d8fafedc/tedd.ipynb    |    8 +-
 .../from_darknet.py                                |    3 +-
 .../opt_conv_cuda.py                               |    6 +-
 .../bring_your_own_datatypes.py                    |  411 -
 .../tune_relay_x86.ipynb                           |    4 +-
 .../baa4de13ce6d932de43e0eb5c4cb8f16/tensorize.py  |   10 +-
 .../tune_relay_arm.py                              |   12 +-
 .../vta_get_started.py                             |    6 +-
 .../tune_conv2d_layer_cuda.ipynb                   |   18 +-
 .../deploy_model_on_rasp.ipynb                     |    8 +-
 .../bring_your_own_datatypes.ipynb                 |  349 -
 .../build_gcn.ipynb                                |    6 +-
 .../deploy_object_detection_pytorch.ipynb          |    4 +-
 .../deploy_classification.ipynb                    |    8 +-
 .../convolution_opt.ipynb                          |   10 +-
 .../deploy_ssd_gluoncv.py                          |    2 +-
 .../convolution_opt.py                             |   10 +-
 .../micro_tflite.ipynb                             |    4 +-
 .../cross_compilation_and_rpc.py                   |    4 +-
 .../matrix_multiply.ipynb                          |   10 +-
 .../tvmc_command_line_driver.ipynb                 |  149 -
 .../from_tensorflow.ipynb                          |    4 +-
 .../tune_relay_mobile_gpu.py                       |   12 +-
 .../from_coreml.py                                 |    2 +-
 .../low_level_custom_pass.ipynb                    |    2 +-
 .../from_onnx.ipynb                                |   11 +-
 .../ea0c81cab71096d16b825a33fd276c58/from_mxnet.py |    2 +-
 .../reduction.ipynb                                |    4 +-
 .../deploy_object_detection_pytorch.py             |    6 +-
 .../deploy_classification.py                       |    6 +-
 .../tune_matmul_x86.ipynb                          |   14 +-
 .../from_mxnet.ipynb                               |    4 +-
 .../f59fd8b968f7dcde34ed872c8527c192/from_keras.py |   28 +-
 .../vta_get_started.ipynb                          |    8 +-
 .../from_pytorch.py                                |    8 +-
 .../tune_relay_arm.ipynb                           |   12 +-
 .../micro_tflite.py                                |    2 +-
 .../sphx_glr_bring_your_own_datatypes_thumb.png    |  Bin 26786 -> 0 bytes
 .../_images/sphx_glr_deploy_classification_001.png |  Bin 245200 -> 245321 bytes
 docs/_images/sphx_glr_deploy_ssd_gluoncv_001.png   |  Bin 261898 -> 262413 bytes
 docs/_images/sphx_glr_from_caffe2_001.png          |  Bin 245200 -> 245321 bytes
 docs/_images/sphx_glr_from_darknet_001.png         |  Bin 339980 -> 340868 bytes
 docs/_images/sphx_glr_from_darknet_thumb.png       |  Bin 132099 -> 132048 bytes
 docs/_images/sphx_glr_from_keras_001.png           |  Bin 245200 -> 245321 bytes
 docs/_images/sphx_glr_from_mxnet_001.png           |  Bin 245200 -> 245321 bytes
 docs/_images/sphx_glr_from_onnx_001.png            |  Bin 164343 -> 164194 bytes
 docs/_images/sphx_glr_from_tflite_001.png          |  Bin 245200 -> 245321 bytes
 .../sphx_glr_tvmc_command_line_driver_thumb.png    |  Bin 26786 -> 0 bytes
 docs/_sources/api/python/contrib.rst.txt           |    6 +-
 docs/_sources/contribute/code_guide.rst.txt        |    2 +-
 docs/_sources/contribute/community.rst.txt         |    2 +-
 docs/_sources/contribute/document.rst.txt          |    4 +-
 docs/_sources/contribute/error_handling.rst.txt    |    4 +-
 docs/_sources/contribute/git_howto.rst.txt         |   26 +-
 docs/_sources/contribute/pull_request.rst.txt      |    8 +-
 docs/_sources/contribute/release_process.rst.txt   |   19 +-
 docs/_sources/deploy/android.rst.txt               |    4 +-
 docs/_sources/deploy/arm_compute_lib.rst.txt       |    6 -
 docs/_sources/deploy/cpp_deploy.rst.txt            |   10 +-
 docs/_sources/deploy/hls.rst.txt                   |    2 +-
 docs/_sources/deploy/index.rst.txt                 |    1 -
 docs/_sources/deploy/integrate.rst.txt             |    9 +-
 docs/_sources/deploy/tensorrt.rst.txt              |  297 -
 docs/_sources/dev/convert_layout.rst.txt           |    2 +-
 docs/_sources/dev/frontend/tensorflow.rst.txt      |    4 +-
 docs/_sources/dev/hybrid_script.rst.txt            |   14 +-
 docs/_sources/dev/index.rst.txt                    |    4 +-
 docs/_sources/dev/inferbound.rst.txt               |   30 +-
 .../introduction_to_module_serialization.rst.txt   |    4 +-
 docs/_sources/dev/pass_infra.rst.txt               |   26 +-
 docs/_sources/dev/relay_add_op.rst.txt             |    2 +-
 docs/_sources/dev/relay_add_pass.rst.txt           |    6 +-
 .../dev/relay_bring_your_own_codegen.rst.txt       |   14 +-
 docs/_sources/dev/relay_intro.rst.txt              |    6 +-
 docs/_sources/dev/runtime.rst.txt                  |   22 +-
 docs/_sources/dev/virtual_machine.rst.txt          |   18 +-
 docs/_sources/install/docker.rst.txt               |    2 +-
 docs/_sources/install/from_source.rst.txt          |    6 +-
 docs/_sources/langref/hybrid_script.rst.txt        |   13 +-
 docs/_sources/langref/relay_pattern.rst.txt        |   10 +-
 .../auto_scheduler/sg_execution_times.rst.txt      |    6 +-
 .../auto_scheduler/tune_conv2d_layer_cuda.rst.txt  | 1592 +---
 .../auto_scheduler/tune_matmul_x86.rst.txt         |  460 +-
 .../tutorials/autotvm/sg_execution_times.rst.txt   |   16 +-
 .../tutorials/autotvm/tune_conv2d_cuda.rst.txt     |   48 +-
 .../tutorials/autotvm/tune_relay_arm.rst.txt       |   12 +-
 .../tutorials/autotvm/tune_relay_cuda.rst.txt      |   10 +-
 .../autotvm/tune_relay_mobile_gpu.rst.txt          |   12 +-
 .../tutorials/autotvm/tune_relay_x86.rst.txt       |    4 -
 .../tutorials/autotvm/tune_simple_template.rst.txt |   24 +-
 .../tutorials/dev/bring_your_own_datatypes.rst.txt |  675 --
 .../tutorials/dev/low_level_custom_pass.rst.txt    |   66 +-
 .../tutorials/dev/sg_execution_times.rst.txt       |    7 +-
 docs/_sources/tutorials/dev/use_pass_infra.rst.txt | 3190 +-------
 docs/_sources/tutorials/frontend/build_gcn.rst.txt |   69 +-
 .../frontend/deploy_model_on_android.rst.txt       |   12 +-
 .../frontend/deploy_model_on_rasp.rst.txt          |    6 +-
 .../deploy_object_detection_pytorch.rst.txt        |   22 +-
 .../tutorials/frontend/deploy_prequantized.rst.txt |   14 +-
 .../frontend/deploy_prequantized_tflite.rst.txt    |    6 +-
 .../tutorials/frontend/deploy_quantized.rst.txt    |    2 +-
 .../tutorials/frontend/deploy_ssd_gluoncv.rst.txt  |    4 +-
 .../tutorials/frontend/from_caffe2.rst.txt         |    2 +-
 .../tutorials/frontend/from_coreml.rst.txt         |    2 +-
 .../tutorials/frontend/from_darknet.rst.txt        |    6 +-
 .../_sources/tutorials/frontend/from_keras.rst.txt |   30 +-
 .../_sources/tutorials/frontend/from_mxnet.rst.txt |    2 +-
 docs/_sources/tutorials/frontend/from_onnx.rst.txt |   15 +-
 .../tutorials/frontend/from_pytorch.rst.txt        |    8 +-
 .../tutorials/frontend/from_tensorflow.rst.txt     |    6 +-
 .../tutorials/frontend/from_tflite.rst.txt         |    2 +-
 .../tutorials/frontend/sg_execution_times.rst.txt  |   40 +-
 .../get_started/cross_compilation_and_rpc.rst.txt  |    6 +-
 .../get_started/relay_quick_start.rst.txt          |    8 +-
 .../get_started/sg_execution_times.rst.txt         |    9 +-
 .../get_started/tensor_expr_get_started.rst.txt    |    7 +-
 .../get_started/tvmc_command_line_driver.rst.txt   |  371 -
 docs/_sources/tutorials/index.rst.txt              |   58 +-
 docs/_sources/tutorials/language/reduction.rst.txt |  156 +-
 docs/_sources/tutorials/language/scan.rst.txt      |   94 +-
 .../tutorials/language/schedule_primitives.rst.txt |  355 +-
 .../tutorials/language/sg_execution_times.rst.txt  |   16 +-
 docs/_sources/tutorials/language/tedd.rst.txt      |    6 +-
 docs/_sources/tutorials/language/tensorize.rst.txt |  142 +-
 .../tutorials/language/tuple_inputs.rst.txt        |  111 +-
 docs/_sources/tutorials/micro/micro_tflite.rst.txt |    2 +-
 .../tutorials/micro/sg_execution_times.rst.txt     |    4 +-
 .../tutorials/optimize/opt_conv_cuda.rst.txt       |    8 +-
 .../tutorials/optimize/opt_conv_tensorcore.rst.txt |   68 +-
 docs/_sources/tutorials/optimize/opt_gemm.rst.txt  |  243 +-
 .../tutorials/optimize/sg_execution_times.rst.txt  |   10 +-
 docs/_sources/tutorials/topi/intro_topi.rst.txt    |  360 +-
 .../tutorials/topi/sg_execution_times.rst.txt      |    4 +-
 docs/_sources/vta/dev/hardware.rst.txt             |   12 +-
 docs/_sources/vta/dev/index.rst.txt                |    2 +-
 docs/_sources/vta/index.rst.txt                    |    2 +-
 docs/_sources/vta/install.rst.txt                  |    4 +-
 .../tutorials/autotvm/sg_execution_times.rst.txt   |    4 +-
 .../vta/tutorials/autotvm/tune_relay_vta.rst.txt   |   10 +-
 .../frontend/deploy_classification.rst.txt         |   10 +-
 .../tutorials/frontend/sg_execution_times.rst.txt  |    4 +-
 .../_sources/vta/tutorials/matrix_multiply.rst.txt |  107 +-
 .../vta/tutorials/optimize/convolution_opt.rst.txt |  107 +-
 .../tutorials/optimize/matrix_multiply_opt.rst.txt |  101 +-
 .../tutorials/optimize/sg_execution_times.rst.txt  |    6 +-
 .../vta/tutorials/sg_execution_times.rst.txt       |    6 +-
 .../_sources/vta/tutorials/vta_get_started.rst.txt |   72 +-
 docs/_static/basic.css                             |  141 +-
 docs/_static/css/badge_only.css                    |    2 +-
 docs/_static/css/fonts/Roboto-Slab-Bold.woff       |  Bin 87624 -> 0 bytes
 docs/_static/css/fonts/Roboto-Slab-Bold.woff2      |  Bin 67312 -> 0 bytes
 docs/_static/css/fonts/Roboto-Slab-Regular.woff    |  Bin 86288 -> 0 bytes
 docs/_static/css/fonts/Roboto-Slab-Regular.woff2   |  Bin 66444 -> 0 bytes
 docs/_static/css/fonts/fontawesome-webfont.eot     |  Bin 165742 -> 0 bytes
 docs/_static/css/fonts/fontawesome-webfont.svg     | 2671 -------
 docs/_static/css/fonts/fontawesome-webfont.ttf     |  Bin 165548 -> 0 bytes
 docs/_static/css/fonts/fontawesome-webfont.woff    |  Bin 98024 -> 0 bytes
 docs/_static/css/fonts/fontawesome-webfont.woff2   |  Bin 77160 -> 0 bytes
 docs/_static/css/fonts/lato-bold-italic.woff       |  Bin 323344 -> 0 bytes
 docs/_static/css/fonts/lato-bold-italic.woff2      |  Bin 193308 -> 0 bytes
 docs/_static/css/fonts/lato-bold.woff              |  Bin 309728 -> 0 bytes
 docs/_static/css/fonts/lato-bold.woff2             |  Bin 184912 -> 0 bytes
 docs/_static/css/fonts/lato-normal-italic.woff     |  Bin 328412 -> 0 bytes
 docs/_static/css/fonts/lato-normal-italic.woff2    |  Bin 195704 -> 0 bytes
 docs/_static/css/fonts/lato-normal.woff            |  Bin 309192 -> 0 bytes
 docs/_static/css/fonts/lato-normal.woff2           |  Bin 182708 -> 0 bytes
 docs/_static/css/gallery.css                       |  192 -
 docs/_static/css/theme.css                         |    6 +-
 docs/_static/css/tlcpack_theme.css                 | 1054 ---
 docs/_static/documentation_options.js              |    2 +-
 docs/_static/fonts/FontAwesome.otf                 |  Bin 134808 -> 0 bytes
 docs/_static/fonts/Inconsolata-Bold.ttf            |  Bin 0 -> 109948 bytes
 docs/_static/fonts/Inconsolata-Regular.ttf         |  Bin 0 -> 96964 bytes
 docs/_static/fonts/Inconsolata.ttf                 |  Bin 0 -> 63184 bytes
 docs/_static/fonts/Lato-Bold.ttf                   |  Bin 0 -> 656544 bytes
 docs/_static/fonts/Lato-Regular.ttf                |  Bin 0 -> 656568 bytes
 docs/_static/fonts/Roboto-Slab-Bold.woff           |  Bin 87624 -> 0 bytes
 docs/_static/fonts/Roboto-Slab-Bold.woff2          |  Bin 67312 -> 0 bytes
 docs/_static/fonts/Roboto-Slab-Light.woff          |  Bin 88600 -> 0 bytes
 docs/_static/fonts/Roboto-Slab-Light.woff2         |  Bin 67884 -> 0 bytes
 docs/_static/fonts/Roboto-Slab-Regular.woff        |  Bin 86288 -> 0 bytes
 docs/_static/fonts/Roboto-Slab-Regular.woff2       |  Bin 66444 -> 0 bytes
 docs/_static/fonts/Roboto-Slab-Thin.woff           |  Bin 87452 -> 0 bytes
 docs/_static/fonts/Roboto-Slab-Thin.woff2          |  Bin 66328 -> 0 bytes
 docs/_static/fonts/RobotoSlab-Bold.ttf             |  Bin 0 -> 170616 bytes
 docs/_static/fonts/RobotoSlab-Regular.ttf          |  Bin 0 -> 169064 bytes
 docs/_static/fonts/lato-bold-italic.woff           |  Bin 323344 -> 0 bytes
 docs/_static/fonts/lato-bold-italic.woff2          |  Bin 193308 -> 0 bytes
 docs/_static/fonts/lato-bold.woff                  |  Bin 309728 -> 0 bytes
 docs/_static/fonts/lato-bold.woff2                 |  Bin 184912 -> 0 bytes
 docs/_static/fonts/lato-normal-italic.woff         |  Bin 328412 -> 0 bytes
 docs/_static/fonts/lato-normal-italic.woff2        |  Bin 195704 -> 0 bytes
 docs/_static/fonts/lato-normal.woff                |  Bin 309192 -> 0 bytes
 docs/_static/fonts/lato-normal.woff2               |  Bin 182708 -> 0 bytes
 docs/_static/img/close-icon.svg                    |    3 -
 docs/_static/img/downangle.svg                     |    3 -
 docs/_static/img/dropdown-icon.svg                 |    3 -
 docs/_static/img/dwonloaddoc.svg                   |    5 -
 docs/_static/img/hide.svg                          |    3 -
 docs/_static/img/menu-icon.svg                     |    5 -
 docs/_static/img/nextarrow.svg                     |    4 -
 docs/_static/img/note.svg                          |    5 -
 docs/_static/img/pattern.svg                       |  114 -
 docs/_static/img/prevarrow.svg                     |    4 -
 docs/_static/img/right.svg                         |    3 -
 docs/_static/img/rightangle.svg                    |    3 -
 docs/_static/img/show.svg                          |    4 -
 docs/_static/img/source.svg                        |    5 -
 docs/_static/{jquery-3.5.1.js => jquery-3.4.1.js}  | 1238 ++-
 docs/_static/jquery.js                             |    4 +-
 docs/_static/js/badge_only.js                      |    1 -
 docs/_static/js/html5shiv-printshiv.min.js         |    4 -
 docs/_static/js/html5shiv.min.js                   |    4 -
 docs/_static/js/theme.js                           |    4 +-
 docs/_static/js/tlcpack_theme.js                   |   42 -
 docs/_static/searchtools.js                        |    3 +-
 docs/api/doxygen/algorithm_8h.html                 |    2 +-
 docs/api/doxygen/algorithm_8h__incl.svg            | 1684 +++--
 docs/api/doxygen/algorithm_8h_source.html          |    2 +-
 docs/api/doxygen/analyzer_8h.html                  |    4 +-
 docs/api/doxygen/analyzer_8h__dep__incl.svg        | 1307 ++--
 docs/api/doxygen/analyzer_8h__incl.svg             | 1651 ++--
 docs/api/doxygen/analyzer_8h_source.html           |    2 +-
 docs/api/doxygen/annotated.html                    |  421 +-
 docs/api/doxygen/annotation_8h.html                |    2 +-
 docs/api/doxygen/annotation_8h__incl.svg           | 1419 ++--
 docs/api/doxygen/annotation_8h_source.html         |    2 +-
 docs/api/doxygen/array__utils_8h.html              |    2 +-
 docs/api/doxygen/array__utils_8h__incl.svg         | 1517 ++--
 docs/api/doxygen/attr__registry__map_8h.html       |    2 +-
 docs/api/doxygen/attr__registry__map_8h__incl.svg  |  793 +-
 .../api/doxygen/attr__registry__map_8h_source.html |    2 +-
 docs/api/doxygen/auto__schedule_8h.html            |    2 +-
 docs/api/doxygen/auto__schedule_8h__incl.svg       | 1504 ++--
 docs/api/doxygen/auto__schedule_8h_source.html     |    2 +-
 docs/api/doxygen/auto__scheduler_2feature_8h.html  |    2 +-
 .../doxygen/auto__scheduler_2feature_8h__incl.svg  | 1213 +--
 docs/api/doxygen/autodiff_8h.html                  |    2 +-
 docs/api/doxygen/autodiff_8h__incl.svg             | 1917 ++---
 docs/api/doxygen/base_8h.html                      |    6 +-
 docs/api/doxygen/base_8h__incl.svg                 | 1597 ++--
 docs/api/doxygen/base_8h_source.html               |    4 +-
 .../{bias__add_8h.html => batch__matmul_8h.html}   |   22 +-
 docs/api/doxygen/batch__matmul_8h__incl.svg        | 1419 ++++
 docs/api/doxygen/batch__matmul_8h_source.html      |  111 +
 docs/api/doxygen/bias__add_8h.html                 |    2 +-
 docs/api/doxygen/bias__add_8h__incl.svg            | 1420 ++--
 docs/api/doxygen/bias__add_8h_source.html          |    6 +-
 docs/api/doxygen/bitserial_8h.html                 |    2 +-
 docs/api/doxygen/bitserial_8h__incl.svg            | 1707 +++--
 docs/api/doxygen/bitserial_8h_source.html          |    2 +-
 docs/api/doxygen/bound_8h.html                     |    4 +-
 docs/api/doxygen/bound_8h__dep__incl.svg           |  947 ++-
 docs/api/doxygen/bound_8h__incl.svg                | 1669 +++--
 docs/api/doxygen/broadcast_8h.html                 |    2 +-
 docs/api/doxygen/broadcast_8h__incl.svg            | 2147 +++---
 docs/api/doxygen/broadcast_8h_source.html          |   54 +-
 docs/api/doxygen/buffer_8h.html                    |    4 +-
 docs/api/doxygen/buffer_8h__dep__incl.svg          | 1012 ++-
 docs/api/doxygen/buffer_8h__incl.svg               | 1399 ++--
 docs/api/doxygen/buffer_8h_source.html             |    2 +-
 docs/api/doxygen/builtin_8h.html                   |    2 +-
 docs/api/doxygen/builtin_8h__incl.svg              | 1916 ++---
 docs/api/doxygen/bytecode_8h.html                  |    3 +-
 docs/api/doxygen/bytecode_8h__incl.svg             |  139 +-
 docs/api/doxygen/bytecode_8h_source.html           |  100 +-
 docs/api/doxygen/c__runtime__api_8h.html           |   31 +-
 docs/api/doxygen/c__runtime__api_8h_source.html    |    7 +-
 docs/api/doxygen/classes.html                      |  339 +-
 .../api/doxygen/classtvm_1_1AttrFieldInfoNode.html |    2 +-
 .../classtvm_1_1AttrFieldInfoNode__coll__graph.svg |  161 +-
 docs/api/doxygen/classtvm_1_1BaseAttrsNode.html    |   10 +-
 docs/api/doxygen/classtvm_1_1BaseExprNode.html     |    6 +-
 .../classtvm_1_1BaseExprNode__inherit__graph.svg   | 1068 +--
 .../classtvm_1_1BaseExpr__inherit__graph.svg       |  743 +-
 docs/api/doxygen/classtvm_1_1BaseFuncNode.html     |    2 +-
 docs/api/doxygen/classtvm_1_1ConstructorNode.html  |    4 +-
 .../classtvm_1_1ConstructorNode__coll__graph.svg   |  307 +-
 .../doxygen/classtvm_1_1Diagnostic-members.html    |  112 -
 docs/api/doxygen/classtvm_1_1Diagnostic.html       |  330 -
 .../classtvm_1_1DiagnosticBuilder-members.html     |  114 -
 .../api/doxygen/classtvm_1_1DiagnosticBuilder.html |  351 -
 .../classtvm_1_1DiagnosticBuilder__coll__graph.svg |   86 -
 .../classtvm_1_1DiagnosticContext-members.html     |  112 -
 .../api/doxygen/classtvm_1_1DiagnosticContext.html |  320 -
 .../classtvm_1_1DiagnosticContextNode-members.html |  112 -
 .../doxygen/classtvm_1_1DiagnosticContextNode.html |  299 -
 ...sstvm_1_1DiagnosticContextNode__coll__graph.svg |  120 -
 ...vm_1_1DiagnosticContextNode__inherit__graph.svg |   41 -
 .../classtvm_1_1DiagnosticContext__coll__graph.svg |   43 -
 ...asstvm_1_1DiagnosticContext__inherit__graph.svg |   43 -
 .../classtvm_1_1DiagnosticNode-members.html        |  112 -
 docs/api/doxygen/classtvm_1_1DiagnosticNode.html   |  304 -
 .../classtvm_1_1DiagnosticNode__coll__graph.svg    |  165 -
 .../classtvm_1_1DiagnosticNode__inherit__graph.svg |   41 -
 .../classtvm_1_1DiagnosticRenderer-members.html    |  110 -
 .../doxygen/classtvm_1_1DiagnosticRenderer.html    |  248 -
 ...classtvm_1_1DiagnosticRendererNode-members.html |  109 -
 .../classtvm_1_1DiagnosticRendererNode.html        |  233 -
 ...stvm_1_1DiagnosticRendererNode__coll__graph.svg |   56 -
 ...m_1_1DiagnosticRendererNode__inherit__graph.svg |   38 -
 ...classtvm_1_1DiagnosticRenderer__coll__graph.svg |   41 -
 ...sstvm_1_1DiagnosticRenderer__inherit__graph.svg |   41 -
 .../classtvm_1_1Diagnostic__coll__graph.svg        |   43 -
 .../classtvm_1_1Diagnostic__inherit__graph.svg     |   43 -
 docs/api/doxygen/classtvm_1_1EnvFuncNode.html      |    2 +-
 .../classtvm_1_1EnvFuncNode__coll__graph.svg       |  183 +-
 docs/api/doxygen/classtvm_1_1FloatImmNode.html     |    4 +-
 docs/api/doxygen/classtvm_1_1FuncType-members.html |    2 +-
 docs/api/doxygen/classtvm_1_1FuncType.html         |   17 +-
 .../doxygen/classtvm_1_1GlobalTypeVar-members.html |    2 +-
 docs/api/doxygen/classtvm_1_1GlobalTypeVar.html    |   17 +-
 .../api/doxygen/classtvm_1_1GlobalTypeVarNode.html |    2 +-
 .../classtvm_1_1GlobalTypeVarNode__coll__graph.svg |  243 +-
 docs/api/doxygen/classtvm_1_1GlobalVarNode.html    |    4 +-
 .../classtvm_1_1GlobalVarNode__coll__graph.svg     |  285 +-
 docs/api/doxygen/classtvm_1_1IRModule-members.html |    2 +-
 docs/api/doxygen/classtvm_1_1IRModule.html         |   17 +-
 .../doxygen/classtvm_1_1IRModuleNode-members.html  |   13 +-
 docs/api/doxygen/classtvm_1_1IRModuleNode.html     |   21 +-
 .../classtvm_1_1IRModuleNode__coll__graph.svg      |  147 +-
 .../classtvm_1_1IRModuleNode__inherit__graph.svg   |   35 +-
 .../classtvm_1_1IncompleteType-members.html        |    2 +-
 docs/api/doxygen/classtvm_1_1IncompleteType.html   |   21 +-
 docs/api/doxygen/classtvm_1_1IntImmNode.html       |    4 +-
 docs/api/doxygen/classtvm_1_1OpNode.html           |    4 +-
 .../doxygen/classtvm_1_1OpNode__coll__graph.svg    |  313 +-
 docs/api/doxygen/classtvm_1_1PrimExpr.html         |    2 +-
 docs/api/doxygen/classtvm_1_1PrimExprNode.html     |    8 +-
 .../classtvm_1_1PrimExprNode__inherit__graph.svg   |  760 +-
 .../classtvm_1_1PrimExpr__inherit__graph.svg       | 1040 ++-
 docs/api/doxygen/classtvm_1_1RelayExprNode.html    |    2 +-
 .../doxygen/classtvm_1_1RelayRefType-members.html  |    2 +-
 docs/api/doxygen/classtvm_1_1RelayRefType.html     |   18 +-
 docs/api/doxygen/classtvm_1_1SourceNameNode.html   |    2 +-
 .../classtvm_1_1SourceNameNode__coll__graph.svg    |  157 +-
 docs/api/doxygen/classtvm_1_1Span-members.html     |    2 +-
 docs/api/doxygen/classtvm_1_1Span.html             |   10 +-
 docs/api/doxygen/classtvm_1_1TargetKindNode.html   |    2 +-
 .../classtvm_1_1TargetKindNode__coll__graph.svg    |  189 +-
 docs/api/doxygen/classtvm_1_1TargetNode.html       |    2 +-
 .../classtvm_1_1TargetNode__coll__graph.svg        |  255 +-
 docs/api/doxygen/classtvm_1_1TargetTagNode.html    |    2 +-
 .../classtvm_1_1TargetTagNode__coll__graph.svg     |  207 +-
 .../api/doxygen/classtvm_1_1TupleType-members.html |    2 +-
 docs/api/doxygen/classtvm_1_1TupleType.html        |   21 +-
 .../classtvm_1_1TypeReporterNode-members.html      |    6 +-
 docs/api/doxygen/classtvm_1_1TypeReporterNode.html |   74 +-
 .../classtvm_1_1TypeReporterNode__coll__graph.svg  |   46 +-
 ...lasstvm_1_1TypeReporterNode__inherit__graph.svg |   46 +-
 docs/api/doxygen/classtvm_1_1TypeVar-members.html  |    2 +-
 docs/api/doxygen/classtvm_1_1TypeVar.html          |   17 +-
 docs/api/doxygen/classtvm_1_1TypeVarNode.html      |    2 +-
 .../classtvm_1_1TypeVarNode__coll__graph.svg       |  243 +-
 .../classtvm_1_1arith_1_1ConstraintContext.html    |    2 +-
 .../classtvm_1_1arith_1_1IterMapExpr-members.html  |  111 -
 .../doxygen/classtvm_1_1arith_1_1IterMapExpr.html  |  181 -
 ...asstvm_1_1arith_1_1IterMapExprNode-members.html |  114 -
 .../classtvm_1_1arith_1_1IterMapExprNode.html      |  263 -
 ...vm_1_1arith_1_1IterMapExprNode__coll__graph.svg |  113 -
 ...1_1arith_1_1IterMapExprNode__inherit__graph.svg |  125 -
 ...asstvm_1_1arith_1_1IterMapExpr__coll__graph.svg |   76 -
 ...tvm_1_1arith_1_1IterMapExpr__inherit__graph.svg |  120 -
 .../classtvm_1_1arith_1_1IterMark-members.html     |  107 -
 .../api/doxygen/classtvm_1_1arith_1_1IterMark.html |  206 -
 .../classtvm_1_1arith_1_1IterMarkNode-members.html |  114 -
 .../doxygen/classtvm_1_1arith_1_1IterMarkNode.html |  358 -
 ...sstvm_1_1arith_1_1IterMarkNode__coll__graph.svg |   98 -
 ...vm_1_1arith_1_1IterMarkNode__inherit__graph.svg |   45 -
 .../classtvm_1_1arith_1_1IterMark__coll__graph.svg |   38 -
 ...asstvm_1_1arith_1_1IterMark__inherit__graph.svg |   38 -
 ...classtvm_1_1arith_1_1IterSplitExpr-members.html |  116 -
 .../classtvm_1_1arith_1_1IterSplitExpr.html        |  344 -
 ...stvm_1_1arith_1_1IterSplitExprNode-members.html |  121 -
 .../classtvm_1_1arith_1_1IterSplitExprNode.html    |  382 -
 ..._1_1arith_1_1IterSplitExprNode__coll__graph.svg |  214 -
 ...1arith_1_1IterSplitExprNode__inherit__graph.svg |  103 -
 ...stvm_1_1arith_1_1IterSplitExpr__coll__graph.svg |   99 -
 ...m_1_1arith_1_1IterSplitExpr__inherit__graph.svg |   99 -
 .../classtvm_1_1arith_1_1IterSumExpr-members.html  |  114 -
 .../doxygen/classtvm_1_1arith_1_1IterSumExpr.html  |  241 -
 ...asstvm_1_1arith_1_1IterSumExprNode-members.html |  119 -
 .../classtvm_1_1arith_1_1IterSumExprNode.html      |  348 -
 ...vm_1_1arith_1_1IterSumExprNode__coll__graph.svg |  203 -
 ...1_1arith_1_1IterSumExprNode__inherit__graph.svg |  101 -
 ...asstvm_1_1arith_1_1IterSumExpr__coll__graph.svg |   97 -
 ...tvm_1_1arith_1_1IterSumExpr__inherit__graph.svg |   97 -
 ...1auto__scheduler_1_1AnnotationStep-members.html |    3 +-
 ...sstvm_1_1auto__scheduler_1_1AnnotationStep.html |   11 +-
 ...o__scheduler_1_1AnnotationStepNode-members.html |    2 +-
 ...m_1_1auto__scheduler_1_1AnnotationStepNode.html |    8 +-
 ...o__scheduler_1_1AnnotationStep__coll__graph.svg |   45 +-
 ...scheduler_1_1AnnotationStep__inherit__graph.svg |   45 +-
 ...stvm_1_1auto__scheduler_1_1BuildResultNode.html |    2 +-
 ...__scheduler_1_1BuildResultNode__coll__graph.svg |  171 +-
 ..._1auto__scheduler_1_1CacheReadStep-members.html |    3 +-
 ...asstvm_1_1auto__scheduler_1_1CacheReadStep.html |   11 +-
 ...to__scheduler_1_1CacheReadStepNode-members.html |    2 +-
 ...vm_1_1auto__scheduler_1_1CacheReadStepNode.html |   10 +-
 ...scheduler_1_1CacheReadStepNode__coll__graph.svg |  191 +-
 ...to__scheduler_1_1CacheReadStep__coll__graph.svg |   45 +-
 ..._scheduler_1_1CacheReadStep__inherit__graph.svg |   45 +-
 ...1auto__scheduler_1_1CacheWriteStep-members.html |    3 +-
 ...sstvm_1_1auto__scheduler_1_1CacheWriteStep.html |   11 +-
 ...o__scheduler_1_1CacheWriteStepNode-members.html |    2 +-
 ...m_1_1auto__scheduler_1_1CacheWriteStepNode.html |   10 +-
 ...cheduler_1_1CacheWriteStepNode__coll__graph.svg |  199 +-
 ...o__scheduler_1_1CacheWriteStep__coll__graph.svg |   45 +-
 ...scheduler_1_1CacheWriteStep__inherit__graph.svg |   45 +-
 ..._1auto__scheduler_1_1ComputeAtStep-members.html |    3 +-
 ...asstvm_1_1auto__scheduler_1_1ComputeAtStep.html |   11 +-
 ...to__scheduler_1_1ComputeAtStepNode-members.html |    2 +-
 ...vm_1_1auto__scheduler_1_1ComputeAtStepNode.html |    8 +-
 ...to__scheduler_1_1ComputeAtStep__coll__graph.svg |   45 +-
 ..._scheduler_1_1ComputeAtStep__inherit__graph.svg |   45 +-
 ...m_1_1auto__scheduler_1_1ComputeDAG-members.html |   19 +-
 .../classtvm_1_1auto__scheduler_1_1ComputeDAG.html |   83 +-
 ...1auto__scheduler_1_1ComputeDAG__coll__graph.svg |   37 +-
 ...to__scheduler_1_1ComputeDAG__inherit__graph.svg |   37 +-
 ...to__scheduler_1_1ComputeInlineStep-members.html |    3 +-
 ...vm_1_1auto__scheduler_1_1ComputeInlineStep.html |   11 +-
 ...scheduler_1_1ComputeInlineStepNode-members.html |    2 +-
 ..._1auto__scheduler_1_1ComputeInlineStepNode.html |    8 +-
 ...scheduler_1_1ComputeInlineStep__coll__graph.svg |   45 +-
 ...eduler_1_1ComputeInlineStep__inherit__graph.svg |   45 +-
 ...auto__scheduler_1_1ComputeRootStep-members.html |    3 +-
 ...stvm_1_1auto__scheduler_1_1ComputeRootStep.html |   11 +-
 ...__scheduler_1_1ComputeRootStepNode-members.html |    2 +-
 ..._1_1auto__scheduler_1_1ComputeRootStepNode.html |    8 +-
 ...__scheduler_1_1ComputeRootStep__coll__graph.svg |   45 +-
 ...cheduler_1_1ComputeRootStep__inherit__graph.svg |   45 +-
 ..._1auto__scheduler_1_1CostModelNode-members.html |    1 -
 ...asstvm_1_1auto__scheduler_1_1CostModelNode.html |   35 +-
 ...to__scheduler_1_1CostModelNode__coll__graph.svg |   43 +-
 ..._scheduler_1_1CostModelNode__inherit__graph.svg |   51 +-
 ..._scheduler_1_1FollowFusedSplitStep-members.html |    7 +-
 ...1_1auto__scheduler_1_1FollowFusedSplitStep.html |   11 +-
 ...eduler_1_1FollowFusedSplitStepNode-members.html |    2 +-
 ...uto__scheduler_1_1FollowFusedSplitStepNode.html |    8 +-
 ...eduler_1_1FollowFusedSplitStep__coll__graph.svg |   45 +-
 ...ler_1_1FollowFusedSplitStep__inherit__graph.svg |   45 +-
 ...auto__scheduler_1_1FollowSplitStep-members.html |    7 +-
 ...stvm_1_1auto__scheduler_1_1FollowSplitStep.html |   11 +-
 ...__scheduler_1_1FollowSplitStepNode-members.html |    2 +-
 ..._1_1auto__scheduler_1_1FollowSplitStepNode.html |    8 +-
 ...__scheduler_1_1FollowSplitStep__coll__graph.svg |   45 +-
 ...cheduler_1_1FollowSplitStep__inherit__graph.svg |   45 +-
 ...tvm_1_1auto__scheduler_1_1FuseStep-members.html |    7 +-
 .../classtvm_1_1auto__scheduler_1_1FuseStep.html   |   11 +-
 ...1_1auto__scheduler_1_1FuseStepNode-members.html |    2 +-
 ...lasstvm_1_1auto__scheduler_1_1FuseStepNode.html |    8 +-
 ...1_1auto__scheduler_1_1FuseStep__coll__graph.svg |   45 +-
 ...auto__scheduler_1_1FuseStep__inherit__graph.svg |   45 +-
 ...lasstvm_1_1auto__scheduler_1_1IteratorNode.html |    2 +-
 ...uto__scheduler_1_1IteratorNode__coll__graph.svg |  189 +-
 ...tvm_1_1auto__scheduler_1_1LocalBuilderNode.html |    2 +-
 ..._scheduler_1_1LocalBuilderNode__coll__graph.svg |  193 +-
 ...vm_1_1auto__scheduler_1_1MeasureResultNode.html |    2 +-
 ...scheduler_1_1MeasureResultNode__coll__graph.svg |  169 +-
 ...m_1_1auto__scheduler_1_1PragmaStep-members.html |    7 +-
 .../classtvm_1_1auto__scheduler_1_1PragmaStep.html |   11 +-
 ...1auto__scheduler_1_1PragmaStepNode-members.html |    2 +-
 ...sstvm_1_1auto__scheduler_1_1PragmaStepNode.html |   10 +-
 ...o__scheduler_1_1PragmaStepNode__coll__graph.svg |  201 +-
 ...1auto__scheduler_1_1PragmaStep__coll__graph.svg |   45 +-
 ...to__scheduler_1_1PragmaStep__inherit__graph.svg |   45 +-
 ...to__scheduler_1_1PreloadMeasuredStatesNode.html |    2 +-
 ...r_1_1PreloadMeasuredStatesNode__coll__graph.svg |  189 +-
 ...stvm_1_1auto__scheduler_1_1ProgramMeasurer.html |    8 +-
 ...__scheduler_1_1ProgramMeasurerNode-members.html |    2 +-
 ..._1_1auto__scheduler_1_1ProgramMeasurerNode.html |   28 +-
 ..._scheduler_1_1PythonBasedModelNode-members.html |    1 -
 ...1_1auto__scheduler_1_1PythonBasedModelNode.html |    7 +-
 ...eduler_1_1PythonBasedModelNode__coll__graph.svg |   69 +-
 ...ler_1_1PythonBasedModelNode__inherit__graph.svg |   47 +-
 ...asstvm_1_1auto__scheduler_1_1RPCRunnerNode.html |    2 +-
 ...to__scheduler_1_1RPCRunnerNode__coll__graph.svg |  209 +-
 ...auto__scheduler_1_1RandomModelNode-members.html |    1 -
 ...stvm_1_1auto__scheduler_1_1RandomModelNode.html |    7 +-
 ...__scheduler_1_1RandomModelNode__coll__graph.svg |   65 +-
 ...cheduler_1_1RandomModelNode__inherit__graph.svg |   47 +-
 ...tvm_1_1auto__scheduler_1_1RecordReaderNode.html |    2 +-
 ..._scheduler_1_1RecordReaderNode__coll__graph.svg |  157 +-
 ...tvm_1_1auto__scheduler_1_1RecordToFileNode.html |    2 +-
 ..._scheduler_1_1RecordToFileNode__coll__graph.svg |  189 +-
 ..._1_1auto__scheduler_1_1ReorderStep-members.html |    7 +-
 ...classtvm_1_1auto__scheduler_1_1ReorderStep.html |   11 +-
 ...auto__scheduler_1_1ReorderStepNode-members.html |    2 +-
 ...stvm_1_1auto__scheduler_1_1ReorderStepNode.html |    8 +-
 ...auto__scheduler_1_1ReorderStep__coll__graph.svg |   45 +-
 ...o__scheduler_1_1ReorderStep__inherit__graph.svg |   45 +-
 ..._1_1auto__scheduler_1_1RfactorStep-members.html |    7 +-
 ...classtvm_1_1auto__scheduler_1_1RfactorStep.html |   11 +-
 ...auto__scheduler_1_1RfactorStepNode-members.html |    2 +-
 ...stvm_1_1auto__scheduler_1_1RfactorStepNode.html |    8 +-
 ...auto__scheduler_1_1RfactorStep__coll__graph.svg |   45 +-
 ...o__scheduler_1_1RfactorStep__inherit__graph.svg |   45 +-
 ...uto__scheduler_1_1SearchPolicyNode-members.html |   21 +-
 ...tvm_1_1auto__scheduler_1_1SearchPolicyNode.html |   51 +-
 ..._scheduler_1_1SearchPolicyNode__coll__graph.svg |   89 +-
 ...heduler_1_1SearchPolicyNode__inherit__graph.svg |   51 +-
 ...sstvm_1_1auto__scheduler_1_1SearchTaskNode.html |    2 +-
 ...o__scheduler_1_1SearchTaskNode__coll__graph.svg |  270 +-
 ...vm_1_1auto__scheduler_1_1SplitStep-members.html |    7 +-
 .../classtvm_1_1auto__scheduler_1_1SplitStep.html  |   11 +-
 ..._1auto__scheduler_1_1SplitStepNode-members.html |    2 +-
 ...asstvm_1_1auto__scheduler_1_1SplitStepNode.html |    8 +-
 ..._1auto__scheduler_1_1SplitStep__coll__graph.svg |   45 +-
 ...uto__scheduler_1_1SplitStep__inherit__graph.svg |   45 +-
 ...lasstvm_1_1auto__scheduler_1_1Step-members.html |    3 +-
 .../classtvm_1_1auto__scheduler_1_1Step.html       |   33 +-
 ...tvm_1_1auto__scheduler_1_1Step__coll__graph.svg |   41 +-
 ..._1_1auto__scheduler_1_1Step__inherit__graph.svg |   97 +-
 ...uto__scheduler_1_1StorageAlignStep-members.html |    7 +-
 ...tvm_1_1auto__scheduler_1_1StorageAlignStep.html |   11 +-
 ..._scheduler_1_1StorageAlignStepNode-members.html |    2 +-
 ...1_1auto__scheduler_1_1StorageAlignStepNode.html |    8 +-
 ..._scheduler_1_1StorageAlignStep__coll__graph.svg |   45 +-
 ...heduler_1_1StorageAlignStep__inherit__graph.svg |   45 +-
 ...classtvm_1_1detail_1_1AttrDocEntry-members.html |    4 +-
 .../classtvm_1_1detail_1_1AttrDocEntry.html        |   18 +-
 docs/api/doxygen/classtvm_1_1parser_1_1Source.html |  208 -
 .../classtvm_1_1parser_1_1SourceMap-members.html   |    9 +-
 .../doxygen/classtvm_1_1parser_1_1SourceMap.html   |  103 +-
 ...lasstvm_1_1parser_1_1SourceMapNode-members.html |    2 +-
 .../classtvm_1_1parser_1_1SourceMapNode.html       |   10 +-
 ...tvm_1_1parser_1_1SourceMapNode__coll__graph.svg |    4 +-
 ...lasstvm_1_1parser_1_1SourceMap__coll__graph.svg |   45 +-
 ...stvm_1_1parser_1_1SourceMap__inherit__graph.svg |   45 +-
 .../classtvm_1_1parser_1_1SourceNode-members.html  |  111 -
 .../doxygen/classtvm_1_1parser_1_1SourceNode.html  |  263 -
 ...asstvm_1_1parser_1_1SourceNode__coll__graph.svg |  163 -
 ...tvm_1_1parser_1_1SourceNode__inherit__graph.svg |   40 -
 .../classtvm_1_1parser_1_1Source__coll__graph.svg  |   39 -
 ...lasstvm_1_1parser_1_1Source__inherit__graph.svg |   39 -
 .../api/doxygen/classtvm_1_1relay_1_1CallNode.html |    2 +-
 .../doxygen/classtvm_1_1relay_1_1ConstantNode.html |    2 +-
 .../doxygen/classtvm_1_1relay_1_1FunctionNode.html |    2 +-
 docs/api/doxygen/classtvm_1_1relay_1_1IdNode.html  |    2 +-
 .../classtvm_1_1relay_1_1IdNode__coll__graph.svg   |  157 +-
 docs/api/doxygen/classtvm_1_1relay_1_1IfNode.html  |    2 +-
 docs/api/doxygen/classtvm_1_1relay_1_1LetNode.html |    2 +-
 .../doxygen/classtvm_1_1relay_1_1MatchNode.html    |    2 +-
 .../classtvm_1_1relay_1_1OpImplementationNode.html |    2 +-
 ...1relay_1_1OpImplementationNode__coll__graph.svg |  203 +-
 .../classtvm_1_1relay_1_1RefCreateNode.html        |    2 +-
 .../doxygen/classtvm_1_1relay_1_1RefReadNode.html  |    2 +-
 .../doxygen/classtvm_1_1relay_1_1RefWriteNode.html |    2 +-
 .../doxygen/classtvm_1_1relay_1_1TempExprNode.html |    2 +-
 .../classtvm_1_1relay_1_1TupleGetItemNode.html     |    2 +-
 .../doxygen/classtvm_1_1relay_1_1TupleNode.html    |    2 +-
 docs/api/doxygen/classtvm_1_1relay_1_1VarNode.html |    2 +-
 .../classtvm_1_1relay_1_1VarPatternNode.html       |    2 +-
 ...tvm_1_1relay_1_1VarPatternNode__coll__graph.svg |  203 +-
 .../doxygen/classtvm_1_1runtime_1_1NDArray.html    |    4 +-
 .../api/doxygen/classtvm_1_1runtime_1_1Object.html |    2 +-
 .../doxygen/classtvm_1_1runtime_1_1ObjectRef.html  |    2 +-
 ...tvm_1_1runtime_1_1ObjectRef__inherit__graph.svg |   71 +-
 .../doxygen/classtvm_1_1runtime_1_1Optional.html   |    2 +-
 .../classtvm_1_1runtime_1_1String-members.html     |   81 +-
 .../api/doxygen/classtvm_1_1runtime_1_1String.html |   40 +-
 .../classtvm_1_1runtime_1_1String__coll__graph.svg |  135 +-
 ...asstvm_1_1runtime_1_1String__inherit__graph.svg |   97 +-
 .../classtvm_1_1runtime_1_1TVMArgValue.html        |    2 +-
 ...m_1_1runtime_1_1TVMArgValue__inherit__graph.svg |  103 +-
 ...1_1runtime_1_1TVMMovableArgValue__-members.html |   30 +-
 ...lasstvm_1_1runtime_1_1TVMMovableArgValue__.html |  155 +-
 ...untime_1_1TVMMovableArgValue____coll__graph.svg |  133 +-
 ...ime_1_1TVMMovableArgValue____inherit__graph.svg |  105 +-
 .../classtvm_1_1runtime_1_1TVMPODValue__.html      |    2 +-
 ...1_1runtime_1_1TVMPODValue____inherit__graph.svg |  168 +-
 ...1_1TypedPackedFunc_3_01R_07Args_8_8_8_08_4.html |    6 +-
 docs/api/doxygen/classtvm_1_1tir_1_1AddNode.html   |    4 +-
 docs/api/doxygen/classtvm_1_1tir_1_1AndNode.html   |    4 +-
 docs/api/doxygen/classtvm_1_1tir_1_1AnyNode.html   |    4 +-
 .../doxygen/classtvm_1_1tir_1_1AttrStmtNode.html   |    2 +-
 ...lasstvm_1_1tir_1_1AttrStmtNode__coll__graph.svg |  261 +-
 ..._1_1tir_1_1BijectiveLayoutNode__coll__graph.svg |   16 +-
 .../doxygen/classtvm_1_1tir_1_1BinaryOpNode.html   |    4 +-
 .../doxygen/classtvm_1_1tir_1_1BroadcastNode.html  |    4 +-
 .../doxygen/classtvm_1_1tir_1_1BufferLoadNode.html |    4 +-
 .../api/doxygen/classtvm_1_1tir_1_1BufferNode.html |    2 +-
 .../classtvm_1_1tir_1_1BufferNode__coll__graph.svg |  193 +-
 docs/api/doxygen/classtvm_1_1tir_1_1CallNode.html  |    4 +-
 docs/api/doxygen/classtvm_1_1tir_1_1CastNode.html  |    4 +-
 docs/api/doxygen/classtvm_1_1tir_1_1CmpOpNode.html |    4 +-
 docs/api/doxygen/classtvm_1_1tir_1_1DivNode.html   |    4 +-
 docs/api/doxygen/classtvm_1_1tir_1_1EQNode.html    |    4 +-
 .../doxygen/classtvm_1_1tir_1_1ExprFunctor.html    |    2 +-
 .../doxygen/classtvm_1_1tir_1_1FloorDivNode.html   |    4 +-
 .../doxygen/classtvm_1_1tir_1_1FloorModNode.html   |    4 +-
 docs/api/doxygen/classtvm_1_1tir_1_1GENode.html    |    4 +-
 docs/api/doxygen/classtvm_1_1tir_1_1GTNode.html    |    4 +-
 .../doxygen/classtvm_1_1tir_1_1IterVarNode.html    |    2 +-
 ...classtvm_1_1tir_1_1IterVarNode__coll__graph.svg |  267 +-
 docs/api/doxygen/classtvm_1_1tir_1_1LTNode.html    |    4 +-
 .../doxygen/classtvm_1_1tir_1_1Layout-members.html |   25 +-
 docs/api/doxygen/classtvm_1_1tir_1_1Layout.html    |   29 -
 .../api/doxygen/classtvm_1_1tir_1_1LayoutNode.html |    2 +-
 .../classtvm_1_1tir_1_1LayoutNode__coll__graph.svg |  171 +-
 .../classtvm_1_1tir_1_1Layout__coll__graph.svg     |   16 +-
 .../classtvm_1_1tir_1_1Layout__inherit__graph.svg  |   16 +-
 docs/api/doxygen/classtvm_1_1tir_1_1LetNode.html   |    4 +-
 docs/api/doxygen/classtvm_1_1tir_1_1LoadNode.html  |    4 +-
 docs/api/doxygen/classtvm_1_1tir_1_1MaxNode.html   |    4 +-
 docs/api/doxygen/classtvm_1_1tir_1_1MinNode.html   |    4 +-
 docs/api/doxygen/classtvm_1_1tir_1_1ModNode.html   |    4 +-
 docs/api/doxygen/classtvm_1_1tir_1_1MulNode.html   |    4 +-
 docs/api/doxygen/classtvm_1_1tir_1_1NENode.html    |    4 +-
 docs/api/doxygen/classtvm_1_1tir_1_1NotNode.html   |    4 +-
 docs/api/doxygen/classtvm_1_1tir_1_1OrNode.html    |    4 +-
 .../doxygen/classtvm_1_1tir_1_1PrimFuncNode.html   |    2 +-
 .../classtvm_1_1tir_1_1ProducerLoadNode.html       |    4 +-
 docs/api/doxygen/classtvm_1_1tir_1_1RampNode.html  |    4 +-
 .../api/doxygen/classtvm_1_1tir_1_1ReduceNode.html |    4 +-
 .../api/doxygen/classtvm_1_1tir_1_1SelectNode.html |    4 +-
 .../doxygen/classtvm_1_1tir_1_1ShuffleNode.html    |    4 +-
 .../doxygen/classtvm_1_1tir_1_1SizeVarNode.html    |    6 +-
 ...classtvm_1_1tir_1_1SizeVarNode__coll__graph.svg |  283 +-
 .../doxygen/classtvm_1_1tir_1_1StringImmNode.html  |    6 +-
 ...asstvm_1_1tir_1_1StringImmNode__coll__graph.svg |  271 +-
 docs/api/doxygen/classtvm_1_1tir_1_1SubNode.html   |    4 +-
 docs/api/doxygen/classtvm_1_1tir_1_1VarNode.html   |    6 +-
 .../classtvm_1_1tir_1_1VarNode__coll__graph.svg    |  283 +-
 ...vm_1_1transform_1_1PassContextNode-members.html |    4 +-
 .../classtvm_1_1transform_1_1PassContextNode.html  |   34 +-
 ..._1transform_1_1PassContextNode__coll__graph.svg |  147 +-
 ...ransform_1_1PassContextNode__inherit__graph.svg |   12 +-
 .../classtvm_1_1transform_1_1PassInfoNode.html     |    2 +-
 ...m_1_1transform_1_1PassInfoNode__coll__graph.svg |  171 +-
 docs/api/doxygen/codegen_8h.html                   |    2 +-
 docs/api/doxygen/codegen_8h__incl.svg              | 1918 ++---
 docs/api/doxygen/codegen_8h_source.html            |    6 +-
 docs/api/doxygen/compute__dag_8h.html              |   16 +-
 docs/api/doxygen/compute__dag_8h__dep__incl.svg    |  103 +-
 docs/api/doxygen/compute__dag_8h__incl.svg         | 1783 ++---
 docs/api/doxygen/compute__dag_8h_source.html       |    7 +-
 docs/api/doxygen/constant__utils_8h.html           |    4 +-
 docs/api/doxygen/constant__utils_8h__dep__incl.svg |  177 +-
 docs/api/doxygen/constant__utils_8h__incl.svg      | 2071 +++---
 docs/api/doxygen/constant__utils_8h_source.html    |    2 +-
 docs/api/doxygen/cost__model_8h.html               |    2 +-
 docs/api/doxygen/cost__model_8h__incl.svg          | 1517 ++--
 docs/api/doxygen/cost__model_8h_source.html        |   25 +-
 docs/api/doxygen/cublas_8h.html                    |    2 +-
 docs/api/doxygen/cublas_8h__incl.svg               | 2032 ++---
 docs/api/doxygen/cuda_2dense_8h.html               |    2 +-
 docs/api/doxygen/cuda_2dense_8h__incl.svg          | 1363 ++--
 docs/api/doxygen/cuda_2dense_8h_source.html        |    2 +-
 docs/api/doxygen/cuda_2injective_8h.html           |    2 +-
 docs/api/doxygen/cuda_2injective_8h__incl.svg      | 1369 ++--
 docs/api/doxygen/cuda_2normalization_8h.html       |    2 +-
 docs/api/doxygen/cuda_2normalization_8h__incl.svg  | 1359 ++--
 docs/api/doxygen/cuda_2pooling_8h.html             |    2 +-
 docs/api/doxygen/cuda_2pooling_8h__incl.svg        | 1397 ++--
 docs/api/doxygen/cuda_2pooling_8h_source.html      |    2 +-
 docs/api/doxygen/cuda_2reduction_8h.html           |    2 +-
 docs/api/doxygen/cuda_2reduction_8h__incl.svg      | 1369 ++--
 docs/api/doxygen/cuda_2reduction_8h_source.html    |    2 +-
 docs/api/doxygen/cuda_2softmax_8h.html             |    2 +-
 docs/api/doxygen/cuda_2softmax_8h__incl.svg        | 1369 ++--
 docs/api/doxygen/data__layout_8h.html              |    2 +-
 docs/api/doxygen/data__layout_8h__incl.svg         | 1637 ++--
 docs/api/doxygen/data__layout_8h_source.html       |   43 +-
 docs/api/doxygen/data__type_8h.html                |    8 +-
 docs/api/doxygen/data__type_8h__dep__incl.svg      |  952 ++-
 docs/api/doxygen/data__type_8h__incl.svg           |  104 +-
 docs/api/doxygen/data__type_8h_source.html         |    2 +-
 docs/api/doxygen/dataflow__matcher_8h.html         |    2 +-
 docs/api/doxygen/dataflow__matcher_8h__incl.svg    | 1912 +++--
 docs/api/doxygen/dataflow__matcher_8h_source.html  |    4 +-
 docs/api/doxygen/dataflow__pattern_8h.html         |    2 +-
 docs/api/doxygen/dataflow__pattern_8h__incl.svg    | 1990 ++---
 .../api/doxygen/dataflow__pattern__functor_8h.html |    2 +-
 .../dataflow__pattern__functor_8h__incl.svg        | 1873 +++--
 .../dataflow__pattern__functor_8h_source.html      |    2 +-
 docs/api/doxygen/debug_8h.html                     |    2 +-
 docs/api/doxygen/debug_8h__incl.svg                | 1475 ++--
 docs/api/doxygen/debug_8h_source.html              |    2 +-
 docs/api/doxygen/detail_2broadcast_8h.html         |    4 +-
 .../doxygen/detail_2broadcast_8h__dep__incl.svg    |  105 +-
 docs/api/doxygen/detail_2broadcast_8h__incl.svg    | 1495 ++--
 docs/api/doxygen/detail_2broadcast_8h_source.html  |    6 +-
 docs/api/doxygen/detail_2extern_8h.html            |    2 +-
 docs/api/doxygen/detail_2extern_8h__incl.svg       | 1465 ++--
 docs/api/doxygen/detail_2extern_8h_source.html     |    6 +-
 docs/api/doxygen/device__api_8h.html               |    2 +-
 docs/api/doxygen/device__api_8h__incl.svg          |  685 +-
 docs/api/doxygen/device__api_8h_source.html        |    2 +-
 docs/api/doxygen/device__copy_8h.html              |    2 +-
 docs/api/doxygen/device__copy_8h__incl.svg         | 1419 ++--
 docs/api/doxygen/device__copy_8h_source.html       |    2 +-
 docs/api/doxygen/diagnostic_8h.html                |  158 -
 docs/api/doxygen/diagnostic_8h__dep__incl.svg      |  952 ---
 docs/api/doxygen/diagnostic_8h__incl.svg           | 1212 ---
 docs/api/doxygen/diagnostic_8h_source.html         |  146 -
 docs/api/doxygen/dilate_8h.html                    |    8 +-
 docs/api/doxygen/dilate_8h__incl.svg               | 2056 ++---
 docs/api/doxygen/dilate_8h_source.html             |    6 +-
 docs/api/doxygen/dir_000002_000006.html            |    2 +-
 docs/api/doxygen/dir_000002_000008.html            |    2 +-
 docs/api/doxygen/dir_000004_000009.html            |    2 +-
 docs/api/doxygen/dir_000006_000014.html            |   90 -
 docs/api/doxygen/dir_000008_000006.html            |    2 +-
 docs/api/doxygen/dir_000011_000009.html            |   90 -
 docs/api/doxygen/dir_000012_000006.html            |    2 +-
 docs/api/doxygen/dir_000012_000009.html            |    2 +-
 docs/api/doxygen/dir_000012_000021.html            |    2 +-
 docs/api/doxygen/dir_000025_000021.html            |    2 +-
 .../dir_194ecda214f05a38134392ac6a69b970.html      |    3 -
 .../dir_194ecda214f05a38134392ac6a69b970_dep.svg   |   12 +-
 .../dir_1f1b12d204a071c9e67e47fcbb552b86_dep.svg   |    4 +-
 .../dir_3a038e7bfa2370c6aee2a5aecd5d3ef1.html      |    3 +
 .../dir_3a038e7bfa2370c6aee2a5aecd5d3ef1_dep.svg   |    4 +-
 .../dir_519be2d4a83a987dbf989f1de527b870.html      |    2 +-
 .../dir_519be2d4a83a987dbf989f1de527b870_dep.svg   |   60 +-
 .../dir_54983dd6d74c59f67ee9e8e5a50aafc4_dep.svg   |    4 +-
 .../dir_5da96592f3a7c442b838b075c58254c2_dep.svg   |    4 +-
 .../dir_63946bee875c6d52bce55e72a67a86ad.html      |    2 +-
 .../dir_63946bee875c6d52bce55e72a67a86ad_dep.svg   |  267 +-
 .../dir_72c2f11201cd7636dc7624de0754daa5_dep.svg   |    4 +-
 .../dir_8e4e25e66b8623d88c5b5dd2040bca97.html      |    4 +-
 .../dir_8e4e25e66b8623d88c5b5dd2040bca97_dep.svg   |   16 +-
 .../dir_ac57496531ccbad72f774fa62e6de987_dep.svg   |    4 +-
 .../dir_b4c7d8e826c599ba55146c099a14beb5.html      |    2 +-
 .../dir_b4c7d8e826c599ba55146c099a14beb5_dep.svg   |  411 +-
 .../dir_dc867ff9a37cad1764f1670dc7eba6c1.html      |    5 +-
 .../dir_dc867ff9a37cad1764f1670dc7eba6c1_dep.svg   |  101 +-
 .../dir_f97d855a3173728370e632aa77170e34_dep.svg   |    4 +-
 .../dir_fafc18f54a755f417c55c769623cbfef.html      |    2 +-
 .../dir_fafc18f54a755f417c55c769623cbfef_dep.svg   |   59 +-
 docs/api/doxygen/driver__api_8h.html               |    2 +-
 docs/api/doxygen/driver__api_8h__incl.svg          | 1852 ++---
 docs/api/doxygen/elemwise_8h.html                  |    2 +-
 docs/api/doxygen/elemwise_8h__incl.svg             | 1742 ++---
 docs/api/doxygen/elemwise_8h_source.html           |   10 +-
 docs/api/doxygen/env__func_8h.html                 |    2 +-
 docs/api/doxygen/env__func_8h__incl.svg            | 1025 ++-
 docs/api/doxygen/env__func_8h_source.html          |    6 +-
 docs/api/doxygen/error_8h.html                     |    4 +-
 docs/api/doxygen/error_8h__dep__incl.svg           |  253 +-
 docs/api/doxygen/error_8h__incl.svg                | 1734 +++--
 docs/api/doxygen/error_8h_source.html              |    2 +-
 docs/api/doxygen/error__codes_8h.html              |    5 +-
 docs/api/doxygen/error__codes_8h__dep__incl.svg    |   50 +-
 docs/api/doxygen/error__codes_8h_source.html       |    5 +-
 docs/api/doxygen/executable_8h.html                |    2 +-
 docs/api/doxygen/executable_8h__incl.svg           |  676 +-
 docs/api/doxygen/files.html                        |   49 +-
 docs/api/doxygen/flatten_8h.html                   |    2 +-
 docs/api/doxygen/flatten_8h__incl.svg              | 1481 ++--
 docs/api/doxygen/functions.html                    |   15 +-
 docs/api/doxygen/functions_0x7e.html               |    3 -
 docs/api/doxygen/functions_a.html                  |   13 +-
 docs/api/doxygen/functions_b.html                  |    8 +-
 docs/api/doxygen/functions_c.html                  |    6 +-
 docs/api/doxygen/functions_d.html                  |   29 +-
 docs/api/doxygen/functions_e.html                  |   18 +-
 docs/api/doxygen/functions_f.html                  |    4 +-
 docs/api/doxygen/functions_func_0x7e.html          |    3 -
 docs/api/doxygen/functions_func_a.html             |   14 +-
 docs/api/doxygen/functions_func_b.html             |    3 -
 docs/api/doxygen/functions_func_c.html             |   12 +-
 docs/api/doxygen/functions_func_d.html             |   17 +-
 docs/api/doxygen/functions_func_e.html             |   13 +-
 docs/api/doxygen/functions_func_f.html             |    4 +-
 docs/api/doxygen/functions_func_g.html             |   14 +-
 docs/api/doxygen/functions_func_h.html             |    3 -
 docs/api/doxygen/functions_func_i.html             |   13 +-
 docs/api/doxygen/functions_func_l.html             |    2 +-
 docs/api/doxygen/functions_func_m.html             |    4 +-
 docs/api/doxygen/functions_func_n.html             |    3 -
 docs/api/doxygen/functions_func_o.html             |   46 +-
 docs/api/doxygen/functions_func_r.html             |   15 +-
 docs/api/doxygen/functions_func_s.html             |   36 +-
 docs/api/doxygen/functions_func_t.html             |   69 +-
 docs/api/doxygen/functions_func_v.html             |   32 +-
 docs/api/doxygen/functions_func_w.html             |    3 -
 docs/api/doxygen/functions_g.html                  |   14 +-
 docs/api/doxygen/functions_h.html                  |    3 -
 docs/api/doxygen/functions_i.html                  |   17 +-
 docs/api/doxygen/functions_k.html                  |    8 +-
 docs/api/doxygen/functions_l.html                  |   13 +-
 docs/api/doxygen/functions_m.html                  |   10 +-
 docs/api/doxygen/functions_n.html                  |    3 -
 docs/api/doxygen/functions_o.html                  |   40 +-
 docs/api/doxygen/functions_r.html                  |   17 +-
 docs/api/doxygen/functions_rela.html               |    3 -
 docs/api/doxygen/functions_s.html                  |   66 +-
 docs/api/doxygen/functions_t.html                  |   63 +-
 docs/api/doxygen/functions_v.html                  |   36 +-
 docs/api/doxygen/functions_vars.html               |   15 +-
 docs/api/doxygen/functions_vars_a.html             |    3 +-
 docs/api/doxygen/functions_vars_b.html             |    3 +-
 docs/api/doxygen/functions_vars_d.html             |   11 +-
 docs/api/doxygen/functions_vars_e.html             |    7 +-
 docs/api/doxygen/functions_vars_k.html             |    8 +-
 docs/api/doxygen/functions_vars_l.html             |   11 +-
 docs/api/doxygen/functions_vars_m.html             |    6 +-
 docs/api/doxygen/functions_vars_o.html             |    2 +-
 docs/api/doxygen/functions_vars_r.html             |    4 -
 docs/api/doxygen/functions_vars_s.html             |   23 +-
 docs/api/doxygen/functions_w.html                  |    3 -
 docs/api/doxygen/functor_8h.html                   |    4 +-
 docs/api/doxygen/functor_8h__dep__incl.svg         |  821 +-
 docs/api/doxygen/functor_8h__incl.svg              |  165 +-
 docs/api/doxygen/functor_8h_source.html            |    2 +-
 docs/api/doxygen/fuse_8h.html                      |    2 +-
 docs/api/doxygen/fuse_8h__incl.svg                 | 1517 ++--
 docs/api/doxygen/generic_2default_8h.html          |    2 +-
 docs/api/doxygen/generic_2default_8h__incl.svg     | 1369 ++--
 docs/api/doxygen/generic_2extern_8h.html           |    2 +-
 docs/api/doxygen/generic_2extern_8h__incl.svg      | 1419 ++--
 docs/api/doxygen/generic_2injective_8h.html        |    2 +-
 docs/api/doxygen/generic_2injective_8h__incl.svg   | 1369 ++--
 docs/api/doxygen/generic__func_8h.html             |    2 +-
 docs/api/doxygen/generic__func_8h__incl.svg        | 1413 ++--
 docs/api/doxygen/generic__func_8h_source.html      |    4 +-
 docs/api/doxygen/globals.html                      |   19 +-
 docs/api/doxygen/{globals.html => globals_d.html}  |    3 +-
 docs/api/doxygen/globals_defs.html                 |   17 +-
 docs/api/doxygen/globals_e.html                    |    3 +-
 docs/api/doxygen/globals_eval.html                 |    3 -
 docs/api/doxygen/globals_f.html                    |    3 +-
 docs/api/doxygen/globals_func.html                 |    8 +-
 docs/api/doxygen/globals_g.html                    |    3 +-
 docs/api/doxygen/globals_i.html                    |    3 +-
 docs/api/doxygen/globals_k.html                    |    6 +-
 docs/api/doxygen/globals_p.html                    |    3 +-
 docs/api/doxygen/globals_r.html                    |    3 +-
 docs/api/doxygen/globals_s.html                    |    3 +-
 docs/api/doxygen/globals_t.html                    |   12 +-
 docs/api/doxygen/globals_u.html                    |    8 +-
 docs/api/doxygen/globals_v.html                    |    3 +-
 docs/api/doxygen/hierarchy.html                    | 1659 ++---
 docs/api/doxygen/image_8h.html                     |    2 +-
 docs/api/doxygen/image_8h__incl.svg                | 1707 +++--
 docs/api/doxygen/image_8h_source.html              |    2 +-
 docs/api/doxygen/inherit_graph_123.svg             |   52 +-
 docs/api/doxygen/inherit_graph_129.svg             |   15 +-
 docs/api/doxygen/inherit_graph_130.svg             |   12 +-
 docs/api/doxygen/inherit_graph_131.svg             |   12 +-
 docs/api/doxygen/inherit_graph_132.svg             |   12 +-
 docs/api/doxygen/inherit_graph_133.svg             |   15 +-
 docs/api/doxygen/inherit_graph_134.svg             |   15 +-
 docs/api/doxygen/inherit_graph_135.svg             |   12 +-
 docs/api/doxygen/inherit_graph_136.svg             |   15 +-
 docs/api/doxygen/inherit_graph_137.svg             |   15 +-
 docs/api/doxygen/inherit_graph_138.svg             |   15 +-
 docs/api/doxygen/inherit_graph_139.svg             |   15 +-
 docs/api/doxygen/inherit_graph_140.svg             |   12 +-
 docs/api/doxygen/inherit_graph_141.svg             |   12 +-
 docs/api/doxygen/inherit_graph_142.svg             |   12 +-
 docs/api/doxygen/inherit_graph_143.svg             |   12 +-
 docs/api/doxygen/inherit_graph_144.svg             |   15 +-
 docs/api/doxygen/inherit_graph_145.svg             |   17 +-
 docs/api/doxygen/inherit_graph_146.svg             |   16 +-
 docs/api/doxygen/inherit_graph_147.svg             |   15 +-
 docs/api/doxygen/inherit_graph_148.svg             |   14 +-
 docs/api/doxygen/inherit_graph_149.svg             |   12 +-
 docs/api/doxygen/inherit_graph_150.svg             |   69 +-
 docs/api/doxygen/inherit_graph_151.svg             |   54 +-
 docs/api/doxygen/inherit_graph_152.svg             |   72 +-
 docs/api/doxygen/inherit_graph_153.svg             |   19 +-
 docs/api/doxygen/inherit_graph_154.svg             |   15 +-
 docs/api/doxygen/inherit_graph_155.svg             |   15 +-
 docs/api/doxygen/inherit_graph_156.svg             |   27 +-
 docs/api/doxygen/inherit_graph_157.svg             |   24 +-
 docs/api/doxygen/inherit_graph_158.svg             |   28 +-
 docs/api/doxygen/inherit_graph_159.svg             |   12 +-
 docs/api/doxygen/inherit_graph_160.svg             |   12 +-
 docs/api/doxygen/inherit_graph_161.svg             |   12 +-
 docs/api/doxygen/inherit_graph_162.svg             |   12 +-
 docs/api/doxygen/inherit_graph_163.svg             |   12 +-
 docs/api/doxygen/inherit_graph_164.svg             |   12 +-
 docs/api/doxygen/inherit_graph_165.svg             |   12 +-
 docs/api/doxygen/inherit_graph_166.svg             |   12 +-
 docs/api/doxygen/inherit_graph_167.svg             |   12 +-
 docs/api/doxygen/inherit_graph_168.svg             |   12 +-
 docs/api/doxygen/inherit_graph_169.svg             |   21 -
 docs/api/doxygen/inherit_graph_57.svg              |   12 +-
 docs/api/doxygen/inherit_graph_58.svg              |   12 +-
 docs/api/doxygen/inherit_graph_59.svg              |   15 +-
 docs/api/doxygen/inherit_graph_60.svg              | 2959 +++++++-
 docs/api/doxygen/inherit_graph_61.svg              | 3062 +-------
 docs/api/doxygen/inherit_graph_62.svg              |   12 +-
 docs/api/doxygen/inherit_graph_63.svg              |   16 +-
 docs/api/doxygen/inherit_graph_64.svg              |   16 +-
 docs/api/doxygen/inherit_graph_65.svg              |   12 +-
 docs/api/doxygen/inherit_graph_86.svg              | 7866 ++++++++++----------
 docs/api/doxygen/inherits.html                     |  104 +-
 docs/api/doxygen/int__set_8h.html                  |    4 +-
 docs/api/doxygen/int__set_8h__dep__incl.svg        | 1337 ++--
 docs/api/doxygen/int__set_8h__incl.svg             | 1577 ++--
 docs/api/doxygen/int__solver_8h.html               |    2 +-
 docs/api/doxygen/int__solver_8h__incl.svg          | 1661 +++--
 docs/api/doxygen/interpreter_8h.html               |    2 +-
 docs/api/doxygen/interpreter_8h__incl.svg          | 1893 +++--
 docs/api/doxygen/interpreter_8h_source.html        |    6 +-
 docs/api/doxygen/ir_2adt_8h.html                   |    4 +-
 docs/api/doxygen/ir_2adt_8h__dep__incl.svg         | 1284 ++--
 docs/api/doxygen/ir_2adt_8h__incl.svg              | 1361 ++--
 docs/api/doxygen/ir_2adt_8h_source.html            |    4 +-
 docs/api/doxygen/ir_2attrs_8h.html                 |    4 +-
 docs/api/doxygen/ir_2attrs_8h__dep__incl.svg       | 1004 ++-
 docs/api/doxygen/ir_2attrs_8h__incl.svg            | 1399 ++--
 docs/api/doxygen/ir_2attrs_8h_source.html          |  109 +-
 docs/api/doxygen/ir_2expr_8h.html                  |    4 +-
 docs/api/doxygen/ir_2expr_8h__dep__incl.svg        | 1117 ++-
 docs/api/doxygen/ir_2expr_8h__incl.svg             | 1319 ++--
 docs/api/doxygen/ir_2expr_8h_source.html           |    8 +-
 docs/api/doxygen/ir_2function_8h.html              |    4 +-
 docs/api/doxygen/ir_2function_8h__dep__incl.svg    | 1035 ++-
 docs/api/doxygen/ir_2function_8h__incl.svg         | 1441 ++--
 docs/api/doxygen/ir_2module_8h.html                |    5 +-
 docs/api/doxygen/ir_2module_8h__dep__incl.svg      | 1314 ++--
 docs/api/doxygen/ir_2module_8h__incl.svg           | 1674 ++---
 docs/api/doxygen/ir_2module_8h_source.html         |   33 +-
 docs/api/doxygen/ir_2op_8h.html                    |    2 +-
 docs/api/doxygen/ir_2op_8h__incl.svg               | 1851 +++--
 docs/api/doxygen/ir_2op_8h_source.html             |   16 +-
 docs/api/doxygen/ir_2transform_8h.html             |    9 +-
 docs/api/doxygen/ir_2transform_8h__dep__incl.svg   |  221 +-
 docs/api/doxygen/ir_2transform_8h__incl.svg        | 1791 +++--
 docs/api/doxygen/ir_2transform_8h_source.html      |   70 +-
 docs/api/doxygen/ir_2type_8h.html                  |    4 +-
 docs/api/doxygen/ir_2type_8h__dep__incl.svg        | 1346 ++--
 docs/api/doxygen/ir_2type_8h__incl.svg             | 1221 ++-
 docs/api/doxygen/ir_2type_8h_source.html           |   85 +-
 docs/api/doxygen/iter__affine__map_8h.html         |  173 -
 docs/api/doxygen/iter__affine__map_8h__incl.svg    | 1163 ---
 docs/api/doxygen/iter__affine__map_8h_source.html  |  149 -
 docs/api/doxygen/local__response__norm_8h.html     |    2 +-
 .../api/doxygen/local__response__norm_8h__incl.svg | 2028 ++---
 .../doxygen/local__response__norm_8h_source.html   |    4 +-
 docs/api/doxygen/loop__state_8h.html               |    4 +-
 docs/api/doxygen/loop__state_8h__dep__incl.svg     |  119 +-
 docs/api/doxygen/loop__state_8h__incl.svg          | 1704 ++---
 docs/api/doxygen/loop__state_8h_source.html        |    4 +-
 docs/api/doxygen/mapping_8h.html                   |    2 +-
 docs/api/doxygen/mapping_8h__incl.svg              | 2028 ++---
 docs/api/doxygen/measure_8h.html                   |    6 +-
 docs/api/doxygen/measure_8h__dep__incl.svg         |   73 +-
 docs/api/doxygen/measure_8h__incl.svg              | 1689 ++---
 docs/api/doxygen/measure_8h_source.html            |   10 +-
 docs/api/doxygen/measure__record_8h.html           |    2 +-
 docs/api/doxygen/measure__record_8h__incl.svg      | 1629 ++--
 docs/api/doxygen/measure__record_8h_source.html    |    2 +-
 docs/api/doxygen/memory__manager_8h.html           |    2 +-
 docs/api/doxygen/memory__manager_8h__incl.svg      |  184 +-
 docs/api/doxygen/memory__manager_8h_source.html    |    2 +-
 docs/api/doxygen/namespacemembers_b.html           |    9 +-
 docs/api/doxygen/namespacemembers_d.html           |   12 +-
 docs/api/doxygen/namespacemembers_enum.html        |    3 -
 docs/api/doxygen/namespacemembers_f.html           |    2 +-
 docs/api/doxygen/namespacemembers_func_b.html      |    9 +-
 docs/api/doxygen/namespacemembers_func_d.html      |   12 +-
 docs/api/doxygen/namespacemembers_func_i.html      |    3 +-
 docs/api/doxygen/namespacemembers_func_k.html      |    2 +-
 docs/api/doxygen/namespacemembers_func_m.html      |    2 +-
 docs/api/doxygen/namespacemembers_func_p.html      |    2 +-
 docs/api/doxygen/namespacemembers_func_r.html      |    5 +-
 docs/api/doxygen/namespacemembers_func_t.html      |    3 -
 docs/api/doxygen/namespacemembers_func_w.html      |    2 +-
 docs/api/doxygen/namespacemembers_i.html           |    3 +-
 docs/api/doxygen/namespacemembers_k.html           |    2 +-
 docs/api/doxygen/namespacemembers_l.html           |   11 +-
 docs/api/doxygen/namespacemembers_m.html           |    2 +-
 docs/api/doxygen/namespacemembers_p.html           |    2 +-
 docs/api/doxygen/namespacemembers_r.html           |    5 +-
 docs/api/doxygen/namespacemembers_t.html           |    3 -
 docs/api/doxygen/namespacemembers_type.html        |    2 +-
 docs/api/doxygen/namespacemembers_w.html           |    2 +-
 docs/api/doxygen/namespacetvm.html                 |  271 +-
 docs/api/doxygen/namespacetvm_1_1arith.html        |   76 -
 .../doxygen/namespacetvm_1_1auto__scheduler.html   |   42 +-
 docs/api/doxygen/namespacetvm_1_1parser.html       |   19 +-
 docs/api/doxygen/namespacetvm_1_1relay.html        |  103 +-
 .../namespacetvm_1_1relay_1_1transform.html        |   22 -
 docs/api/doxygen/namespacetvm_1_1topi.html         |   40 +-
 .../doxygen/namespacetvm_1_1topi_1_1contrib.html   |   61 -
 docs/api/doxygen/namespacetvm_1_1topi_1_1nn.html   |   64 +-
 docs/api/doxygen/ndarray_8h.html                   |    2 +-
 docs/api/doxygen/ndarray_8h__dep__incl.svg         |   42 +-
 docs/api/doxygen/ndarray_8h__incl.svg              |  174 +-
 docs/api/doxygen/ndarray_8h_source.html            |   52 +-
 docs/api/doxygen/nn_2bnn_8h.html                   |    2 +-
 docs/api/doxygen/nn_2bnn_8h__incl.svg              | 1392 ++--
 docs/api/doxygen/nn_2bnn_8h_source.html            |    8 +-
 docs/api/doxygen/nn_2dense_8h.html                 |    2 +-
 docs/api/doxygen/nn_2dense_8h__incl.svg            | 2028 ++---
 docs/api/doxygen/nn_2dense_8h_source.html          |    4 +-
 docs/api/doxygen/nn_2pooling_8h.html               |    2 +-
 docs/api/doxygen/nn_2pooling_8h__incl.svg          | 1462 ++--
 docs/api/doxygen/nn_2pooling_8h_source.html        |   36 +-
 docs/api/doxygen/nn_2softmax_8h.html               |    2 +-
 docs/api/doxygen/nn_2softmax_8h__incl.svg          | 1436 ++--
 docs/api/doxygen/nn_2softmax_8h_source.html        |    4 +-
 docs/api/doxygen/node_2container_8h.html           |   22 +-
 docs/api/doxygen/node_2container_8h__dep__incl.svg | 1657 +++--
 docs/api/doxygen/node_2container_8h__incl.svg      |  763 +-
 docs/api/doxygen/node_2container_8h_source.html    |    6 +-
 docs/api/doxygen/node_8h.html                      |    4 +-
 docs/api/doxygen/node_8h__dep__incl.svg            | 1101 ++-
 docs/api/doxygen/node_8h__incl.svg                 | 1155 ++-
 docs/api/doxygen/object_8h.html                    |   12 +-
 docs/api/doxygen/object_8h__dep__incl.svg          | 1324 ++--
 docs/api/doxygen/object_8h__incl.svg               |  120 +-
 docs/api/doxygen/object_8h_source.html             |   10 +-
 docs/api/doxygen/op__strategy_8h.html              |    2 +-
 docs/api/doxygen/op__strategy_8h__incl.svg         | 2000 +++--
 docs/api/doxygen/operation_8h.html                 |    4 +-
 docs/api/doxygen/operation_8h__dep__incl.svg       |  481 +-
 docs/api/doxygen/operation_8h__incl.svg            | 1535 ++--
 docs/api/doxygen/operation_8h_source.html          |    2 +-
 docs/api/doxygen/packed__func_8h.html              |   12 +-
 docs/api/doxygen/packed__func_8h__dep__incl.svg    |  757 +-
 docs/api/doxygen/packed__func_8h__incl.svg         |  659 +-
 docs/api/doxygen/packed__func_8h_source.html       |  146 +-
 docs/api/doxygen/pad__utils_8h.html                |    2 +-
 docs/api/doxygen/pad__utils_8h__incl.svg           | 1531 ++--
 docs/api/doxygen/parser_8h.html                    |    6 +-
 docs/api/doxygen/parser_8h__incl.svg               |  757 +-
 docs/api/doxygen/parser_8h_source.html             |    6 +-
 docs/api/doxygen/pattern_8h.html                   |    2 +-
 docs/api/doxygen/pattern_8h__incl.svg              | 1567 ++--
 docs/api/doxygen/pattern__functor_8h.html          |    2 +-
 docs/api/doxygen/pattern__functor_8h__incl.svg     | 2137 +++---
 docs/api/doxygen/pattern__functor_8h_source.html   |    2 +-
 docs/api/doxygen/ravel__unravel_8h.html            |    2 +-
 docs/api/doxygen/ravel__unravel_8h__incl.svg       | 1523 ++--
 docs/api/doxygen/ravel__unravel_8h_source.html     |    4 +-
 docs/api/doxygen/reduce_8h.html                    |    2 +-
 docs/api/doxygen/reduce_8h__incl.svg               | 1419 ++--
 docs/api/doxygen/reduce_8h_source.html             |    2 +-
 docs/api/doxygen/reduction_8h.html                 |    2 +-
 docs/api/doxygen/reduction_8h__incl.svg            | 1206 ++-
 docs/api/doxygen/reduction_8h_source.html          |    6 +-
 docs/api/doxygen/reflection_8h.html                |    4 +-
 docs/api/doxygen/reflection_8h__dep__incl.svg      | 1181 ++-
 docs/api/doxygen/reflection_8h__incl.svg           |  995 ++-
 docs/api/doxygen/reflection_8h_source.html         |    4 +-
 docs/api/doxygen/registry_8h.html                  |    4 +-
 docs/api/doxygen/registry_8h__dep__incl.svg        | 1044 ++-
 docs/api/doxygen/registry_8h__incl.svg             |  705 +-
 docs/api/doxygen/relay_2adt_8h.html                |    2 +-
 docs/api/doxygen/relay_2adt_8h__incl.svg           | 2223 +++---
 docs/api/doxygen/relay_2adt_8h_source.html         |   42 +-
 docs/api/doxygen/relay_2analysis_8h.html           |   15 +-
 docs/api/doxygen/relay_2analysis_8h__incl.svg      | 2131 +++---
 docs/api/doxygen/relay_2analysis_8h_source.html    |    8 +-
 docs/api/doxygen/relay_2attrs_2memory_8h.html      |    2 +-
 docs/api/doxygen/relay_2attrs_2memory_8h__incl.svg | 2143 +++---
 .../doxygen/relay_2attrs_2memory_8h_source.html    |    2 +-
 docs/api/doxygen/relay_2attrs_2nn_8h.html          |    3 +-
 docs/api/doxygen/relay_2attrs_2nn_8h__incl.svg     | 1722 +++--
 docs/api/doxygen/relay_2attrs_2nn_8h_source.html   |  572 +-
 docs/api/doxygen/relay_2attrs_2transform_8h.html   |    5 +-
 .../doxygen/relay_2attrs_2transform_8h__incl.svg   | 1684 +++--
 .../doxygen/relay_2attrs_2transform_8h_source.html |   10 +-
 docs/api/doxygen/relay_2attrs_2vm_8h.html          |    2 +-
 docs/api/doxygen/relay_2attrs_2vm_8h__incl.svg     | 1407 ++--
 docs/api/doxygen/relay_2attrs_2vm_8h_source.html   |    2 +-
 docs/api/doxygen/relay_2expr_8h.html               |    2 +-
 docs/api/doxygen/relay_2expr_8h__incl.svg          | 2060 ++---
 docs/api/doxygen/relay_2expr_8h_source.html        |    2 +-
 docs/api/doxygen/relay_2expr__functor_8h.html      |    2 +-
 docs/api/doxygen/relay_2expr__functor_8h__incl.svg | 1925 ++---
 .../doxygen/relay_2expr__functor_8h_source.html    |    6 +-
 docs/api/doxygen/relay_2feature_8h.html            |    2 +-
 docs/api/doxygen/relay_2feature_8h__incl.svg       | 1891 ++---
 docs/api/doxygen/relay_2feature_8h_source.html     |    4 +-
 docs/api/doxygen/relay_2function_8h.html           |    2 +-
 docs/api/doxygen/relay_2function_8h__incl.svg      | 2010 +++--
 docs/api/doxygen/relay_2function_8h_source.html    |    4 +-
 docs/api/doxygen/relay_2op_8h.html                 |    2 +-
 docs/api/doxygen/relay_2op_8h__incl.svg            | 2004 ++---
 docs/api/doxygen/relay_2op__attr__types_8h.html    |    8 +-
 .../doxygen/relay_2op__attr__types_8h__incl.svg    | 2255 +++---
 .../doxygen/relay_2op__attr__types_8h_source.html  |    8 +-
 docs/api/doxygen/relay_2qnn_2attrs_8h.html         |    2 +-
 docs/api/doxygen/relay_2qnn_2attrs_8h__incl.svg    | 1419 ++--
 docs/api/doxygen/relay_2qnn_2attrs_8h_source.html  |    2 +-
 docs/api/doxygen/relay_2qnn_2transform_8h.html     |    2 +-
 .../api/doxygen/relay_2qnn_2transform_8h__incl.svg | 1573 ++--
 docs/api/doxygen/relay_2transform_8h.html          |    8 +-
 docs/api/doxygen/relay_2transform_8h__incl.svg     | 1699 +++--
 docs/api/doxygen/relay_2transform_8h_source.html   |   23 +-
 docs/api/doxygen/relay_2type_8h.html               |    2 +-
 docs/api/doxygen/relay_2type_8h__incl.svg          | 2071 +++---
 docs/api/doxygen/relay_2type_8h_source.html        |   40 +-
 docs/api/doxygen/reorg_8h.html                     |    2 +-
 docs/api/doxygen/reorg_8h__incl.svg                | 1244 ++--
 docs/api/doxygen/reorg_8h_source.html              |    2 +-
 docs/api/doxygen/repr__printer_8h.html             |    4 +-
 docs/api/doxygen/repr__printer_8h__dep__incl.svg   | 1217 ++-
 docs/api/doxygen/repr__printer_8h__incl.svg        |  191 +-
 docs/api/doxygen/rocblas_8h.html                   |    5 +-
 docs/api/doxygen/rocblas_8h__incl.svg              | 2032 ++---
 docs/api/doxygen/rocblas_8h_source.html            |    3 +-
 docs/api/doxygen/rocm_2dense_8h.html               |    2 +-
 docs/api/doxygen/rocm_2dense_8h__incl.svg          | 1393 ++--
 docs/api/doxygen/rocm_2dense_8h_source.html        |    2 +-
 docs/api/doxygen/rocm_2injective_8h.html           |    2 +-
 docs/api/doxygen/rocm_2injective_8h__incl.svg      | 1387 ++--
 docs/api/doxygen/rocm_2normalization_8h.html       |    2 +-
 docs/api/doxygen/rocm_2normalization_8h__incl.svg  | 1359 ++--
 docs/api/doxygen/rocm_2pooling_8h.html             |    2 +-
 docs/api/doxygen/rocm_2pooling_8h__incl.svg        | 1403 ++--
 docs/api/doxygen/rocm_2reduction_8h.html           |    2 +-
 docs/api/doxygen/rocm_2reduction_8h__incl.svg      | 1387 ++--
 docs/api/doxygen/rocm_2softmax_8h.html             |    2 +-
 docs/api/doxygen/rocm_2softmax_8h__incl.svg        | 1387 ++--
 docs/api/doxygen/runtime_2container_8h.html        |    4 +-
 .../doxygen/runtime_2container_8h__dep__incl.svg   | 1525 ++--
 docs/api/doxygen/runtime_2container_8h__incl.svg   |  267 +-
 docs/api/doxygen/runtime_2container_8h_source.html |   71 +-
 docs/api/doxygen/runtime_2memory_8h.html           |    4 +-
 docs/api/doxygen/runtime_2memory_8h__dep__incl.svg | 1315 ++--
 docs/api/doxygen/runtime_2memory_8h__incl.svg      |  136 +-
 docs/api/doxygen/runtime_2module_8h.html           |    4 +-
 docs/api/doxygen/runtime_2module_8h__dep__incl.svg |  755 +-
 docs/api/doxygen/runtime_2module_8h__incl.svg      |  589 +-
 docs/api/doxygen/runtime_2module_8h_source.html    |    2 +-
 docs/api/doxygen/runtime_2vm_2vm_8h.html           |    2 +-
 docs/api/doxygen/runtime_2vm_2vm_8h__incl.svg      | 1064 ++-
 docs/api/doxygen/runtime_2vm_2vm_8h_source.html    |   10 +-
 docs/api/doxygen/schedule_8h.html                  |    4 +-
 docs/api/doxygen/schedule_8h__dep__incl.svg        |  852 ++-
 docs/api/doxygen/schedule_8h__incl.svg             | 1818 ++---
 docs/api/doxygen/schedule__pass_8h.html            |    2 +-
 docs/api/doxygen/schedule__pass_8h__incl.svg       | 1372 ++--
 docs/api/doxygen/search/all_0.js                   |   10 +-
 docs/api/doxygen/search/all_1.js                   |   21 +-
 docs/api/doxygen/search/all_10.js                  |    2 +-
 docs/api/doxygen/search/all_12.js                  |   20 +-
 docs/api/doxygen/search/all_13.js                  |   47 +-
 docs/api/doxygen/search/all_14.js                  |   43 +-
 docs/api/doxygen/search/all_15.js                  |    5 +-
 docs/api/doxygen/search/all_16.js                  |    8 +-
 docs/api/doxygen/search/all_17.js                  |    3 +-
 docs/api/doxygen/search/all_19.js                  |    1 -
 docs/api/doxygen/search/all_2.js                   |   11 +-
 docs/api/doxygen/search/all_3.js                   |   11 +-
 docs/api/doxygen/search/all_4.js                   |   32 +-
 docs/api/doxygen/search/all_5.js                   |   17 +-
 docs/api/doxygen/search/all_6.js                   |    6 +-
 docs/api/doxygen/search/all_7.js                   |   13 +-
 docs/api/doxygen/search/all_8.js                   |    5 +-
 docs/api/doxygen/search/all_9.js                   |   35 +-
 docs/api/doxygen/search/all_b.js                   |    7 +-
 docs/api/doxygen/search/all_c.js                   |   10 +-
 docs/api/doxygen/search/all_d.js                   |   19 +-
 docs/api/doxygen/search/all_e.js                   |    2 -
 docs/api/doxygen/search/all_f.js                   |   18 +-
 docs/api/doxygen/search/classes_0.js               |    1 -
 docs/api/doxygen/search/classes_10.js              |    3 +-
 docs/api/doxygen/search/classes_11.js              |    3 +-
 docs/api/doxygen/search/classes_13.js              |    4 +-
 docs/api/doxygen/search/classes_3.js               |    7 -
 docs/api/doxygen/search/classes_4.js               |    2 +-
 docs/api/doxygen/search/classes_7.js               |    2 +-
 docs/api/doxygen/search/classes_8.js               |   10 +-
 docs/api/doxygen/search/classes_a.js               |    3 +-
 docs/api/doxygen/search/classes_f.js               |    2 +-
 docs/api/doxygen/search/defines_0.js               |    6 +-
 docs/api/doxygen/search/defines_1.js               |    6 +-
 docs/api/doxygen/search/defines_2.js               |    4 +-
 docs/api/doxygen/search/defines_3.js               |    3 +-
 docs/api/doxygen/search/defines_4.js               |    8 +-
 docs/api/doxygen/search/defines_5.js               |    8 +-
 docs/api/doxygen/search/defines_6.js               |   65 +-
 docs/api/doxygen/search/defines_7.js               |   63 +-
 .../search/{enums_c.html => defines_8.html}        |    2 +-
 .../doxygen/search/{defines_7.js => defines_8.js}  |    0
 docs/api/doxygen/search/enums_7.js                 |    3 +-
 docs/api/doxygen/search/enums_8.js                 |    4 +-
 docs/api/doxygen/search/enums_9.js                 |    3 +-
 docs/api/doxygen/search/enums_a.js                 |    3 +-
 docs/api/doxygen/search/enums_b.js                 |    9 +-
 docs/api/doxygen/search/enums_c.js                 |   10 -
 docs/api/doxygen/search/enumvalues_4.js            |    1 -
 docs/api/doxygen/search/enumvalues_5.js            |    1 -
 docs/api/doxygen/search/enumvalues_8.js            |    3 +-
 docs/api/doxygen/search/enumvalues_a.js            |    3 +-
 docs/api/doxygen/search/files_1.js                 |    3 +-
 docs/api/doxygen/search/files_10.js                |    2 +-
 docs/api/doxygen/search/files_3.js                 |    5 +-
 docs/api/doxygen/search/files_7.js                 |    5 +-
 docs/api/doxygen/search/functions_1.js             |    6 +-
 docs/api/doxygen/search/functions_10.js            |    2 +-
 docs/api/doxygen/search/functions_12.js            |    8 +-
 docs/api/doxygen/search/functions_13.js            |   20 +-
 docs/api/doxygen/search/functions_14.js            |   20 +-
 docs/api/doxygen/search/functions_15.js            |    3 +-
 docs/api/doxygen/search/functions_16.js            |    2 +-
 docs/api/doxygen/search/functions_17.js            |    3 +-
 docs/api/doxygen/search/functions_19.js            |    1 -
 docs/api/doxygen/search/functions_2.js             |    2 +-
 docs/api/doxygen/search/functions_3.js             |    5 +-
 docs/api/doxygen/search/functions_4.js             |   13 +-
 docs/api/doxygen/search/functions_5.js             |    4 +-
 docs/api/doxygen/search/functions_6.js             |    4 +-
 docs/api/doxygen/search/functions_7.js             |    7 +-
 docs/api/doxygen/search/functions_8.js             |    1 -
 docs/api/doxygen/search/functions_9.js             |   11 +-
 docs/api/doxygen/search/functions_b.js             |    2 +-
 docs/api/doxygen/search/functions_c.js             |    2 +-
 docs/api/doxygen/search/functions_d.js             |    6 +-
 docs/api/doxygen/search/functions_e.js             |    1 -
 docs/api/doxygen/search/functions_f.js             |   14 +-
 docs/api/doxygen/search/related_2.js               |    1 -
 docs/api/doxygen/search/searchdata.js              |    4 +-
 docs/api/doxygen/search/typedefs_5.js              |    2 +-
 docs/api/doxygen/search/variables_0.js             |    8 +-
 docs/api/doxygen/search/variables_1.js             |    2 +-
 docs/api/doxygen/search/variables_10.js            |    1 -
 docs/api/doxygen/search/variables_11.js            |   12 +-
 docs/api/doxygen/search/variables_2.js             |    2 +-
 docs/api/doxygen/search/variables_4.js             |    5 +-
 docs/api/doxygen/search/variables_5.js             |    3 +-
 docs/api/doxygen/search/variables_a.js             |    4 +-
 docs/api/doxygen/search/variables_b.js             |    9 +-
 docs/api/doxygen/search/variables_c.js             |    3 +-
 docs/api/doxygen/search/variables_e.js             |    2 +-
 docs/api/doxygen/search__policy_8h.html            |   14 +-
 docs/api/doxygen/search__policy_8h__incl.svg       | 1823 +++--
 docs/api/doxygen/search__policy_8h_source.html     |   35 +-
 docs/api/doxygen/search__task_8h.html              |    4 +-
 docs/api/doxygen/search__task_8h__dep__incl.svg    |   85 +-
 docs/api/doxygen/search__task_8h__incl.svg         | 1307 ++--
 docs/api/doxygen/search__task_8h_source.html       |    2 +-
 docs/api/doxygen/serialization_8h.html             |    2 +-
 docs/api/doxygen/serialization_8h__incl.svg        |   26 +-
 docs/api/doxygen/serializer_8h.html                |    2 +-
 docs/api/doxygen/serializer_8h__dep__incl.svg      |   42 +-
 docs/api/doxygen/serializer_8h__incl.svg           |  120 +-
 docs/api/doxygen/source__map_8h.html               |   13 +-
 docs/api/doxygen/source__map_8h__dep__incl.svg     |  970 ---
 docs/api/doxygen/source__map_8h__incl.svg          | 1289 ++--
 docs/api/doxygen/source__map_8h_source.html        |   35 +-
 docs/api/doxygen/span_8h.html                      |    4 +-
 docs/api/doxygen/span_8h__dep__incl.svg            | 1295 ++--
 docs/api/doxygen/span_8h__incl.svg                 | 1179 ++-
 docs/api/doxygen/span_8h_source.html               |    4 +-
 docs/api/doxygen/stmt_8h.html                      |    4 +-
 docs/api/doxygen/stmt_8h__dep__incl.svg            |  867 ++-
 docs/api/doxygen/stmt_8h__incl.svg                 | 1543 ++--
 docs/api/doxygen/stmt_8h_source.html               |    2 +-
 docs/api/doxygen/stmt__functor_8h.html             |    2 +-
 docs/api/doxygen/stmt__functor_8h__incl.svg        | 1757 +++--
 docs/api/doxygen/stmt__functor_8h_source.html      |    2 +-
 ...ructtvm_1_1detail_1_1AttrInitEntry-members.html |    6 +-
 .../structtvm_1_1detail_1_1AttrInitEntry.html      |   28 +-
 ... => structtvm_1_1parser_1_1Source-members.html} |   14 +-
 .../api/doxygen/structtvm_1_1parser_1_1Source.html |  323 +
 .../structtvm_1_1parser_1_1Source__coll__graph.svg |   60 +
 .../structtvm_1_1relay_1_1Conv2DAttrs-members.html |    6 +-
 .../doxygen/structtvm_1_1relay_1_1Conv2DAttrs.html |   26 +-
 ...ucttvm_1_1relay_1_1Conv2DAttrs__coll__graph.svg |  334 +-
 .../structtvm_1_1relay_1_1CorrelationAttrs.html    |    2 +-
 ...m_1_1relay_1_1CorrelationAttrs__coll__graph.svg |  235 +-
 .../structtvm_1_1relay_1_1DilateAttrs-members.html |   29 +-
 .../doxygen/structtvm_1_1relay_1_1DilateAttrs.html |   16 +-
 ...ucttvm_1_1relay_1_1DilateAttrs__coll__graph.svg |    2 +-
 ...tvm_1_1relay_1_1DilateAttrs__inherit__graph.svg |   97 +-
 ...ttvm_1_1relay_1_1GlobalPool2DAttrs-members.html |    2 +-
 .../structtvm_1_1relay_1_1GlobalPool2DAttrs.html   |   10 +-
 ..._1_1relay_1_1GlobalPool2DAttrs__coll__graph.svg |  193 +-
 .../structtvm_1_1relay_1_1GridSampleAttrs.html     |    2 +-
 ...vm_1_1relay_1_1GridSampleAttrs__coll__graph.svg |  235 +-
 ...tvm_1_1relay_1_1MatrixSetDiagAttrs-members.html |  125 -
 .../structtvm_1_1relay_1_1MatrixSetDiagAttrs.html  |  273 -
 ...1_1relay_1_1MatrixSetDiagAttrs__coll__graph.svg |   91 -
 ...relay_1_1MatrixSetDiagAttrs__inherit__graph.svg |   91 -
 ...ructtvm_1_1relay_1_1MaxPool2DAttrs-members.html |    2 +-
 .../structtvm_1_1relay_1_1MaxPool2DAttrs.html      |   10 +-
 ...tvm_1_1relay_1_1MaxPool2DAttrs__coll__graph.svg |  216 +-
 .../structtvm_1_1relay_1_1Resize3dAttrs.html       |    2 +-
 ...ttvm_1_1relay_1_1Resize3dAttrs__coll__graph.svg |  219 +-
 docs/api/doxygen/structtvm_1_1tir_1_1LENode.html   |    4 +-
 docs/api/doxygen/structural__equal_8h.html         |    4 +-
 .../doxygen/structural__equal_8h__dep__incl.svg    |  791 +-
 docs/api/doxygen/structural__equal_8h__incl.svg    |  865 ++-
 docs/api/doxygen/structural__hash_8h.html          |    4 +-
 .../api/doxygen/structural__hash_8h__dep__incl.svg |  791 +-
 docs/api/doxygen/structural__hash_8h__incl.svg     |  881 ++-
 docs/api/doxygen/tag_8h.html                       |    2 +-
 docs/api/doxygen/tag_8h__incl.svg                  | 1413 ++--
 docs/api/doxygen/tags_8h.html                      |    2 +-
 docs/api/doxygen/tags_8h__dep__incl.svg            |  410 +-
 docs/api/doxygen/target_8h.html                    |    4 +-
 docs/api/doxygen/target_8h__dep__incl.svg          |  473 +-
 docs/api/doxygen/target_8h__incl.svg               | 1353 ++--
 docs/api/doxygen/target__info_8h.html              |    2 +-
 docs/api/doxygen/target__info_8h__incl.svg         | 1341 ++--
 docs/api/doxygen/target__kind_8h.html              |    4 +-
 docs/api/doxygen/target__kind_8h__dep__incl.svg    |  483 +-
 docs/api/doxygen/target__kind_8h__incl.svg         | 1267 ++--
 docs/api/doxygen/target__kind_8h_source.html       |    4 +-
 docs/api/doxygen/tensor_8h.html                    |    4 +-
 docs/api/doxygen/tensor_8h__dep__incl.svg          |  953 ++-
 docs/api/doxygen/tensor_8h__incl.svg               | 1936 ++---
 docs/api/doxygen/tensor_8h_source.html             |    2 +-
 docs/api/doxygen/tensor__intrin_8h.html            |    4 +-
 docs/api/doxygen/tensor__intrin_8h__dep__incl.svg  |  924 ++-
 docs/api/doxygen/tensor__intrin_8h__incl.svg       | 1917 ++---
 docs/api/doxygen/tensor__type_8h.html              |    2 +-
 docs/api/doxygen/tensor__type_8h__incl.svg         | 1321 ++--
 docs/api/doxygen/tensor__type_8h_source.html       |    4 +-
 docs/api/doxygen/tensor__utils_8h.html             |    2 +-
 docs/api/doxygen/tensor__utils_8h__incl.svg        | 1517 ++--
 docs/api/doxygen/tir_2analysis_8h.html             |    4 +-
 docs/api/doxygen/tir_2analysis_8h__dep__incl.svg   |  185 +-
 docs/api/doxygen/tir_2analysis_8h__incl.svg        | 1898 +++--
 docs/api/doxygen/tir_2analysis_8h_source.html      |    2 +-
 docs/api/doxygen/tir_2expr_8h.html                 |    2 +-
 docs/api/doxygen/tir_2expr_8h__incl.svg            | 1517 ++--
 docs/api/doxygen/tir_2expr_8h_source.html          |   12 +-
 docs/api/doxygen/tir_2expr__functor_8h.html        |    2 +-
 docs/api/doxygen/tir_2expr__functor_8h__incl.svg   | 1679 +++--
 docs/api/doxygen/tir_2function_8h.html             |    4 +-
 docs/api/doxygen/tir_2function_8h__dep__incl.svg   |  385 +-
 docs/api/doxygen/tir_2function_8h__incl.svg        | 1757 +++--
 docs/api/doxygen/tir_2function_8h_source.html      |    6 +-
 docs/api/doxygen/tir_2op_8h.html                   |   11 +-
 docs/api/doxygen/tir_2op_8h__dep__incl.svg         |  962 ++-
 docs/api/doxygen/tir_2op_8h__incl.svg              | 2116 +++---
 docs/api/doxygen/tir_2op_8h_source.html            |  109 +-
 docs/api/doxygen/tir_2op__attr__types_8h.html      |    4 +-
 .../doxygen/tir_2op__attr__types_8h__dep__incl.svg |  195 +-
 docs/api/doxygen/tir_2op__attr__types_8h__incl.svg |  275 +-
 docs/api/doxygen/tir_2transform_8h.html            |    2 +-
 docs/api/doxygen/tir_2transform_8h__incl.svg       | 1892 ++---
 docs/api/doxygen/topi_2nn_8h.html                  |    2 +-
 docs/api/doxygen/topi_2nn_8h__incl.svg             | 2103 +++---
 docs/api/doxygen/topi_2nn_8h_source.html           |   10 +-
 docs/api/doxygen/topi_2transform_8h.html           |    9 +-
 docs/api/doxygen/topi_2transform_8h__incl.svg      | 1652 ++--
 docs/api/doxygen/topi_2transform_8h_source.html    |   69 +-
 docs/api/doxygen/transform__step_8h.html           |    4 +-
 docs/api/doxygen/transform__step_8h__dep__incl.svg |  129 +-
 docs/api/doxygen/transform__step_8h__incl.svg      | 1787 ++---
 docs/api/doxygen/transform__step_8h_source.html    |  113 +-
 docs/api/doxygen/type__functor_8h.html             |    2 +-
 docs/api/doxygen/type__functor_8h__incl.svg        | 2279 +++---
 docs/api/doxygen/type__functor_8h_source.html      |   18 +-
 docs/api/doxygen/type__relation_8h.html            |    4 +-
 docs/api/doxygen/type__relation_8h__incl.svg       | 1599 ++--
 docs/api/doxygen/type__relation_8h_source.html     |   59 +-
 docs/api/doxygen/{utils_8h.html => util_8h.html}   |   10 +-
 docs/api/doxygen/util_8h__incl.svg                 |  934 +++
 docs/api/doxygen/util_8h_source.html               |  109 +
 docs/api/doxygen/utils_8h__incl.svg                |  939 ---
 docs/api/doxygen/utils_8h_source.html              |  109 -
 docs/api/doxygen/utvm__rpc__server_8h.html         |   57 +-
 docs/api/doxygen/utvm__rpc__server_8h__incl.svg    |   44 +-
 docs/api/doxygen/utvm__rpc__server_8h_source.html  |    9 +-
 docs/api/doxygen/var_8h.html                       |    4 +-
 docs/api/doxygen/var_8h__dep__incl.svg             | 1002 +--
 docs/api/doxygen/var_8h__incl.svg                  | 1355 ++--
 docs/api/doxygen/var_8h_source.html                |    2 +-
 docs/api/doxygen/vision_8h.html                    |    2 +-
 docs/api/doxygen/vision_8h__incl.svg               | 1707 +++--
 docs/api/doxygen/vision_8h_source.html             |    2 +-
 docs/api/doxygen/with_8h.html                      |    2 +-
 docs/api/doxygen/with_8h__dep__incl.svg            | 1258 ++--
 docs/api/doxygen/x86_2bnn_8h.html                  |    2 +-
 docs/api/doxygen/x86_2bnn_8h__incl.svg             | 1365 ++--
 docs/api/doxygen/x86_2default_8h.html              |    2 +-
 docs/api/doxygen/x86_2default_8h__incl.svg         | 1369 ++--
 docs/api/doxygen/x86_2injective_8h.html            |    2 +-
 docs/api/doxygen/x86_2injective_8h__incl.svg       | 1365 ++--
 docs/api/links.html                                |  183 +-
 docs/api/python/auto_scheduler.html                |  685 +-
 docs/api/python/autotvm.html                       |  457 +-
 docs/api/python/contrib.html                       |  483 +-
 docs/api/python/driver.html                        |  193 +-
 docs/api/python/error.html                         |  191 +-
 docs/api/python/graph_runtime.html                 |  201 +-
 docs/api/python/index.html                         |  185 +-
 docs/api/python/ir.html                            |  379 +-
 docs/api/python/micro.html                         |  481 +-
 docs/api/python/ndarray.html                       |  191 +-
 docs/api/python/relay/analysis.html                |  279 +-
 docs/api/python/relay/backend.html                 |  195 +-
 docs/api/python/relay/dataflow_pattern.html        |  375 +-
 docs/api/python/relay/frontend.html                |  276 +-
 docs/api/python/relay/image.html                   |  219 +-
 docs/api/python/relay/index.html                   |  700 +-
 docs/api/python/relay/nn.html                      |  628 +-
 docs/api/python/relay/testing.html                 |  450 +-
 docs/api/python/relay/transform.html               |  394 +-
 docs/api/python/relay/vision.html                  |  231 +-
 docs/api/python/rpc.html                           |  277 +-
 docs/api/python/runtime.html                       |  275 +-
 docs/api/python/target.html                        |  243 +-
 docs/api/python/te.html                            |  297 +-
 docs/api/python/tir.html                           |  379 +-
 docs/api/python/topi.html                          | 1250 ++--
 docs/api/python/vta/index.html                     |  191 +-
 docs/api/rust/.lock                                |    0
 docs/api/rust/COPYRIGHT.txt                        |   45 -
 docs/api/rust/FiraSans-LICENSE.txt                 |   94 -
 docs/api/rust/FiraSans-Medium.woff                 |  Bin 186824 -> 0 bytes
 docs/api/rust/FiraSans-Regular.woff                |  Bin 183268 -> 0 bytes
 docs/api/rust/LICENSE-APACHE.txt                   |  201 -
 docs/api/rust/LICENSE-MIT.txt                      |   23 -
 docs/api/rust/SourceCodePro-LICENSE.txt            |   93 -
 docs/api/rust/SourceCodePro-Regular.woff           |  Bin 55472 -> 0 bytes
 docs/api/rust/SourceCodePro-Semibold.woff          |  Bin 55360 -> 0 bytes
 docs/api/rust/SourceSerifPro-Bold.ttf.woff         |  Bin 93248 -> 0 bytes
 docs/api/rust/SourceSerifPro-It.ttf.woff           |  Bin 36200 -> 0 bytes
 docs/api/rust/SourceSerifPro-LICENSE.md            |   93 -
 docs/api/rust/SourceSerifPro-Regular.ttf.woff      |  Bin 88596 -> 0 bytes
 docs/api/rust/array/all.html                       |    4 -
 docs/api/rust/array/fn.main.html                   |    2 -
 docs/api/rust/array/index.html                     |    3 -
 docs/api/rust/array/sidebar-items.js               |    1 -
 docs/api/rust/ayu.css                              |    1 -
 docs/api/rust/basics/all.html                      |    4 -
 docs/api/rust/basics/fn.main.html                  |    2 -
 docs/api/rust/basics/index.html                    |    3 -
 docs/api/rust/basics/sidebar-items.js              |    1 -
 docs/api/rust/brush.svg                            |    1 -
 docs/api/rust/compiler_ext/all.html                |    4 -
 docs/api/rust/compiler_ext/fn.tvm_export.html      |    2 -
 docs/api/rust/compiler_ext/index.html              |    3 -
 docs/api/rust/compiler_ext/sidebar-items.js        |    1 -
 docs/api/rust/dark.css                             |    1 -
 docs/api/rust/down-arrow.svg                       |    1 -
 docs/api/rust/error/all.html                       |    4 -
 docs/api/rust/error/fn.main.html                   |    2 -
 docs/api/rust/error/index.html                     |    3 -
 docs/api/rust/error/sidebar-items.js               |    1 -
 docs/api/rust/favicon.ico                          |  Bin 23229 -> 0 bytes
 docs/api/rust/float/all.html                       |    4 -
 docs/api/rust/float/fn.main.html                   |    2 -
 docs/api/rust/float/index.html                     |    3 -
 docs/api/rust/float/sidebar-items.js               |    1 -
 .../rust/implementors/core/clone/trait.Clone.js    |    5 -
 docs/api/rust/implementors/core/cmp/trait.Eq.js    |    5 -
 docs/api/rust/implementors/core/cmp/trait.Ord.js   |    4 -
 .../rust/implementors/core/cmp/trait.PartialEq.js  |    6 -
 .../rust/implementors/core/cmp/trait.PartialOrd.js |    4 -
 .../rust/implementors/core/convert/trait.AsRef.js  |    4 -
 .../rust/implementors/core/convert/trait.From.js   |    6 -
 .../implementors/core/convert/trait.TryFrom.js     |    6 -
 .../implementors/core/default/trait.Default.js     |    4 -
 docs/api/rust/implementors/core/fmt/trait.Debug.js |    7 -
 .../rust/implementors/core/fmt/trait.Display.js    |    6 -
 docs/api/rust/implementors/core/hash/trait.Hash.js |    5 -
 .../core/iter/traits/collect/trait.FromIterator.js |    3 -
 .../core/iter/traits/collect/trait.IntoIterator.js |    3 -
 .../core/iter/traits/iterator/trait.Iterator.js    |    3 -
 .../rust/implementors/core/marker/trait.Copy.js    |    3 -
 .../rust/implementors/core/marker/trait.Freeze.js  |    7 -
 .../rust/implementors/core/marker/trait.Send.js    |    7 -
 .../implementors/core/marker/trait.StructuralEq.js |    3 -
 .../core/marker/trait.StructuralPartialEq.js       |    4 -
 .../rust/implementors/core/marker/trait.Sync.js    |    7 -
 .../rust/implementors/core/marker/trait.Unpin.js   |    7 -
 .../implementors/core/ops/deref/trait.Deref.js     |    4 -
 .../rust/implementors/core/ops/drop/trait.Drop.js  |    4 -
 .../rust/implementors/core/str/trait.FromStr.js    |    3 -
 .../implementors/serde/de/trait.Deserialize.js     |    3 -
 .../rust/implementors/serde/ser/trait.Serialize.js |    3 -
 .../api/rust/implementors/std/error/trait.Error.js |    6 -
 .../implementors/std/panic/trait.RefUnwindSafe.js  |    7 -
 .../implementors/std/panic/trait.UnwindSafe.js     |    7 -
 .../rust/implementors/structopt/trait.StructOpt.js |    3 -
 .../structopt/trait.StructOptInternal.js           |    3 -
 .../tvm/runtime/function/trait.ToFunction.js       |    3 -
 .../tvm/runtime/function/trait.Typed.js            |    3 -
 .../tvm/runtime/ndarray/trait.Num32.js             |    3 -
 .../implementors/tvm/runtime/trait.IsObject.js     |    3 -
 .../implementors/tvm/runtime/trait.IsObjectRef.js  |    3 -
 .../rust/implementors/tvm_graph_rt/trait.Module.js |    3 -
 .../implementors/tvm_graph_rt/trait.PackedFunc.js  |    3 -
 .../tvm_rt/function/trait.ToFunction.js            |    3 -
 .../implementors/tvm_rt/function/trait.Typed.js    |    3 -
 .../implementors/tvm_rt/ndarray/trait.Num32.js     |    3 -
 .../implementors/tvm_rt/object/trait.IsObject.js   |    3 -
 .../tvm_rt/object/trait.IsObjectRef.js             |    3 -
 .../tvm_sys/packed_func/trait.PackedFunc.js        |    3 -
 docs/api/rust/int/all.html                         |    4 -
 docs/api/rust/int/fn.main.html                     |    2 -
 docs/api/rust/int/index.html                       |    3 -
 docs/api/rust/int/sidebar-items.js                 |    1 -
 docs/api/rust/light.css                            |    1 -
 docs/api/rust/main.js                              |    7 -
 docs/api/rust/normalize.css                        |    2 -
 docs/api/rust/noscript.css                         |    1 -
 docs/api/rust/resnet/all.html                      |    4 -
 docs/api/rust/resnet/fn.main.html                  |    2 -
 docs/api/rust/resnet/index.html                    |    3 -
 docs/api/rust/resnet/sidebar-items.js              |    1 -
 docs/api/rust/rust-logo.png                        |  Bin 5758 -> 0 bytes
 docs/api/rust/rustdoc.css                          |    1 -
 docs/api/rust/search-index.js                      |   21 -
 docs/api/rust/settings.css                         |    1 -
 docs/api/rust/settings.html                        |    3 -
 docs/api/rust/settings.js                          |    1 -
 docs/api/rust/source-files.js                      |   20 -
 docs/api/rust/source-script.js                     |    1 -
 docs/api/rust/src/array/array.rs.html              |  130 -
 docs/api/rust/src/basics/main.rs.html              |  106 -
 docs/api/rust/src/compiler_ext/lib.rs.html         |   74 -
 docs/api/rust/src/error/error.rs.html              |   94 -
 docs/api/rust/src/float/float.rs.html              |  104 -
 docs/api/rust/src/int/int.rs.html                  |   94 -
 docs/api/rust/src/resnet/main.rs.html              |  250 -
 docs/api/rust/src/string/string.rs.html            |  122 -
 docs/api/rust/src/test_rt_nn/main.rs.html          |  202 -
 docs/api/rust/src/test_rt_tvm_basic/main.rs.html   |   98 -
 docs/api/rust/src/test_rt_tvm_dso/main.rs.html     |   82 -
 docs/api/rust/src/test_rt_wasm32/main.rs.html      |  112 -
 docs/api/rust/src/tvm/ir/arith.rs.html             |   96 -
 docs/api/rust/src/tvm/ir/attrs.rs.html             |   62 -
 .../rust/src/tvm/ir/diagnostics/codespan.rs.html   |  436 --
 docs/api/rust/src/tvm/ir/diagnostics/mod.rs.html   |  494 --
 docs/api/rust/src/tvm/ir/expr.rs.html              |  204 -
 docs/api/rust/src/tvm/ir/function.rs.html          |   96 -
 docs/api/rust/src/tvm/ir/mod.rs.html               |   72 -
 docs/api/rust/src/tvm/ir/module.rs.html            |  346 -
 docs/api/rust/src/tvm/ir/op.rs.html                |   90 -
 docs/api/rust/src/tvm/ir/relay/attrs/mod.rs.html   |   46 -
 docs/api/rust/src/tvm/ir/relay/attrs/nn.rs.html    |  220 -
 .../rust/src/tvm/ir/relay/attrs/transform.rs.html  |   66 -
 docs/api/rust/src/tvm/ir/relay/mod.rs.html         | 1174 ---
 docs/api/rust/src/tvm/ir/source_map.rs.html        |  118 -
 docs/api/rust/src/tvm/ir/span.rs.html              |  146 -
 docs/api/rust/src/tvm/ir/tir.rs.html               |  174 -
 docs/api/rust/src/tvm/ir/ty.rs.html                |  488 --
 docs/api/rust/src/tvm/lib.rs.html                  |  152 -
 docs/api/rust/src/tvm/python.rs.html               |  124 -
 docs/api/rust/src/tvm/runtime/graph_rt.rs.html     |  198 -
 docs/api/rust/src/tvm/runtime/mod.rs.html          |   48 -
 docs/api/rust/src/tvm/transform.rs.html            |  230 -
 docs/api/rust/src/tvm_graph_rt/allocator.rs.html   |  150 -
 docs/api/rust/src/tvm_graph_rt/array.rs.html       |  806 --
 docs/api/rust/src/tvm_graph_rt/errors.rs.html      |  114 -
 docs/api/rust/src/tvm_graph_rt/graph.rs.html       | 1050 ---
 docs/api/rust/src/tvm_graph_rt/lib.rs.html         |  156 -
 docs/api/rust/src/tvm_graph_rt/module/dso.rs.html  |  300 -
 docs/api/rust/src/tvm_graph_rt/module/mod.rs.html  |  132 -
 .../rust/src/tvm_graph_rt/module/syslib.rs.html    |  150 -
 docs/api/rust/src/tvm_graph_rt/threading.rs.html   |  530 --
 docs/api/rust/src/tvm_graph_rt/workspace.rs.html   |  282 -
 docs/api/rust/src/tvm_macros/external.rs.html      |  334 -
 docs/api/rust/src/tvm_macros/import_module.rs.html |  270 -
 docs/api/rust/src/tvm_macros/lib.rs.html           |   86 -
 docs/api/rust/src/tvm_macros/object.rs.html        |  382 -
 docs/api/rust/src/tvm_macros/util.rs.html          |  100 -
 docs/api/rust/src/tvm_rt/array.rs.html             |  350 -
 docs/api/rust/src/tvm_rt/context.rs.html           |  198 -
 docs/api/rust/src/tvm_rt/errors.rs.html            |  198 -
 docs/api/rust/src/tvm_rt/function.rs.html          |  732 --
 docs/api/rust/src/tvm_rt/lib.rs.html               |  264 -
 docs/api/rust/src/tvm_rt/map.rs.html               |  534 --
 docs/api/rust/src/tvm_rt/module.rs.html            |  264 -
 docs/api/rust/src/tvm_rt/ndarray.rs.html           | 1012 ---
 docs/api/rust/src/tvm_rt/object/mod.rs.html        |  206 -
 docs/api/rust/src/tvm_rt/object/object_ptr.rs.html |  904 ---
 docs/api/rust/src/tvm_rt/string.rs.html            |  286 -
 docs/api/rust/src/tvm_rt/to_function.rs.html       |  586 --
 docs/api/rust/src/tvm_rt/value.rs.html             |  218 -
 docs/api/rust/src/tvm_sys/array.rs.html            |  130 -
 docs/api/rust/src/tvm_sys/byte_array.rs.html       |  262 -
 docs/api/rust/src/tvm_sys/context.rs.html          |  596 --
 docs/api/rust/src/tvm_sys/datatype.rs.html         |  424 --
 docs/api/rust/src/tvm_sys/errors.rs.html           |   96 -
 docs/api/rust/src/tvm_sys/lib.rs.html              |  146 -
 docs/api/rust/src/tvm_sys/packed_func.rs.html      |  858 ---
 docs/api/rust/src/tvm_sys/value.rs.html            |  194 -
 .../rust/tvm-sys/src/c_runtime_api.rs.html         |    9 -
 docs/api/rust/src/tyck/tyck.rs.html                |  102 -
 docs/api/rust/storage.js                           |    1 -
 docs/api/rust/string/all.html                      |    4 -
 docs/api/rust/string/fn.main.html                  |    2 -
 docs/api/rust/string/index.html                    |    3 -
 docs/api/rust/string/sidebar-items.js              |    1 -
 docs/api/rust/test_rt_nn/all.html                  |    4 -
 docs/api/rust/test_rt_nn/constant.BATCH_SIZE.html  |    2 -
 docs/api/rust/test_rt_nn/constant.IN_DIM.html      |    2 -
 docs/api/rust/test_rt_nn/fn.main.html              |    2 -
 docs/api/rust/test_rt_nn/index.html                |    4 -
 docs/api/rust/test_rt_nn/sidebar-items.js          |    1 -
 docs/api/rust/test_rt_tvm_basic/all.html           |    4 -
 docs/api/rust/test_rt_tvm_basic/fn.main.html       |    2 -
 docs/api/rust/test_rt_tvm_basic/index.html         |    4 -
 docs/api/rust/test_rt_tvm_basic/sidebar-items.js   |    1 -
 .../tvm_mod/ext/fn.__tvm_main__.html               |    2 -
 .../tvm_mod/ext/fn.__tvm_module_ctx.html           |    2 -
 .../tvm_mod/ext/fn.__tvm_module_startup.html       |    2 -
 .../tvm_mod/ext/fn.default_function.html           |    2 -
 .../rust/test_rt_tvm_basic/tvm_mod/ext/index.html  |    3 -
 .../test_rt_tvm_basic/tvm_mod/ext/sidebar-items.js |    1 -
 .../test_rt_tvm_basic/tvm_mod/fn.__tvm_main__.html |    2 -
 .../tvm_mod/fn.__tvm_module_ctx.html               |    2 -
 .../tvm_mod/fn.__tvm_module_startup.html           |    2 -
 .../tvm_mod/fn.default_function.html               |    2 -
 docs/api/rust/test_rt_tvm_basic/tvm_mod/index.html |    4 -
 .../test_rt_tvm_basic/tvm_mod/sidebar-items.js     |    1 -
 docs/api/rust/test_rt_tvm_dso/all.html             |    4 -
 docs/api/rust/test_rt_tvm_dso/fn.main.html         |    2 -
 docs/api/rust/test_rt_tvm_dso/index.html           |    3 -
 docs/api/rust/test_rt_tvm_dso/sidebar-items.js     |    1 -
 docs/api/rust/test_rt_wasm32/all.html              |    4 -
 .../test_rt_wasm32/fn.__get_tvm_module_ctx.html    |    2 -
 docs/api/rust/test_rt_wasm32/fn.main.html          |    2 -
 docs/api/rust/test_rt_wasm32/index.html            |    4 -
 docs/api/rust/test_rt_wasm32/sidebar-items.js      |    1 -
 .../test_rt_wasm32/static.__tvm_module_ctx.html    |    2 -
 docs/api/rust/theme.js                             |    1 -
 docs/api/rust/tvm/all.html                         |    4 -
 docs/api/rust/tvm/context/enum.DeviceType.html     |   49 -
 docs/api/rust/tvm/context/fn.get_device_attr.html  |    2 -
 docs/api/rust/tvm/context/index.html               |    7 -
 docs/api/rust/tvm/context/sidebar-items.js         |    1 -
 docs/api/rust/tvm/context/struct.Context.html      |   43 -
 .../tvm/context/struct.UnsupportedDeviceError.html |   21 -
 docs/api/rust/tvm/enum.DeviceType.html             |   49 -
 docs/api/rust/tvm/enum.Error.html                  |   42 -
 docs/api/rust/tvm/enum.NDArrayError.html           |   34 -
 docs/api/rust/tvm/errors/enum.Error.html           |   42 -
 docs/api/rust/tvm/errors/enum.NDArrayError.html    |   34 -
 docs/api/rust/tvm/errors/index.html                |    4 -
 docs/api/rust/tvm/errors/sidebar-items.js          |    1 -
 .../tvm/errors/struct.FunctionNotFoundError.html   |   21 -
 .../rust/tvm/errors/struct.TypeMismatchError.html  |   25 -
 docs/api/rust/tvm/fn.version.html                  |    3 -
 docs/api/rust/tvm/function/enum.ArgValue.html      |  450 --
 docs/api/rust/tvm/function/enum.RetValue.html      |  356 -
 .../ffi/constant.DLDataTypeCode_kDLBfloat.html     |    2 -
 .../ffi/constant.DLDataTypeCode_kDLFloat.html      |    2 -
 .../ffi/constant.DLDataTypeCode_kDLInt.html        |    2 -
 .../ffi/constant.DLDataTypeCode_kDLUInt.html       |    2 -
 .../function/ffi/constant.DLDeviceType_kDLCPU.html |    3 -
 .../ffi/constant.DLDeviceType_kDLCPUPinned.html    |    4 -
 .../ffi/constant.DLDeviceType_kDLExtDev.html       |    5 -
 .../function/ffi/constant.DLDeviceType_kDLGPU.html |    3 -
 .../ffi/constant.DLDeviceType_kDLMetal.html        |    3 -
 .../ffi/constant.DLDeviceType_kDLOpenCL.html       |    3 -
 .../ffi/constant.DLDeviceType_kDLROCM.html         |    3 -
 .../function/ffi/constant.DLDeviceType_kDLVPI.html |    3 -
 .../ffi/constant.DLDeviceType_kDLVulkan.html       |    3 -
 .../tvm/function/ffi/constant.DLPACK_VERSION.html  |    2 -
 .../rust/tvm/function/ffi/constant.INT16_MAX.html  |    2 -
 .../rust/tvm/function/ffi/constant.INT16_MIN.html  |    2 -
 .../rust/tvm/function/ffi/constant.INT32_MAX.html  |    2 -
 .../rust/tvm/function/ffi/constant.INT32_MIN.html  |    2 -
 .../rust/tvm/function/ffi/constant.INT8_MAX.html   |    2 -
 .../rust/tvm/function/ffi/constant.INT8_MIN.html   |    2 -
 .../rust/tvm/function/ffi/constant.INTPTR_MAX.html |    2 -
 .../rust/tvm/function/ffi/constant.INTPTR_MIN.html |    2 -
 .../tvm/function/ffi/constant.INT_FAST16_MAX.html  |    2 -
 .../tvm/function/ffi/constant.INT_FAST16_MIN.html  |    2 -
 .../tvm/function/ffi/constant.INT_FAST32_MAX.html  |    2 -
 .../tvm/function/ffi/constant.INT_FAST32_MIN.html  |    2 -
 .../tvm/function/ffi/constant.INT_FAST8_MAX.html   |    2 -
 .../tvm/function/ffi/constant.INT_FAST8_MIN.html   |    2 -
 .../tvm/function/ffi/constant.INT_LEAST16_MAX.html |    2 -
 .../tvm/function/ffi/constant.INT_LEAST16_MIN.html |    2 -
 .../tvm/function/ffi/constant.INT_LEAST32_MAX.html |    2 -
 .../tvm/function/ffi/constant.INT_LEAST32_MIN.html |    2 -
 .../tvm/function/ffi/constant.INT_LEAST8_MAX.html  |    2 -
 .../tvm/function/ffi/constant.INT_LEAST8_MIN.html  |    2 -
 .../tvm/function/ffi/constant.PTRDIFF_MAX.html     |    2 -
 .../tvm/function/ffi/constant.PTRDIFF_MIN.html     |    2 -
 .../tvm/function/ffi/constant.SIG_ATOMIC_MAX.html  |    2 -
 .../tvm/function/ffi/constant.SIG_ATOMIC_MIN.html  |    2 -
 .../rust/tvm/function/ffi/constant.SIZE_MAX.html   |    2 -
 .../ffi/constant.TVMArgTypeCode_kTVMArgFloat.html  |    2 -
 .../ffi/constant.TVMArgTypeCode_kTVMArgInt.html    |    2 -
 .../ffi/constant.TVMArgTypeCode_kTVMBytes.html     |    2 -
 .../ffi/constant.TVMArgTypeCode_kTVMContext.html   |    2 -
 ...constant.TVMArgTypeCode_kTVMDLTensorHandle.html |    2 -
 .../ffi/constant.TVMArgTypeCode_kTVMDataType.html  |    2 -
 .../ffi/constant.TVMArgTypeCode_kTVMExtBegin.html  |    2 -
 .../ffi/constant.TVMArgTypeCode_kTVMExtEnd.html    |    2 -
 .../constant.TVMArgTypeCode_kTVMExtReserveEnd.html |    2 -
 .../constant.TVMArgTypeCode_kTVMModuleHandle.html  |    2 -
 .../constant.TVMArgTypeCode_kTVMNDArrayHandle.html |    2 -
 .../ffi/constant.TVMArgTypeCode_kTVMNNVMFirst.html |    2 -
 .../ffi/constant.TVMArgTypeCode_kTVMNNVMLast.html  |    2 -
 .../ffi/constant.TVMArgTypeCode_kTVMNullptr.html   |    2 -
 .../constant.TVMArgTypeCode_kTVMObjectHandle.html  |    2 -
 ...tant.TVMArgTypeCode_kTVMObjectRValueRefArg.html |    2 -
 .../constant.TVMArgTypeCode_kTVMOpaqueHandle.html  |    2 -
 ...nstant.TVMArgTypeCode_kTVMPackedFuncHandle.html |    2 -
 .../ffi/constant.TVMArgTypeCode_kTVMStr.html       |    2 -
 .../ffi/constant.TVMDeviceExtType_kDLAOCL.html     |    2 -
 .../ffi/constant.TVMDeviceExtType_kDLHexagon.html  |    2 -
 .../ffi/constant.TVMDeviceExtType_kDLMicroDev.html |    2 -
 .../ffi/constant.TVMDeviceExtType_kDLSDAccel.html  |    2 -
 .../ffi/constant.TVMDeviceExtType_kDLWebGPU.html   |    2 -
 .../ffi/constant.TVMDeviceExtType_kOpenGL.html     |    2 -
 .../tvm/function/ffi/constant.TVM_VERSION.html     |    2 -
 .../rust/tvm/function/ffi/constant.UINT16_MAX.html |    2 -
 .../rust/tvm/function/ffi/constant.UINT32_MAX.html |    2 -
 .../rust/tvm/function/ffi/constant.UINT8_MAX.html  |    2 -
 .../tvm/function/ffi/constant.UINTPTR_MAX.html     |    2 -
 .../tvm/function/ffi/constant.UINT_FAST16_MAX.html |    2 -
 .../tvm/function/ffi/constant.UINT_FAST32_MAX.html |    2 -
 .../tvm/function/ffi/constant.UINT_FAST8_MAX.html  |    2 -
 .../function/ffi/constant.UINT_LEAST16_MAX.html    |    2 -
 .../function/ffi/constant.UINT_LEAST32_MAX.html    |    2 -
 .../tvm/function/ffi/constant.UINT_LEAST8_MAX.html |    2 -
 .../rust/tvm/function/ffi/constant.WINT_MAX.html   |    2 -
 .../rust/tvm/function/ffi/constant.WINT_MIN.html   |    2 -
 .../tvm/function/ffi/constant._ATFILE_SOURCE.html  |    2 -
 .../tvm/function/ffi/constant._BITS_WCHAR_H.html   |    2 -
 .../tvm/function/ffi/constant._DEFAULT_SOURCE.html |    2 -
 .../tvm/function/ffi/constant._FEATURES_H.html     |    2 -
 .../tvm/function/ffi/constant._POSIX_C_SOURCE.html |    2 -
 .../tvm/function/ffi/constant._POSIX_SOURCE.html   |    2 -
 .../tvm/function/ffi/constant._STDC_PREDEF_H.html  |    2 -
 .../rust/tvm/function/ffi/constant._STDINT_H.html  |    2 -
 .../tvm/function/ffi/constant._SYS_CDEFS_H.html    |    2 -
 .../tvm/function/ffi/constant.__GLIBC_MINOR__.html |    2 -
 .../rust/tvm/function/ffi/constant.__GLIBC__.html  |    2 -
 .../tvm/function/ffi/constant.__GNU_LIBRARY__.html |    2 -
 .../ffi/constant.__STDC_IEC_559_COMPLEX__.html     |    2 -
 .../function/ffi/constant.__STDC_IEC_559__.html    |    2 -
 .../function/ffi/constant.__STDC_ISO_10646__.html  |    2 -
 .../function/ffi/constant.__STDC_NO_THREADS__.html |    2 -
 .../function/ffi/constant.__SYSCALL_WORDSIZE.html  |    2 -
 .../tvm/function/ffi/constant.__USE_ATFILE.html    |    2 -
 .../function/ffi/constant.__USE_FORTIFY_LEVEL.html |    2 -
 .../tvm/function/ffi/constant.__USE_ISOC11.html    |    2 -
 .../tvm/function/ffi/constant.__USE_ISOC95.html    |    2 -
 .../tvm/function/ffi/constant.__USE_ISOC99.html    |    2 -
 .../rust/tvm/function/ffi/constant.__USE_MISC.html |    2 -
 .../tvm/function/ffi/constant.__USE_POSIX.html     |    2 -
 .../function/ffi/constant.__USE_POSIX199309.html   |    2 -
 .../function/ffi/constant.__USE_POSIX199506.html   |    2 -
 .../tvm/function/ffi/constant.__USE_POSIX2.html    |    2 -
 .../ffi/constant.__USE_POSIX_IMPLICITLY.html       |    2 -
 .../tvm/function/ffi/constant.__USE_XOPEN2K.html   |    2 -
 .../tvm/function/ffi/constant.__USE_XOPEN2K8.html  |    2 -
 .../rust/tvm/function/ffi/constant.__WORDSIZE.html |    2 -
 .../ffi/constant.__WORDSIZE_TIME64_COMPAT32.html   |    2 -
 .../tvm/function/ffi/fn.TVMAPISetLastError.html    |    5 -
 .../rust/tvm/function/ffi/fn.TVMArrayAlloc.html    |   13 -
 .../tvm/function/ffi/fn.TVMArrayCopyFromBytes.html |    7 -
 .../tvm/function/ffi/fn.TVMArrayCopyFromTo.html    |    7 -
 .../tvm/function/ffi/fn.TVMArrayCopyToBytes.html   |    7 -
 .../api/rust/tvm/function/ffi/fn.TVMArrayFree.html |    5 -
 .../tvm/function/ffi/fn.TVMArrayFromDLPack.html    |    7 -
 .../rust/tvm/function/ffi/fn.TVMArrayToDLPack.html |    7 -
 .../function/ffi/fn.TVMBackendAllocWorkspace.html  |   12 -
 .../function/ffi/fn.TVMBackendFreeWorkspace.html   |    8 -
 .../function/ffi/fn.TVMBackendGetFuncFromEnv.html  |    9 -
 .../function/ffi/fn.TVMBackendParallelBarrier.html |    6 -
 .../function/ffi/fn.TVMBackendParallelLaunch.html  |    8 -
 .../ffi/fn.TVMBackendRegisterSystemLibSymbol.html  |    6 -
 .../tvm/function/ffi/fn.TVMBackendRunOnce.html     |   10 -
 .../tvm/function/ffi/fn.TVMCFuncSetReturn.html     |    9 -
 .../rust/tvm/function/ffi/fn.TVMCbArgToReturn.html |    8 -
 .../ffi/fn.TVMDLManagedTensorCallDeleter.html      |    4 -
 .../function/ffi/fn.TVMDeviceAllocDataSpace.html   |   10 -
 .../function/ffi/fn.TVMDeviceCopyDataFromTo.html   |   14 -
 .../function/ffi/fn.TVMDeviceFreeDataSpace.html    |    6 -
 docs/api/rust/tvm/function/ffi/fn.TVMFuncCall.html |   16 -
 .../function/ffi/fn.TVMFuncCreateFromCFunc.html    |    9 -
 docs/api/rust/tvm/function/ffi/fn.TVMFuncFree.html |    5 -
 .../rust/tvm/function/ffi/fn.TVMFuncGetGlobal.html |    7 -
 .../function/ffi/fn.TVMFuncListGlobalNames.html    |    6 -
 .../tvm/function/ffi/fn.TVMFuncRegisterGlobal.html |    7 -
 .../tvm/function/ffi/fn.TVMFuncRemoveGlobal.html   |    4 -
 .../rust/tvm/function/ffi/fn.TVMGetLastError.html  |    8 -
 docs/api/rust/tvm/function/ffi/fn.TVMModFree.html  |    9 -
 .../tvm/function/ffi/fn.TVMModGetFunction.html     |    8 -
 .../api/rust/tvm/function/ffi/fn.TVMModImport.html |    7 -
 .../tvm/function/ffi/fn.TVMModLoadFromFile.html    |    9 -
 .../tvm/function/ffi/fn.TVMObjectDerivedFrom.html  |    7 -
 .../rust/tvm/function/ffi/fn.TVMObjectFree.html    |    7 -
 .../tvm/function/ffi/fn.TVMObjectGetTypeIndex.html |    6 -
 .../rust/tvm/function/ffi/fn.TVMObjectRetain.html  |    6 -
 .../function/ffi/fn.TVMObjectTypeKey2Index.html    |    6 -
 .../api/rust/tvm/function/ffi/fn.TVMSetStream.html |   10 -
 .../rust/tvm/function/ffi/fn.TVMStreamCreate.html  |    7 -
 .../rust/tvm/function/ffi/fn.TVMStreamFree.html    |    7 -
 .../ffi/fn.TVMStreamStreamSynchronize.html         |    8 -
 .../rust/tvm/function/ffi/fn.TVMSynchronize.html   |    7 -
 docs/api/rust/tvm/function/ffi/index.html          |  182 -
 docs/api/rust/tvm/function/ffi/sidebar-items.js    |    1 -
 .../rust/tvm/function/ffi/struct.DLContext.html    |   47 -
 .../rust/tvm/function/ffi/struct.DLDataType.html   |   54 -
 .../tvm/function/ffi/struct.DLManagedTensor.html   |   40 -
 .../api/rust/tvm/function/ffi/struct.DLTensor.html |   63 -
 .../rust/tvm/function/ffi/struct.TVMByteArray.html |   33 -
 .../function/ffi/struct.TVMParallelGroupEnv.html   |   30 -
 .../tvm/function/ffi/type.BackendPackedCFunc.html  |    2 -
 .../rust/tvm/function/ffi/type.DLDataTypeCode.html |    3 -
 .../rust/tvm/function/ffi/type.DLDeviceType.html   |    3 -
 .../tvm/function/ffi/type.FTVMParallelLambda.html  |    6 -
 .../rust/tvm/function/ffi/type.TVMArgTypeCode.html |   14 -
 .../rust/tvm/function/ffi/type.TVMArrayHandle.html |    3 -
 .../function/ffi/type.TVMBackendPackedCFunc.html   |   10 -
 .../api/rust/tvm/function/ffi/type.TVMContext.html |    3 -
 .../tvm/function/ffi/type.TVMDeviceExtType.html    |    3 -
 .../ffi/type.TVMExtensionFuncDeclarer.html         |    7 -
 .../tvm/function/ffi/type.TVMFunctionHandle.html   |    3 -
 .../tvm/function/ffi/type.TVMModuleHandle.html     |    3 -
 .../tvm/function/ffi/type.TVMObjectHandle.html     |    3 -
 .../rust/tvm/function/ffi/type.TVMPackedCFunc.html |   10 -
 .../function/ffi/type.TVMPackedCFuncFinalizer.html |    4 -
 .../tvm/function/ffi/type.TVMRetValueHandle.html   |    3 -
 .../tvm/function/ffi/type.TVMStreamHandle.html     |    4 -
 .../rust/tvm/function/ffi/type.int_fast16_t.html   |    2 -
 .../rust/tvm/function/ffi/type.int_fast32_t.html   |    2 -
 .../rust/tvm/function/ffi/type.int_fast64_t.html   |    2 -
 .../rust/tvm/function/ffi/type.int_fast8_t.html    |    2 -
 .../rust/tvm/function/ffi/type.int_least16_t.html  |    2 -
 .../rust/tvm/function/ffi/type.int_least32_t.html  |    2 -
 .../rust/tvm/function/ffi/type.int_least64_t.html  |    2 -
 .../rust/tvm/function/ffi/type.int_least8_t.html   |    2 -
 docs/api/rust/tvm/function/ffi/type.intmax_t.html  |    2 -
 .../rust/tvm/function/ffi/type.tvm_index_t.html    |    3 -
 .../rust/tvm/function/ffi/type.uint_fast16_t.html  |    2 -
 .../rust/tvm/function/ffi/type.uint_fast32_t.html  |    2 -
 .../rust/tvm/function/ffi/type.uint_fast64_t.html  |    2 -
 .../rust/tvm/function/ffi/type.uint_fast8_t.html   |    2 -
 .../rust/tvm/function/ffi/type.uint_least16_t.html |    2 -
 .../rust/tvm/function/ffi/type.uint_least32_t.html |    2 -
 .../rust/tvm/function/ffi/type.uint_least64_t.html |    2 -
 .../rust/tvm/function/ffi/type.uint_least8_t.html  |    2 -
 docs/api/rust/tvm/function/ffi/type.uintmax_t.html |    2 -
 docs/api/rust/tvm/function/ffi/type.wchar_t.html   |    2 -
 docs/api/rust/tvm/function/ffi/union.TVMValue.html |   47 -
 docs/api/rust/tvm/function/fn.register.html        |   20 -
 .../rust/tvm/function/fn.register_override.html    |    4 -
 .../api/rust/tvm/function/fn.register_untyped.html |    2 -
 docs/api/rust/tvm/function/index.html              |   27 -
 docs/api/rust/tvm/function/sidebar-items.js        |    1 -
 docs/api/rust/tvm/function/struct.Function.html    |   38 -
 docs/api/rust/tvm/function/trait.ToFunction.html   |   22 -
 docs/api/rust/tvm/function/trait.Typed.html        |   14 -
 docs/api/rust/tvm/function/type.Result.html        |    2 -
 docs/api/rust/tvm/index.html                       |   33 -
 docs/api/rust/tvm/ir/arith/index.html              |    3 -
 docs/api/rust/tvm/ir/arith/sidebar-items.js        |    1 -
 .../rust/tvm/ir/arith/struct.ConstIntBound.html    |   30 -
 .../tvm/ir/arith/struct.ConstIntBoundNode.html     |   21 -
 docs/api/rust/tvm/ir/attrs/index.html              |    3 -
 docs/api/rust/tvm/ir/attrs/sidebar-items.js        |    1 -
 docs/api/rust/tvm/ir/attrs/struct.Attrs.html       |   30 -
 .../rust/tvm/ir/attrs/struct.BaseAttrsNode.html    |   19 -
 .../rust/tvm/ir/diagnostics/codespan/fn.init.html  |    4 -
 .../rust/tvm/ir/diagnostics/codespan/index.html    |    8 -
 .../tvm/ir/diagnostics/codespan/sidebar-items.js   |    1 -
 .../tvm/ir/diagnostics/enum.DiagnosticLevel.html   |   22 -
 .../rust/tvm/ir/diagnostics/fn.clear_renderer.html |    2 -
 .../diagnostics/fn.diagnositc_renderer_render.html |    2 -
 .../diagnostics/fn.diagnostic_context_default.html |    2 -
 .../diagnostics/fn.diagnostic_context_render.html  |    2 -
 .../tvm/ir/diagnostics/fn.diagnostic_renderer.html |    2 -
 docs/api/rust/tvm/ir/diagnostics/fn.emit.html      |    2 -
 .../rust/tvm/ir/diagnostics/fn.get_renderer.html   |    2 -
 docs/api/rust/tvm/ir/diagnostics/index.html        |   13 -
 docs/api/rust/tvm/ir/diagnostics/sidebar-items.js  |    1 -
 .../rust/tvm/ir/diagnostics/struct.Diagnostic.html |   30 -
 .../ir/diagnostics/struct.DiagnosticBuilder.html   |   22 -
 .../ir/diagnostics/struct.DiagnosticContext.html   |   35 -
 .../diagnostics/struct.DiagnosticContextNode.html  |   26 -
 .../tvm/ir/diagnostics/struct.DiagnosticNode.html  |   26 -
 .../ir/diagnostics/struct.DiagnosticRenderer.html  |   31 -
 .../diagnostics/struct.DiagnosticRendererNode.html |   28 -
 docs/api/rust/tvm/ir/expr/fn._as_text.html         |    2 -
 docs/api/rust/tvm/ir/expr/fn.as_text.html          |    2 -
 docs/api/rust/tvm/ir/expr/index.html               |    4 -
 docs/api/rust/tvm/ir/expr/sidebar-items.js         |    1 -
 docs/api/rust/tvm/ir/expr/struct.BaseExpr.html     |   30 -
 docs/api/rust/tvm/ir/expr/struct.BaseExprNode.html |   19 -
 docs/api/rust/tvm/ir/expr/struct.GlobalVar.html    |   30 -
 .../api/rust/tvm/ir/expr/struct.GlobalVarNode.html |   20 -
 docs/api/rust/tvm/ir/expr/struct.PrimExpr.html     |   30 -
 docs/api/rust/tvm/ir/expr/struct.PrimExprNode.html |   20 -
 docs/api/rust/tvm/ir/function/index.html           |    4 -
 docs/api/rust/tvm/ir/function/sidebar-items.js     |    1 -
 docs/api/rust/tvm/ir/function/struct.BaseFunc.html |   30 -
 .../rust/tvm/ir/function/struct.BaseFuncNode.html  |   20 -
 docs/api/rust/tvm/ir/function/type.DictAttrs.html  |    2 -
 docs/api/rust/tvm/ir/index.html                    |    4 -
 docs/api/rust/tvm/ir/module/enum.Error.html        |   28 -
 docs/api/rust/tvm/ir/module/fn.module_add_def.html |    2 -
 .../tvm/ir/module/fn.module_get_global_var.html    |    2 -
 .../tvm/ir/module/fn.module_get_global_vars.html   |    2 -
 docs/api/rust/tvm/ir/module/fn.module_lookup.html  |    2 -
 .../rust/tvm/ir/module/fn.module_lookup_str.html   |    2 -
 .../rust/tvm/ir/module/fn.parse_expression.html    |    2 -
 docs/api/rust/tvm/ir/module/fn.parse_module.html   |    2 -
 docs/api/rust/tvm/ir/module/index.html             |    5 -
 docs/api/rust/tvm/ir/module/sidebar-items.js       |    1 -
 docs/api/rust/tvm/ir/module/struct.IRModule.html   |   30 -
 .../rust/tvm/ir/module/struct.IRModuleNode.html    |   22 -
 docs/api/rust/tvm/ir/op/index.html                 |    3 -
 docs/api/rust/tvm/ir/op/sidebar-items.js           |    1 -
 docs/api/rust/tvm/ir/op/struct.Op.html             |   30 -
 docs/api/rust/tvm/ir/op/struct.OpNode.html         |   27 -
 docs/api/rust/tvm/ir/relay/attrs/index.html        |    3 -
 docs/api/rust/tvm/ir/relay/attrs/nn/index.html     |    3 -
 .../rust/tvm/ir/relay/attrs/nn/sidebar-items.js    |    1 -
 .../ir/relay/attrs/nn/struct.BatchNormAttrs.html   |   30 -
 .../relay/attrs/nn/struct.BatchNormAttrsNode.html  |   23 -
 .../tvm/ir/relay/attrs/nn/struct.BiasAddAttrs.html |   30 -
 .../ir/relay/attrs/nn/struct.BiasAddAttrsNode.html |   20 -
 .../tvm/ir/relay/attrs/nn/struct.Conv2DAttrs.html  |   30 -
 .../ir/relay/attrs/nn/struct.Conv2DAttrsNode.html  |   29 -
 .../tvm/ir/relay/attrs/nn/struct.DenseAttrs.html   |   30 -
 .../ir/relay/attrs/nn/struct.DenseAttrsNode.html   |   21 -
 .../relay/attrs/nn/struct.GlobalPool2DAttrs.html   |   30 -
 .../attrs/nn/struct.GlobalPool2DAttrsNode.html     |   20 -
 .../ir/relay/attrs/nn/struct.MaxPool2DAttrs.html   |   30 -
 .../relay/attrs/nn/struct.MaxPool2DAttrsNode.html  |   24 -
 .../tvm/ir/relay/attrs/nn/struct.SoftmaxAttrs.html |   30 -
 .../ir/relay/attrs/nn/struct.SoftmaxAttrsNode.html |   20 -
 docs/api/rust/tvm/ir/relay/attrs/sidebar-items.js  |    1 -
 .../rust/tvm/ir/relay/attrs/transform/index.html   |    3 -
 .../tvm/ir/relay/attrs/transform/sidebar-items.js  |    1 -
 .../attrs/transform/struct.ExpandDimsAttrs.html    |   30 -
 .../transform/struct.ExpandDimsAttrsNode.html      |   21 -
 docs/api/rust/tvm/ir/relay/index.html              |    5 -
 docs/api/rust/tvm/ir/relay/sidebar-items.js        |    1 -
 docs/api/rust/tvm/ir/relay/struct.Call.html        |   30 -
 docs/api/rust/tvm/ir/relay/struct.CallNode.html    |   23 -
 docs/api/rust/tvm/ir/relay/struct.Clause.html      |   30 -
 docs/api/rust/tvm/ir/relay/struct.ClauseNode.html  |   21 -
 docs/api/rust/tvm/ir/relay/struct.Constant.html    |   30 -
 .../api/rust/tvm/ir/relay/struct.ConstantNode.html |   20 -
 docs/api/rust/tvm/ir/relay/struct.Constructor.html |   30 -
 .../rust/tvm/ir/relay/struct.ConstructorNode.html  |   22 -
 docs/api/rust/tvm/ir/relay/struct.Expr.html        |   34 -
 docs/api/rust/tvm/ir/relay/struct.ExprNode.html    |   21 -
 docs/api/rust/tvm/ir/relay/struct.Function.html    |   30 -
 .../api/rust/tvm/ir/relay/struct.FunctionNode.html |   23 -
 docs/api/rust/tvm/ir/relay/struct.Id.html          |   30 -
 docs/api/rust/tvm/ir/relay/struct.IdNode.html      |   20 -
 docs/api/rust/tvm/ir/relay/struct.If.html          |   30 -
 docs/api/rust/tvm/ir/relay/struct.IfNode.html      |   22 -
 docs/api/rust/tvm/ir/relay/struct.Let.html         |   30 -
 docs/api/rust/tvm/ir/relay/struct.LetNode.html     |   22 -
 docs/api/rust/tvm/ir/relay/struct.Match.html       |   30 -
 docs/api/rust/tvm/ir/relay/struct.MatchNode.html   |   22 -
 docs/api/rust/tvm/ir/relay/struct.Pattern.html     |   30 -
 .../tvm/ir/relay/struct.PatternConstructor.html    |   30 -
 .../ir/relay/struct.PatternConstructorNode.html    |   21 -
 docs/api/rust/tvm/ir/relay/struct.PatternNode.html |   20 -
 .../api/rust/tvm/ir/relay/struct.PatternTuple.html |   30 -
 .../rust/tvm/ir/relay/struct.PatternTupleNode.html |   20 -
 docs/api/rust/tvm/ir/relay/struct.PatternVar.html  |   30 -
 .../rust/tvm/ir/relay/struct.PatternVarNode.html   |   20 -
 .../rust/tvm/ir/relay/struct.PatternWildcard.html  |   30 -
 .../tvm/ir/relay/struct.PatternWildcardNode.html   |   19 -
 docs/api/rust/tvm/ir/relay/struct.RefCreate.html   |   30 -
 .../rust/tvm/ir/relay/struct.RefCreateNode.html    |   20 -
 docs/api/rust/tvm/ir/relay/struct.RefRead.html     |   30 -
 docs/api/rust/tvm/ir/relay/struct.RefReadNode.html |   20 -
 docs/api/rust/tvm/ir/relay/struct.RefWrite.html    |   30 -
 .../api/rust/tvm/ir/relay/struct.RefWriteNode.html |   21 -
 docs/api/rust/tvm/ir/relay/struct.Tuple.html       |   30 -
 .../api/rust/tvm/ir/relay/struct.TupleGetItem.html |   30 -
 .../rust/tvm/ir/relay/struct.TupleGetItemNode.html |   21 -
 docs/api/rust/tvm/ir/relay/struct.TupleNode.html   |   20 -
 docs/api/rust/tvm/ir/relay/struct.Var.html         |   30 -
 docs/api/rust/tvm/ir/relay/struct.VarNode.html     |   21 -
 docs/api/rust/tvm/ir/sidebar-items.js              |    1 -
 docs/api/rust/tvm/ir/source_map/index.html         |    5 -
 docs/api/rust/tvm/ir/source_map/sidebar-items.js   |    1 -
 docs/api/rust/tvm/ir/source_map/struct.Source.html |   30 -
 .../rust/tvm/ir/source_map/struct.SourceMap.html   |   30 -
 .../tvm/ir/source_map/struct.SourceMapNode.html    |   23 -
 .../rust/tvm/ir/source_map/struct.SourceNode.html  |   25 -
 docs/api/rust/tvm/ir/span/index.html               |    5 -
 docs/api/rust/tvm/ir/span/sidebar-items.js         |    1 -
 docs/api/rust/tvm/ir/span/struct.SourceName.html   |   30 -
 .../rust/tvm/ir/span/struct.SourceNameNode.html    |   21 -
 docs/api/rust/tvm/ir/span/struct.Span.html         |   30 -
 docs/api/rust/tvm/ir/span/struct.SpanNode.html     |   30 -
 docs/api/rust/tvm/ir/tir/index.html                |    3 -
 docs/api/rust/tvm/ir/tir/sidebar-items.js          |    1 -
 docs/api/rust/tvm/ir/tir/struct.Add.html           |   30 -
 docs/api/rust/tvm/ir/tir/struct.AddNode.html       |   21 -
 docs/api/rust/tvm/ir/tir/struct.And.html           |   30 -
 docs/api/rust/tvm/ir/tir/struct.AndNode.html       |   21 -
 docs/api/rust/tvm/ir/tir/struct.Cast.html          |   30 -
 docs/api/rust/tvm/ir/tir/struct.CastNode.html      |   20 -
 docs/api/rust/tvm/ir/tir/struct.Div.html           |   30 -
 docs/api/rust/tvm/ir/tir/struct.DivNode.html       |   21 -
 docs/api/rust/tvm/ir/tir/struct.Eq.html            |   30 -
 docs/api/rust/tvm/ir/tir/struct.EqNode.html        |   21 -
 docs/api/rust/tvm/ir/tir/struct.FloorDiv.html      |   30 -
 docs/api/rust/tvm/ir/tir/struct.FloorDivNode.html  |   21 -
 docs/api/rust/tvm/ir/tir/struct.FloorMod.html      |   30 -
 docs/api/rust/tvm/ir/tir/struct.FloorModNode.html  |   21 -
 docs/api/rust/tvm/ir/tir/struct.Ge.html            |   30 -
 docs/api/rust/tvm/ir/tir/struct.GeNode.html        |   21 -
 docs/api/rust/tvm/ir/tir/struct.Gt.html            |   30 -
 docs/api/rust/tvm/ir/tir/struct.GtNode.html        |   21 -
 docs/api/rust/tvm/ir/tir/struct.IntImm.html        |   30 -
 docs/api/rust/tvm/ir/tir/struct.IntImmNode.html    |   20 -
 docs/api/rust/tvm/ir/tir/struct.Le.html            |   30 -
 docs/api/rust/tvm/ir/tir/struct.LeNode.html        |   21 -
 docs/api/rust/tvm/ir/tir/struct.Let.html           |   30 -
 docs/api/rust/tvm/ir/tir/struct.LetNode.html       |   22 -
 docs/api/rust/tvm/ir/tir/struct.Lt.html            |   30 -
 docs/api/rust/tvm/ir/tir/struct.LtNode.html        |   21 -
 docs/api/rust/tvm/ir/tir/struct.Max.html           |   30 -
 docs/api/rust/tvm/ir/tir/struct.MaxNode.html       |   21 -
 docs/api/rust/tvm/ir/tir/struct.Min.html           |   30 -
 docs/api/rust/tvm/ir/tir/struct.MinNode.html       |   21 -
 docs/api/rust/tvm/ir/tir/struct.Mod.html           |   30 -
 docs/api/rust/tvm/ir/tir/struct.ModNode.html       |   21 -
 docs/api/rust/tvm/ir/tir/struct.Mul.html           |   30 -
 docs/api/rust/tvm/ir/tir/struct.MulNode.html       |   21 -
 docs/api/rust/tvm/ir/tir/struct.Ne.html            |   30 -
 docs/api/rust/tvm/ir/tir/struct.NeNode.html        |   21 -
 docs/api/rust/tvm/ir/tir/struct.Not.html           |   30 -
 docs/api/rust/tvm/ir/tir/struct.NotNode.html       |   20 -
 docs/api/rust/tvm/ir/tir/struct.Or.html            |   30 -
 docs/api/rust/tvm/ir/tir/struct.OrNode.html        |   21 -
 docs/api/rust/tvm/ir/tir/struct.Ramp.html          |   30 -
 docs/api/rust/tvm/ir/tir/struct.RampNode.html      |   22 -
 docs/api/rust/tvm/ir/tir/struct.Select.html        |   30 -
 docs/api/rust/tvm/ir/tir/struct.SelectNode.html    |   22 -
 docs/api/rust/tvm/ir/tir/struct.Sub.html           |   30 -
 docs/api/rust/tvm/ir/tir/struct.SubNode.html       |   21 -
 docs/api/rust/tvm/ir/tir/struct.Var.html           |   30 -
 docs/api/rust/tvm/ir/tir/struct.VarNode.html       |   20 -
 docs/api/rust/tvm/ir/ty/enum.TypeKind.html         |   23 -
 docs/api/rust/tvm/ir/ty/index.html                 |    7 -
 docs/api/rust/tvm/ir/ty/sidebar-items.js           |    1 -
 docs/api/rust/tvm/ir/ty/struct.BaseTensorType.html |   30 -
 .../rust/tvm/ir/ty/struct.BaseTensorTypeNode.html  |   19 -
 docs/api/rust/tvm/ir/ty/struct.FuncType.html       |   30 -
 docs/api/rust/tvm/ir/ty/struct.FuncTypeNode.html   |   29 -
 docs/api/rust/tvm/ir/ty/struct.GlobalTypeVar.html  |   30 -
 .../rust/tvm/ir/ty/struct.GlobalTypeVarNode.html   |   22 -
 docs/api/rust/tvm/ir/ty/struct.IncompleteType.html |   30 -
 .../rust/tvm/ir/ty/struct.IncompleteTypeNode.html  |   20 -
 docs/api/rust/tvm/ir/ty/struct.PointerType.html    |   30 -
 .../api/rust/tvm/ir/ty/struct.PointerTypeNode.html |   21 -
 docs/api/rust/tvm/ir/ty/struct.PrimType.html       |   30 -
 docs/api/rust/tvm/ir/ty/struct.PrimTypeNode.html   |   21 -
 docs/api/rust/tvm/ir/ty/struct.RefType.html        |   30 -
 .../rust/tvm/ir/ty/struct.RelayRefTypeNode.html    |   20 -
 docs/api/rust/tvm/ir/ty/struct.TensorType.html     |   30 -
 docs/api/rust/tvm/ir/ty/struct.TensorTypeNode.html |   21 -
 docs/api/rust/tvm/ir/ty/struct.TupleType.html      |   30 -
 docs/api/rust/tvm/ir/ty/struct.TupleTypeNode.html  |   20 -
 docs/api/rust/tvm/ir/ty/struct.Type.html           |   30 -
 docs/api/rust/tvm/ir/ty/struct.TypeConstraint.html |   30 -
 .../rust/tvm/ir/ty/struct.TypeConstraintNode.html  |   19 -
 docs/api/rust/tvm/ir/ty/struct.TypeNode.html       |   20 -
 docs/api/rust/tvm/ir/ty/struct.TypeVar.html        |   30 -
 docs/api/rust/tvm/ir/ty/struct.TypeVarNode.html    |   21 -
 docs/api/rust/tvm/macro.export!.html               |   10 -
 docs/api/rust/tvm/macro.export.html                |    6 -
 docs/api/rust/tvm/macro.export_mod!.html           |   10 -
 docs/api/rust/tvm/macro.export_mod.html            |    6 -
 docs/api/rust/tvm/macro.export_pass!.html          |   10 -
 docs/api/rust/tvm/macro.export_pass.html           |    6 -
 docs/api/rust/tvm/macro.initialize!.html           |   10 -
 docs/api/rust/tvm/macro.initialize.html            |   11 -
 docs/api/rust/tvm/module/fn.load_from_file.html    |    2 -
 docs/api/rust/tvm/module/fn.runtime_enabled.html   |    2 -
 docs/api/rust/tvm/module/index.html                |    7 -
 docs/api/rust/tvm/module/sidebar-items.js          |    1 -
 docs/api/rust/tvm/module/struct.Module.html        |   37 -
 docs/api/rust/tvm/ndarray/index.html               |   23 -
 docs/api/rust/tvm/ndarray/sidebar-items.js         |    1 -
 docs/api/rust/tvm/ndarray/struct.NDArray.html      |   71 -
 .../rust/tvm/ndarray/struct.NDArrayContainer.html  |   17 -
 docs/api/rust/tvm/ndarray/trait.Num32.html         |    8 -
 docs/api/rust/tvm/python/fn.load.html              |    7 -
 docs/api/rust/tvm/python/index.html                |    4 -
 docs/api/rust/tvm/python/sidebar-items.js          |    1 -
 .../rust/tvm/runtime/array/fn.array_get_item.html  |    2 -
 docs/api/rust/tvm/runtime/array/fn.array_size.html |    2 -
 docs/api/rust/tvm/runtime/array/index.html         |    4 -
 docs/api/rust/tvm/runtime/array/sidebar-items.js   |    1 -
 docs/api/rust/tvm/runtime/array/struct.Array.html  |   32 -
 .../rust/tvm/runtime/array/struct.IntoIter.html    |  138 -
 .../rust/tvm/runtime/context/enum.DeviceType.html  |   49 -
 .../tvm/runtime/context/fn.get_device_attr.html    |    2 -
 docs/api/rust/tvm/runtime/context/index.html       |    7 -
 docs/api/rust/tvm/runtime/context/sidebar-items.js |    1 -
 .../rust/tvm/runtime/context/struct.Context.html   |   43 -
 .../context/struct.UnsupportedDeviceError.html     |   21 -
 docs/api/rust/tvm/runtime/enum.ArgValue.html       |  450 --
 docs/api/rust/tvm/runtime/enum.DeviceType.html     |   49 -
 docs/api/rust/tvm/runtime/enum.Error.html          |   42 -
 docs/api/rust/tvm/runtime/enum.NDArrayError.html   |   34 -
 docs/api/rust/tvm/runtime/enum.RetValue.html       |  356 -
 docs/api/rust/tvm/runtime/errors/enum.Error.html   |   42 -
 .../rust/tvm/runtime/errors/enum.NDArrayError.html |   34 -
 docs/api/rust/tvm/runtime/errors/index.html        |    4 -
 docs/api/rust/tvm/runtime/errors/sidebar-items.js  |    1 -
 .../errors/struct.FunctionNotFoundError.html       |   21 -
 .../runtime/errors/struct.TypeMismatchError.html   |   25 -
 docs/api/rust/tvm/runtime/fn.debug_print.html      |    2 -
 docs/api/rust/tvm/runtime/fn.get_last_error.html   |    3 -
 docs/api/rust/tvm/runtime/fn.structural_equal.html |    2 -
 docs/api/rust/tvm/runtime/fn.structural_hash.html  |    2 -
 docs/api/rust/tvm/runtime/fn.version.html          |    3 -
 .../rust/tvm/runtime/function/enum.ArgValue.html   |  450 --
 .../rust/tvm/runtime/function/enum.RetValue.html   |  356 -
 .../ffi/constant.DLDataTypeCode_kDLBfloat.html     |    2 -
 .../ffi/constant.DLDataTypeCode_kDLFloat.html      |    2 -
 .../ffi/constant.DLDataTypeCode_kDLInt.html        |    2 -
 .../ffi/constant.DLDataTypeCode_kDLUInt.html       |    2 -
 .../function/ffi/constant.DLDeviceType_kDLCPU.html |    3 -
 .../ffi/constant.DLDeviceType_kDLCPUPinned.html    |    4 -
 .../ffi/constant.DLDeviceType_kDLExtDev.html       |    5 -
 .../function/ffi/constant.DLDeviceType_kDLGPU.html |    3 -
 .../ffi/constant.DLDeviceType_kDLMetal.html        |    3 -
 .../ffi/constant.DLDeviceType_kDLOpenCL.html       |    3 -
 .../ffi/constant.DLDeviceType_kDLROCM.html         |    3 -
 .../function/ffi/constant.DLDeviceType_kDLVPI.html |    3 -
 .../ffi/constant.DLDeviceType_kDLVulkan.html       |    3 -
 .../function/ffi/constant.DLPACK_VERSION.html      |    2 -
 .../runtime/function/ffi/constant.INT16_MAX.html   |    2 -
 .../runtime/function/ffi/constant.INT16_MIN.html   |    2 -
 .../runtime/function/ffi/constant.INT32_MAX.html   |    2 -
 .../runtime/function/ffi/constant.INT32_MIN.html   |    2 -
 .../runtime/function/ffi/constant.INT8_MAX.html    |    2 -
 .../runtime/function/ffi/constant.INT8_MIN.html    |    2 -
 .../runtime/function/ffi/constant.INTPTR_MAX.html  |    2 -
 .../runtime/function/ffi/constant.INTPTR_MIN.html  |    2 -
 .../function/ffi/constant.INT_FAST16_MAX.html      |    2 -
 .../function/ffi/constant.INT_FAST16_MIN.html      |    2 -
 .../function/ffi/constant.INT_FAST32_MAX.html      |    2 -
 .../function/ffi/constant.INT_FAST32_MIN.html      |    2 -
 .../function/ffi/constant.INT_FAST8_MAX.html       |    2 -
 .../function/ffi/constant.INT_FAST8_MIN.html       |    2 -
 .../function/ffi/constant.INT_LEAST16_MAX.html     |    2 -
 .../function/ffi/constant.INT_LEAST16_MIN.html     |    2 -
 .../function/ffi/constant.INT_LEAST32_MAX.html     |    2 -
 .../function/ffi/constant.INT_LEAST32_MIN.html     |    2 -
 .../function/ffi/constant.INT_LEAST8_MAX.html      |    2 -
 .../function/ffi/constant.INT_LEAST8_MIN.html      |    2 -
 .../runtime/function/ffi/constant.PTRDIFF_MAX.html |    2 -
 .../runtime/function/ffi/constant.PTRDIFF_MIN.html |    2 -
 .../function/ffi/constant.SIG_ATOMIC_MAX.html      |    2 -
 .../function/ffi/constant.SIG_ATOMIC_MIN.html      |    2 -
 .../runtime/function/ffi/constant.SIZE_MAX.html    |    2 -
 .../ffi/constant.TVMArgTypeCode_kTVMArgFloat.html  |    2 -
 .../ffi/constant.TVMArgTypeCode_kTVMArgInt.html    |    2 -
 .../ffi/constant.TVMArgTypeCode_kTVMBytes.html     |    2 -
 .../ffi/constant.TVMArgTypeCode_kTVMContext.html   |    2 -
 ...constant.TVMArgTypeCode_kTVMDLTensorHandle.html |    2 -
 .../ffi/constant.TVMArgTypeCode_kTVMDataType.html  |    2 -
 .../ffi/constant.TVMArgTypeCode_kTVMExtBegin.html  |    2 -
 .../ffi/constant.TVMArgTypeCode_kTVMExtEnd.html    |    2 -
 .../constant.TVMArgTypeCode_kTVMExtReserveEnd.html |    2 -
 .../constant.TVMArgTypeCode_kTVMModuleHandle.html  |    2 -
 .../constant.TVMArgTypeCode_kTVMNDArrayHandle.html |    2 -
 .../ffi/constant.TVMArgTypeCode_kTVMNNVMFirst.html |    2 -
 .../ffi/constant.TVMArgTypeCode_kTVMNNVMLast.html  |    2 -
 .../ffi/constant.TVMArgTypeCode_kTVMNullptr.html   |    2 -
 .../constant.TVMArgTypeCode_kTVMObjectHandle.html  |    2 -
 ...tant.TVMArgTypeCode_kTVMObjectRValueRefArg.html |    2 -
 .../constant.TVMArgTypeCode_kTVMOpaqueHandle.html  |    2 -
 ...nstant.TVMArgTypeCode_kTVMPackedFuncHandle.html |    2 -
 .../ffi/constant.TVMArgTypeCode_kTVMStr.html       |    2 -
 .../ffi/constant.TVMDeviceExtType_kDLAOCL.html     |    2 -
 .../ffi/constant.TVMDeviceExtType_kDLHexagon.html  |    2 -
 .../ffi/constant.TVMDeviceExtType_kDLMicroDev.html |    2 -
 .../ffi/constant.TVMDeviceExtType_kDLSDAccel.html  |    2 -
 .../ffi/constant.TVMDeviceExtType_kDLWebGPU.html   |    2 -
 .../ffi/constant.TVMDeviceExtType_kOpenGL.html     |    2 -
 .../runtime/function/ffi/constant.TVM_VERSION.html |    2 -
 .../runtime/function/ffi/constant.UINT16_MAX.html  |    2 -
 .../runtime/function/ffi/constant.UINT32_MAX.html  |    2 -
 .../runtime/function/ffi/constant.UINT8_MAX.html   |    2 -
 .../runtime/function/ffi/constant.UINTPTR_MAX.html |    2 -
 .../function/ffi/constant.UINT_FAST16_MAX.html     |    2 -
 .../function/ffi/constant.UINT_FAST32_MAX.html     |    2 -
 .../function/ffi/constant.UINT_FAST8_MAX.html      |    2 -
 .../function/ffi/constant.UINT_LEAST16_MAX.html    |    2 -
 .../function/ffi/constant.UINT_LEAST32_MAX.html    |    2 -
 .../function/ffi/constant.UINT_LEAST8_MAX.html     |    2 -
 .../runtime/function/ffi/constant.WINT_MAX.html    |    2 -
 .../runtime/function/ffi/constant.WINT_MIN.html    |    2 -
 .../function/ffi/constant._ATFILE_SOURCE.html      |    2 -
 .../function/ffi/constant._BITS_WCHAR_H.html       |    2 -
 .../function/ffi/constant._DEFAULT_SOURCE.html     |    2 -
 .../runtime/function/ffi/constant._FEATURES_H.html |    2 -
 .../function/ffi/constant._POSIX_C_SOURCE.html     |    2 -
 .../function/ffi/constant._POSIX_SOURCE.html       |    2 -
 .../function/ffi/constant._STDC_PREDEF_H.html      |    2 -
 .../runtime/function/ffi/constant._STDINT_H.html   |    2 -
 .../function/ffi/constant._SYS_CDEFS_H.html        |    2 -
 .../function/ffi/constant.__GLIBC_MINOR__.html     |    2 -
 .../runtime/function/ffi/constant.__GLIBC__.html   |    2 -
 .../function/ffi/constant.__GNU_LIBRARY__.html     |    2 -
 .../ffi/constant.__STDC_IEC_559_COMPLEX__.html     |    2 -
 .../function/ffi/constant.__STDC_IEC_559__.html    |    2 -
 .../function/ffi/constant.__STDC_ISO_10646__.html  |    2 -
 .../function/ffi/constant.__STDC_NO_THREADS__.html |    2 -
 .../function/ffi/constant.__SYSCALL_WORDSIZE.html  |    2 -
 .../function/ffi/constant.__USE_ATFILE.html        |    2 -
 .../function/ffi/constant.__USE_FORTIFY_LEVEL.html |    2 -
 .../function/ffi/constant.__USE_ISOC11.html        |    2 -
 .../function/ffi/constant.__USE_ISOC95.html        |    2 -
 .../function/ffi/constant.__USE_ISOC99.html        |    2 -
 .../runtime/function/ffi/constant.__USE_MISC.html  |    2 -
 .../runtime/function/ffi/constant.__USE_POSIX.html |    2 -
 .../function/ffi/constant.__USE_POSIX199309.html   |    2 -
 .../function/ffi/constant.__USE_POSIX199506.html   |    2 -
 .../function/ffi/constant.__USE_POSIX2.html        |    2 -
 .../ffi/constant.__USE_POSIX_IMPLICITLY.html       |    2 -
 .../function/ffi/constant.__USE_XOPEN2K.html       |    2 -
 .../function/ffi/constant.__USE_XOPEN2K8.html      |    2 -
 .../runtime/function/ffi/constant.__WORDSIZE.html  |    2 -
 .../ffi/constant.__WORDSIZE_TIME64_COMPAT32.html   |    2 -
 .../function/ffi/fn.TVMAPISetLastError.html        |    5 -
 .../tvm/runtime/function/ffi/fn.TVMArrayAlloc.html |   13 -
 .../function/ffi/fn.TVMArrayCopyFromBytes.html     |    7 -
 .../function/ffi/fn.TVMArrayCopyFromTo.html        |    7 -
 .../function/ffi/fn.TVMArrayCopyToBytes.html       |    7 -
 .../tvm/runtime/function/ffi/fn.TVMArrayFree.html  |    5 -
 .../function/ffi/fn.TVMArrayFromDLPack.html        |    7 -
 .../runtime/function/ffi/fn.TVMArrayToDLPack.html  |    7 -
 .../function/ffi/fn.TVMBackendAllocWorkspace.html  |   12 -
 .../function/ffi/fn.TVMBackendFreeWorkspace.html   |    8 -
 .../function/ffi/fn.TVMBackendGetFuncFromEnv.html  |    9 -
 .../function/ffi/fn.TVMBackendParallelBarrier.html |    6 -
 .../function/ffi/fn.TVMBackendParallelLaunch.html  |    8 -
 .../ffi/fn.TVMBackendRegisterSystemLibSymbol.html  |    6 -
 .../runtime/function/ffi/fn.TVMBackendRunOnce.html |   10 -
 .../runtime/function/ffi/fn.TVMCFuncSetReturn.html |    9 -
 .../runtime/function/ffi/fn.TVMCbArgToReturn.html  |    8 -
 .../ffi/fn.TVMDLManagedTensorCallDeleter.html      |    4 -
 .../function/ffi/fn.TVMDeviceAllocDataSpace.html   |   10 -
 .../function/ffi/fn.TVMDeviceCopyDataFromTo.html   |   14 -
 .../function/ffi/fn.TVMDeviceFreeDataSpace.html    |    6 -
 .../tvm/runtime/function/ffi/fn.TVMFuncCall.html   |   16 -
 .../function/ffi/fn.TVMFuncCreateFromCFunc.html    |    9 -
 .../tvm/runtime/function/ffi/fn.TVMFuncFree.html   |    5 -
 .../runtime/function/ffi/fn.TVMFuncGetGlobal.html  |    7 -
 .../function/ffi/fn.TVMFuncListGlobalNames.html    |    6 -
 .../function/ffi/fn.TVMFuncRegisterGlobal.html     |    7 -
 .../function/ffi/fn.TVMFuncRemoveGlobal.html       |    4 -
 .../runtime/function/ffi/fn.TVMGetLastError.html   |    8 -
 .../tvm/runtime/function/ffi/fn.TVMModFree.html    |    9 -
 .../runtime/function/ffi/fn.TVMModGetFunction.html |    8 -
 .../tvm/runtime/function/ffi/fn.TVMModImport.html  |    7 -
 .../function/ffi/fn.TVMModLoadFromFile.html        |    9 -
 .../function/ffi/fn.TVMObjectDerivedFrom.html      |    7 -
 .../tvm/runtime/function/ffi/fn.TVMObjectFree.html |    7 -
 .../function/ffi/fn.TVMObjectGetTypeIndex.html     |    6 -
 .../runtime/function/ffi/fn.TVMObjectRetain.html   |    6 -
 .../function/ffi/fn.TVMObjectTypeKey2Index.html    |    6 -
 .../tvm/runtime/function/ffi/fn.TVMSetStream.html  |   10 -
 .../runtime/function/ffi/fn.TVMStreamCreate.html   |    7 -
 .../tvm/runtime/function/ffi/fn.TVMStreamFree.html |    7 -
 .../ffi/fn.TVMStreamStreamSynchronize.html         |    8 -
 .../runtime/function/ffi/fn.TVMSynchronize.html    |    7 -
 docs/api/rust/tvm/runtime/function/ffi/index.html  |  182 -
 .../rust/tvm/runtime/function/ffi/sidebar-items.js |    1 -
 .../tvm/runtime/function/ffi/struct.DLContext.html |   47 -
 .../runtime/function/ffi/struct.DLDataType.html    |   54 -
 .../function/ffi/struct.DLManagedTensor.html       |   40 -
 .../tvm/runtime/function/ffi/struct.DLTensor.html  |   63 -
 .../runtime/function/ffi/struct.TVMByteArray.html  |   33 -
 .../function/ffi/struct.TVMParallelGroupEnv.html   |   30 -
 .../function/ffi/type.BackendPackedCFunc.html      |    2 -
 .../runtime/function/ffi/type.DLDataTypeCode.html  |    3 -
 .../runtime/function/ffi/type.DLDeviceType.html    |    3 -
 .../function/ffi/type.FTVMParallelLambda.html      |    6 -
 .../runtime/function/ffi/type.TVMArgTypeCode.html  |   14 -
 .../runtime/function/ffi/type.TVMArrayHandle.html  |    3 -
 .../function/ffi/type.TVMBackendPackedCFunc.html   |   10 -
 .../tvm/runtime/function/ffi/type.TVMContext.html  |    3 -
 .../function/ffi/type.TVMDeviceExtType.html        |    3 -
 .../ffi/type.TVMExtensionFuncDeclarer.html         |    7 -
 .../function/ffi/type.TVMFunctionHandle.html       |    3 -
 .../runtime/function/ffi/type.TVMModuleHandle.html |    3 -
 .../runtime/function/ffi/type.TVMObjectHandle.html |    3 -
 .../runtime/function/ffi/type.TVMPackedCFunc.html  |   10 -
 .../function/ffi/type.TVMPackedCFuncFinalizer.html |    4 -
 .../function/ffi/type.TVMRetValueHandle.html       |    3 -
 .../runtime/function/ffi/type.TVMStreamHandle.html |    4 -
 .../runtime/function/ffi/type.int_fast16_t.html    |    2 -
 .../runtime/function/ffi/type.int_fast32_t.html    |    2 -
 .../runtime/function/ffi/type.int_fast64_t.html    |    2 -
 .../tvm/runtime/function/ffi/type.int_fast8_t.html |    2 -
 .../runtime/function/ffi/type.int_least16_t.html   |    2 -
 .../runtime/function/ffi/type.int_least32_t.html   |    2 -
 .../runtime/function/ffi/type.int_least64_t.html   |    2 -
 .../runtime/function/ffi/type.int_least8_t.html    |    2 -
 .../tvm/runtime/function/ffi/type.intmax_t.html    |    2 -
 .../tvm/runtime/function/ffi/type.tvm_index_t.html |    3 -
 .../runtime/function/ffi/type.uint_fast16_t.html   |    2 -
 .../runtime/function/ffi/type.uint_fast32_t.html   |    2 -
 .../runtime/function/ffi/type.uint_fast64_t.html   |    2 -
 .../runtime/function/ffi/type.uint_fast8_t.html    |    2 -
 .../runtime/function/ffi/type.uint_least16_t.html  |    2 -
 .../runtime/function/ffi/type.uint_least32_t.html  |    2 -
 .../runtime/function/ffi/type.uint_least64_t.html  |    2 -
 .../runtime/function/ffi/type.uint_least8_t.html   |    2 -
 .../tvm/runtime/function/ffi/type.uintmax_t.html   |    2 -
 .../tvm/runtime/function/ffi/type.wchar_t.html     |    2 -
 .../tvm/runtime/function/ffi/union.TVMValue.html   |   47 -
 .../api/rust/tvm/runtime/function/fn.register.html |   20 -
 .../tvm/runtime/function/fn.register_override.html |    4 -
 .../tvm/runtime/function/fn.register_untyped.html  |    2 -
 docs/api/rust/tvm/runtime/function/index.html      |   27 -
 .../api/rust/tvm/runtime/function/sidebar-items.js |    1 -
 .../rust/tvm/runtime/function/struct.Function.html |   38 -
 .../tvm/runtime/function/trait.ToFunction.html     |   22 -
 .../api/rust/tvm/runtime/function/trait.Typed.html |   14 -
 .../api/rust/tvm/runtime/function/type.Result.html |    2 -
 docs/api/rust/tvm/runtime/graph_rt/index.html      |    4 -
 .../api/rust/tvm/runtime/graph_rt/sidebar-items.js |    1 -
 .../rust/tvm/runtime/graph_rt/struct.GraphRt.html  |   21 -
 docs/api/rust/tvm/runtime/index.html               |   39 -
 docs/api/rust/tvm/runtime/macro.check_call!.html   |   10 -
 docs/api/rust/tvm/runtime/macro.check_call.html    |    6 -
 docs/api/rust/tvm/runtime/macro.external!.html     |   10 -
 docs/api/rust/tvm/runtime/macro.external.html      |    2 -
 docs/api/rust/tvm/runtime/macro.tvm_call!.html     |   10 -
 docs/api/rust/tvm/runtime/macro.tvm_call.html      |    6 -
 .../rust/tvm/runtime/map/fn.array_get_item.html    |    2 -
 docs/api/rust/tvm/runtime/map/fn.map_count.html    |    2 -
 docs/api/rust/tvm/runtime/map/fn.map_get_item.html |    2 -
 docs/api/rust/tvm/runtime/map/fn.map_items.html    |    2 -
 docs/api/rust/tvm/runtime/map/fn.map_size.html     |    2 -
 docs/api/rust/tvm/runtime/map/index.html           |    4 -
 docs/api/rust/tvm/runtime/map/sidebar-items.js     |    1 -
 docs/api/rust/tvm/runtime/map/struct.IntoIter.html |  138 -
 docs/api/rust/tvm/runtime/map/struct.Map.html      |   34 -
 .../rust/tvm/runtime/module/fn.load_from_file.html |    2 -
 .../tvm/runtime/module/fn.runtime_enabled.html     |    2 -
 docs/api/rust/tvm/runtime/module/index.html        |    7 -
 docs/api/rust/tvm/runtime/module/sidebar-items.js  |    1 -
 .../api/rust/tvm/runtime/module/struct.Module.html |   37 -
 docs/api/rust/tvm/runtime/ndarray/index.html       |   23 -
 docs/api/rust/tvm/runtime/ndarray/sidebar-items.js |    1 -
 .../rust/tvm/runtime/ndarray/struct.NDArray.html   |   71 -
 .../runtime/ndarray/struct.NDArrayContainer.html   |   17 -
 docs/api/rust/tvm/runtime/ndarray/trait.Num32.html |    8 -
 .../rust/tvm/runtime/object/fn.debug_print.html    |    2 -
 .../tvm/runtime/object/fn.structural_equal.html    |    2 -
 .../tvm/runtime/object/fn.structural_hash.html     |    2 -
 docs/api/rust/tvm/runtime/object/index.html        |   14 -
 docs/api/rust/tvm/runtime/object/sidebar-items.js  |    1 -
 .../api/rust/tvm/runtime/object/struct.Object.html |   25 -
 .../rust/tvm/runtime/object/struct.ObjectPtr.html  |  118 -
 .../rust/tvm/runtime/object/struct.ObjectRef.html  |   30 -
 .../rust/tvm/runtime/object/trait.IsObject.html    |   14 -
 .../rust/tvm/runtime/object/trait.IsObjectRef.html |   21 -
 docs/api/rust/tvm/runtime/sidebar-items.js         |    1 -
 docs/api/rust/tvm/runtime/string/index.html        |    3 -
 docs/api/rust/tvm/runtime/string/sidebar-items.js  |    1 -
 .../api/rust/tvm/runtime/string/struct.String.html |   49 -
 .../rust/tvm/runtime/string/struct.StringObj.html  |   16 -
 docs/api/rust/tvm/runtime/struct.ByteArray.html    |   30 -
 docs/api/rust/tvm/runtime/struct.Context.html      |   43 -
 docs/api/rust/tvm/runtime/struct.DataType.html     |   47 -
 docs/api/rust/tvm/runtime/struct.Function.html     |   38 -
 .../tvm/runtime/struct.FunctionNotFoundError.html  |   21 -
 docs/api/rust/tvm/runtime/struct.Module.html       |   37 -
 docs/api/rust/tvm/runtime/struct.NDArray.html      |   71 -
 docs/api/rust/tvm/runtime/struct.Object.html       |   25 -
 docs/api/rust/tvm/runtime/struct.ObjectPtr.html    |  118 -
 docs/api/rust/tvm/runtime/struct.ObjectRef.html    |   30 -
 docs/api/rust/tvm/runtime/struct.String.html       |   49 -
 docs/api/rust/tvm/runtime/struct.StringObj.html    |   16 -
 .../rust/tvm/runtime/struct.TypeMismatchError.html |   25 -
 docs/api/rust/tvm/runtime/trait.IsObject.html      |   14 -
 docs/api/rust/tvm/runtime/trait.IsObjectRef.html   |   21 -
 docs/api/rust/tvm/runtime/value/index.html         |    5 -
 docs/api/rust/tvm/runtime/value/sidebar-items.js   |    1 -
 docs/api/rust/tvm/sidebar-items.js                 |    1 -
 docs/api/rust/tvm/struct.Context.html              |   43 -
 docs/api/rust/tvm/struct.DataType.html             |   47 -
 docs/api/rust/tvm/struct.Function.html             |   38 -
 .../api/rust/tvm/struct.FunctionNotFoundError.html |   21 -
 docs/api/rust/tvm/struct.Module.html               |   37 -
 docs/api/rust/tvm/struct.NDArray.html              |   71 -
 docs/api/rust/tvm/struct.TypeMismatchError.html    |   25 -
 .../rust/tvm/transform/fn.create_func_pass.html    |    2 -
 docs/api/rust/tvm/transform/fn.function_pass.html  |    2 -
 docs/api/rust/tvm/transform/index.html             |    5 -
 docs/api/rust/tvm/transform/sidebar-items.js       |    1 -
 docs/api/rust/tvm/transform/struct.PassInfo.html   |   30 -
 .../rust/tvm/transform/struct.PassInfoNode.html    |   22 -
 docs/api/rust/tvm/transform/type.IRModule.html     |    2 -
 docs/api/rust/tvm/transform/type.Pass.html         |    2 -
 docs/api/rust/tvm/transform/type.PassContext.html  |    2 -
 docs/api/rust/tvm/value/index.html                 |    5 -
 docs/api/rust/tvm/value/sidebar-items.js           |    1 -
 docs/api/rust/tvm_graph_rt/all.html                |    4 -
 .../tvm_graph_rt/array/constant.DTYPE_FLOAT32.html |   10 -
 .../tvm_graph_rt/array/constant.DTYPE_FLOAT64.html |   10 -
 .../tvm_graph_rt/array/constant.DTYPE_INT32.html   |   10 -
 .../tvm_graph_rt/array/constant.DTYPE_UINT32.html  |   10 -
 docs/api/rust/tvm_graph_rt/array/enum.Storage.html |   10 -
 .../api/rust/tvm_graph_rt/array/struct.Tensor.html |   10 -
 .../rust/tvm_graph_rt/constant.DTYPE_FLOAT32.html  |    2 -
 .../rust/tvm_graph_rt/constant.DTYPE_FLOAT64.html  |    2 -
 .../rust/tvm_graph_rt/constant.DTYPE_INT32.html    |    2 -
 .../rust/tvm_graph_rt/constant.DTYPE_UINT32.html   |    2 -
 docs/api/rust/tvm_graph_rt/enum.ArgValue.html      |   90 -
 docs/api/rust/tvm_graph_rt/enum.RetValue.html      |   79 -
 docs/api/rust/tvm_graph_rt/enum.Storage.html       |   26 -
 .../rust/tvm_graph_rt/errors/enum.ArrayError.html  |   24 -
 .../tvm_graph_rt/errors/enum.GraphFormatError.html |   29 -
 docs/api/rust/tvm_graph_rt/errors/index.html       |    4 -
 docs/api/rust/tvm_graph_rt/errors/sidebar-items.js |    1 -
 .../errors/struct.FunctionNotFound.html            |   19 -
 .../tvm_graph_rt/errors/struct.InvalidPointer.html |   19 -
 .../ffi/constant.DLDataTypeCode_kDLBfloat.html     |    2 -
 .../ffi/constant.DLDataTypeCode_kDLFloat.html      |    2 -
 .../ffi/constant.DLDataTypeCode_kDLInt.html        |    2 -
 .../ffi/constant.DLDataTypeCode_kDLUInt.html       |    2 -
 .../ffi/constant.DLDeviceType_kDLCPU.html          |    3 -
 .../ffi/constant.DLDeviceType_kDLCPUPinned.html    |    4 -
 .../ffi/constant.DLDeviceType_kDLExtDev.html       |    5 -
 .../ffi/constant.DLDeviceType_kDLGPU.html          |    3 -
 .../ffi/constant.DLDeviceType_kDLMetal.html        |    3 -
 .../ffi/constant.DLDeviceType_kDLOpenCL.html       |    3 -
 .../ffi/constant.DLDeviceType_kDLROCM.html         |    3 -
 .../ffi/constant.DLDeviceType_kDLVPI.html          |    3 -
 .../ffi/constant.DLDeviceType_kDLVulkan.html       |    3 -
 .../tvm_graph_rt/ffi/constant.DLPACK_VERSION.html  |    2 -
 .../rust/tvm_graph_rt/ffi/constant.INT16_MAX.html  |    2 -
 .../rust/tvm_graph_rt/ffi/constant.INT16_MIN.html  |    2 -
 .../rust/tvm_graph_rt/ffi/constant.INT32_MAX.html  |    2 -
 .../rust/tvm_graph_rt/ffi/constant.INT32_MIN.html  |    2 -
 .../rust/tvm_graph_rt/ffi/constant.INT8_MAX.html   |    2 -
 .../rust/tvm_graph_rt/ffi/constant.INT8_MIN.html   |    2 -
 .../rust/tvm_graph_rt/ffi/constant.INTPTR_MAX.html |    2 -
 .../rust/tvm_graph_rt/ffi/constant.INTPTR_MIN.html |    2 -
 .../tvm_graph_rt/ffi/constant.INT_FAST16_MAX.html  |    2 -
 .../tvm_graph_rt/ffi/constant.INT_FAST16_MIN.html  |    2 -
 .../tvm_graph_rt/ffi/constant.INT_FAST32_MAX.html  |    2 -
 .../tvm_graph_rt/ffi/constant.INT_FAST32_MIN.html  |    2 -
 .../tvm_graph_rt/ffi/constant.INT_FAST8_MAX.html   |    2 -
 .../tvm_graph_rt/ffi/constant.INT_FAST8_MIN.html   |    2 -
 .../tvm_graph_rt/ffi/constant.INT_LEAST16_MAX.html |    2 -
 .../tvm_graph_rt/ffi/constant.INT_LEAST16_MIN.html |    2 -
 .../tvm_graph_rt/ffi/constant.INT_LEAST32_MAX.html |    2 -
 .../tvm_graph_rt/ffi/constant.INT_LEAST32_MIN.html |    2 -
 .../tvm_graph_rt/ffi/constant.INT_LEAST8_MAX.html  |    2 -
 .../tvm_graph_rt/ffi/constant.INT_LEAST8_MIN.html  |    2 -
 .../tvm_graph_rt/ffi/constant.PTRDIFF_MAX.html     |    2 -
 .../tvm_graph_rt/ffi/constant.PTRDIFF_MIN.html     |    2 -
 .../tvm_graph_rt/ffi/constant.SIG_ATOMIC_MAX.html  |    2 -
 .../tvm_graph_rt/ffi/constant.SIG_ATOMIC_MIN.html  |    2 -
 .../rust/tvm_graph_rt/ffi/constant.SIZE_MAX.html   |    2 -
 .../ffi/constant.TVMArgTypeCode_kTVMArgFloat.html  |    2 -
 .../ffi/constant.TVMArgTypeCode_kTVMArgInt.html    |    2 -
 .../ffi/constant.TVMArgTypeCode_kTVMBytes.html     |    2 -
 .../ffi/constant.TVMArgTypeCode_kTVMContext.html   |    2 -
 ...constant.TVMArgTypeCode_kTVMDLTensorHandle.html |    2 -
 .../ffi/constant.TVMArgTypeCode_kTVMDataType.html  |    2 -
 .../ffi/constant.TVMArgTypeCode_kTVMExtBegin.html  |    2 -
 .../ffi/constant.TVMArgTypeCode_kTVMExtEnd.html    |    2 -
 .../constant.TVMArgTypeCode_kTVMExtReserveEnd.html |    2 -
 .../constant.TVMArgTypeCode_kTVMModuleHandle.html  |    2 -
 .../constant.TVMArgTypeCode_kTVMNDArrayHandle.html |    2 -
 .../ffi/constant.TVMArgTypeCode_kTVMNNVMFirst.html |    2 -
 .../ffi/constant.TVMArgTypeCode_kTVMNNVMLast.html  |    2 -
 .../ffi/constant.TVMArgTypeCode_kTVMNullptr.html   |    2 -
 .../constant.TVMArgTypeCode_kTVMObjectHandle.html  |    2 -
 ...tant.TVMArgTypeCode_kTVMObjectRValueRefArg.html |    2 -
 .../constant.TVMArgTypeCode_kTVMOpaqueHandle.html  |    2 -
 ...nstant.TVMArgTypeCode_kTVMPackedFuncHandle.html |    2 -
 .../ffi/constant.TVMArgTypeCode_kTVMStr.html       |    2 -
 .../ffi/constant.TVMDeviceExtType_kDLAOCL.html     |    2 -
 .../ffi/constant.TVMDeviceExtType_kDLHexagon.html  |    2 -
 .../ffi/constant.TVMDeviceExtType_kDLMicroDev.html |    2 -
 .../ffi/constant.TVMDeviceExtType_kDLSDAccel.html  |    2 -
 .../ffi/constant.TVMDeviceExtType_kDLWebGPU.html   |    2 -
 .../ffi/constant.TVMDeviceExtType_kOpenGL.html     |    2 -
 .../tvm_graph_rt/ffi/constant.TVM_VERSION.html     |    2 -
 .../rust/tvm_graph_rt/ffi/constant.UINT16_MAX.html |    2 -
 .../rust/tvm_graph_rt/ffi/constant.UINT32_MAX.html |    2 -
 .../rust/tvm_graph_rt/ffi/constant.UINT8_MAX.html  |    2 -
 .../tvm_graph_rt/ffi/constant.UINTPTR_MAX.html     |    2 -
 .../tvm_graph_rt/ffi/constant.UINT_FAST16_MAX.html |    2 -
 .../tvm_graph_rt/ffi/constant.UINT_FAST32_MAX.html |    2 -
 .../tvm_graph_rt/ffi/constant.UINT_FAST8_MAX.html  |    2 -
 .../ffi/constant.UINT_LEAST16_MAX.html             |    2 -
 .../ffi/constant.UINT_LEAST32_MAX.html             |    2 -
 .../tvm_graph_rt/ffi/constant.UINT_LEAST8_MAX.html |    2 -
 .../rust/tvm_graph_rt/ffi/constant.WINT_MAX.html   |    2 -
 .../rust/tvm_graph_rt/ffi/constant.WINT_MIN.html   |    2 -
 .../tvm_graph_rt/ffi/constant._ATFILE_SOURCE.html  |    2 -
 .../tvm_graph_rt/ffi/constant._BITS_WCHAR_H.html   |    2 -
 .../tvm_graph_rt/ffi/constant._DEFAULT_SOURCE.html |    2 -
 .../tvm_graph_rt/ffi/constant._FEATURES_H.html     |    2 -
 .../tvm_graph_rt/ffi/constant._POSIX_C_SOURCE.html |    2 -
 .../tvm_graph_rt/ffi/constant._POSIX_SOURCE.html   |    2 -
 .../tvm_graph_rt/ffi/constant._STDC_PREDEF_H.html  |    2 -
 .../rust/tvm_graph_rt/ffi/constant._STDINT_H.html  |    2 -
 .../tvm_graph_rt/ffi/constant._SYS_CDEFS_H.html    |    2 -
 .../tvm_graph_rt/ffi/constant.__GLIBC_MINOR__.html |    2 -
 .../rust/tvm_graph_rt/ffi/constant.__GLIBC__.html  |    2 -
 .../tvm_graph_rt/ffi/constant.__GNU_LIBRARY__.html |    2 -
 .../ffi/constant.__STDC_IEC_559_COMPLEX__.html     |    2 -
 .../ffi/constant.__STDC_IEC_559__.html             |    2 -
 .../ffi/constant.__STDC_ISO_10646__.html           |    2 -
 .../ffi/constant.__STDC_NO_THREADS__.html          |    2 -
 .../ffi/constant.__SYSCALL_WORDSIZE.html           |    2 -
 .../tvm_graph_rt/ffi/constant.__USE_ATFILE.html    |    2 -
 .../ffi/constant.__USE_FORTIFY_LEVEL.html          |    2 -
 .../tvm_graph_rt/ffi/constant.__USE_ISOC11.html    |    2 -
 .../tvm_graph_rt/ffi/constant.__USE_ISOC95.html    |    2 -
 .../tvm_graph_rt/ffi/constant.__USE_ISOC99.html    |    2 -
 .../rust/tvm_graph_rt/ffi/constant.__USE_MISC.html |    2 -
 .../tvm_graph_rt/ffi/constant.__USE_POSIX.html     |    2 -
 .../ffi/constant.__USE_POSIX199309.html            |    2 -
 .../ffi/constant.__USE_POSIX199506.html            |    2 -
 .../tvm_graph_rt/ffi/constant.__USE_POSIX2.html    |    2 -
 .../ffi/constant.__USE_POSIX_IMPLICITLY.html       |    2 -
 .../tvm_graph_rt/ffi/constant.__USE_XOPEN2K.html   |    2 -
 .../tvm_graph_rt/ffi/constant.__USE_XOPEN2K8.html  |    2 -
 .../rust/tvm_graph_rt/ffi/constant.__WORDSIZE.html |    2 -
 .../ffi/constant.__WORDSIZE_TIME64_COMPAT32.html   |    2 -
 .../tvm_graph_rt/ffi/fn.TVMAPISetLastError.html    |    5 -
 .../rust/tvm_graph_rt/ffi/fn.TVMArrayAlloc.html    |   13 -
 .../tvm_graph_rt/ffi/fn.TVMArrayCopyFromBytes.html |    7 -
 .../tvm_graph_rt/ffi/fn.TVMArrayCopyFromTo.html    |    7 -
 .../tvm_graph_rt/ffi/fn.TVMArrayCopyToBytes.html   |    7 -
 .../api/rust/tvm_graph_rt/ffi/fn.TVMArrayFree.html |    5 -
 .../tvm_graph_rt/ffi/fn.TVMArrayFromDLPack.html    |    7 -
 .../rust/tvm_graph_rt/ffi/fn.TVMArrayToDLPack.html |    7 -
 .../ffi/fn.TVMBackendAllocWorkspace.html           |   12 -
 .../ffi/fn.TVMBackendFreeWorkspace.html            |    8 -
 .../ffi/fn.TVMBackendGetFuncFromEnv.html           |    9 -
 .../ffi/fn.TVMBackendParallelBarrier.html          |    6 -
 .../ffi/fn.TVMBackendParallelLaunch.html           |    8 -
 .../ffi/fn.TVMBackendRegisterSystemLibSymbol.html  |    6 -
 .../tvm_graph_rt/ffi/fn.TVMBackendRunOnce.html     |   10 -
 .../tvm_graph_rt/ffi/fn.TVMCFuncSetReturn.html     |    9 -
 .../rust/tvm_graph_rt/ffi/fn.TVMCbArgToReturn.html |    8 -
 .../ffi/fn.TVMDLManagedTensorCallDeleter.html      |    4 -
 .../ffi/fn.TVMDeviceAllocDataSpace.html            |   10 -
 .../ffi/fn.TVMDeviceCopyDataFromTo.html            |   14 -
 .../ffi/fn.TVMDeviceFreeDataSpace.html             |    6 -
 docs/api/rust/tvm_graph_rt/ffi/fn.TVMFuncCall.html |   16 -
 .../ffi/fn.TVMFuncCreateFromCFunc.html             |    9 -
 docs/api/rust/tvm_graph_rt/ffi/fn.TVMFuncFree.html |    5 -
 .../rust/tvm_graph_rt/ffi/fn.TVMFuncGetGlobal.html |    7 -
 .../ffi/fn.TVMFuncListGlobalNames.html             |    6 -
 .../tvm_graph_rt/ffi/fn.TVMFuncRegisterGlobal.html |    7 -
 .../tvm_graph_rt/ffi/fn.TVMFuncRemoveGlobal.html   |    4 -
 .../rust/tvm_graph_rt/ffi/fn.TVMGetLastError.html  |    8 -
 docs/api/rust/tvm_graph_rt/ffi/fn.TVMModFree.html  |    9 -
 .../tvm_graph_rt/ffi/fn.TVMModGetFunction.html     |    8 -
 .../api/rust/tvm_graph_rt/ffi/fn.TVMModImport.html |    7 -
 .../tvm_graph_rt/ffi/fn.TVMModLoadFromFile.html    |    9 -
 .../tvm_graph_rt/ffi/fn.TVMObjectDerivedFrom.html  |    7 -
 .../rust/tvm_graph_rt/ffi/fn.TVMObjectFree.html    |    7 -
 .../tvm_graph_rt/ffi/fn.TVMObjectGetTypeIndex.html |    6 -
 .../rust/tvm_graph_rt/ffi/fn.TVMObjectRetain.html  |    6 -
 .../ffi/fn.TVMObjectTypeKey2Index.html             |    6 -
 .../api/rust/tvm_graph_rt/ffi/fn.TVMSetStream.html |   10 -
 .../rust/tvm_graph_rt/ffi/fn.TVMStreamCreate.html  |    7 -
 .../rust/tvm_graph_rt/ffi/fn.TVMStreamFree.html    |    7 -
 .../ffi/fn.TVMStreamStreamSynchronize.html         |    8 -
 .../rust/tvm_graph_rt/ffi/fn.TVMSynchronize.html   |    7 -
 docs/api/rust/tvm_graph_rt/ffi/index.html          |  182 -
 docs/api/rust/tvm_graph_rt/ffi/sidebar-items.js    |    1 -
 .../rust/tvm_graph_rt/ffi/struct.DLContext.html    |   44 -
 .../rust/tvm_graph_rt/ffi/struct.DLDataType.html   |   51 -
 .../tvm_graph_rt/ffi/struct.DLManagedTensor.html   |   38 -
 .../api/rust/tvm_graph_rt/ffi/struct.DLTensor.html |   64 -
 .../rust/tvm_graph_rt/ffi/struct.TVMByteArray.html |   31 -
 .../ffi/struct.TVMParallelGroupEnv.html            |   28 -
 .../tvm_graph_rt/ffi/type.BackendPackedCFunc.html  |    2 -
 .../rust/tvm_graph_rt/ffi/type.DLDataTypeCode.html |    3 -
 .../rust/tvm_graph_rt/ffi/type.DLDeviceType.html   |    3 -
 .../tvm_graph_rt/ffi/type.FTVMParallelLambda.html  |    6 -
 .../rust/tvm_graph_rt/ffi/type.TVMArgTypeCode.html |   14 -
 .../rust/tvm_graph_rt/ffi/type.TVMArrayHandle.html |    3 -
 .../ffi/type.TVMBackendPackedCFunc.html            |   10 -
 .../api/rust/tvm_graph_rt/ffi/type.TVMContext.html |    3 -
 .../tvm_graph_rt/ffi/type.TVMDeviceExtType.html    |    3 -
 .../ffi/type.TVMExtensionFuncDeclarer.html         |    7 -
 .../tvm_graph_rt/ffi/type.TVMFunctionHandle.html   |    3 -
 .../tvm_graph_rt/ffi/type.TVMModuleHandle.html     |    3 -
 .../tvm_graph_rt/ffi/type.TVMObjectHandle.html     |    3 -
 .../rust/tvm_graph_rt/ffi/type.TVMPackedCFunc.html |   10 -
 .../ffi/type.TVMPackedCFuncFinalizer.html          |    4 -
 .../tvm_graph_rt/ffi/type.TVMRetValueHandle.html   |    3 -
 .../tvm_graph_rt/ffi/type.TVMStreamHandle.html     |    4 -
 .../rust/tvm_graph_rt/ffi/type.int_fast16_t.html   |    2 -
 .../rust/tvm_graph_rt/ffi/type.int_fast32_t.html   |    2 -
 .../rust/tvm_graph_rt/ffi/type.int_fast64_t.html   |    2 -
 .../rust/tvm_graph_rt/ffi/type.int_fast8_t.html    |    2 -
 .../rust/tvm_graph_rt/ffi/type.int_least16_t.html  |    2 -
 .../rust/tvm_graph_rt/ffi/type.int_least32_t.html  |    2 -
 .../rust/tvm_graph_rt/ffi/type.int_least64_t.html  |    2 -
 .../rust/tvm_graph_rt/ffi/type.int_least8_t.html   |    2 -
 docs/api/rust/tvm_graph_rt/ffi/type.intmax_t.html  |    2 -
 .../rust/tvm_graph_rt/ffi/type.tvm_index_t.html    |    3 -
 .../rust/tvm_graph_rt/ffi/type.uint_fast16_t.html  |    2 -
 .../rust/tvm_graph_rt/ffi/type.uint_fast32_t.html  |    2 -
 .../rust/tvm_graph_rt/ffi/type.uint_fast64_t.html  |    2 -
 .../rust/tvm_graph_rt/ffi/type.uint_fast8_t.html   |    2 -
 .../rust/tvm_graph_rt/ffi/type.uint_least16_t.html |    2 -
 .../rust/tvm_graph_rt/ffi/type.uint_least32_t.html |    2 -
 .../rust/tvm_graph_rt/ffi/type.uint_least64_t.html |    2 -
 .../rust/tvm_graph_rt/ffi/type.uint_least8_t.html  |    2 -
 docs/api/rust/tvm_graph_rt/ffi/type.uintmax_t.html |    2 -
 docs/api/rust/tvm_graph_rt/ffi/type.wchar_t.html   |    2 -
 docs/api/rust/tvm_graph_rt/ffi/union.TVMValue.html |   45 -
 .../rust/tvm_graph_rt/fn.TVMAPISetLastError.html   |    2 -
 .../tvm_graph_rt/fn.TVMBackendAllocWorkspace.html  |    2 -
 .../tvm_graph_rt/fn.TVMBackendFreeWorkspace.html   |    2 -
 .../tvm_graph_rt/fn.TVMBackendParallelBarrier.html |    2 -
 .../tvm_graph_rt/fn.TVMBackendParallelLaunch.html  |    2 -
 docs/api/rust/tvm_graph_rt/fn.TVMGetLastError.html |    2 -
 docs/api/rust/tvm_graph_rt/fn.load_param_dict.html |    3 -
 docs/api/rust/tvm_graph_rt/fn.remove_item.html     |    2 -
 .../tvm_graph_rt/graph/fn.load_param_dict.html     |   10 -
 docs/api/rust/tvm_graph_rt/graph/struct.Entry.html |   10 -
 docs/api/rust/tvm_graph_rt/graph/struct.Graph.html |   10 -
 .../tvm_graph_rt/graph/struct.GraphExecutor.html   |   10 -
 docs/api/rust/tvm_graph_rt/graph/struct.Node.html  |   10 -
 docs/api/rust/tvm_graph_rt/index.html              |   36 -
 docs/api/rust/tvm_graph_rt/macro.call_packed!.html |   10 -
 docs/api/rust/tvm_graph_rt/macro.call_packed.html  |   10 -
 .../rust/tvm_graph_rt/macro.import_module!.html    |   10 -
 .../api/rust/tvm_graph_rt/macro.import_module.html |    2 -
 .../tvm_graph_rt/module/dso/struct.DsoModule.html  |   10 -
 .../module/syslib/struct.SystemLibModule.html      |   10 -
 .../api/rust/tvm_graph_rt/module/trait.Module.html |   10 -
 .../tvm_graph_rt/packed_func/enum.ArgValue.html    |   90 -
 .../tvm_graph_rt/packed_func/enum.RetValue.html    |   79 -
 docs/api/rust/tvm_graph_rt/packed_func/index.html  |   11 -
 .../rust/tvm_graph_rt/packed_func/sidebar-items.js |    1 -
 .../tvm_graph_rt/packed_func/trait.PackedFunc.html |    3 -
 .../tvm_graph_rt/packed_func/union.TVMValue.html   |   45 -
 docs/api/rust/tvm_graph_rt/sidebar-items.js        |    1 -
 docs/api/rust/tvm_graph_rt/struct.DLTensor.html    |   64 -
 docs/api/rust/tvm_graph_rt/struct.DsoModule.html   |   13 -
 docs/api/rust/tvm_graph_rt/struct.Entry.html       |   19 -
 .../rust/tvm_graph_rt/struct.FuncCallError.html    |   19 -
 docs/api/rust/tvm_graph_rt/struct.Graph.html       |   32 -
 .../rust/tvm_graph_rt/struct.GraphExecutor.html    |   37 -
 docs/api/rust/tvm_graph_rt/struct.Node.html        |   21 -
 .../rust/tvm_graph_rt/struct.SystemLibModule.html  |   12 -
 docs/api/rust/tvm_graph_rt/struct.Tensor.html      |   62 -
 .../tvm_graph_rt/struct.ValueDowncastError.html    |   23 -
 .../threading/fn.TVMBackendParallelBarrier.html    |   10 -
 .../threading/fn.TVMBackendParallelLaunch.html     |   10 -
 docs/api/rust/tvm_graph_rt/trait.Module.html       |    6 -
 docs/api/rust/tvm_graph_rt/trait.PackedFunc.html   |    3 -
 docs/api/rust/tvm_graph_rt/union.TVMValue.html     |   45 -
 .../workspace/fn.TVMBackendAllocWorkspace.html     |   10 -
 .../workspace/fn.TVMBackendFreeWorkspace.html      |   10 -
 .../tvm_graph_rt/workspace/fn.remove_item.html     |   10 -
 docs/api/rust/tvm_macros/all.html                  |    4 -
 docs/api/rust/tvm_macros/derive.Object.html        |    8 -
 docs/api/rust/tvm_macros/index.html                |    4 -
 docs/api/rust/tvm_macros/macro.external!.html      |   10 -
 docs/api/rust/tvm_macros/macro.external.html       |    2 -
 docs/api/rust/tvm_macros/macro.import_module!.html |   10 -
 docs/api/rust/tvm_macros/macro.import_module.html  |    2 -
 docs/api/rust/tvm_macros/sidebar-items.js          |    1 -
 docs/api/rust/tvm_rt/all.html                      |    4 -
 docs/api/rust/tvm_rt/array/fn.array_get_item.html  |    2 -
 docs/api/rust/tvm_rt/array/fn.array_size.html      |    2 -
 docs/api/rust/tvm_rt/array/index.html              |    4 -
 docs/api/rust/tvm_rt/array/sidebar-items.js        |    1 -
 docs/api/rust/tvm_rt/array/struct.Array.html       |   29 -
 docs/api/rust/tvm_rt/array/struct.IntoIter.html    |  135 -
 docs/api/rust/tvm_rt/context/enum.DeviceType.html  |   47 -
 .../rust/tvm_rt/context/fn.get_device_attr.html    |    2 -
 docs/api/rust/tvm_rt/context/index.html            |    7 -
 docs/api/rust/tvm_rt/context/sidebar-items.js      |    1 -
 docs/api/rust/tvm_rt/context/struct.Context.html   |   41 -
 .../context/struct.UnsupportedDeviceError.html     |   19 -
 docs/api/rust/tvm_rt/enum.ArgValue.html            |  124 -
 docs/api/rust/tvm_rt/enum.DeviceType.html          |   47 -
 docs/api/rust/tvm_rt/enum.RetValue.html            |  111 -
 docs/api/rust/tvm_rt/errors/enum.Error.html        |   39 -
 docs/api/rust/tvm_rt/errors/enum.NDArrayError.html |   32 -
 docs/api/rust/tvm_rt/errors/index.html             |    4 -
 docs/api/rust/tvm_rt/errors/sidebar-items.js       |    1 -
 .../errors/struct.FunctionNotFoundError.html       |   19 -
 .../tvm_rt/errors/struct.TypeMismatchError.html    |   23 -
 docs/api/rust/tvm_rt/fn.get_last_error.html        |    3 -
 docs/api/rust/tvm_rt/fn.version.html               |    3 -
 docs/api/rust/tvm_rt/function/enum.ArgValue.html   |  124 -
 docs/api/rust/tvm_rt/function/enum.RetValue.html   |  111 -
 .../ffi/constant.DLDataTypeCode_kDLBfloat.html     |    2 -
 .../ffi/constant.DLDataTypeCode_kDLFloat.html      |    2 -
 .../ffi/constant.DLDataTypeCode_kDLInt.html        |    2 -
 .../ffi/constant.DLDataTypeCode_kDLUInt.html       |    2 -
 .../function/ffi/constant.DLDeviceType_kDLCPU.html |    3 -
 .../ffi/constant.DLDeviceType_kDLCPUPinned.html    |    4 -
 .../ffi/constant.DLDeviceType_kDLExtDev.html       |    5 -
 .../function/ffi/constant.DLDeviceType_kDLGPU.html |    3 -
 .../ffi/constant.DLDeviceType_kDLMetal.html        |    3 -
 .../ffi/constant.DLDeviceType_kDLOpenCL.html       |    3 -
 .../ffi/constant.DLDeviceType_kDLROCM.html         |    3 -
 .../function/ffi/constant.DLDeviceType_kDLVPI.html |    3 -
 .../ffi/constant.DLDeviceType_kDLVulkan.html       |    3 -
 .../function/ffi/constant.DLPACK_VERSION.html      |    2 -
 .../tvm_rt/function/ffi/constant.INT16_MAX.html    |    2 -
 .../tvm_rt/function/ffi/constant.INT16_MIN.html    |    2 -
 .../tvm_rt/function/ffi/constant.INT32_MAX.html    |    2 -
 .../tvm_rt/function/ffi/constant.INT32_MIN.html    |    2 -
 .../tvm_rt/function/ffi/constant.INT8_MAX.html     |    2 -
 .../tvm_rt/function/ffi/constant.INT8_MIN.html     |    2 -
 .../tvm_rt/function/ffi/constant.INTPTR_MAX.html   |    2 -
 .../tvm_rt/function/ffi/constant.INTPTR_MIN.html   |    2 -
 .../function/ffi/constant.INT_FAST16_MAX.html      |    2 -
 .../function/ffi/constant.INT_FAST16_MIN.html      |    2 -
 .../function/ffi/constant.INT_FAST32_MAX.html      |    2 -
 .../function/ffi/constant.INT_FAST32_MIN.html      |    2 -
 .../function/ffi/constant.INT_FAST8_MAX.html       |    2 -
 .../function/ffi/constant.INT_FAST8_MIN.html       |    2 -
 .../function/ffi/constant.INT_LEAST16_MAX.html     |    2 -
 .../function/ffi/constant.INT_LEAST16_MIN.html     |    2 -
 .../function/ffi/constant.INT_LEAST32_MAX.html     |    2 -
 .../function/ffi/constant.INT_LEAST32_MIN.html     |    2 -
 .../function/ffi/constant.INT_LEAST8_MAX.html      |    2 -
 .../function/ffi/constant.INT_LEAST8_MIN.html      |    2 -
 .../tvm_rt/function/ffi/constant.PTRDIFF_MAX.html  |    2 -
 .../tvm_rt/function/ffi/constant.PTRDIFF_MIN.html  |    2 -
 .../function/ffi/constant.SIG_ATOMIC_MAX.html      |    2 -
 .../function/ffi/constant.SIG_ATOMIC_MIN.html      |    2 -
 .../tvm_rt/function/ffi/constant.SIZE_MAX.html     |    2 -
 .../ffi/constant.TVMArgTypeCode_kTVMArgFloat.html  |    2 -
 .../ffi/constant.TVMArgTypeCode_kTVMArgInt.html    |    2 -
 .../ffi/constant.TVMArgTypeCode_kTVMBytes.html     |    2 -
 .../ffi/constant.TVMArgTypeCode_kTVMContext.html   |    2 -
 ...constant.TVMArgTypeCode_kTVMDLTensorHandle.html |    2 -
 .../ffi/constant.TVMArgTypeCode_kTVMDataType.html  |    2 -
 .../ffi/constant.TVMArgTypeCode_kTVMExtBegin.html  |    2 -
 .../ffi/constant.TVMArgTypeCode_kTVMExtEnd.html    |    2 -
 .../constant.TVMArgTypeCode_kTVMExtReserveEnd.html |    2 -
 .../constant.TVMArgTypeCode_kTVMModuleHandle.html  |    2 -
 .../constant.TVMArgTypeCode_kTVMNDArrayHandle.html |    2 -
 .../ffi/constant.TVMArgTypeCode_kTVMNNVMFirst.html |    2 -
 .../ffi/constant.TVMArgTypeCode_kTVMNNVMLast.html  |    2 -
 .../ffi/constant.TVMArgTypeCode_kTVMNullptr.html   |    2 -
 .../constant.TVMArgTypeCode_kTVMObjectHandle.html  |    2 -
 ...tant.TVMArgTypeCode_kTVMObjectRValueRefArg.html |    2 -
 .../constant.TVMArgTypeCode_kTVMOpaqueHandle.html  |    2 -
 ...nstant.TVMArgTypeCode_kTVMPackedFuncHandle.html |    2 -
 .../ffi/constant.TVMArgTypeCode_kTVMStr.html       |    2 -
 .../ffi/constant.TVMDeviceExtType_kDLAOCL.html     |    2 -
 .../ffi/constant.TVMDeviceExtType_kDLHexagon.html  |    2 -
 .../ffi/constant.TVMDeviceExtType_kDLMicroDev.html |    2 -
 .../ffi/constant.TVMDeviceExtType_kDLSDAccel.html  |    2 -
 .../ffi/constant.TVMDeviceExtType_kDLWebGPU.html   |    2 -
 .../ffi/constant.TVMDeviceExtType_kOpenGL.html     |    2 -
 .../tvm_rt/function/ffi/constant.TVM_VERSION.html  |    2 -
 .../tvm_rt/function/ffi/constant.UINT16_MAX.html   |    2 -
 .../tvm_rt/function/ffi/constant.UINT32_MAX.html   |    2 -
 .../tvm_rt/function/ffi/constant.UINT8_MAX.html    |    2 -
 .../tvm_rt/function/ffi/constant.UINTPTR_MAX.html  |    2 -
 .../function/ffi/constant.UINT_FAST16_MAX.html     |    2 -
 .../function/ffi/constant.UINT_FAST32_MAX.html     |    2 -
 .../function/ffi/constant.UINT_FAST8_MAX.html      |    2 -
 .../function/ffi/constant.UINT_LEAST16_MAX.html    |    2 -
 .../function/ffi/constant.UINT_LEAST32_MAX.html    |    2 -
 .../function/ffi/constant.UINT_LEAST8_MAX.html     |    2 -
 .../tvm_rt/function/ffi/constant.WINT_MAX.html     |    2 -
 .../tvm_rt/function/ffi/constant.WINT_MIN.html     |    2 -
 .../function/ffi/constant._ATFILE_SOURCE.html      |    2 -
 .../function/ffi/constant._BITS_WCHAR_H.html       |    2 -
 .../function/ffi/constant._DEFAULT_SOURCE.html     |    2 -
 .../tvm_rt/function/ffi/constant._FEATURES_H.html  |    2 -
 .../function/ffi/constant._POSIX_C_SOURCE.html     |    2 -
 .../function/ffi/constant._POSIX_SOURCE.html       |    2 -
 .../function/ffi/constant._STDC_PREDEF_H.html      |    2 -
 .../tvm_rt/function/ffi/constant._STDINT_H.html    |    2 -
 .../tvm_rt/function/ffi/constant._SYS_CDEFS_H.html |    2 -
 .../function/ffi/constant.__GLIBC_MINOR__.html     |    2 -
 .../tvm_rt/function/ffi/constant.__GLIBC__.html    |    2 -
 .../function/ffi/constant.__GNU_LIBRARY__.html     |    2 -
 .../ffi/constant.__STDC_IEC_559_COMPLEX__.html     |    2 -
 .../function/ffi/constant.__STDC_IEC_559__.html    |    2 -
 .../function/ffi/constant.__STDC_ISO_10646__.html  |    2 -
 .../function/ffi/constant.__STDC_NO_THREADS__.html |    2 -
 .../function/ffi/constant.__SYSCALL_WORDSIZE.html  |    2 -
 .../tvm_rt/function/ffi/constant.__USE_ATFILE.html |    2 -
 .../function/ffi/constant.__USE_FORTIFY_LEVEL.html |    2 -
 .../tvm_rt/function/ffi/constant.__USE_ISOC11.html |    2 -
 .../tvm_rt/function/ffi/constant.__USE_ISOC95.html |    2 -
 .../tvm_rt/function/ffi/constant.__USE_ISOC99.html |    2 -
 .../tvm_rt/function/ffi/constant.__USE_MISC.html   |    2 -
 .../tvm_rt/function/ffi/constant.__USE_POSIX.html  |    2 -
 .../function/ffi/constant.__USE_POSIX199309.html   |    2 -
 .../function/ffi/constant.__USE_POSIX199506.html   |    2 -
 .../tvm_rt/function/ffi/constant.__USE_POSIX2.html |    2 -
 .../ffi/constant.__USE_POSIX_IMPLICITLY.html       |    2 -
 .../function/ffi/constant.__USE_XOPEN2K.html       |    2 -
 .../function/ffi/constant.__USE_XOPEN2K8.html      |    2 -
 .../tvm_rt/function/ffi/constant.__WORDSIZE.html   |    2 -
 .../ffi/constant.__WORDSIZE_TIME64_COMPAT32.html   |    2 -
 .../tvm_rt/function/ffi/fn.TVMAPISetLastError.html |    5 -
 .../rust/tvm_rt/function/ffi/fn.TVMArrayAlloc.html |   13 -
 .../function/ffi/fn.TVMArrayCopyFromBytes.html     |    7 -
 .../tvm_rt/function/ffi/fn.TVMArrayCopyFromTo.html |    7 -
 .../function/ffi/fn.TVMArrayCopyToBytes.html       |    7 -
 .../rust/tvm_rt/function/ffi/fn.TVMArrayFree.html  |    5 -
 .../tvm_rt/function/ffi/fn.TVMArrayFromDLPack.html |    7 -
 .../tvm_rt/function/ffi/fn.TVMArrayToDLPack.html   |    7 -
 .../function/ffi/fn.TVMBackendAllocWorkspace.html  |   12 -
 .../function/ffi/fn.TVMBackendFreeWorkspace.html   |    8 -
 .../function/ffi/fn.TVMBackendGetFuncFromEnv.html  |    9 -
 .../function/ffi/fn.TVMBackendParallelBarrier.html |    6 -
 .../function/ffi/fn.TVMBackendParallelLaunch.html  |    8 -
 .../ffi/fn.TVMBackendRegisterSystemLibSymbol.html  |    6 -
 .../tvm_rt/function/ffi/fn.TVMBackendRunOnce.html  |   10 -
 .../tvm_rt/function/ffi/fn.TVMCFuncSetReturn.html  |    9 -
 .../tvm_rt/function/ffi/fn.TVMCbArgToReturn.html   |    8 -
 .../ffi/fn.TVMDLManagedTensorCallDeleter.html      |    4 -
 .../function/ffi/fn.TVMDeviceAllocDataSpace.html   |   10 -
 .../function/ffi/fn.TVMDeviceCopyDataFromTo.html   |   14 -
 .../function/ffi/fn.TVMDeviceFreeDataSpace.html    |    6 -
 .../rust/tvm_rt/function/ffi/fn.TVMFuncCall.html   |   16 -
 .../function/ffi/fn.TVMFuncCreateFromCFunc.html    |    9 -
 .../rust/tvm_rt/function/ffi/fn.TVMFuncFree.html   |    5 -
 .../tvm_rt/function/ffi/fn.TVMFuncGetGlobal.html   |    7 -
 .../function/ffi/fn.TVMFuncListGlobalNames.html    |    6 -
 .../function/ffi/fn.TVMFuncRegisterGlobal.html     |    7 -
 .../function/ffi/fn.TVMFuncRemoveGlobal.html       |    4 -
 .../tvm_rt/function/ffi/fn.TVMGetLastError.html    |    8 -
 .../rust/tvm_rt/function/ffi/fn.TVMModFree.html    |    9 -
 .../tvm_rt/function/ffi/fn.TVMModGetFunction.html  |    8 -
 .../rust/tvm_rt/function/ffi/fn.TVMModImport.html  |    7 -
 .../tvm_rt/function/ffi/fn.TVMModLoadFromFile.html |    9 -
 .../function/ffi/fn.TVMObjectDerivedFrom.html      |    7 -
 .../rust/tvm_rt/function/ffi/fn.TVMObjectFree.html |    7 -
 .../function/ffi/fn.TVMObjectGetTypeIndex.html     |    6 -
 .../tvm_rt/function/ffi/fn.TVMObjectRetain.html    |    6 -
 .../function/ffi/fn.TVMObjectTypeKey2Index.html    |    6 -
 .../rust/tvm_rt/function/ffi/fn.TVMSetStream.html  |   10 -
 .../tvm_rt/function/ffi/fn.TVMStreamCreate.html    |    7 -
 .../rust/tvm_rt/function/ffi/fn.TVMStreamFree.html |    7 -
 .../ffi/fn.TVMStreamStreamSynchronize.html         |    8 -
 .../tvm_rt/function/ffi/fn.TVMSynchronize.html     |    7 -
 docs/api/rust/tvm_rt/function/ffi/index.html       |  182 -
 docs/api/rust/tvm_rt/function/ffi/sidebar-items.js |    1 -
 .../rust/tvm_rt/function/ffi/struct.DLContext.html |   45 -
 .../tvm_rt/function/ffi/struct.DLDataType.html     |   52 -
 .../function/ffi/struct.DLManagedTensor.html       |   38 -
 .../rust/tvm_rt/function/ffi/struct.DLTensor.html  |   61 -
 .../tvm_rt/function/ffi/struct.TVMByteArray.html   |   31 -
 .../function/ffi/struct.TVMParallelGroupEnv.html   |   28 -
 .../function/ffi/type.BackendPackedCFunc.html      |    2 -
 .../tvm_rt/function/ffi/type.DLDataTypeCode.html   |    3 -
 .../tvm_rt/function/ffi/type.DLDeviceType.html     |    3 -
 .../function/ffi/type.FTVMParallelLambda.html      |    6 -
 .../tvm_rt/function/ffi/type.TVMArgTypeCode.html   |   14 -
 .../tvm_rt/function/ffi/type.TVMArrayHandle.html   |    3 -
 .../function/ffi/type.TVMBackendPackedCFunc.html   |   10 -
 .../rust/tvm_rt/function/ffi/type.TVMContext.html  |    3 -
 .../tvm_rt/function/ffi/type.TVMDeviceExtType.html |    3 -
 .../ffi/type.TVMExtensionFuncDeclarer.html         |    7 -
 .../function/ffi/type.TVMFunctionHandle.html       |    3 -
 .../tvm_rt/function/ffi/type.TVMModuleHandle.html  |    3 -
 .../tvm_rt/function/ffi/type.TVMObjectHandle.html  |    3 -
 .../tvm_rt/function/ffi/type.TVMPackedCFunc.html   |   10 -
 .../function/ffi/type.TVMPackedCFuncFinalizer.html |    4 -
 .../function/ffi/type.TVMRetValueHandle.html       |    3 -
 .../tvm_rt/function/ffi/type.TVMStreamHandle.html  |    4 -
 .../tvm_rt/function/ffi/type.int_fast16_t.html     |    2 -
 .../tvm_rt/function/ffi/type.int_fast32_t.html     |    2 -
 .../tvm_rt/function/ffi/type.int_fast64_t.html     |    2 -
 .../rust/tvm_rt/function/ffi/type.int_fast8_t.html |    2 -
 .../tvm_rt/function/ffi/type.int_least16_t.html    |    2 -
 .../tvm_rt/function/ffi/type.int_least32_t.html    |    2 -
 .../tvm_rt/function/ffi/type.int_least64_t.html    |    2 -
 .../tvm_rt/function/ffi/type.int_least8_t.html     |    2 -
 .../rust/tvm_rt/function/ffi/type.intmax_t.html    |    2 -
 .../rust/tvm_rt/function/ffi/type.tvm_index_t.html |    3 -
 .../tvm_rt/function/ffi/type.uint_fast16_t.html    |    2 -
 .../tvm_rt/function/ffi/type.uint_fast32_t.html    |    2 -
 .../tvm_rt/function/ffi/type.uint_fast64_t.html    |    2 -
 .../tvm_rt/function/ffi/type.uint_fast8_t.html     |    2 -
 .../tvm_rt/function/ffi/type.uint_least16_t.html   |    2 -
 .../tvm_rt/function/ffi/type.uint_least32_t.html   |    2 -
 .../tvm_rt/function/ffi/type.uint_least64_t.html   |    2 -
 .../tvm_rt/function/ffi/type.uint_least8_t.html    |    2 -
 .../rust/tvm_rt/function/ffi/type.uintmax_t.html   |    2 -
 .../api/rust/tvm_rt/function/ffi/type.wchar_t.html |    2 -
 .../rust/tvm_rt/function/ffi/union.TVMValue.html   |   45 -
 docs/api/rust/tvm_rt/function/fn.register.html     |   20 -
 .../rust/tvm_rt/function/fn.register_override.html |    4 -
 .../rust/tvm_rt/function/fn.register_untyped.html  |    2 -
 docs/api/rust/tvm_rt/function/index.html           |   27 -
 docs/api/rust/tvm_rt/function/sidebar-items.js     |    1 -
 docs/api/rust/tvm_rt/function/struct.Function.html |   43 -
 .../api/rust/tvm_rt/function/trait.ToFunction.html |   22 -
 docs/api/rust/tvm_rt/function/trait.Typed.html     |   14 -
 docs/api/rust/tvm_rt/function/type.Result.html     |    2 -
 docs/api/rust/tvm_rt/index.html                    |   30 -
 docs/api/rust/tvm_rt/macro.check_call!.html        |   10 -
 docs/api/rust/tvm_rt/macro.check_call.html         |    6 -
 docs/api/rust/tvm_rt/macro.external!.html          |   10 -
 docs/api/rust/tvm_rt/macro.external.html           |    2 -
 docs/api/rust/tvm_rt/macro.tvm_call!.html          |   10 -
 docs/api/rust/tvm_rt/macro.tvm_call.html           |    6 -
 docs/api/rust/tvm_rt/map/fn.array_get_item.html    |    2 -
 docs/api/rust/tvm_rt/map/fn.map_count.html         |    2 -
 docs/api/rust/tvm_rt/map/fn.map_get_item.html      |    2 -
 docs/api/rust/tvm_rt/map/fn.map_items.html         |    2 -
 docs/api/rust/tvm_rt/map/fn.map_size.html          |    2 -
 docs/api/rust/tvm_rt/map/index.html                |    4 -
 docs/api/rust/tvm_rt/map/sidebar-items.js          |    1 -
 docs/api/rust/tvm_rt/map/struct.IntoIter.html      |  135 -
 docs/api/rust/tvm_rt/map/struct.Map.html           |   32 -
 docs/api/rust/tvm_rt/module/fn.load_from_file.html |    2 -
 .../api/rust/tvm_rt/module/fn.runtime_enabled.html |    2 -
 docs/api/rust/tvm_rt/module/index.html             |    7 -
 docs/api/rust/tvm_rt/module/sidebar-items.js       |    1 -
 docs/api/rust/tvm_rt/module/struct.Module.html     |   35 -
 docs/api/rust/tvm_rt/ndarray/index.html            |   23 -
 docs/api/rust/tvm_rt/ndarray/sidebar-items.js      |    1 -
 docs/api/rust/tvm_rt/ndarray/struct.NDArray.html   |   81 -
 .../tvm_rt/ndarray/struct.NDArrayContainer.html    |   15 -
 docs/api/rust/tvm_rt/ndarray/trait.Num32.html      |    8 -
 docs/api/rust/tvm_rt/object/fn.debug_print.html    |    2 -
 .../rust/tvm_rt/object/fn.structural_equal.html    |    2 -
 .../api/rust/tvm_rt/object/fn.structural_hash.html |    2 -
 docs/api/rust/tvm_rt/object/index.html             |   14 -
 .../tvm_rt/object/object_ptr/struct.Object.html    |   10 -
 .../tvm_rt/object/object_ptr/struct.ObjectPtr.html |   10 -
 .../tvm_rt/object/object_ptr/struct.ObjectRef.html |   10 -
 .../tvm_rt/object/object_ptr/trait.IsObject.html   |   10 -
 docs/api/rust/tvm_rt/object/sidebar-items.js       |    1 -
 docs/api/rust/tvm_rt/object/struct.Object.html     |   23 -
 docs/api/rust/tvm_rt/object/struct.ObjectPtr.html  |   35 -
 docs/api/rust/tvm_rt/object/struct.ObjectRef.html  |   28 -
 docs/api/rust/tvm_rt/object/trait.IsObject.html    |   14 -
 docs/api/rust/tvm_rt/object/trait.IsObjectRef.html |   21 -
 docs/api/rust/tvm_rt/sidebar-items.js              |    1 -
 docs/api/rust/tvm_rt/string/index.html             |    3 -
 docs/api/rust/tvm_rt/string/sidebar-items.js       |    1 -
 docs/api/rust/tvm_rt/string/struct.String.html     |   47 -
 docs/api/rust/tvm_rt/string/struct.StringObj.html  |   14 -
 docs/api/rust/tvm_rt/struct.ByteArray.html         |   28 -
 docs/api/rust/tvm_rt/struct.Context.html           |   41 -
 docs/api/rust/tvm_rt/struct.DataType.html          |   45 -
 .../api/rust/tvm_rt/to_function/enum.ArgValue.html |   10 -
 .../api/rust/tvm_rt/to_function/enum.RetValue.html |   10 -
 .../ffi/constant.DLDataTypeCode_kDLBfloat.html     |   10 -
 .../ffi/constant.DLDataTypeCode_kDLFloat.html      |   10 -
 .../ffi/constant.DLDataTypeCode_kDLInt.html        |   10 -
 .../ffi/constant.DLDataTypeCode_kDLUInt.html       |   10 -
 .../ffi/constant.DLDeviceType_kDLCPU.html          |   10 -
 .../ffi/constant.DLDeviceType_kDLCPUPinned.html    |   10 -
 .../ffi/constant.DLDeviceType_kDLExtDev.html       |   10 -
 .../ffi/constant.DLDeviceType_kDLGPU.html          |   10 -
 .../ffi/constant.DLDeviceType_kDLMetal.html        |   10 -
 .../ffi/constant.DLDeviceType_kDLOpenCL.html       |   10 -
 .../ffi/constant.DLDeviceType_kDLROCM.html         |   10 -
 .../ffi/constant.DLDeviceType_kDLVPI.html          |   10 -
 .../ffi/constant.DLDeviceType_kDLVulkan.html       |   10 -
 .../to_function/ffi/constant.DLPACK_VERSION.html   |   10 -
 .../tvm_rt/to_function/ffi/constant.INT16_MAX.html |   10 -
 .../tvm_rt/to_function/ffi/constant.INT16_MIN.html |   10 -
 .../tvm_rt/to_function/ffi/constant.INT32_MAX.html |   10 -
 .../tvm_rt/to_function/ffi/constant.INT32_MIN.html |   10 -
 .../tvm_rt/to_function/ffi/constant.INT8_MAX.html  |   10 -
 .../tvm_rt/to_function/ffi/constant.INT8_MIN.html  |   10 -
 .../to_function/ffi/constant.INTPTR_MAX.html       |   10 -
 .../to_function/ffi/constant.INTPTR_MIN.html       |   10 -
 .../to_function/ffi/constant.INT_FAST16_MAX.html   |   10 -
 .../to_function/ffi/constant.INT_FAST16_MIN.html   |   10 -
 .../to_function/ffi/constant.INT_FAST32_MAX.html   |   10 -
 .../to_function/ffi/constant.INT_FAST32_MIN.html   |   10 -
 .../to_function/ffi/constant.INT_FAST8_MAX.html    |   10 -
 .../to_function/ffi/constant.INT_FAST8_MIN.html    |   10 -
 .../to_function/ffi/constant.INT_LEAST16_MAX.html  |   10 -
 .../to_function/ffi/constant.INT_LEAST16_MIN.html  |   10 -
 .../to_function/ffi/constant.INT_LEAST32_MAX.html  |   10 -
 .../to_function/ffi/constant.INT_LEAST32_MIN.html  |   10 -
 .../to_function/ffi/constant.INT_LEAST8_MAX.html   |   10 -
 .../to_function/ffi/constant.INT_LEAST8_MIN.html   |   10 -
 .../to_function/ffi/constant.PTRDIFF_MAX.html      |   10 -
 .../to_function/ffi/constant.PTRDIFF_MIN.html      |   10 -
 .../to_function/ffi/constant.SIG_ATOMIC_MAX.html   |   10 -
 .../to_function/ffi/constant.SIG_ATOMIC_MIN.html   |   10 -
 .../tvm_rt/to_function/ffi/constant.SIZE_MAX.html  |   10 -
 .../ffi/constant.TVMArgTypeCode_kTVMArgFloat.html  |   10 -
 .../ffi/constant.TVMArgTypeCode_kTVMArgInt.html    |   10 -
 .../ffi/constant.TVMArgTypeCode_kTVMBytes.html     |   10 -
 .../ffi/constant.TVMArgTypeCode_kTVMContext.html   |   10 -
 ...constant.TVMArgTypeCode_kTVMDLTensorHandle.html |   10 -
 .../ffi/constant.TVMArgTypeCode_kTVMDataType.html  |   10 -
 .../ffi/constant.TVMArgTypeCode_kTVMExtBegin.html  |   10 -
 .../ffi/constant.TVMArgTypeCode_kTVMExtEnd.html    |   10 -
 .../constant.TVMArgTypeCode_kTVMExtReserveEnd.html |   10 -
 .../constant.TVMArgTypeCode_kTVMModuleHandle.html  |   10 -
 .../constant.TVMArgTypeCode_kTVMNDArrayHandle.html |   10 -
 .../ffi/constant.TVMArgTypeCode_kTVMNNVMFirst.html |   10 -
 .../ffi/constant.TVMArgTypeCode_kTVMNNVMLast.html  |   10 -
 .../ffi/constant.TVMArgTypeCode_kTVMNullptr.html   |   10 -
 .../constant.TVMArgTypeCode_kTVMObjectHandle.html  |   10 -
 ...tant.TVMArgTypeCode_kTVMObjectRValueRefArg.html |   10 -
 .../constant.TVMArgTypeCode_kTVMOpaqueHandle.html  |   10 -
 ...nstant.TVMArgTypeCode_kTVMPackedFuncHandle.html |   10 -
 .../ffi/constant.TVMArgTypeCode_kTVMStr.html       |   10 -
 .../ffi/constant.TVMDeviceExtType_kDLAOCL.html     |   10 -
 .../ffi/constant.TVMDeviceExtType_kDLHexagon.html  |   10 -
 .../ffi/constant.TVMDeviceExtType_kDLMicroDev.html |   10 -
 .../ffi/constant.TVMDeviceExtType_kDLSDAccel.html  |   10 -
 .../ffi/constant.TVMDeviceExtType_kDLWebGPU.html   |   10 -
 .../ffi/constant.TVMDeviceExtType_kOpenGL.html     |   10 -
 .../to_function/ffi/constant.TVM_VERSION.html      |   10 -
 .../to_function/ffi/constant.UINT16_MAX.html       |   10 -
 .../to_function/ffi/constant.UINT32_MAX.html       |   10 -
 .../tvm_rt/to_function/ffi/constant.UINT8_MAX.html |   10 -
 .../to_function/ffi/constant.UINTPTR_MAX.html      |   10 -
 .../to_function/ffi/constant.UINT_FAST16_MAX.html  |   10 -
 .../to_function/ffi/constant.UINT_FAST32_MAX.html  |   10 -
 .../to_function/ffi/constant.UINT_FAST8_MAX.html   |   10 -
 .../to_function/ffi/constant.UINT_LEAST16_MAX.html |   10 -
 .../to_function/ffi/constant.UINT_LEAST32_MAX.html |   10 -
 .../to_function/ffi/constant.UINT_LEAST8_MAX.html  |   10 -
 .../tvm_rt/to_function/ffi/constant.WINT_MAX.html  |   10 -
 .../tvm_rt/to_function/ffi/constant.WINT_MIN.html  |   10 -
 .../to_function/ffi/constant._ATFILE_SOURCE.html   |   10 -
 .../to_function/ffi/constant._BITS_WCHAR_H.html    |   10 -
 .../to_function/ffi/constant._DEFAULT_SOURCE.html  |   10 -
 .../to_function/ffi/constant._FEATURES_H.html      |   10 -
 .../to_function/ffi/constant._POSIX_C_SOURCE.html  |   10 -
 .../to_function/ffi/constant._POSIX_SOURCE.html    |   10 -
 .../to_function/ffi/constant._STDC_PREDEF_H.html   |   10 -
 .../tvm_rt/to_function/ffi/constant._STDINT_H.html |   10 -
 .../to_function/ffi/constant._SYS_CDEFS_H.html     |   10 -
 .../to_function/ffi/constant.__GLIBC_MINOR__.html  |   10 -
 .../tvm_rt/to_function/ffi/constant.__GLIBC__.html |   10 -
 .../to_function/ffi/constant.__GNU_LIBRARY__.html  |   10 -
 .../ffi/constant.__STDC_IEC_559_COMPLEX__.html     |   10 -
 .../to_function/ffi/constant.__STDC_IEC_559__.html |   10 -
 .../ffi/constant.__STDC_ISO_10646__.html           |   10 -
 .../ffi/constant.__STDC_NO_THREADS__.html          |   10 -
 .../ffi/constant.__SYSCALL_WORDSIZE.html           |   10 -
 .../to_function/ffi/constant.__USE_ATFILE.html     |   10 -
 .../ffi/constant.__USE_FORTIFY_LEVEL.html          |   10 -
 .../to_function/ffi/constant.__USE_ISOC11.html     |   10 -
 .../to_function/ffi/constant.__USE_ISOC95.html     |   10 -
 .../to_function/ffi/constant.__USE_ISOC99.html     |   10 -
 .../to_function/ffi/constant.__USE_MISC.html       |   10 -
 .../to_function/ffi/constant.__USE_POSIX.html      |   10 -
 .../ffi/constant.__USE_POSIX199309.html            |   10 -
 .../ffi/constant.__USE_POSIX199506.html            |   10 -
 .../to_function/ffi/constant.__USE_POSIX2.html     |   10 -
 .../ffi/constant.__USE_POSIX_IMPLICITLY.html       |   10 -
 .../to_function/ffi/constant.__USE_XOPEN2K.html    |   10 -
 .../to_function/ffi/constant.__USE_XOPEN2K8.html   |   10 -
 .../to_function/ffi/constant.__WORDSIZE.html       |   10 -
 .../ffi/constant.__WORDSIZE_TIME64_COMPAT32.html   |   10 -
 .../to_function/ffi/fn.TVMAPISetLastError.html     |   10 -
 .../tvm_rt/to_function/ffi/fn.TVMArrayAlloc.html   |   10 -
 .../to_function/ffi/fn.TVMArrayCopyFromBytes.html  |   10 -
 .../to_function/ffi/fn.TVMArrayCopyFromTo.html     |   10 -
 .../to_function/ffi/fn.TVMArrayCopyToBytes.html    |   10 -
 .../tvm_rt/to_function/ffi/fn.TVMArrayFree.html    |   10 -
 .../to_function/ffi/fn.TVMArrayFromDLPack.html     |   10 -
 .../to_function/ffi/fn.TVMArrayToDLPack.html       |   10 -
 .../ffi/fn.TVMBackendAllocWorkspace.html           |   10 -
 .../ffi/fn.TVMBackendFreeWorkspace.html            |   10 -
 .../ffi/fn.TVMBackendGetFuncFromEnv.html           |   10 -
 .../ffi/fn.TVMBackendParallelBarrier.html          |   10 -
 .../ffi/fn.TVMBackendParallelLaunch.html           |   10 -
 .../ffi/fn.TVMBackendRegisterSystemLibSymbol.html  |   10 -
 .../to_function/ffi/fn.TVMBackendRunOnce.html      |   10 -
 .../to_function/ffi/fn.TVMCFuncSetReturn.html      |   10 -
 .../to_function/ffi/fn.TVMCbArgToReturn.html       |   10 -
 .../ffi/fn.TVMDLManagedTensorCallDeleter.html      |   10 -
 .../ffi/fn.TVMDeviceAllocDataSpace.html            |   10 -
 .../ffi/fn.TVMDeviceCopyDataFromTo.html            |   10 -
 .../to_function/ffi/fn.TVMDeviceFreeDataSpace.html |   10 -
 .../tvm_rt/to_function/ffi/fn.TVMFuncCall.html     |   10 -
 .../to_function/ffi/fn.TVMFuncCreateFromCFunc.html |   10 -
 .../tvm_rt/to_function/ffi/fn.TVMFuncFree.html     |   10 -
 .../to_function/ffi/fn.TVMFuncGetGlobal.html       |   10 -
 .../to_function/ffi/fn.TVMFuncListGlobalNames.html |   10 -
 .../to_function/ffi/fn.TVMFuncRegisterGlobal.html  |   10 -
 .../to_function/ffi/fn.TVMFuncRemoveGlobal.html    |   10 -
 .../tvm_rt/to_function/ffi/fn.TVMGetLastError.html |   10 -
 .../rust/tvm_rt/to_function/ffi/fn.TVMModFree.html |   10 -
 .../to_function/ffi/fn.TVMModGetFunction.html      |   10 -
 .../tvm_rt/to_function/ffi/fn.TVMModImport.html    |   10 -
 .../to_function/ffi/fn.TVMModLoadFromFile.html     |   10 -
 .../to_function/ffi/fn.TVMObjectDerivedFrom.html   |   10 -
 .../tvm_rt/to_function/ffi/fn.TVMObjectFree.html   |   10 -
 .../to_function/ffi/fn.TVMObjectGetTypeIndex.html  |   10 -
 .../tvm_rt/to_function/ffi/fn.TVMObjectRetain.html |   10 -
 .../to_function/ffi/fn.TVMObjectTypeKey2Index.html |   10 -
 .../tvm_rt/to_function/ffi/fn.TVMSetStream.html    |   10 -
 .../tvm_rt/to_function/ffi/fn.TVMStreamCreate.html |   10 -
 .../tvm_rt/to_function/ffi/fn.TVMStreamFree.html   |   10 -
 .../ffi/fn.TVMStreamStreamSynchronize.html         |   10 -
 .../tvm_rt/to_function/ffi/fn.TVMSynchronize.html  |   10 -
 docs/api/rust/tvm_rt/to_function/ffi/index.html    |   10 -
 .../tvm_rt/to_function/ffi/struct.DLContext.html   |   10 -
 .../tvm_rt/to_function/ffi/struct.DLDataType.html  |   10 -
 .../to_function/ffi/struct.DLManagedTensor.html    |   10 -
 .../tvm_rt/to_function/ffi/struct.DLTensor.html    |   10 -
 .../to_function/ffi/struct.TVMByteArray.html       |   10 -
 .../ffi/struct.TVMParallelGroupEnv.html            |   10 -
 .../to_function/ffi/type.BackendPackedCFunc.html   |   10 -
 .../to_function/ffi/type.DLDataTypeCode.html       |   10 -
 .../tvm_rt/to_function/ffi/type.DLDeviceType.html  |   10 -
 .../to_function/ffi/type.FTVMParallelLambda.html   |   10 -
 .../to_function/ffi/type.TVMArgTypeCode.html       |   10 -
 .../to_function/ffi/type.TVMArrayHandle.html       |   10 -
 .../ffi/type.TVMBackendPackedCFunc.html            |   10 -
 .../tvm_rt/to_function/ffi/type.TVMContext.html    |   10 -
 .../to_function/ffi/type.TVMDeviceExtType.html     |   10 -
 .../ffi/type.TVMExtensionFuncDeclarer.html         |   10 -
 .../to_function/ffi/type.TVMFunctionHandle.html    |   10 -
 .../to_function/ffi/type.TVMModuleHandle.html      |   10 -
 .../to_function/ffi/type.TVMObjectHandle.html      |   10 -
 .../to_function/ffi/type.TVMPackedCFunc.html       |   10 -
 .../ffi/type.TVMPackedCFuncFinalizer.html          |   10 -
 .../to_function/ffi/type.TVMRetValueHandle.html    |   10 -
 .../to_function/ffi/type.TVMStreamHandle.html      |   10 -
 .../tvm_rt/to_function/ffi/type.int_fast16_t.html  |   10 -
 .../tvm_rt/to_function/ffi/type.int_fast32_t.html  |   10 -
 .../tvm_rt/to_function/ffi/type.int_fast64_t.html  |   10 -
 .../tvm_rt/to_function/ffi/type.int_fast8_t.html   |   10 -
 .../tvm_rt/to_function/ffi/type.int_least16_t.html |   10 -
 .../tvm_rt/to_function/ffi/type.int_least32_t.html |   10 -
 .../tvm_rt/to_function/ffi/type.int_least64_t.html |   10 -
 .../tvm_rt/to_function/ffi/type.int_least8_t.html  |   10 -
 .../rust/tvm_rt/to_function/ffi/type.intmax_t.html |   10 -
 .../tvm_rt/to_function/ffi/type.tvm_index_t.html   |   10 -
 .../tvm_rt/to_function/ffi/type.uint_fast16_t.html |   10 -
 .../tvm_rt/to_function/ffi/type.uint_fast32_t.html |   10 -
 .../tvm_rt/to_function/ffi/type.uint_fast64_t.html |   10 -
 .../tvm_rt/to_function/ffi/type.uint_fast8_t.html  |   10 -
 .../to_function/ffi/type.uint_least16_t.html       |   10 -
 .../to_function/ffi/type.uint_least32_t.html       |   10 -
 .../to_function/ffi/type.uint_least64_t.html       |   10 -
 .../tvm_rt/to_function/ffi/type.uint_least8_t.html |   10 -
 .../tvm_rt/to_function/ffi/type.uintmax_t.html     |   10 -
 .../rust/tvm_rt/to_function/ffi/type.wchar_t.html  |   10 -
 .../tvm_rt/to_function/ffi/union.TVMValue.html     |   10 -
 .../rust/tvm_rt/to_function/trait.ToFunction.html  |   10 -
 docs/api/rust/tvm_rt/to_function/trait.Typed.html  |   10 -
 docs/api/rust/tvm_rt/value/index.html              |    5 -
 docs/api/rust/tvm_rt/value/sidebar-items.js        |    1 -
 docs/api/rust/tvm_sys/all.html                     |    4 -
 docs/api/rust/tvm_sys/array/index.html             |    2 -
 docs/api/rust/tvm_sys/array/sidebar-items.js       |    1 -
 docs/api/rust/tvm_sys/byte_array/index.html        |    4 -
 docs/api/rust/tvm_sys/byte_array/sidebar-items.js  |    1 -
 .../rust/tvm_sys/byte_array/struct.ByteArray.html  |   28 -
 docs/api/rust/tvm_sys/context/enum.DeviceType.html |   48 -
 docs/api/rust/tvm_sys/context/index.html           |   20 -
 docs/api/rust/tvm_sys/context/sidebar-items.js     |    1 -
 docs/api/rust/tvm_sys/context/struct.Context.html  |   41 -
 .../context/struct.UnsupportedDeviceError.html     |   19 -
 .../tvm_sys/datatype/enum.ParseDataTypeError.html  |   25 -
 docs/api/rust/tvm_sys/datatype/index.html          |    4 -
 docs/api/rust/tvm_sys/datatype/sidebar-items.js    |    1 -
 .../api/rust/tvm_sys/datatype/struct.DataType.html |   45 -
 docs/api/rust/tvm_sys/errors/index.html            |    3 -
 docs/api/rust/tvm_sys/errors/sidebar-items.js      |    1 -
 .../rust/tvm_sys/errors/struct.FuncCallError.html  |   19 -
 .../tvm_sys/errors/struct.ValueDowncastError.html  |   23 -
 .../ffi/constant.DLDataTypeCode_kDLBfloat.html     |    2 -
 .../ffi/constant.DLDataTypeCode_kDLFloat.html      |    2 -
 .../ffi/constant.DLDataTypeCode_kDLInt.html        |    2 -
 .../ffi/constant.DLDataTypeCode_kDLUInt.html       |    2 -
 .../tvm_sys/ffi/constant.DLDeviceType_kDLCPU.html  |    3 -
 .../ffi/constant.DLDeviceType_kDLCPUPinned.html    |    4 -
 .../ffi/constant.DLDeviceType_kDLExtDev.html       |    5 -
 .../tvm_sys/ffi/constant.DLDeviceType_kDLGPU.html  |    3 -
 .../ffi/constant.DLDeviceType_kDLMetal.html        |    3 -
 .../ffi/constant.DLDeviceType_kDLOpenCL.html       |    3 -
 .../tvm_sys/ffi/constant.DLDeviceType_kDLROCM.html |    3 -
 .../tvm_sys/ffi/constant.DLDeviceType_kDLVPI.html  |    3 -
 .../ffi/constant.DLDeviceType_kDLVulkan.html       |    3 -
 .../rust/tvm_sys/ffi/constant.DLPACK_VERSION.html  |    2 -
 docs/api/rust/tvm_sys/ffi/constant.INT16_MAX.html  |    2 -
 docs/api/rust/tvm_sys/ffi/constant.INT16_MIN.html  |    2 -
 docs/api/rust/tvm_sys/ffi/constant.INT32_MAX.html  |    2 -
 docs/api/rust/tvm_sys/ffi/constant.INT32_MIN.html  |    2 -
 docs/api/rust/tvm_sys/ffi/constant.INT8_MAX.html   |    2 -
 docs/api/rust/tvm_sys/ffi/constant.INT8_MIN.html   |    2 -
 docs/api/rust/tvm_sys/ffi/constant.INTPTR_MAX.html |    2 -
 docs/api/rust/tvm_sys/ffi/constant.INTPTR_MIN.html |    2 -
 .../rust/tvm_sys/ffi/constant.INT_FAST16_MAX.html  |    2 -
 .../rust/tvm_sys/ffi/constant.INT_FAST16_MIN.html  |    2 -
 .../rust/tvm_sys/ffi/constant.INT_FAST32_MAX.html  |    2 -
 .../rust/tvm_sys/ffi/constant.INT_FAST32_MIN.html  |    2 -
 .../rust/tvm_sys/ffi/constant.INT_FAST8_MAX.html   |    2 -
 .../rust/tvm_sys/ffi/constant.INT_FAST8_MIN.html   |    2 -
 .../rust/tvm_sys/ffi/constant.INT_LEAST16_MAX.html |    2 -
 .../rust/tvm_sys/ffi/constant.INT_LEAST16_MIN.html |    2 -
 .../rust/tvm_sys/ffi/constant.INT_LEAST32_MAX.html |    2 -
 .../rust/tvm_sys/ffi/constant.INT_LEAST32_MIN.html |    2 -
 .../rust/tvm_sys/ffi/constant.INT_LEAST8_MAX.html  |    2 -
 .../rust/tvm_sys/ffi/constant.INT_LEAST8_MIN.html  |    2 -
 .../api/rust/tvm_sys/ffi/constant.PTRDIFF_MAX.html |    2 -
 .../api/rust/tvm_sys/ffi/constant.PTRDIFF_MIN.html |    2 -
 .../rust/tvm_sys/ffi/constant.SIG_ATOMIC_MAX.html  |    2 -
 .../rust/tvm_sys/ffi/constant.SIG_ATOMIC_MIN.html  |    2 -
 docs/api/rust/tvm_sys/ffi/constant.SIZE_MAX.html   |    2 -
 .../ffi/constant.TVMArgTypeCode_kTVMArgFloat.html  |    2 -
 .../ffi/constant.TVMArgTypeCode_kTVMArgInt.html    |    2 -
 .../ffi/constant.TVMArgTypeCode_kTVMBytes.html     |    2 -
 .../ffi/constant.TVMArgTypeCode_kTVMContext.html   |    2 -
 ...constant.TVMArgTypeCode_kTVMDLTensorHandle.html |    2 -
 .../ffi/constant.TVMArgTypeCode_kTVMDataType.html  |    2 -
 .../ffi/constant.TVMArgTypeCode_kTVMExtBegin.html  |    2 -
 .../ffi/constant.TVMArgTypeCode_kTVMExtEnd.html    |    2 -
 .../constant.TVMArgTypeCode_kTVMExtReserveEnd.html |    2 -
 .../constant.TVMArgTypeCode_kTVMModuleHandle.html  |    2 -
 .../constant.TVMArgTypeCode_kTVMNDArrayHandle.html |    2 -
 .../ffi/constant.TVMArgTypeCode_kTVMNNVMFirst.html |    2 -
 .../ffi/constant.TVMArgTypeCode_kTVMNNVMLast.html  |    2 -
 .../ffi/constant.TVMArgTypeCode_kTVMNullptr.html   |    2 -
 .../constant.TVMArgTypeCode_kTVMObjectHandle.html  |    2 -
 ...tant.TVMArgTypeCode_kTVMObjectRValueRefArg.html |    2 -
 .../constant.TVMArgTypeCode_kTVMOpaqueHandle.html  |    2 -
 ...nstant.TVMArgTypeCode_kTVMPackedFuncHandle.html |    2 -
 .../ffi/constant.TVMArgTypeCode_kTVMStr.html       |    2 -
 .../ffi/constant.TVMDeviceExtType_kDLAOCL.html     |    2 -
 .../ffi/constant.TVMDeviceExtType_kDLHexagon.html  |    2 -
 .../ffi/constant.TVMDeviceExtType_kDLMicroDev.html |    2 -
 .../ffi/constant.TVMDeviceExtType_kDLSDAccel.html  |    2 -
 .../ffi/constant.TVMDeviceExtType_kDLWebGPU.html   |    2 -
 .../ffi/constant.TVMDeviceExtType_kOpenGL.html     |    2 -
 .../api/rust/tvm_sys/ffi/constant.TVM_VERSION.html |    2 -
 docs/api/rust/tvm_sys/ffi/constant.UINT16_MAX.html |    2 -
 docs/api/rust/tvm_sys/ffi/constant.UINT32_MAX.html |    2 -
 docs/api/rust/tvm_sys/ffi/constant.UINT8_MAX.html  |    2 -
 .../api/rust/tvm_sys/ffi/constant.UINTPTR_MAX.html |    2 -
 .../rust/tvm_sys/ffi/constant.UINT_FAST16_MAX.html |    2 -
 .../rust/tvm_sys/ffi/constant.UINT_FAST32_MAX.html |    2 -
 .../rust/tvm_sys/ffi/constant.UINT_FAST8_MAX.html  |    2 -
 .../tvm_sys/ffi/constant.UINT_LEAST16_MAX.html     |    2 -
 .../tvm_sys/ffi/constant.UINT_LEAST32_MAX.html     |    2 -
 .../rust/tvm_sys/ffi/constant.UINT_LEAST8_MAX.html |    2 -
 docs/api/rust/tvm_sys/ffi/constant.WINT_MAX.html   |    2 -
 docs/api/rust/tvm_sys/ffi/constant.WINT_MIN.html   |    2 -
 .../rust/tvm_sys/ffi/constant._ATFILE_SOURCE.html  |    2 -
 .../rust/tvm_sys/ffi/constant._BITS_WCHAR_H.html   |    2 -
 .../rust/tvm_sys/ffi/constant._DEFAULT_SOURCE.html |    2 -
 .../api/rust/tvm_sys/ffi/constant._FEATURES_H.html |    2 -
 .../rust/tvm_sys/ffi/constant._POSIX_C_SOURCE.html |    2 -
 .../rust/tvm_sys/ffi/constant._POSIX_SOURCE.html   |    2 -
 .../rust/tvm_sys/ffi/constant._STDC_PREDEF_H.html  |    2 -
 docs/api/rust/tvm_sys/ffi/constant._STDINT_H.html  |    2 -
 .../rust/tvm_sys/ffi/constant._SYS_CDEFS_H.html    |    2 -
 .../rust/tvm_sys/ffi/constant.__GLIBC_MINOR__.html |    2 -
 docs/api/rust/tvm_sys/ffi/constant.__GLIBC__.html  |    2 -
 .../rust/tvm_sys/ffi/constant.__GNU_LIBRARY__.html |    2 -
 .../ffi/constant.__STDC_IEC_559_COMPLEX__.html     |    2 -
 .../tvm_sys/ffi/constant.__STDC_IEC_559__.html     |    2 -
 .../tvm_sys/ffi/constant.__STDC_ISO_10646__.html   |    2 -
 .../tvm_sys/ffi/constant.__STDC_NO_THREADS__.html  |    2 -
 .../tvm_sys/ffi/constant.__SYSCALL_WORDSIZE.html   |    2 -
 .../rust/tvm_sys/ffi/constant.__USE_ATFILE.html    |    2 -
 .../tvm_sys/ffi/constant.__USE_FORTIFY_LEVEL.html  |    2 -
 .../rust/tvm_sys/ffi/constant.__USE_ISOC11.html    |    2 -
 .../rust/tvm_sys/ffi/constant.__USE_ISOC95.html    |    2 -
 .../rust/tvm_sys/ffi/constant.__USE_ISOC99.html    |    2 -
 docs/api/rust/tvm_sys/ffi/constant.__USE_MISC.html |    2 -
 .../api/rust/tvm_sys/ffi/constant.__USE_POSIX.html |    2 -
 .../tvm_sys/ffi/constant.__USE_POSIX199309.html    |    2 -
 .../tvm_sys/ffi/constant.__USE_POSIX199506.html    |    2 -
 .../rust/tvm_sys/ffi/constant.__USE_POSIX2.html    |    2 -
 .../ffi/constant.__USE_POSIX_IMPLICITLY.html       |    2 -
 .../rust/tvm_sys/ffi/constant.__USE_XOPEN2K.html   |    2 -
 .../rust/tvm_sys/ffi/constant.__USE_XOPEN2K8.html  |    2 -
 docs/api/rust/tvm_sys/ffi/constant.__WORDSIZE.html |    2 -
 .../ffi/constant.__WORDSIZE_TIME64_COMPAT32.html   |    2 -
 .../rust/tvm_sys/ffi/fn.TVMAPISetLastError.html    |    5 -
 docs/api/rust/tvm_sys/ffi/fn.TVMArrayAlloc.html    |   13 -
 .../rust/tvm_sys/ffi/fn.TVMArrayCopyFromBytes.html |    7 -
 .../rust/tvm_sys/ffi/fn.TVMArrayCopyFromTo.html    |    7 -
 .../rust/tvm_sys/ffi/fn.TVMArrayCopyToBytes.html   |    7 -
 docs/api/rust/tvm_sys/ffi/fn.TVMArrayFree.html     |    5 -
 .../rust/tvm_sys/ffi/fn.TVMArrayFromDLPack.html    |    7 -
 docs/api/rust/tvm_sys/ffi/fn.TVMArrayToDLPack.html |    7 -
 .../tvm_sys/ffi/fn.TVMBackendAllocWorkspace.html   |   12 -
 .../tvm_sys/ffi/fn.TVMBackendFreeWorkspace.html    |    8 -
 .../tvm_sys/ffi/fn.TVMBackendGetFuncFromEnv.html   |    9 -
 .../tvm_sys/ffi/fn.TVMBackendParallelBarrier.html  |    6 -
 .../tvm_sys/ffi/fn.TVMBackendParallelLaunch.html   |    8 -
 .../ffi/fn.TVMBackendRegisterSystemLibSymbol.html  |    6 -
 .../api/rust/tvm_sys/ffi/fn.TVMBackendRunOnce.html |   10 -
 .../api/rust/tvm_sys/ffi/fn.TVMCFuncSetReturn.html |    9 -
 docs/api/rust/tvm_sys/ffi/fn.TVMCbArgToReturn.html |    8 -
 .../ffi/fn.TVMDLManagedTensorCallDeleter.html      |    4 -
 .../tvm_sys/ffi/fn.TVMDeviceAllocDataSpace.html    |   10 -
 .../tvm_sys/ffi/fn.TVMDeviceCopyDataFromTo.html    |   14 -
 .../tvm_sys/ffi/fn.TVMDeviceFreeDataSpace.html     |    6 -
 docs/api/rust/tvm_sys/ffi/fn.TVMFuncCall.html      |   16 -
 .../tvm_sys/ffi/fn.TVMFuncCreateFromCFunc.html     |    9 -
 docs/api/rust/tvm_sys/ffi/fn.TVMFuncFree.html      |    5 -
 docs/api/rust/tvm_sys/ffi/fn.TVMFuncGetGlobal.html |    7 -
 .../tvm_sys/ffi/fn.TVMFuncListGlobalNames.html     |    6 -
 .../rust/tvm_sys/ffi/fn.TVMFuncRegisterGlobal.html |    7 -
 .../rust/tvm_sys/ffi/fn.TVMFuncRemoveGlobal.html   |    4 -
 docs/api/rust/tvm_sys/ffi/fn.TVMGetLastError.html  |    8 -
 docs/api/rust/tvm_sys/ffi/fn.TVMModFree.html       |    9 -
 .../api/rust/tvm_sys/ffi/fn.TVMModGetFunction.html |    8 -
 docs/api/rust/tvm_sys/ffi/fn.TVMModImport.html     |    7 -
 .../rust/tvm_sys/ffi/fn.TVMModLoadFromFile.html    |    9 -
 .../rust/tvm_sys/ffi/fn.TVMObjectDerivedFrom.html  |    7 -
 docs/api/rust/tvm_sys/ffi/fn.TVMObjectFree.html    |    7 -
 .../rust/tvm_sys/ffi/fn.TVMObjectGetTypeIndex.html |    6 -
 docs/api/rust/tvm_sys/ffi/fn.TVMObjectRetain.html  |    6 -
 .../tvm_sys/ffi/fn.TVMObjectTypeKey2Index.html     |    6 -
 docs/api/rust/tvm_sys/ffi/fn.TVMSetStream.html     |   10 -
 docs/api/rust/tvm_sys/ffi/fn.TVMStreamCreate.html  |    7 -
 docs/api/rust/tvm_sys/ffi/fn.TVMStreamFree.html    |    7 -
 .../tvm_sys/ffi/fn.TVMStreamStreamSynchronize.html |    8 -
 docs/api/rust/tvm_sys/ffi/fn.TVMSynchronize.html   |    7 -
 docs/api/rust/tvm_sys/ffi/index.html               |  182 -
 docs/api/rust/tvm_sys/ffi/sidebar-items.js         |    1 -
 docs/api/rust/tvm_sys/ffi/struct.DLContext.html    |   35 -
 docs/api/rust/tvm_sys/ffi/struct.DLDataType.html   |   52 -
 .../rust/tvm_sys/ffi/struct.DLManagedTensor.html   |   38 -
 docs/api/rust/tvm_sys/ffi/struct.DLTensor.html     |   61 -
 docs/api/rust/tvm_sys/ffi/struct.TVMByteArray.html |   31 -
 .../tvm_sys/ffi/struct.TVMParallelGroupEnv.html    |   28 -
 .../rust/tvm_sys/ffi/type.BackendPackedCFunc.html  |    2 -
 docs/api/rust/tvm_sys/ffi/type.DLDataTypeCode.html |    3 -
 docs/api/rust/tvm_sys/ffi/type.DLDeviceType.html   |    4 -
 .../rust/tvm_sys/ffi/type.FTVMParallelLambda.html  |    6 -
 docs/api/rust/tvm_sys/ffi/type.TVMArgTypeCode.html |   14 -
 docs/api/rust/tvm_sys/ffi/type.TVMArrayHandle.html |    3 -
 .../tvm_sys/ffi/type.TVMBackendPackedCFunc.html    |   10 -
 docs/api/rust/tvm_sys/ffi/type.TVMContext.html     |   13 -
 .../rust/tvm_sys/ffi/type.TVMDeviceExtType.html    |    3 -
 .../tvm_sys/ffi/type.TVMExtensionFuncDeclarer.html |    7 -
 .../rust/tvm_sys/ffi/type.TVMFunctionHandle.html   |    3 -
 .../api/rust/tvm_sys/ffi/type.TVMModuleHandle.html |    3 -
 .../api/rust/tvm_sys/ffi/type.TVMObjectHandle.html |    3 -
 docs/api/rust/tvm_sys/ffi/type.TVMPackedCFunc.html |   10 -
 .../tvm_sys/ffi/type.TVMPackedCFuncFinalizer.html  |    4 -
 .../rust/tvm_sys/ffi/type.TVMRetValueHandle.html   |    3 -
 .../api/rust/tvm_sys/ffi/type.TVMStreamHandle.html |    4 -
 docs/api/rust/tvm_sys/ffi/type.int_fast16_t.html   |    2 -
 docs/api/rust/tvm_sys/ffi/type.int_fast32_t.html   |    2 -
 docs/api/rust/tvm_sys/ffi/type.int_fast64_t.html   |    2 -
 docs/api/rust/tvm_sys/ffi/type.int_fast8_t.html    |    2 -
 docs/api/rust/tvm_sys/ffi/type.int_least16_t.html  |    2 -
 docs/api/rust/tvm_sys/ffi/type.int_least32_t.html  |    2 -
 docs/api/rust/tvm_sys/ffi/type.int_least64_t.html  |    2 -
 docs/api/rust/tvm_sys/ffi/type.int_least8_t.html   |    2 -
 docs/api/rust/tvm_sys/ffi/type.intmax_t.html       |    2 -
 docs/api/rust/tvm_sys/ffi/type.tvm_index_t.html    |    3 -
 docs/api/rust/tvm_sys/ffi/type.uint_fast16_t.html  |    2 -
 docs/api/rust/tvm_sys/ffi/type.uint_fast32_t.html  |    2 -
 docs/api/rust/tvm_sys/ffi/type.uint_fast64_t.html  |    2 -
 docs/api/rust/tvm_sys/ffi/type.uint_fast8_t.html   |    2 -
 docs/api/rust/tvm_sys/ffi/type.uint_least16_t.html |    2 -
 docs/api/rust/tvm_sys/ffi/type.uint_least32_t.html |    2 -
 docs/api/rust/tvm_sys/ffi/type.uint_least64_t.html |    2 -
 docs/api/rust/tvm_sys/ffi/type.uint_least8_t.html  |    2 -
 docs/api/rust/tvm_sys/ffi/type.uintmax_t.html      |    2 -
 docs/api/rust/tvm_sys/ffi/type.wchar_t.html        |    2 -
 docs/api/rust/tvm_sys/ffi/union.TVMValue.html      |   57 -
 docs/api/rust/tvm_sys/index.html                   |   14 -
 docs/api/rust/tvm_sys/macro.call_packed!.html      |   10 -
 docs/api/rust/tvm_sys/macro.call_packed.html       |   10 -
 docs/api/rust/tvm_sys/macro.try_downcast!.html     |   10 -
 docs/api/rust/tvm_sys/macro.try_downcast.html      |    6 -
 .../rust/tvm_sys/packed_func/enum.ArgValue.html    |  146 -
 .../rust/tvm_sys/packed_func/enum.RetValue.html    |  117 -
 docs/api/rust/tvm_sys/packed_func/index.html       |    9 -
 docs/api/rust/tvm_sys/packed_func/sidebar-items.js |    1 -
 .../rust/tvm_sys/packed_func/trait.PackedFunc.html |    3 -
 docs/api/rust/tvm_sys/sidebar-items.js             |    1 -
 docs/api/rust/tvm_sys/value/index.html             |    3 -
 docs/api/rust/tvm_sys/value/sidebar-items.js       |    1 -
 .../value/struct.UnsupportedDeviceError.html       |   19 -
 docs/api/rust/tyck/all.html                        |    4 -
 docs/api/rust/tyck/fn.main.html                    |    2 -
 docs/api/rust/tyck/index.html                      |    4 -
 docs/api/rust/tyck/sidebar-items.js                |    1 -
 docs/api/rust/tyck/struct.Opt.html                 |   24 -
 docs/api/rust/wheel.svg                            |    1 -
 docs/api/typedoc/assets/js/search.json             |    2 +-
 docs/api/typedoc/classes/bytestreamreader.html     |   12 +-
 docs/api/typedoc/classes/cachedcallstack.html      |   34 +-
 docs/api/typedoc/classes/dlcontext.html            |   10 +-
 docs/api/typedoc/classes/dldatatype.html           |   12 +-
 docs/api/typedoc/classes/environment.html          |   12 +-
 docs/api/typedoc/classes/ffilibrary.html           |   20 +-
 docs/api/typedoc/classes/graphruntime.html         |   16 +-
 docs/api/typedoc/classes/instance.html             |   40 +-
 docs/api/typedoc/classes/memory.html               |   34 +-
 docs/api/typedoc/classes/module.html               |   10 +-
 docs/api/typedoc/classes/ndarray.html              |   22 +-
 docs/api/typedoc/classes/packedfunccell.html       |    6 +-
 docs/api/typedoc/classes/rpcserver.html            |   14 +-
 docs/api/typedoc/classes/scalar.html               |    6 +-
 docs/api/typedoc/classes/webgpucontext.html        |   12 +-
 docs/api/typedoc/enums/argtypecode.html            |   30 +-
 docs/api/typedoc/enums/aynccallbackcode.html       |    4 +-
 docs/api/typedoc/enums/dldatatypecode.html         |    8 +-
 docs/api/typedoc/enums/rpcserverstate.html         |   12 +-
 docs/api/typedoc/enums/sizeof.html                 |   18 +-
 docs/api/typedoc/index.html                        |  118 +-
 docs/api/typedoc/interfaces/disposable.html        |    2 +-
 docs/api/typedoc/interfaces/functioninfo.html      |    6 +-
 docs/api/typedoc/interfaces/libraryprovider.html   |    4 +-
 docs/contribute/code_guide.html                    |  187 +-
 docs/contribute/code_review.html                   |  185 +-
 docs/contribute/committer_guide.html               |  185 +-
 docs/contribute/community.html                     |  187 +-
 docs/contribute/document.html                      |  189 +-
 docs/contribute/error_handling.html                |  189 +-
 docs/contribute/git_howto.html                     |  215 +-
 docs/contribute/index.html                         |  187 +-
 docs/contribute/pull_request.html                  |  193 +-
 docs/contribute/release_process.html               |  197 +-
 docs/deploy/android.html                           |  190 +-
 docs/deploy/arm_compute_lib.html                   |  199 +-
 docs/deploy/cpp_deploy.html                        |  196 +-
 docs/deploy/hls.html                               |  188 +-
 docs/deploy/index.html                             |  196 +-
 docs/deploy/integrate.html                         |  211 +-
 docs/deploy/tensorrt.html                          |  717 --
 docs/dev/benchmark.html                            |  185 +-
 docs/dev/codebase_walkthrough.html                 |  185 +-
 docs/dev/convert_layout.html                       |  187 +-
 docs/dev/debugger.html                             |  185 +-
 docs/dev/frontend/tensorflow.html                  |  189 +-
 docs/dev/how_to.html                               |  187 +-
 docs/dev/hybrid_script.html                        |  197 +-
 docs/dev/index.html                                |  187 +-
 docs/dev/inferbound.html                           |  215 +-
 docs/dev/introduction_to_module_serialization.html |  189 +-
 docs/dev/pass_infra.html                           |  211 +-
 docs/dev/relay_add_op.html                         |  187 +-
 docs/dev/relay_add_pass.html                       |  193 +-
 docs/dev/relay_bring_your_own_codegen.html         |  199 +-
 docs/dev/relay_intro.html                          |  191 +-
 docs/dev/relay_op_strategy.html                    |  185 +-
 docs/dev/runtime.html                              |  205 +-
 docs/dev/security.html                             |  185 +-
 docs/dev/virtual_machine.html                      |  199 +-
 docs/faq.html                                      |  183 +-
 docs/genindex.html                                 |  358 +-
 docs/index.html                                    |  183 +-
 docs/install/docker.html                           |  187 +-
 docs/install/from_source.html                      |  191 +-
 docs/install/index.html                            |  183 +-
 docs/install/nnpack.html                           |  187 +-
 docs/langref/hybrid_script.html                    |  198 +-
 docs/langref/index.html                            |  183 +-
 docs/langref/relay_adt.html                        |  185 +-
 docs/langref/relay_expr.html                       |  185 +-
 docs/langref/relay_op.html                         |  185 +-
 docs/langref/relay_pattern.html                    |  187 +-
 docs/langref/relay_type.html                       |  185 +-
 docs/objects.inv                                   |  Bin 17289 -> 16936 bytes
 docs/py-modindex.html                              |  181 +-
 docs/search.html                                   |  185 +-
 docs/searchindex.js                                |    2 +-
 .../auto_scheduler/sg_execution_times.html         |  187 +-
 .../auto_scheduler/tune_conv2d_layer_cuda.html     | 1785 +----
 docs/tutorials/auto_scheduler/tune_matmul_x86.html |  650 +-
 docs/tutorials/autotvm/sg_execution_times.html     |  195 +-
 docs/tutorials/autotvm/tune_conv2d_cuda.html       |  232 +-
 docs/tutorials/autotvm/tune_relay_arm.html         |  198 +-
 docs/tutorials/autotvm/tune_relay_cuda.html        |  196 +-
 docs/tutorials/autotvm/tune_relay_mobile_gpu.html  |  198 +-
 docs/tutorials/autotvm/tune_relay_x86.html         |  188 +-
 docs/tutorials/autotvm/tune_simple_template.html   |  208 +-
 docs/tutorials/dev/bring_your_own_datatypes.html   |  819 --
 docs/tutorials/dev/low_level_custom_pass.html      |  256 +-
 docs/tutorials/dev/sg_execution_times.html         |  188 +-
 docs/tutorials/dev/use_pass_infra.html             | 3396 +--------
 docs/tutorials/frontend/build_gcn.html             |  254 +-
 .../frontend/deploy_model_on_android.html          |  199 +-
 docs/tutorials/frontend/deploy_model_on_rasp.html  |  193 +-
 .../frontend/deploy_object_detection_pytorch.html  |  217 +-
 docs/tutorials/frontend/deploy_prequantized.html   |  201 +-
 .../frontend/deploy_prequantized_tflite.html       |  193 +-
 docs/tutorials/frontend/deploy_quantized.html      |  193 +-
 docs/tutorials/frontend/deploy_sparse.html         |  187 +-
 docs/tutorials/frontend/deploy_ssd_gluoncv.html    |  191 +-
 docs/tutorials/frontend/from_caffe2.html           |  189 +-
 docs/tutorials/frontend/from_coreml.html           |  193 +-
 docs/tutorials/frontend/from_darknet.html          |  193 +-
 docs/tutorials/frontend/from_keras.html            |  224 +-
 docs/tutorials/frontend/from_mxnet.html            |  189 +-
 docs/tutorials/frontend/from_onnx.html             |  206 +-
 docs/tutorials/frontend/from_pytorch.html          |  195 +-
 docs/tutorials/frontend/from_tensorflow.html       |  193 +-
 docs/tutorials/frontend/from_tflite.html           |  189 +-
 docs/tutorials/frontend/sg_execution_times.html    |  221 +-
 docs/tutorials/frontend/using_external_lib.html    |  187 +-
 .../get_started/cross_compilation_and_rpc.html     |  192 +-
 docs/tutorials/get_started/relay_quick_start.html  |  298 +-
 docs/tutorials/get_started/sg_execution_times.html |  190 +-
 .../get_started/tensor_expr_get_started.html       |  193 +-
 .../get_started/tvmc_command_line_driver.html      |  660 --
 docs/tutorials/index.html                          |  391 +-
 docs/tutorials/language/extern_op.html             |  185 +-
 docs/tutorials/language/intrin_math.html           |  185 +-
 docs/tutorials/language/reduction.html             |  351 +-
 docs/tutorials/language/scan.html                  |  285 +-
 docs/tutorials/language/schedule_primitives.html   |  562 +-
 docs/tutorials/language/sg_execution_times.html    |  197 +-
 docs/tutorials/language/tedd.html                  |  191 +-
 docs/tutorials/language/tensorize.html             |  347 +-
 docs/tutorials/language/tuple_inputs.html          |  302 +-
 docs/tutorials/micro/micro_tflite.html             |  187 +-
 docs/tutorials/micro/sg_execution_times.html       |  185 +-
 docs/tutorials/optimize/opt_conv_cuda.html         |  193 +-
 docs/tutorials/optimize/opt_conv_tensorcore.html   |  257 +-
 docs/tutorials/optimize/opt_gemm.html              |  442 +-
 .../optimize/opt_matmul_auto_tensorcore.html       |  185 +-
 docs/tutorials/optimize/sg_execution_times.html    |  191 +-
 docs/tutorials/topi/intro_topi.html                |  559 +-
 docs/tutorials/topi/sg_execution_times.html        |  185 +-
 docs/vta/dev/config.html                           |  187 +-
 docs/vta/dev/hardware.html                         |  199 +-
 docs/vta/dev/index.html                            |  187 +-
 docs/vta/index.html                                |  185 +-
 docs/vta/install.html                              |  189 +-
 docs/vta/tutorials/autotvm/sg_execution_times.html |  185 +-
 docs/vta/tutorials/autotvm/tune_relay_vta.html     |  380 +-
 .../tutorials/frontend/deploy_classification.html  |  211 +-
 .../vta/tutorials/frontend/sg_execution_times.html |  185 +-
 docs/vta/tutorials/index.html                      |  185 +-
 docs/vta/tutorials/matrix_multiply.html            |  300 +-
 docs/vta/tutorials/optimize/convolution_opt.html   |  300 +-
 .../tutorials/optimize/matrix_multiply_opt.html    |  294 +-
 .../vta/tutorials/optimize/sg_execution_times.html |  187 +-
 docs/vta/tutorials/sg_execution_times.html         |  187 +-
 docs/vta/tutorials/vta_get_started.html            |  263 +-
 3604 files changed, 164830 insertions(+), 262593 deletions(-)

diff --git a/.gitignore b/.gitignore
index cf6401d..7aaaff4 100644
--- a/.gitignore
+++ b/.gitignore
@@ -5,4 +5,3 @@ website.tgz
 *.bak
 .jekyll-cache
 docs.tgz
-Gemfile.lock
diff --git a/docs/_downloads/00a1355fcb7c30e9e70fc8fefc708f98/tuple_inputs.ipynb b/docs/_downloads/00a1355fcb7c30e9e70fc8fefc708f98/tuple_inputs.ipynb
index 7acb5de..dedcacd 100644
--- a/docs/_downloads/00a1355fcb7c30e9e70fc8fefc708f98/tuple_inputs.ipynb
+++ b/docs/_downloads/00a1355fcb7c30e9e70fc8fefc708f98/tuple_inputs.ipynb
@@ -114,7 +114,7 @@
       "name": "python",
       "nbconvert_exporter": "python",
       "pygments_lexer": "ipython3",
-      "version": "3.6.12"
+      "version": "3.6.10"
     }
   },
   "nbformat": 4,
diff --git a/docs/_downloads/02fc8627299fa0b05eb017773b471bfa/from_tflite.py b/docs/_downloads/02fc8627299fa0b05eb017773b471bfa/from_tflite.py
index a3014f9..ee7da62 100644
--- a/docs/_downloads/02fc8627299fa0b05eb017773b471bfa/from_tflite.py
+++ b/docs/_downloads/02fc8627299fa0b05eb017773b471bfa/from_tflite.py
@@ -105,7 +105,7 @@ from PIL import Image
 from matplotlib import pyplot as plt
 import numpy as np
 
-image_url = "https://github.com/dmlc/mxnet.js/blob/main/data/cat.png?raw=true"
+image_url = "https://github.com/dmlc/mxnet.js/blob/master/data/cat.png?raw=true"
 image_path = download_testdata(image_url, "cat.png", module="data")
 resized_image = Image.open(image_path).resize((224, 224))
 plt.imshow(resized_image)
diff --git a/docs/_downloads/0bb862dbb3a4c434477f93fe2c147fbb/tune_simple_template.py b/docs/_downloads/0bb862dbb3a4c434477f93fe2c147fbb/tune_simple_template.py
index b5167b3..357abf1 100644
--- a/docs/_downloads/0bb862dbb3a4c434477f93fe2c147fbb/tune_simple_template.py
+++ b/docs/_downloads/0bb862dbb3a4c434477f93fe2c147fbb/tune_simple_template.py
@@ -26,10 +26,6 @@ The first step is defining a search space.
 The second step is running a search algorithm to explore through this space.
 In this tutorial, you can learn how to perform these two steps in TVM.
 The whole workflow is illustrated by a matrix multiplication example.
-
-Note that this tutorial will not run on Windows or recent versions of macOS. To
-get it to run, you will need to wrap the body of this tutorial in a :code:`if
-__name__ == "__main__":` block.
 """
 
 ######################################################################
diff --git a/docs/_downloads/0c30ce88b67b0e8d46494348ab36c9fb/from_tflite.ipynb b/docs/_downloads/0c30ce88b67b0e8d46494348ab36c9fb/from_tflite.ipynb
index e0ed6f8..2c3f32a 100644
--- a/docs/_downloads/0c30ce88b67b0e8d46494348ab36c9fb/from_tflite.ipynb
+++ b/docs/_downloads/0c30ce88b67b0e8d46494348ab36c9fb/from_tflite.ipynb
@@ -69,7 +69,7 @@
       },
       "outputs": [],
       "source": [
-        "from PIL import Image\nfrom matplotlib import pyplot as plt\nimport numpy as np\n\nimage_url = \"https://github.com/dmlc/mxnet.js/blob/main/data/cat.png?raw=true\"\nimage_path = download_testdata(image_url, \"cat.png\", module=\"data\")\nresized_image = Image.open(image_path).resize((224, 224))\nplt.imshow(resized_image)\nplt.show()\nimage_data = np.asarray(resized_image).astype(\"float32\")\n\n# Add a dimension to the image so that we have NHWC format layout\nimage_data = np.ex [...]
+        "from PIL import Image\nfrom matplotlib import pyplot as plt\nimport numpy as np\n\nimage_url = \"https://github.com/dmlc/mxnet.js/blob/master/data/cat.png?raw=true\"\nimage_path = download_testdata(image_url, \"cat.png\", module=\"data\")\nresized_image = Image.open(image_path).resize((224, 224))\nplt.imshow(resized_image)\nplt.show()\nimage_data = np.asarray(resized_image).astype(\"float32\")\n\n# Add a dimension to the image so that we have NHWC format layout\nimage_data = np. [...]
       ]
     },
     {
@@ -143,7 +143,7 @@
       "name": "python",
       "nbconvert_exporter": "python",
       "pygments_lexer": "ipython3",
-      "version": "3.6.12"
+      "version": "3.6.10"
     }
   },
   "nbformat": 4,
diff --git a/docs/_downloads/0d95a85fc279fdff660608ef305b9107/tune_simple_template.ipynb b/docs/_downloads/0d95a85fc279fdff660608ef305b9107/tune_simple_template.ipynb
index b49ccad..7c9c127 100644
--- a/docs/_downloads/0d95a85fc279fdff660608ef305b9107/tune_simple_template.ipynb
+++ b/docs/_downloads/0d95a85fc279fdff660608ef305b9107/tune_simple_template.ipynb
@@ -15,7 +15,7 @@
       "cell_type": "markdown",
       "metadata": {},
       "source": [
-        "\nWriting tunable template and Using auto-tuner\n=============================================\n**Author**: `Lianmin Zheng <https://github.com/merrymercy>`_\n\nThis is an introduction tutorial to the auto-tuning module in TVM.\n\nThere are two steps in auto-tuning.\nThe first step is defining a search space.\nThe second step is running a search algorithm to explore through this space.\nIn this tutorial, you can learn how to perform these two steps in TVM.\nThe whole workflow is  [...]
+        "\nWriting tunable template and Using auto-tuner\n=============================================\n**Author**: `Lianmin Zheng <https://github.com/merrymercy>`_\n\nThis is an introduction tutorial to the auto-tuning module in TVM.\n\nThere are two steps in auto-tuning.\nThe first step is defining a search space.\nThe second step is running a search algorithm to explore through this space.\nIn this tutorial, you can learn how to perform these two steps in TVM.\nThe whole workflow is  [...]
       ]
     },
     {
@@ -182,7 +182,7 @@
       "name": "python",
       "nbconvert_exporter": "python",
       "pygments_lexer": "ipython3",
-      "version": "3.6.12"
+      "version": "3.6.10"
     }
   },
   "nbformat": 4,
diff --git a/docs/_downloads/1195277fef6a622db64b78b4ea799ed4/matrix_multiply.py b/docs/_downloads/1195277fef6a622db64b78b4ea799ed4/matrix_multiply.py
index 593ac3c..77fc805 100644
--- a/docs/_downloads/1195277fef6a622db64b78b4ea799ed4/matrix_multiply.py
+++ b/docs/_downloads/1195277fef6a622db64b78b4ea799ed4/matrix_multiply.py
@@ -40,7 +40,7 @@ from tvm import te
 import vta
 import numpy as np
 from tvm import rpc
-from tvm.contrib import utils
+from tvm.contrib import util
 from vta.testing import simulator
 
 # Load VTA parameters from the 3rdparty/vta-hw/config/vta_config.json file
@@ -86,7 +86,7 @@ elif env.TARGET in ["sim", "tsim"]:
 # The last operation is a cast and copy back to DRAM, into results tensor
 # :code:`C`.
 #
-# .. image:: https://raw.githubusercontent.com/uwsampl/web-data/main/vta/tutorial/gemm_dataflow.png
+# .. image:: https://raw.githubusercontent.com/uwsaml/web-data/master/vta/tutorial/gemm_dataflow.png
 #      :align: center
 
 ######################################################################
@@ -107,7 +107,7 @@ elif env.TARGET in ["sim", "tsim"]:
 #   adding the result matrix to an accumulator matrix, as shown in the
 #   figure below.
 #
-#   .. image:: https://raw.githubusercontent.com/uwsampl/web-data/main/vta/tutorial/tensor_core.png
+#   .. image:: https://raw.githubusercontent.com/uwsaml/web-data/master/vta/tutorial/tensor_core.png
 #        :align: center
 #        :width: 480px
 #
@@ -126,7 +126,7 @@ elif env.TARGET in ["sim", "tsim"]:
 #   contiguous.
 #   The resulting tiled tensor has a shape of (2, 4, 2, 2).
 #
-#   .. image:: https://raw.githubusercontent.com/uwsampl/web-data/main/vta/tutorial/data_tiling.png
+#   .. image:: https://raw.githubusercontent.com/uwsaml/web-data/master/vta/tutorial/data_tiling.png
 #        :align: center
 #        :width: 480px
 #
@@ -389,7 +389,7 @@ print(vta.lower(s, [A, B, C], simple_mode=True))
 my_gemm = vta.build(s, [A, B, C], "ext_dev", env.target_host, name="my_gemm")
 
 # Write the compiled module into an object file.
-temp = utils.tempdir()
+temp = util.tempdir()
 my_gemm.save(temp.relpath("gemm.o"))
 
 # Send the executable over RPC
diff --git a/docs/_downloads/13509e02380dbdb802e80921620e9b5c/use_pass_infra.ipynb b/docs/_downloads/13509e02380dbdb802e80921620e9b5c/use_pass_infra.ipynb
index c10a41f..936c740 100644
--- a/docs/_downloads/13509e02380dbdb802e80921620e9b5c/use_pass_infra.ipynb
+++ b/docs/_downloads/13509e02380dbdb802e80921620e9b5c/use_pass_infra.ipynb
@@ -251,7 +251,7 @@
       "name": "python",
       "nbconvert_exporter": "python",
       "pygments_lexer": "ipython3",
-      "version": "3.6.12"
+      "version": "3.6.10"
     }
   },
   "nbformat": 4,
diff --git a/docs/_downloads/143c743c62f58570eabd77fd3395ca8c/scan.py b/docs/_downloads/143c743c62f58570eabd77fd3395ca8c/scan.py
index 5f51320..fdb6ec9 100644
--- a/docs/_downloads/143c743c62f58570eabd77fd3395ca8c/scan.py
+++ b/docs/_downloads/143c743c62f58570eabd77fd3395ca8c/scan.py
@@ -25,7 +25,6 @@ Recurrent computing is a typical pattern in neural networks.
 from __future__ import absolute_import, print_function
 
 import tvm
-import tvm.testing
 from tvm import te
 import numpy as np
 
diff --git a/docs/_downloads/1604460dde2b82fb9db809bb388890f8/deploy_prequantized_tflite.ipynb b/docs/_downloads/1604460dde2b82fb9db809bb388890f8/deploy_prequantized_tflite.ipynb
index 7017b5d..db7c42f 100644
--- a/docs/_downloads/1604460dde2b82fb9db809bb388890f8/deploy_prequantized_tflite.ipynb
+++ b/docs/_downloads/1604460dde2b82fb9db809bb388890f8/deploy_prequantized_tflite.ipynb
@@ -94,7 +94,7 @@
       },
       "outputs": [],
       "source": [
-        "def get_real_image(im_height, im_width):\n    from PIL import Image\n\n    repo_base = \"https://github.com/dmlc/web-data/raw/main/tensorflow/models/InceptionV1/\"\n    img_name = \"elephant-299.jpg\"\n    image_url = os.path.join(repo_base, img_name)\n    img_path = download_testdata(image_url, img_name, module=\"data\")\n    image = Image.open(img_path).resize((im_height, im_width))\n    x = np.array(image).astype(\"uint8\")\n    data = np.reshape(x, (1, im_height, im_width, 3 [...]
+        "def get_real_image(im_height, im_width):\n    from PIL import Image\n\n    repo_base = \"https://github.com/dmlc/web-data/raw/master/tensorflow/models/InceptionV1/\"\n    img_name = \"elephant-299.jpg\"\n    image_url = os.path.join(repo_base, img_name)\n    img_path = download_testdata(image_url, img_name, module=\"data\")\n    image = Image.open(img_path).resize((im_height, im_width))\n    x = np.array(image).astype(\"uint8\")\n    data = np.reshape(x, (1, im_height, im_width, [...]
       ]
     },
     {
@@ -311,7 +311,7 @@
       "name": "python",
       "nbconvert_exporter": "python",
       "pygments_lexer": "ipython3",
-      "version": "3.6.12"
+      "version": "3.6.10"
     }
   },
   "nbformat": 4,
diff --git a/docs/_downloads/18fb1ab3ed0a0c9f304520f2beaf4fd6/tvmc_command_line_driver.py b/docs/_downloads/18fb1ab3ed0a0c9f304520f2beaf4fd6/tvmc_command_line_driver.py
deleted file mode 100644
index d844de5..0000000
--- a/docs/_downloads/18fb1ab3ed0a0c9f304520f2beaf4fd6/tvmc_command_line_driver.py
+++ /dev/null
@@ -1,336 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""
-Getting Started with TVM command line driver - TVMC
-===================================================
-**Authors**:
-`Leandro Nunes <https://github.com/leandron>`_,
-`Matthew Barrett <https://github.com/mbaret>`_
-
-This tutorial is an introduction to working with TVMC, the TVM command
-line driver. TVMC is a tool that exposes TVM features such as
-auto-tuning, compiling, profiling and execution of models, via a
-command line interface.
-
-In this tutorial we are going to use TVMC to compile, run and tune a
-ResNet-50 on a x86 CPU.
-
-We are going to start by downloading ResNet 50 V2. Then, we are going
-to use TVMC to compile this model into a TVM module, and use the
-compiled module to generate predictions. Finally, we are going to experiment
-with the auto-tuning options, that can be used to help the compiler to
-improve network performance.
-
-The final goal is to give an overview of TVMC's capabilities and also
-some guidance on where to look for more information.
-"""
-
-######################################################################
-# Using TVMC
-# ----------
-#
-# TVMC is a Python application, part of the TVM Python package.
-# When you install TVM using a Python package, you will get TVMC as
-# as a command line application called ``tvmc``.
-#
-# Alternatively, if you have TVM as a Python module on your
-# ``$PYTHONPATH``,you can access the command line driver functionality
-# via the executable python module, ``python -m tvm.driver.tvmc``.
-#
-# For simplicity, this tutorial will mention TVMC command line using
-# ``tvmc <options>``, but the same results can be obtained with
-# ``python -m tvm.driver.tvmc <options>``.
-#
-# You can check the help page using:
-#
-# .. code-block:: bash
-#
-#   tvmc --help
-#
-#
-# As you can see in the help page, the main features are
-# accessible via the subcommands ``tune``, ``compile`` and ``run``.
-# To read about specific options under a given subcommand, use
-# ``tvmc <subcommand> --help``.
-#
-# In the following sections we will use TVMC to tune, compile and
-# run a model. But first, we need a model.
-#
-
-
-######################################################################
-# Obtaining the model
-# -------------------
-#
-# We are going to use ResNet-50 V2 as an example to experiment with TVMC.
-# The version below is in ONNX format. To download the file, you can use
-# the command below:
-#
-# .. code-block:: bash
-#
-#   wget https://github.com/onnx/models/raw/master/vision/classification/resnet/model/resnet50-v2-7.onnx
-#
-#
-
-######################################################################
-# .. note:: Supported model formats
-#
-#   TVMC supports models created with Keras, ONNX, TensorFlow, TFLite
-#   and Torch. Use the option``--model-format`` if you need to
-#   explicitly provide the model format you are using. See ``tvmc
-#   compile --help`` for more information.
-#
-
-
-######################################################################
-# Compiling the model
-# -------------------
-#
-# The next step once we've downloaded ResNet-50, is to compile it,
-# To accomplish that, we are going to use ``tvmc compile``. The
-# output we get from the compilation process is a TAR package,
-# that can be used to run our model on the target device.
-#
-# .. code-block:: bash
-#
-#   tvmc compile \
-#     --target "llvm" \
-#     --output compiled_module.tar \
-#     resnet50-v2-7.onnx
-#
-# Once compilation finishes, the output ``compiled_module.tar`` will be created. This
-# can be directly loaded by your application and run via the TVM runtime APIs.
-#
-
-
-######################################################################
-# .. note:: Defining the correct target
-#
-#   Specifying the correct target (option ``--target``) can have a huge
-#   impact on the performance of the compiled module, as it can take
-#   advantage of hardware features available on the target. For more
-#   information, please refer to `Auto-tuning a convolutional network
-#   for x86 CPU <https://tvm.apache.org/docs/tutorials/autotvm/tune_relay_x86.html#define-network>`_.
-#
-
-
-######################################################################
-#
-# In the next step, we are going to use the compiled module, providing it
-# with some inputs, to generate some predictions.
-#
-
-
-######################################################################
-# Input pre-processing
-# --------------------
-#
-# In order to generate predictions, we will need two things:
-#
-# - the compiled module, which we just produced;
-# - a valid input to the model
-#
-# Each model is particular when it comes to expected tensor shapes, formats and data
-# types. For this reason, most models require some pre and
-# post processing, to ensure the input(s) is valid and to interpret the output(s).
-#
-# In TVMC, we adopted NumPy's ``.npz`` format for both input and output data.
-# This is a well-supported NumPy format to serialize multiple arrays into a file.
-#
-# We will use the usual cat image, similar to other TVM tutorials:
-#
-# .. image:: https://s3.amazonaws.com/model-server/inputs/kitten.jpg
-#    :height: 224px
-#    :width: 224px
-#    :align: center
-#
-# For our ResNet 50 V2 model, the input is expected to be in ImageNet format.
-# Here is an example of a script to pre-process an image for ResNet 50 V2.
-#
-from tvm.contrib.download import download_testdata
-from PIL import Image
-import numpy as np
-
-img_url = "https://s3.amazonaws.com/model-server/inputs/kitten.jpg"
-img_path = download_testdata(img_url, "imagenet_cat.png", module="data")
-
-# Resize it to 224x224
-resized_image = Image.open(img_path).resize((224, 224))
-img_data = np.asarray(resized_image).astype("float32")
-
-# ONNX expects NCHW input, so convert the array
-img_data = np.transpose(img_data, (2, 0, 1))
-
-# Normalize according to ImageNet
-imagenet_mean = np.array([0.485, 0.456, 0.406])
-imagenet_stddev = np.array([0.229, 0.224, 0.225])
-norm_img_data = np.zeros(img_data.shape).astype("float32")
-for i in range(img_data.shape[0]):
-    norm_img_data[i, :, :] = (img_data[i, :, :] / 255 - imagenet_mean[i]) / imagenet_stddev[i]
-
-# Add batch dimension
-img_data = np.expand_dims(norm_img_data, axis=0)
-
-# Save to .npz (outputs imagenet_cat.npz)
-np.savez("imagenet_cat", data=img_data)
-
-
-######################################################################
-# Running the compiled module
-# ---------------------------
-#
-# With both the compiled module and input file in hand, we can run it by
-# invoking ``tvmc run``.
-#
-# .. code-block:: bash
-#
-#    tvmc run \
-#      --inputs imagenet_cat.npz \
-#      --output predictions.npz \
-#      compiled_module.tar
-#
-# When running the above command, a new file ``predictions.npz`` should
-# be produced. It contains the output tensors.
-#
-# In this example, we are running the model on the same machine that we used
-# for compilation. In some cases we might want to run it remotely via
-# an RPC Tracker. To read more about these options please check ``tvmc
-# run --help``.
-#
-
-######################################################################
-# Output post-processing
-# ----------------------
-#
-# As previously mentioned, each model will have its own particular way
-# of providing output tensors.
-#
-# In our case, we need to run some post-processing to render the
-# outputs from ResNet 50 V2 into a more human-readable form.
-#
-# The script below shows an example of the post-processing to extract
-# labels from the output of our compiled module.
-#
-import os.path
-import numpy as np
-
-from scipy.special import softmax
-
-from tvm.contrib.download import download_testdata
-
-# Download a list of labels
-labels_url = "https://s3.amazonaws.com/onnx-model-zoo/synset.txt"
-labels_path = download_testdata(labels_url, "synset.txt", module="data")
-
-with open(labels_path, "r") as f:
-    labels = [l.rstrip() for l in f]
-
-output_file = "predictions.npz"
-
-# Open the output and read the output tensor
-if os.path.exists(output_file):
-    with np.load(output_file) as data:
-        scores = softmax(data["output_0"])
-        scores = np.squeeze(scores)
-        scores = np.argsort(scores)[::-1]
-
-        for i in scores[0:5]:
-            print("class='%s' with probability=%f" % (labels[i], scores[i]))
-
-
-########################################################################
-# When running the script, a list of predictions should be printed similar
-# the the example below.
-#
-# .. code-block:: bash
-#
-#   $ python post_processing.py
-#   class=n02123045 tabby, tabby cat ; probability=446.000000
-#   class=n02123159 tiger cat ; probability=675.000000
-#   class=n02124075 Egyptian cat ; probability=836.000000
-#   class=n02129604 tiger, Panthera tigris ; probability=917.000000
-#   class=n04040759 radiator ; probability=213.000000
-#
-
-
-######################################################################
-# Tuning the model
-# ----------------
-#
-# In some cases, we might not get the expected performance when running
-# inferences using our compiled module. In cases like this, we can make use
-# of the auto-tuner, to find a better configuration for our model and
-# get a boost in performance.
-#
-# Tuning in TVM refers to the process by which a model is optimized
-# to run faster on a given target. This differs from training or
-# fine-tuning in that it does not affect the accuracy of the model,
-# but only the runtime performance.
-#
-# As part of the tuning process, TVM will try running many different
-# operator implementation variants to see which perform best. The
-# results of these runs are stored in a tuning records file, which is
-# ultimately the output of the ``tune`` subcommand.
-#
-# In the simplest form, tuning requires you to provide three things:
-#
-# - the target specification of the device you intend to run this model on;
-# - the path to an output file in which the tuning records will be stored, and finally,
-# - a path to the model to be tuned.
-#
-#
-# The example below demonstrates how that works in practice:
-#
-# .. code-block:: bash
-#
-#   tvmc tune \
-#     --target "llvm" \
-#     --output autotuner_records.json \
-#     resnet50-v2-7.onnx
-#
-#
-# Tuning sessions can take a long time, so ``tvmc tune`` offers many options to
-# customize your tuning process, in terms of number of repetitions (``--repeat`` and
-# ``--number``, for example), the tuning algorithm to be use, and so on.
-# Check ``tvmc tune --help`` for more information.
-#
-# As an output of the tuning process above, we obtained the tuning records stored
-# in ``autotuner_records.json``. This file can be used in two ways:
-#
-# - as an input to further tuning (via ``tvmc tune --tuning-records``), or
-# - as an input to the compiler
-#
-# The compiler will use the results to generate high performance code for the model
-# on your specified target. To do that we can use ``tvmc compile --tuning-records``.
-# Check ``tvmc compile --help`` for more information.
-#
-
-
-######################################################################
-# Final Remarks
-# -------------
-#
-# In this tutorial, we presented TVMC, a command line driver for TVM.
-# We demonstrated how to compile, run and tune a model, as well
-# as discussed the need for pre and post processing of inputs and outputs.
-#
-# Here we presented a simple example using ResNet 50 V2 locally. However, TVMC
-# supports many more features including cross-compilation, remote execution and
-# profiling/benchmarking.
-#
-# To see what other options are available, please have a look at ``tvmc --help``.
-#
diff --git a/docs/_downloads/2354a24ad8bc07194943c49f2fb48874/tune_conv2d_cuda.ipynb b/docs/_downloads/2354a24ad8bc07194943c49f2fb48874/tune_conv2d_cuda.ipynb
index 8353b85..4994c69 100644
--- a/docs/_downloads/2354a24ad8bc07194943c49f2fb48874/tune_conv2d_cuda.ipynb
+++ b/docs/_downloads/2354a24ad8bc07194943c49f2fb48874/tune_conv2d_cuda.ipynb
@@ -15,7 +15,7 @@
       "cell_type": "markdown",
       "metadata": {},
       "source": [
-        "\nTuning High Performance Convolution on NVIDIA GPUs\n=========================================================================\n**Author**: `Lianmin Zheng <https://github.com/merrymercy>`_\n\nThis is an advanced tutorial for writing high performance tunable template for\nNVIDIA GPU. By running auto-tuner on this template, we can outperform the\nvendor provided library CuDNN in many cases.\n\nNote that this tutorial will not run on Windows or recent versions of macOS. To\nget it [...]
+        "\nTuning High Performance Convolution on NVIDIA GPUs\n=========================================================================\n**Author**: `Lianmin Zheng <https://github.com/merrymercy>`_\n\nThis is an advanced tutorial for writing high performance tunable template for\nNVIDIA GPU. By running auto-tuner on this template, we can outperform the\nvendor provided library CuDNN in many cases.\n\n"
       ]
     },
     {
@@ -107,7 +107,7 @@
       "name": "python",
       "nbconvert_exporter": "python",
       "pygments_lexer": "ipython3",
-      "version": "3.6.12"
+      "version": "3.6.10"
     }
   },
   "nbformat": 4,
diff --git a/docs/_downloads/24a7471da81b18c4ba77d215289aed2f/relay_quick_start.ipynb b/docs/_downloads/24a7471da81b18c4ba77d215289aed2f/relay_quick_start.ipynb
index 1068889..19d8a89 100644
--- a/docs/_downloads/24a7471da81b18c4ba77d215289aed2f/relay_quick_start.ipynb
+++ b/docs/_downloads/24a7471da81b18c4ba77d215289aed2f/relay_quick_start.ipynb
@@ -22,7 +22,7 @@
       "cell_type": "markdown",
       "metadata": {},
       "source": [
-        "Overview for Supported Hardware Backend of TVM\n----------------------------------------------\nThe image below shows hardware backend currently supported by TVM:\n\n![](https://github.com/dmlc/web-data/raw/main/tvm/tutorial/tvm_support_list.png)\n\n     :align: center\n\nIn this tutorial, we'll choose cuda and llvm as target backends.\nTo begin with, let's import Relay and TVM.\n\n"
+        "Overview for Supported Hardware Backend of TVM\n----------------------------------------------\nThe image below shows hardware backend currently supported by TVM:\n\n![](https://github.com/dmlc/web-data/raw/master/tvm/tutorial/tvm_support_list.png)\n\n     :align: center\n\nIn this tutorial, we'll choose cuda and llvm as target backends.\nTo begin with, let's import Relay and TVM.\n\n"
       ]
     },
     {
@@ -105,7 +105,7 @@
       },
       "outputs": [],
       "source": [
-        "# save the graph, lib and params into separate files\nfrom tvm.contrib import utils\n\ntemp = utils.tempdir()\npath_lib = temp.relpath(\"deploy_lib.tar\")\nlib.export_library(path_lib)\nprint(temp.listdir())"
+        "# save the graph, lib and params into separate files\nfrom tvm.contrib import util\n\ntemp = util.tempdir()\npath_lib = temp.relpath(\"deploy_lib.tar\")\nlib.export_library(path_lib)\nprint(temp.listdir())"
       ]
     },
     {
@@ -136,7 +136,7 @@
       "name": "python",
       "nbconvert_exporter": "python",
       "pygments_lexer": "ipython3",
-      "version": "3.6.12"
+      "version": "3.6.10"
     }
   },
   "nbformat": 4,
diff --git a/docs/_downloads/272a5a893d007658546dc0eaf0a7aeed/tune_relay_cuda.py b/docs/_downloads/272a5a893d007658546dc0eaf0a7aeed/tune_relay_cuda.py
index f9b8921..32ee266 100644
--- a/docs/_downloads/272a5a893d007658546dc0eaf0a7aeed/tune_relay_cuda.py
+++ b/docs/_downloads/272a5a893d007658546dc0eaf0a7aeed/tune_relay_cuda.py
@@ -33,10 +33,6 @@ these operators, it will query this log file to get the best knob values.
 We also released pre-tuned parameters for some NVIDIA GPUs. You can go to
 `NVIDIA GPU Benchmark <https://github.com/apache/incubator-tvm/wiki/Benchmark#nvidia-gpu>`_
 to see the results.
-
-Note that this tutorial will not run on Windows or recent versions of macOS. To
-get it to run, you will need to wrap the body of this tutorial in a :code:`if
-__name__ == "__main__":` block.
 """
 
 ######################################################################
@@ -69,7 +65,7 @@ from tvm import autotvm
 from tvm import relay
 import tvm.relay.testing
 from tvm.autotvm.tuner import XGBTuner, GATuner, RandomTuner, GridSearchTuner
-from tvm.contrib.utils import tempdir
+from tvm.contrib.util import tempdir
 import tvm.contrib.graph_runtime as runtime
 
 #################################################################
@@ -317,7 +313,7 @@ def tune_and_evaluate(tuning_opt):
 #      import logging
 #      logging.getLogger('autotvm').setLevel(logging.DEBUG)
 #
-#   Finally, always feel free to ask our community for help on https://discuss.tvm.apache.org
+#   Finally, always feel free to ask our community for help on https://discuss.tvm.ai
 
 
 #################################################################
@@ -326,7 +322,7 @@ def tune_and_evaluate(tuning_opt):
 #
 # If you have multiple devices, you can use all of them for measurement.
 # TVM uses the RPC Tracker to manage distributed devices.
-# The RPC Tracker is a centralized controller node. We can register all devices to
+# The RPC Tracker is a centralized master node. We can register all devices to
 # the tracker. For example, if we have 10 GPU cards, we can register all of them
 # to the tracker, and run 10 measurements in parallel, accelerating the tuning process.
 #
diff --git a/docs/_downloads/2c0ed53a9ebd68caf76cd8235fae2711/tune_relay_mobile_gpu.ipynb b/docs/_downloads/2c0ed53a9ebd68caf76cd8235fae2711/tune_relay_mobile_gpu.ipynb
index 52ce8d8..0e17ec1 100644
--- a/docs/_downloads/2c0ed53a9ebd68caf76cd8235fae2711/tune_relay_mobile_gpu.ipynb
+++ b/docs/_downloads/2c0ed53a9ebd68caf76cd8235fae2711/tune_relay_mobile_gpu.ipynb
@@ -15,7 +15,7 @@
       "cell_type": "markdown",
       "metadata": {},
       "source": [
-        "\nAuto-tuning a convolutional network for Mobile GPU\n==================================================\n**Author**: `Lianmin Zheng <https://github.com/merrymercy>`_, `Eddie Yan <https://github.com/eqy>`_\n\nAuto-tuning for a specific device is critical for getting the best\nperformance. This is a tutorial about how to tune a whole convolutional\nnetwork.\n\nThe operator implementation for Mobile GPU in TVM is written in template form.\nThe template has many tunable knobs (tile [...]
+        "\nAuto-tuning a convolutional network for Mobile GPU\n==================================================\n**Author**: `Lianmin Zheng <https://github.com/merrymercy>`_, `Eddie Yan <https://github.com/eqy>`_\n\nAuto-tuning for a specific device is critical for getting the best\nperformance. This is a tutorial about how to tune a whole convolutional\nnetwork.\n\nThe operator implementation for Mobile GPU in TVM is written in template form.\nThe template has many tunable knobs (tile [...]
       ]
     },
     {
@@ -33,7 +33,7 @@
       },
       "outputs": [],
       "source": [
-        "import os\n\nimport numpy as np\n\nimport tvm\nfrom tvm import te\nfrom tvm import autotvm\nfrom tvm import relay\nimport tvm.relay.testing\nfrom tvm.autotvm.tuner import XGBTuner, GATuner, RandomTuner, GridSearchTuner\nfrom tvm.contrib.utils import tempdir\nimport tvm.contrib.graph_runtime as runtime"
+        "import os\n\nimport numpy as np\n\nimport tvm\nfrom tvm import te\nfrom tvm import autotvm\nfrom tvm import relay\nimport tvm.relay.testing\nfrom tvm.autotvm.tuner import XGBTuner, GATuner, RandomTuner, GridSearchTuner\nfrom tvm.contrib.util import tempdir\nimport tvm.contrib.graph_runtime as runtime"
       ]
     },
     {
@@ -58,14 +58,14 @@
       "cell_type": "markdown",
       "metadata": {},
       "source": [
-        "Start RPC Tracker\n-----------------\nTVM uses RPC session to communicate with ARM boards.\nDuring tuning, the tuner will send the generated code to the board and\nmeasure the speed of code on the board.\n\nTo scale up the tuning, TVM uses RPC Tracker to manage distributed devices.\nThe RPC Tracker is a centralized controller node. We can register all devices to\nthe tracker. For example, if we have 10 phones, we can register all of them\nto the tracker, and run 10 measurements  [...]
+        "Start RPC Tracker\n-----------------\nTVM uses RPC session to communicate with ARM boards.\nDuring tuning, the tuner will send the generated code to the board and\nmeasure the speed of code on the board.\n\nTo scale up the tuning, TVM uses RPC Tracker to manage distributed devices.\nThe RPC Tracker is a centralized master node. We can register all devices to\nthe tracker. For example, if we have 10 phones, we can register all of them\nto the tracker, and run 10 measurements in p [...]
       ]
     },
     {
       "cell_type": "markdown",
       "metadata": {},
       "source": [
-        "Register devices to RPC Tracker\n-----------------------------------\nNow we can register our devices to the tracker. The first step is to\nbuild the TVM runtime for the ARM devices.\n\n* For Linux:\n  Follow this section `build-tvm-runtime-on-device` to build\n  the TVM runtime on the device. Then register the device to tracker by\n\n  .. code-block:: bash\n\n    python -m tvm.exec.rpc_server --tracker=[HOST_IP]:9190 --key=rk3399\n\n  (replace :code:`[HOST_IP]` with the IP addr [...]
+        "Register devices to RPC Tracker\n-----------------------------------\nNow we can register our devices to the tracker. The first step is to\nbuild the TVM runtime for the ARM devices.\n\n* For Linux:\n  Follow this section `build-tvm-runtime-on-device` to build\n  the TVM runtime on the device. Then register the device to tracker by\n\n  .. code-block:: bash\n\n    python -m tvm.exec.rpc_server --tracker=[HOST_IP]:9190 --key=rk3399\n\n  (replace :code:`[HOST_IP]` with the IP addr [...]
       ]
     },
     {
@@ -140,7 +140,7 @@
       "cell_type": "markdown",
       "metadata": {},
       "source": [
-        "<div class=\"alert alert-info\"><h4>Note</h4><p>**Experiencing Difficulties?**\n\n  The auto tuning module is error-prone. If you always see \" 0.00/ 0.00 GFLOPS\",\n  then there must be something wrong.\n\n  First, make sure you set the correct configuration of your device.\n  Then, you can print debug information by adding these lines in the beginning\n  of the script. It will print every measurement result, where you can find useful\n  error messages.\n\n  .. code-block:: pyt [...]
+        "<div class=\"alert alert-info\"><h4>Note</h4><p>**Experiencing Difficulties?**\n\n  The auto tuning module is error-prone. If you always see \" 0.00/ 0.00 GFLOPS\",\n  then there must be something wrong.\n\n  First, make sure you set the correct configuration of your device.\n  Then, you can print debug information by adding these lines in the beginning\n  of the script. It will print every measurement result, where you can find useful\n  error messages.\n\n  .. code-block:: pyt [...]
       ]
     }
   ],
@@ -160,7 +160,7 @@
       "name": "python",
       "nbconvert_exporter": "python",
       "pygments_lexer": "ipython3",
-      "version": "3.6.12"
+      "version": "3.6.10"
     }
   },
   "nbformat": 4,
diff --git a/docs/_downloads/2c8ef0390ad4c53ca85671fa36c33b26/tune_conv2d_cuda.py b/docs/_downloads/2c8ef0390ad4c53ca85671fa36c33b26/tune_conv2d_cuda.py
index b307077..ce9c198 100644
--- a/docs/_downloads/2c8ef0390ad4c53ca85671fa36c33b26/tune_conv2d_cuda.py
+++ b/docs/_downloads/2c8ef0390ad4c53ca85671fa36c33b26/tune_conv2d_cuda.py
@@ -22,10 +22,6 @@ Tuning High Performance Convolution on NVIDIA GPUs
 This is an advanced tutorial for writing high performance tunable template for
 NVIDIA GPU. By running auto-tuner on this template, we can outperform the
 vendor provided library CuDNN in many cases.
-
-Note that this tutorial will not run on Windows or recent versions of macOS. To
-get it to run, you will need to wrap the body of this tutorial in a :code:`if
-__name__ == "__main__":` block.
 """
 
 ######################################################################
diff --git a/docs/_downloads/2daaacf3c023a8ad30b14e52b9aaa635/matrix_multiply_opt.ipynb b/docs/_downloads/2daaacf3c023a8ad30b14e52b9aaa635/matrix_multiply_opt.ipynb
index 7a4a5ca..0cf7055 100644
--- a/docs/_downloads/2daaacf3c023a8ad30b14e52b9aaa635/matrix_multiply_opt.ipynb
+++ b/docs/_downloads/2daaacf3c023a8ad30b14e52b9aaa635/matrix_multiply_opt.ipynb
@@ -33,14 +33,14 @@
       },
       "outputs": [],
       "source": [
-        "from __future__ import absolute_import, print_function\n\nimport os\nimport tvm\nfrom tvm import te\nimport vta\nimport numpy as np\nfrom tvm import rpc\nfrom tvm.contrib import utils\nfrom vta.testing import simulator\n\n# Load VTA parameters from the 3rdparty/vta-hw/config/vta_config.json file\nenv = vta.get_env()\n\n# We read the Pynq RPC host IP address and port number from the OS environment\nhost = os.environ.get(\"VTA_RPC_HOST\", \"192.168.2.99\")\nport = int(os.environ.g [...]
+        "from __future__ import absolute_import, print_function\n\nimport os\nimport tvm\nfrom tvm import te\nimport vta\nimport numpy as np\nfrom tvm import rpc\nfrom tvm.contrib import util\nfrom vta.testing import simulator\n\n# Load VTA parameters from the 3rdparty/vta-hw/config/vta_config.json file\nenv = vta.get_env()\n\n# We read the Pynq RPC host IP address and port number from the OS environment\nhost = os.environ.get(\"VTA_RPC_HOST\", \"192.168.2.99\")\nport = int(os.environ.ge [...]
       ]
     },
     {
       "cell_type": "markdown",
       "metadata": {},
       "source": [
-        "Computation Declaration\n-----------------------\nAs a first step, we need to describe our matrix multiplication computation.\nWe define the matrix multiplication as the computation one would find in a\nfully connected layer, defined by its batch size, input channels, and output\nchannels.\nThese have to be integer multiples of the VTA tensor shape:\n:code:`BATCH`, :code:`BLOCK_IN`, and :code:`BLOCK_OUT` respectively.\n\nWe've added extra operators to the matrix multiplication t [...]
+        "Computation Declaration\n-----------------------\nAs a first step, we need to describe our matrix multiplication computation.\nWe define the matrix multiplication as the computation one would find in a\nfully connected layer, defined by its batch size, input channels, and output\nchannels.\nThese have to be integer multiples of the VTA tensor shape:\n:code:`BATCH`, :code:`BLOCK_IN`, and :code:`BLOCK_OUT` respectively.\n\nWe've added extra operators to the matrix multiplication t [...]
       ]
     },
     {
@@ -76,7 +76,7 @@
       "cell_type": "markdown",
       "metadata": {},
       "source": [
-        "Blocking the Computation\n~~~~~~~~~~~~~~~~~~~~~~~~\nThe matrix multiplication is by default too large for activations or weights\nto fit on VTA's on-chip buffers all at once.\nWe block the (1, 1024) by (1024, 1024) matrix multiplication into\nsmaller (1, 256) by (256, 256) matrix multiplications so the intermediate\ntensors can fit on the accelerator's on-chip SRAM.\nThis approach is similar to blocking techniques applied to CPUs and GPUs in\norder to increase cache hit rate.\n\ [...]
+        "Blocking the Computation\n~~~~~~~~~~~~~~~~~~~~~~~~\nThe matrix multiplication is by default too large for activations or weights\nto fit on VTA's on-chip buffers all at once.\nWe block the (1, 1024) by (1024, 1024) matrix multiplication into\nsmaller (1, 256) by (256, 256) matrix multiplications so the intermediate\ntensors can fit on the accelerator's on-chip SRAM.\nThis approach is similar to blocking techniques applied to CPUs and GPUs in\norder to increase cache hit rate.\n\ [...]
       ]
     },
     {
@@ -141,7 +141,7 @@
       },
       "outputs": [],
       "source": [
-        "# Compile the TVM module\nmy_gemm = vta.build(s, [data, weight, res], \"ext_dev\", env.target_host, name=\"my_gemm\")\ntemp = utils.tempdir()\nmy_gemm.save(temp.relpath(\"gemm.o\"))\nremote.upload(temp.relpath(\"gemm.o\"))\nf = remote.load_module(\"gemm.o\")\n\n# Get the remote device context\nctx = remote.ext_dev(0)\n\n# Initialize the data and weight arrays randomly in the int range of (-128, 128]\ndata_np = np.random.randint(-128, 128, size=(batch_size, in_channels)).astype(d [...]
+        "# Compile the TVM module\nmy_gemm = vta.build(s, [data, weight, res], \"ext_dev\", env.target_host, name=\"my_gemm\")\ntemp = util.tempdir()\nmy_gemm.save(temp.relpath(\"gemm.o\"))\nremote.upload(temp.relpath(\"gemm.o\"))\nf = remote.load_module(\"gemm.o\")\n\n# Get the remote device context\nctx = remote.ext_dev(0)\n\n# Initialize the data and weight arrays randomly in the int range of (-128, 128]\ndata_np = np.random.randint(-128, 128, size=(batch_size, in_channels)).astype(da [...]
       ]
     },
     {
@@ -168,7 +168,7 @@
       "name": "python",
       "nbconvert_exporter": "python",
       "pygments_lexer": "ipython3",
-      "version": "3.6.12"
+      "version": "3.6.10"
     }
   },
   "nbformat": 4,
diff --git a/docs/_downloads/2e974b05b6d59fcf944f96d27106b994/from_keras.ipynb b/docs/_downloads/2e974b05b6d59fcf944f96d27106b994/from_keras.ipynb
index 2380bf8..154de67 100644
--- a/docs/_downloads/2e974b05b6d59fcf944f96d27106b994/from_keras.ipynb
+++ b/docs/_downloads/2e974b05b6d59fcf944f96d27106b994/from_keras.ipynb
@@ -44,7 +44,7 @@
       },
       "outputs": [],
       "source": [
-        "if tuple(keras.__version__.split(\".\")) < (\"2\", \"4\", \"0\"):\n    weights_url = \"\".join(\n        [\n            \"https://github.com/fchollet/deep-learning-models/releases/\",\n            \"download/v0.2/resnet50_weights_tf_dim_ordering_tf_kernels.h5\",\n        ]\n    )\n    weights_file = \"resnet50_keras_old.h5\"\nelse:\n    weights_url = \"\".join(\n        [\n            \" https://storage.googleapis.com/tensorflow/keras-applications/\",\n            \"resnet/resne [...]
+        "weights_url = \"\".join(\n    [\n        \"https://github.com/fchollet/deep-learning-models/releases/\",\n        \"download/v0.2/resnet50_weights_tf_dim_ordering_tf_kernels.h5\",\n    ]\n)\nweights_file = \"resnet50_weights.h5\"\nweights_path = download_testdata(weights_url, weights_file, module=\"keras\")\nkeras_resnet50 = keras.applications.resnet50.ResNet50(\n    include_top=True, weights=None, input_shape=(224, 224, 3), classes=1000\n)\nkeras_resnet50.load_weights(weights_path)"
       ]
     },
     {
@@ -62,7 +62,7 @@
       },
       "outputs": [],
       "source": [
-        "from PIL import Image\nfrom matplotlib import pyplot as plt\nfrom keras.applications.resnet50 import preprocess_input\n\nimg_url = \"https://github.com/dmlc/mxnet.js/blob/main/data/cat.png?raw=true\"\nimg_path = download_testdata(img_url, \"cat.png\", module=\"data\")\nimg = Image.open(img_path).resize((224, 224))\nplt.imshow(img)\nplt.show()\n# input preprocess\ndata = np.array(img)[np.newaxis, :].astype(\"float32\")\ndata = preprocess_input(data).transpose([0, 3, 1, 2])\nprint [...]
+        "from PIL import Image\nfrom matplotlib import pyplot as plt\nfrom keras.applications.resnet50 import preprocess_input\n\nimg_url = \"https://github.com/dmlc/mxnet.js/blob/master/data/cat.png?raw=true\"\nimg_path = download_testdata(img_url, \"cat.png\", module=\"data\")\nimg = Image.open(img_path).resize((224, 224))\nplt.imshow(img)\nplt.show()\n# input preprocess\ndata = np.array(img)[np.newaxis, :].astype(\"float32\")\ndata = preprocess_input(data).transpose([0, 3, 1, 2])\npri [...]
       ]
     },
     {
@@ -136,7 +136,7 @@
       "name": "python",
       "nbconvert_exporter": "python",
       "pygments_lexer": "ipython3",
-      "version": "3.6.12"
+      "version": "3.6.10"
     }
   },
   "nbformat": 4,
diff --git a/docs/_downloads/2f6dcf56b15f857f94b6d320c1ace6e5/from_coreml.ipynb b/docs/_downloads/2f6dcf56b15f857f94b6d320c1ace6e5/from_coreml.ipynb
index 04f282f..f02c05b 100644
--- a/docs/_downloads/2f6dcf56b15f857f94b6d320c1ace6e5/from_coreml.ipynb
+++ b/docs/_downloads/2f6dcf56b15f857f94b6d320c1ace6e5/from_coreml.ipynb
@@ -62,7 +62,7 @@
       },
       "outputs": [],
       "source": [
-        "img_url = \"https://github.com/dmlc/mxnet.js/blob/main/data/cat.png?raw=true\"\nimg_path = download_testdata(img_url, \"cat.png\", module=\"data\")\nimg = Image.open(img_path).resize((224, 224))\n# Mobilenet.mlmodel's input is BGR format\nimg_bgr = np.array(img)[:, :, ::-1]\nx = np.transpose(img_bgr, (2, 0, 1))[np.newaxis, :]"
+        "img_url = \"https://github.com/dmlc/mxnet.js/blob/master/data/cat.png?raw=true\"\nimg_path = download_testdata(img_url, \"cat.png\", module=\"data\")\nimg = Image.open(img_path).resize((224, 224))\n# Mobilenet.mlmodel's input is BGR format\nimg_bgr = np.array(img)[:, :, ::-1]\nx = np.transpose(img_bgr, (2, 0, 1))[np.newaxis, :]"
       ]
     },
     {
@@ -136,7 +136,7 @@
       "name": "python",
       "nbconvert_exporter": "python",
       "pygments_lexer": "ipython3",
-      "version": "3.6.12"
+      "version": "3.6.10"
     }
   },
   "nbformat": 4,
diff --git a/docs/_downloads/30015213c2882505d466865fafaed52d/from_caffe2.ipynb b/docs/_downloads/30015213c2882505d466865fafaed52d/from_caffe2.ipynb
index 5f6fc42..e9447da 100644
--- a/docs/_downloads/30015213c2882505d466865fafaed52d/from_caffe2.ipynb
+++ b/docs/_downloads/30015213c2882505d466865fafaed52d/from_caffe2.ipynb
@@ -51,7 +51,7 @@
       },
       "outputs": [],
       "source": [
-        "from tvm.contrib.download import download_testdata\nfrom PIL import Image\nfrom matplotlib import pyplot as plt\nimport numpy as np\n\nimg_url = \"https://github.com/dmlc/mxnet.js/blob/main/data/cat.png?raw=true\"\nimg_path = download_testdata(img_url, \"cat.png\", module=\"data\")\nimg = Image.open(img_path).resize((224, 224))\nplt.imshow(img)\nplt.show()\n# input preprocess\ndef transform_image(image):\n    image = np.array(image) - np.array([123.0, 117.0, 104.0])\n    image / [...]
+        "from tvm.contrib.download import download_testdata\nfrom PIL import Image\nfrom matplotlib import pyplot as plt\nimport numpy as np\n\nimg_url = \"https://github.com/dmlc/mxnet.js/blob/master/data/cat.png?raw=true\"\nimg_path = download_testdata(img_url, \"cat.png\", module=\"data\")\nimg = Image.open(img_path).resize((224, 224))\nplt.imshow(img)\nplt.show()\n# input preprocess\ndef transform_image(image):\n    image = np.array(image) - np.array([123.0, 117.0, 104.0])\n    image [...]
       ]
     },
     {
@@ -125,7 +125,7 @@
       "name": "python",
       "nbconvert_exporter": "python",
       "pygments_lexer": "ipython3",
-      "version": "3.6.12"
+      "version": "3.6.10"
     }
   },
   "nbformat": 4,
diff --git a/docs/_downloads/37c76200603adf82ebeffc23bdef8d31/tensor_expr_get_started.py b/docs/_downloads/37c76200603adf82ebeffc23bdef8d31/tensor_expr_get_started.py
index 7f1bb6a..76e0262 100644
--- a/docs/_downloads/37c76200603adf82ebeffc23bdef8d31/tensor_expr_get_started.py
+++ b/docs/_downloads/37c76200603adf82ebeffc23bdef8d31/tensor_expr_get_started.py
@@ -30,7 +30,6 @@ the tensor expression language.
 from __future__ import absolute_import, print_function
 
 import tvm
-import tvm.testing
 from tvm import te
 import numpy as np
 
@@ -210,9 +209,9 @@ else:
 # - cc.create_shared calls a compiler (gcc) to create a shared library
 #
 from tvm.contrib import cc
-from tvm.contrib import utils
+from tvm.contrib import util
 
-temp = utils.tempdir()
+temp = util.tempdir()
 fadd.save(temp.relpath("myadd.o"))
 if tgt == "cuda":
     fadd.imported_modules[0].save(temp.relpath("myadd.ptx"))
diff --git a/docs/_downloads/38606228ff7130fbd6473b7c0625ddcd/deploy_model_on_android.ipynb b/docs/_downloads/38606228ff7130fbd6473b7c0625ddcd/deploy_model_on_android.ipynb
index d846f69..57871d3 100644
--- a/docs/_downloads/38606228ff7130fbd6473b7c0625ddcd/deploy_model_on_android.ipynb
+++ b/docs/_downloads/38606228ff7130fbd6473b7c0625ddcd/deploy_model_on_android.ipynb
@@ -26,7 +26,7 @@
       },
       "outputs": [],
       "source": [
-        "import os\nimport numpy as np\nfrom PIL import Image\nimport keras\nfrom keras.applications.mobilenet_v2 import MobileNetV2\nimport tvm\nfrom tvm import te\nimport tvm.relay as relay\nfrom tvm import rpc\nfrom tvm.contrib import utils, ndk, graph_runtime as runtime\nfrom tvm.contrib.download import download_testdata"
+        "import os\nimport numpy as np\nfrom PIL import Image\nimport keras\nfrom keras.applications.mobilenet_v2 import MobileNetV2\nimport tvm\nfrom tvm import te\nimport tvm.relay as relay\nfrom tvm import rpc\nfrom tvm.contrib import util, ndk, graph_runtime as runtime\nfrom tvm.contrib.download import download_testdata"
       ]
     },
     {
@@ -47,7 +47,7 @@
       "cell_type": "markdown",
       "metadata": {},
       "source": [
-        "Register Android device to RPC Tracker\n--------------------------------------\nNow we can register our Android device to the tracker.\n\nFollow this `readme page <https://github.com/apache/incubator-tvm/tree/main/apps/android_rpc>`_ to\ninstall TVM RPC APK on the android device.\n\nHere is an example of config.mk. I enabled OpenCL and Vulkan.\n\n\n.. code-block:: bash\n\n  APP_ABI = arm64-v8a\n\n  APP_PLATFORM = android-24\n\n  # whether enable OpenCL during compile\n  USE_OPEN [...]
+        "Register Android device to RPC Tracker\n--------------------------------------\nNow we can register our Android device to the tracker.\n\nFollow this `readme page <https://github.com/apache/incubator-tvm/tree/master/apps/android_rpc>`_ to\ninstall TVM RPC APK on the android device.\n\nHere is an example of config.mk. I enabled OpenCL and Vulkan.\n\n\n.. code-block:: bash\n\n  APP_ABI = arm64-v8a\n\n  APP_PLATFORM = android-24\n\n  # whether enable OpenCL during compile\n  USE_OP [...]
       ]
     },
     {
@@ -83,7 +83,7 @@
       },
       "outputs": [],
       "source": [
-        "img_url = \"https://github.com/dmlc/mxnet.js/blob/main/data/cat.png?raw=true\"\nimg_name = \"cat.png\"\nimg_path = download_testdata(img_url, img_name, module=\"data\")\nimage = Image.open(img_path).resize((224, 224))\ndtype = \"float32\"\n\n\ndef transform_image(image):\n    image = np.array(image) - np.array([123.0, 117.0, 104.0])\n    image /= np.array([58.395, 57.12, 57.375])\n    image = image.transpose((2, 0, 1))\n    image = image[np.newaxis, :]\n    return image\n\n\nx = [...]
+        "img_url = \"https://github.com/dmlc/mxnet.js/blob/master/data/cat.png?raw=true\"\nimg_name = \"cat.png\"\nimg_path = download_testdata(img_url, img_name, module=\"data\")\nimage = Image.open(img_path).resize((224, 224))\ndtype = \"float32\"\n\n\ndef transform_image(image):\n    image = np.array(image) - np.array([123.0, 117.0, 104.0])\n    image /= np.array([58.395, 57.12, 57.375])\n    image = image.transpose((2, 0, 1))\n    image = image[np.newaxis, :]\n    return image\n\n\nx [...]
       ]
     },
     {
@@ -119,7 +119,7 @@
       },
       "outputs": [],
       "source": [
-        "local_demo = True\n\n# by default on CPU target will execute.\n# select 'cpu', 'opencl' and 'vulkan'\ntest_target = \"cpu\"\n\n# Change target configuration.\n# Run `adb shell cat /proc/cpuinfo` to find the arch.\narch = \"arm64\"\ntarget = \"llvm -mtriple=%s-linux-android\" % arch\ntarget_host = None\n\nif local_demo:\n    target_host = None\n    target = \"llvm\"\nelif test_target == \"opencl\":\n    target_host = target\n    target = \"opencl\"\nelif test_target == \"vulkan\" [...]
+        "local_demo = True\n\n# by default on CPU target will execute.\n# select 'cpu', 'opencl' and 'vulkan'\ntest_target = \"cpu\"\n\n# Change target configuration.\n# Run `adb shell cat /proc/cpuinfo` to find the arch.\narch = \"arm64\"\ntarget = \"llvm -mtriple=%s-linux-android\" % arch\ntarget_host = None\n\nif local_demo:\n    target_host = None\n    target = \"llvm\"\nelif test_target == \"opencl\":\n    target_host = target\n    target = \"opencl\"\nelif test_target == \"vulkan\" [...]
       ]
     },
     {
@@ -182,7 +182,7 @@
       "name": "python",
       "nbconvert_exporter": "python",
       "pygments_lexer": "ipython3",
-      "version": "3.6.12"
+      "version": "3.6.10"
     }
   },
   "nbformat": 4,
diff --git a/docs/_downloads/3961fdfa7abff1b6dc996faa43b4c40f/deploy_model_on_android.py b/docs/_downloads/3961fdfa7abff1b6dc996faa43b4c40f/deploy_model_on_android.py
index 35f989c..3bf55d9 100644
--- a/docs/_downloads/3961fdfa7abff1b6dc996faa43b4c40f/deploy_model_on_android.py
+++ b/docs/_downloads/3961fdfa7abff1b6dc996faa43b4c40f/deploy_model_on_android.py
@@ -34,7 +34,7 @@ import tvm
 from tvm import te
 import tvm.relay as relay
 from tvm import rpc
-from tvm.contrib import utils, ndk, graph_runtime as runtime
+from tvm.contrib import util, ndk, graph_runtime as runtime
 from tvm.contrib.download import download_testdata
 
 
@@ -106,7 +106,7 @@ from tvm.contrib.download import download_testdata
 # --------------------------------------
 # Now we can register our Android device to the tracker.
 #
-# Follow this `readme page <https://github.com/apache/incubator-tvm/tree/main/apps/android_rpc>`_ to
+# Follow this `readme page <https://github.com/apache/incubator-tvm/tree/master/apps/android_rpc>`_ to
 # install TVM RPC APK on the android device.
 #
 # Here is an example of config.mk. I enabled OpenCL and Vulkan.
@@ -139,7 +139,7 @@ from tvm.contrib.download import download_testdata
 #
 # .. note::
 #
-#   At this time, don't forget to `create a standalone toolchain <https://github.com/apache/incubator-tvm/tree/main/apps/android_rpc#architecture-and-android-standalone-toolchain>`_ .
+#   At this time, don't forget to `create a standalone toolchain <https://github.com/apache/incubator-tvm/tree/master/apps/android_rpc#architecture-and-android-standalone-toolchain>`_ .
 #
 #   for example
 #
@@ -206,7 +206,7 @@ keras_mobilenet_v2.load_weights(weights_path)
 ######################################################################
 # In order to test our model, here we download an image of cat and
 # transform its format.
-img_url = "https://github.com/dmlc/mxnet.js/blob/main/data/cat.png?raw=true"
+img_url = "https://github.com/dmlc/mxnet.js/blob/master/data/cat.png?raw=true"
 img_name = "cat.png"
 img_path = download_testdata(img_url, img_name, module="data")
 image = Image.open(img_path).resize((224, 224))
@@ -282,7 +282,7 @@ with tvm.transform.PassContext(opt_level=3):
 # change the parameters but keep the result of model as the same.
 
 # Save the library at local temporary directory.
-tmp = utils.tempdir()
+tmp = util.tempdir()
 lib_fname = tmp.relpath("net.so")
 fcompile = ndk.create_shared if not local_demo else None
 lib.export_library(lib_fname, fcompile)
diff --git a/docs/_downloads/440add54bfa6dfb4fa9ed5037187aa4c/opt_gemm.ipynb b/docs/_downloads/440add54bfa6dfb4fa9ed5037187aa4c/opt_gemm.ipynb
index 90cb47f..d664a74 100644
--- a/docs/_downloads/440add54bfa6dfb4fa9ed5037187aa4c/opt_gemm.ipynb
+++ b/docs/_downloads/440add54bfa6dfb4fa9ed5037187aa4c/opt_gemm.ipynb
@@ -166,7 +166,7 @@
       "cell_type": "markdown",
       "metadata": {},
       "source": [
-        "Array Packing\n-------------\nAnother important trick is array packing. This trick is to reorder the storage dimension of the\narray to convert the continuous access pattern on certain dimension to a sequential pattern after\nflattening.\n\n![](https://github.com/dmlc/web-data/raw/main/tvm/tutorial/array-packing.png)\n\n     :align: center\n\n\n"
+        "Array Packing\n-------------\nAnother important trick is array packing. This trick is to reorder the storage dimension of the\narray to convert the continuous access pattern on certain dimension to a sequential pattern after\nflattening.\n\n![](https://github.com/dmlc/web-data/raw/master/tvm/tutorial/array-packing.png)\n\n     :align: center\n\n\n"
       ]
     },
     {
@@ -301,7 +301,7 @@
       "name": "python",
       "nbconvert_exporter": "python",
       "pygments_lexer": "ipython3",
-      "version": "3.6.12"
+      "version": "3.6.10"
     }
   },
   "nbformat": 4,
diff --git a/docs/_downloads/48bd751ebaae08fce134e559f86a25cc/tune_relay_vta.ipynb b/docs/_downloads/48bd751ebaae08fce134e559f86a25cc/tune_relay_vta.ipynb
index 9846b89..67f1eaa 100644
--- a/docs/_downloads/48bd751ebaae08fce134e559f86a25cc/tune_relay_vta.ipynb
+++ b/docs/_downloads/48bd751ebaae08fce134e559f86a25cc/tune_relay_vta.ipynb
@@ -33,7 +33,7 @@
       },
       "outputs": [],
       "source": [
-        "import os\nfrom mxnet.gluon.model_zoo import vision\nimport numpy as np\nfrom PIL import Image\n\nfrom tvm import topi\nimport tvm\nfrom tvm import te\nfrom tvm import rpc, autotvm, relay\nfrom tvm.contrib import graph_runtime, utils, download\nfrom tvm.autotvm.measure.measure_methods import request_remote\nfrom tvm.autotvm.tuner import XGBTuner, GATuner, RandomTuner, GridSearchTuner\n\nimport vta\nfrom vta.testing import simulator\nfrom vta.top import graph_pack"
+        "import os\nfrom mxnet.gluon.model_zoo import vision\nimport numpy as np\nfrom PIL import Image\n\nfrom tvm import topi\nimport tvm\nfrom tvm import te\nfrom tvm import rpc, autotvm, relay\nfrom tvm.contrib import graph_runtime, util, download\nfrom tvm.autotvm.measure.measure_methods import request_remote\nfrom tvm.autotvm.tuner import XGBTuner, GATuner, RandomTuner, GridSearchTuner\n\nimport vta\nfrom vta.testing import simulator\nfrom vta.top import graph_pack"
       ]
     },
     {
@@ -58,7 +58,7 @@
       "cell_type": "markdown",
       "metadata": {},
       "source": [
-        "Start RPC Tracker\n-----------------\nTVM uses an RPC session to communicate with Pynq boards.\nDuring tuning, the tuner will send the generated code to the board and\nmeasure the speed of code on the board.\n\nTo scale up tuning, TVM uses an RPC Tracker to manage multiple devices.\nThe RPC Tracker is a centralized controller node. We can register all devices to\nthe tracker. For example, if we have 10 Pynq boards, we can register all of them\nto the tracker, and run 10 measurem [...]
+        "Start RPC Tracker\n-----------------\nTVM uses an RPC session to communicate with Pynq boards.\nDuring tuning, the tuner will send the generated code to the board and\nmeasure the speed of code on the board.\n\nTo scale up tuning, TVM uses an RPC Tracker to manage multiple devices.\nThe RPC Tracker is a centralized master node. We can register all devices to\nthe tracker. For example, if we have 10 Pynq boards, we can register all of them\nto the tracker, and run 10 measurements [...]
       ]
     },
     {
@@ -144,7 +144,7 @@
       },
       "outputs": [],
       "source": [
-        "def tune_and_evaluate(tuning_opt):\n\n    if env.TARGET != \"sim\":\n        # Get remote from fleet node\n        remote = autotvm.measure.request_remote(\n            env.TARGET, tracker_host, tracker_port, timeout=10000\n        )\n        # Reconfigure the JIT runtime and FPGA.\n        vta.reconfig_runtime(remote)\n        vta.program_fpga(remote, bitstream=None)\n    else:\n        # In simulation mode, host the RPC server locally.\n        remote = rpc.LocalSession()\n\n  [...]
+        "def tune_and_evaluate(tuning_opt):\n\n    if env.TARGET != \"sim\":\n        # Get remote from fleet node\n        remote = autotvm.measure.request_remote(\n            env.TARGET, tracker_host, tracker_port, timeout=10000\n        )\n        # Reconfigure the JIT runtime and FPGA.\n        vta.reconfig_runtime(remote)\n        vta.program_fpga(remote, bitstream=None)\n    else:\n        # In simulation mode, host the RPC server locally.\n        remote = rpc.LocalSession()\n\n  [...]
       ]
     },
     {
@@ -158,7 +158,7 @@
       "cell_type": "markdown",
       "metadata": {},
       "source": [
-        "<div class=\"alert alert-info\"><h4>Note</h4><p>**Experiencing Difficulties?**\n\n  The auto tuning module is error-prone. If you always see \" 0.00/ 0.00 GFLOPS\",\n  then there must be something wrong.\n\n  First, make sure you set the correct configuration of your device.\n  Then, you can print debug information by adding these lines in the beginning\n  of the script. It will print every measurement result, where you can find useful\n  error messages.\n\n  .. code-block:: pyt [...]
+        "<div class=\"alert alert-info\"><h4>Note</h4><p>**Experiencing Difficulties?**\n\n  The auto tuning module is error-prone. If you always see \" 0.00/ 0.00 GFLOPS\",\n  then there must be something wrong.\n\n  First, make sure you set the correct configuration of your device.\n  Then, you can print debug information by adding these lines in the beginning\n  of the script. It will print every measurement result, where you can find useful\n  error messages.\n\n  .. code-block:: pyt [...]
       ]
     }
   ],
@@ -178,7 +178,7 @@
       "name": "python",
       "nbconvert_exporter": "python",
       "pygments_lexer": "ipython3",
-      "version": "3.6.12"
+      "version": "3.6.10"
     }
   },
   "nbformat": 4,
diff --git a/docs/_downloads/4dd41316d6ea7ff2b6993aab65428bf5/cross_compilation_and_rpc.ipynb b/docs/_downloads/4dd41316d6ea7ff2b6993aab65428bf5/cross_compilation_and_rpc.ipynb
index 0d6f59c..e5d7707 100644
--- a/docs/_downloads/4dd41316d6ea7ff2b6993aab65428bf5/cross_compilation_and_rpc.ipynb
+++ b/docs/_downloads/4dd41316d6ea7ff2b6993aab65428bf5/cross_compilation_and_rpc.ipynb
@@ -47,7 +47,7 @@
       },
       "outputs": [],
       "source": [
-        "import numpy as np\n\nimport tvm\nfrom tvm import te\nfrom tvm import rpc\nfrom tvm.contrib import utils\n\nn = tvm.runtime.convert(1024)\nA = te.placeholder((n,), name=\"A\")\nB = te.compute((n,), lambda i: A[i] + 1.0, name=\"B\")\ns = te.create_schedule(B.op)"
+        "import numpy as np\n\nimport tvm\nfrom tvm import te\nfrom tvm import rpc\nfrom tvm.contrib import util\n\nn = tvm.runtime.convert(1024)\nA = te.placeholder((n,), name=\"A\")\nB = te.compute((n,), lambda i: A[i] + 1.0, name=\"B\")\ns = te.create_schedule(B.op)"
       ]
     },
     {
@@ -65,7 +65,7 @@
       },
       "outputs": [],
       "source": [
-        "local_demo = True\n\nif local_demo:\n    target = \"llvm\"\nelse:\n    target = \"llvm -mtriple=armv7l-linux-gnueabihf\"\n\nfunc = tvm.build(s, [A, B], target=target, name=\"add_one\")\n# save the lib at a local temp folder\ntemp = utils.tempdir()\npath = temp.relpath(\"lib.tar\")\nfunc.export_library(path)"
+        "local_demo = True\n\nif local_demo:\n    target = \"llvm\"\nelse:\n    target = \"llvm -mtriple=armv7l-linux-gnueabihf\"\n\nfunc = tvm.build(s, [A, B], target=target, name=\"add_one\")\n# save the lib at a local temp folder\ntemp = util.tempdir()\npath = temp.relpath(\"lib.tar\")\nfunc.export_library(path)"
       ]
     },
     {
@@ -171,7 +171,7 @@
       "name": "python",
       "nbconvert_exporter": "python",
       "pygments_lexer": "ipython3",
-      "version": "3.6.12"
+      "version": "3.6.10"
     }
   },
   "nbformat": 4,
diff --git a/docs/_downloads/4e9540fc014621d8d3bd14869c1ab227/scan.ipynb b/docs/_downloads/4e9540fc014621d8d3bd14869c1ab227/scan.ipynb
index 2193c48..a6506cf 100644
--- a/docs/_downloads/4e9540fc014621d8d3bd14869c1ab227/scan.ipynb
+++ b/docs/_downloads/4e9540fc014621d8d3bd14869c1ab227/scan.ipynb
@@ -26,7 +26,7 @@
       },
       "outputs": [],
       "source": [
-        "from __future__ import absolute_import, print_function\n\nimport tvm\nimport tvm.testing\nfrom tvm import te\nimport numpy as np"
+        "from __future__ import absolute_import, print_function\n\nimport tvm\nfrom tvm import te\nimport numpy as np"
       ]
     },
     {
@@ -161,7 +161,7 @@
       "name": "python",
       "nbconvert_exporter": "python",
       "pygments_lexer": "ipython3",
-      "version": "3.6.12"
+      "version": "3.6.10"
     }
   },
   "nbformat": 4,
diff --git a/docs/_downloads/4f4a49a5483a0d0aa4af30f58c3c8664/deploy_quantized.ipynb b/docs/_downloads/4f4a49a5483a0d0aa4af30f58c3c8664/deploy_quantized.ipynb
index 94eabbc..fe29d13 100644
--- a/docs/_downloads/4f4a49a5483a0d0aa4af30f58c3c8664/deploy_quantized.ipynb
+++ b/docs/_downloads/4f4a49a5483a0d0aa4af30f58c3c8664/deploy_quantized.ipynb
@@ -15,7 +15,7 @@
       "cell_type": "markdown",
       "metadata": {},
       "source": [
-        "\nDeploy a Quantized Model on Cuda\n================================\n**Author**: `Wuwei Lin <https://github.com/vinx13>`_\n\nThis article is an introductory tutorial of automatic quantization with TVM.\nAutomatic quantization is one of the quantization modes in TVM. More details on\nthe quantization story in TVM can be found\n`here <https://discuss.tvm.apache.org/t/quantization-story/3920>`_.\nIn this tutorial, we will import a GluonCV pre-trained model on ImageNet to\nRelay, q [...]
+        "\nDeploy a Quantized Model on Cuda\n================================\n**Author**: `Wuwei Lin <https://github.com/vinx13>`_\n\nThis article is an introductory tutorial of automatic quantization with TVM.\nAutomatic quantization is one of the quantization modes in TVM. More details on\nthe quantization story in TVM can be found\n`here <https://discuss.tvm.ai/t/quantization-story/3920>`_.\nIn this tutorial, we will import a GluonCV pre-trained model on ImageNet to\nRelay, quantize  [...]
       ]
     },
     {
@@ -136,7 +136,7 @@
       "name": "python",
       "nbconvert_exporter": "python",
       "pygments_lexer": "ipython3",
-      "version": "3.6.12"
+      "version": "3.6.10"
     }
   },
   "nbformat": 4,
diff --git a/docs/_downloads/50b174352ccf0a0defcbd8e6b40145e2/from_tensorflow.py b/docs/_downloads/50b174352ccf0a0defcbd8e6b40145e2/from_tensorflow.py
index 5cdc395..a3e8173 100644
--- a/docs/_downloads/50b174352ccf0a0defcbd8e6b40145e2/from_tensorflow.py
+++ b/docs/_downloads/50b174352ccf0a0defcbd8e6b40145e2/from_tensorflow.py
@@ -45,7 +45,7 @@ except ImportError:
 import tvm.relay.testing.tf as tf_testing
 
 # Base location for model related files.
-repo_base = "https://github.com/dmlc/web-data/raw/main/tensorflow/models/InceptionV1/"
+repo_base = "https://github.com/dmlc/web-data/raw/master/tensorflow/models/InceptionV1/"
 
 # Test image
 img_name = "elephant-299.jpg"
diff --git a/docs/_downloads/578004d7db54caef0007609ae5540c72/intro_topi.ipynb b/docs/_downloads/578004d7db54caef0007609ae5540c72/intro_topi.ipynb
index c027c43..d2bf573 100644
--- a/docs/_downloads/578004d7db54caef0007609ae5540c72/intro_topi.ipynb
+++ b/docs/_downloads/578004d7db54caef0007609ae5540c72/intro_topi.ipynb
@@ -26,7 +26,7 @@
       },
       "outputs": [],
       "source": [
-        "from __future__ import absolute_import, print_function\n\nimport tvm\nimport tvm.testing\nfrom tvm import te\nfrom tvm import topi\nimport numpy as np"
+        "from __future__ import absolute_import, print_function\n\nimport tvm\nfrom tvm import te\nfrom tvm import topi\nimport numpy as np"
       ]
     },
     {
@@ -222,7 +222,7 @@
       "name": "python",
       "nbconvert_exporter": "python",
       "pygments_lexer": "ipython3",
-      "version": "3.6.12"
+      "version": "3.6.10"
     }
   },
   "nbformat": 4,
diff --git a/docs/_downloads/5b32f1dc3e9e2fc5ac5be0918758b967/deploy_quantized.py b/docs/_downloads/5b32f1dc3e9e2fc5ac5be0918758b967/deploy_quantized.py
index e75f6e9..093bd73 100644
--- a/docs/_downloads/5b32f1dc3e9e2fc5ac5be0918758b967/deploy_quantized.py
+++ b/docs/_downloads/5b32f1dc3e9e2fc5ac5be0918758b967/deploy_quantized.py
@@ -22,7 +22,7 @@ Deploy a Quantized Model on Cuda
 This article is an introductory tutorial of automatic quantization with TVM.
 Automatic quantization is one of the quantization modes in TVM. More details on
 the quantization story in TVM can be found
-`here <https://discuss.tvm.apache.org/t/quantization-story/3920>`_.
+`here <https://discuss.tvm.ai/t/quantization-story/3920>`_.
 In this tutorial, we will import a GluonCV pre-trained model on ImageNet to
 Relay, quantize the Relay model and then perform the inference.
 """
diff --git a/docs/_downloads/5bd1bb9c6505ea40407fa19f01579414/reduction.py b/docs/_downloads/5bd1bb9c6505ea40407fa19f01579414/reduction.py
index cffa10e..ecefc28 100644
--- a/docs/_downloads/5bd1bb9c6505ea40407fa19f01579414/reduction.py
+++ b/docs/_downloads/5bd1bb9c6505ea40407fa19f01579414/reduction.py
@@ -28,7 +28,6 @@ In this tutorial, we will demonstrate how to do reduction in TVM.
 from __future__ import absolute_import, print_function
 
 import tvm
-import tvm.testing
 from tvm import te
 import numpy as np
 
diff --git a/docs/_downloads/5c443f88ea44ce77c5ccade429af6e74/deploy_prequantized_tflite.py b/docs/_downloads/5c443f88ea44ce77c5ccade429af6e74/deploy_prequantized_tflite.py
index 121ad9d..52321b1 100644
--- a/docs/_downloads/5c443f88ea44ce77c5ccade429af6e74/deploy_prequantized_tflite.py
+++ b/docs/_downloads/5c443f88ea44ce77c5ccade429af6e74/deploy_prequantized_tflite.py
@@ -101,7 +101,7 @@ extract(model_path)
 def get_real_image(im_height, im_width):
     from PIL import Image
 
-    repo_base = "https://github.com/dmlc/web-data/raw/main/tensorflow/models/InceptionV1/"
+    repo_base = "https://github.com/dmlc/web-data/raw/master/tensorflow/models/InceptionV1/"
     img_name = "elephant-299.jpg"
     image_url = os.path.join(repo_base, img_name)
     img_path = download_testdata(image_url, img_name, module="data")
diff --git a/docs/_downloads/5df1a8bfe653027789c10728e74a65c0/intrin_math.ipynb b/docs/_downloads/5df1a8bfe653027789c10728e74a65c0/intrin_math.ipynb
index 9fec5f6..15d5c32 100644
--- a/docs/_downloads/5df1a8bfe653027789c10728e74a65c0/intrin_math.ipynb
+++ b/docs/_downloads/5df1a8bfe653027789c10728e74a65c0/intrin_math.ipynb
@@ -161,7 +161,7 @@
       "name": "python",
       "nbconvert_exporter": "python",
       "pygments_lexer": "ipython3",
-      "version": "3.6.12"
+      "version": "3.6.10"
     }
   },
   "nbformat": 4,
diff --git a/docs/_downloads/612f9e42b0247df5c8ab277534e2af65/tune_relay_vta.py b/docs/_downloads/612f9e42b0247df5c8ab277534e2af65/tune_relay_vta.py
index 7f04424..41fd04e 100644
--- a/docs/_downloads/612f9e42b0247df5c8ab277534e2af65/tune_relay_vta.py
+++ b/docs/_downloads/612f9e42b0247df5c8ab277534e2af65/tune_relay_vta.py
@@ -62,7 +62,7 @@ from tvm import topi
 import tvm
 from tvm import te
 from tvm import rpc, autotvm, relay
-from tvm.contrib import graph_runtime, utils, download
+from tvm.contrib import graph_runtime, util, download
 from tvm.autotvm.measure.measure_methods import request_remote
 from tvm.autotvm.tuner import XGBTuner, GATuner, RandomTuner, GridSearchTuner
 
@@ -119,7 +119,7 @@ def compile_network(env, target, model, start_pack, stop_pack):
 # measure the speed of code on the board.
 #
 # To scale up tuning, TVM uses an RPC Tracker to manage multiple devices.
-# The RPC Tracker is a centralized controller node. We can register all devices to
+# The RPC Tracker is a centralized master node. We can register all devices to
 # the tracker. For example, if we have 10 Pynq boards, we can register all of them
 # to the tracker, and run 10 measurements in parallel, accelerating the tuning process.
 #
@@ -424,7 +424,7 @@ def tune_and_evaluate(tuning_opt):
 
         # Export library
         print("Upload...")
-        temp = utils.tempdir()
+        temp = util.tempdir()
         lib.save(temp.relpath("graphlib.o"))
         remote.upload(temp.relpath("graphlib.o"))
         lib = remote.load_module("graphlib.o")
@@ -507,4 +507,4 @@ tune_and_evaluate(tuning_option)
 #      import logging
 #      logging.getLogger('autotvm').setLevel(logging.DEBUG)
 #
-#   Finally, always feel free to ask our community for help on https://discuss.tvm.apache.org
+#   Finally, always feel free to ask our community for help on https://discuss.tvm.ai
diff --git a/docs/_downloads/64a7765a4ac55f228cf82b8462944a61/schedule_primitives.ipynb b/docs/_downloads/64a7765a4ac55f228cf82b8462944a61/schedule_primitives.ipynb
index 0b6fc25..2386da3 100644
--- a/docs/_downloads/64a7765a4ac55f228cf82b8462944a61/schedule_primitives.ipynb
+++ b/docs/_downloads/64a7765a4ac55f228cf82b8462944a61/schedule_primitives.ipynb
@@ -276,7 +276,7 @@
       "name": "python",
       "nbconvert_exporter": "python",
       "pygments_lexer": "ipython3",
-      "version": "3.6.12"
+      "version": "3.6.10"
     }
   },
   "nbformat": 4,
diff --git a/docs/_downloads/65bd9927a152de6eed3444185b24287f/tensorize.ipynb b/docs/_downloads/65bd9927a152de6eed3444185b24287f/tensorize.ipynb
index da58016..87eb568 100644
--- a/docs/_downloads/65bd9927a152de6eed3444185b24287f/tensorize.ipynb
+++ b/docs/_downloads/65bd9927a152de6eed3444185b24287f/tensorize.ipynb
@@ -116,7 +116,7 @@
       },
       "outputs": [],
       "source": [
-        "def gemv_impl():\n    cc_code = \"\"\"\n      extern \"C\" int gemv_update(float *cc, float *aa, float *bb, int m, int l, int stride) {\n        for (int i = 0; i < m; ++i) {\n            for (int j = 0; j < l; ++j) {\n                cc[i] += aa[j] * bb[i * stride + j];\n            }\n        }\n        return 0;\n      }\n    \"\"\"\n    from tvm.contrib import utils, clang\n\n    temp = utils.tempdir()\n    ll_path = temp.relpath(\"temp.ll\")\n    # Create LLVM ir from c sou [...]
+        "def gemv_impl():\n    cc_code = \"\"\"\n      extern \"C\" int gemv_update(float *cc, float *aa, float *bb, int m, int l, int stride) {\n        for (int i = 0; i < m; ++i) {\n            for (int j = 0; j < l; ++j) {\n                cc[i] += aa[j] * bb[i * stride + j];\n            }\n        }\n        return 0;\n      }\n    \"\"\"\n    from tvm.contrib import util, clang\n\n    temp = util.tempdir()\n    ll_path = temp.relpath(\"temp.ll\")\n    # Create LLVM ir from c sourc [...]
       ]
     },
     {
@@ -152,7 +152,7 @@
       },
       "outputs": [],
       "source": [
-        "func = tvm.build(s, [A, B, C], target=\"llvm\", name=\"gemv\")\n\nfrom tvm.topi.utils import get_const_tuple\n\ndtype = A.dtype\nctx = tvm.context(\"cpu\", 0)\na = np.random.uniform(size=get_const_tuple(A.shape)).astype(dtype)\nb = np.random.uniform(size=get_const_tuple(B.shape)).astype(dtype)\nc = tvm.nd.array(np.zeros(get_const_tuple(C.shape), dtype=dtype), ctx)\nfunc(tvm.nd.array(a, ctx), tvm.nd.array(b, ctx), c)\ntvm.testing.assert_allclose(c.asnumpy(), np.dot(a, b.T), rtol=1e-3)"
+        "func = tvm.build(s, [A, B, C], target=\"llvm\", name=\"gemv\")\n\nfrom tvm.topi.util import get_const_tuple\n\ndtype = A.dtype\nctx = tvm.context(\"cpu\", 0)\na = np.random.uniform(size=get_const_tuple(A.shape)).astype(dtype)\nb = np.random.uniform(size=get_const_tuple(B.shape)).astype(dtype)\nc = tvm.nd.array(np.zeros(get_const_tuple(C.shape), dtype=dtype), ctx)\nfunc(tvm.nd.array(a, ctx), tvm.nd.array(b, ctx), c)\ntvm.testing.assert_allclose(c.asnumpy(), np.dot(a, b.T), rtol=1e-3)"
       ]
     },
     {
@@ -188,7 +188,7 @@
       },
       "outputs": [],
       "source": [
-        "def gemv_impl():\n    cc_code = \"\"\"\n      extern \"C\" int gemv_update(float *cc, float *aa, float *bb, int m, int l, int stride) {\n        for (int i = 0; i < m; ++i) {\n            for (int j = 0; j < l; ++j) {\n                cc[i] += aa[j] * bb[i * stride + j];\n            }\n        }\n        return 0;\n      }\n      extern \"C\" int gemv_reset(float *cc, int m) {\n        for (int i = 0; i < m; ++i) {\n            cc[i] = 0.0;\n        }\n        return 0;\n       [...]
+        "def gemv_impl():\n    cc_code = \"\"\"\n      extern \"C\" int gemv_update(float *cc, float *aa, float *bb, int m, int l, int stride) {\n        for (int i = 0; i < m; ++i) {\n            for (int j = 0; j < l; ++j) {\n                cc[i] += aa[j] * bb[i * stride + j];\n            }\n        }\n        return 0;\n      }\n      extern \"C\" int gemv_reset(float *cc, int m) {\n        for (int i = 0; i < m; ++i) {\n            cc[i] = 0.0;\n        }\n        return 0;\n       [...]
       ]
     },
     {
@@ -233,7 +233,7 @@
       "name": "python",
       "nbconvert_exporter": "python",
       "pygments_lexer": "ipython3",
-      "version": "3.6.12"
+      "version": "3.6.10"
     }
   },
   "nbformat": 4,
diff --git a/docs/_downloads/6748a8fb7e82692825b259c20af8372a/opt_conv_cuda.ipynb b/docs/_downloads/6748a8fb7e82692825b259c20af8372a/opt_conv_cuda.ipynb
index a7716f2..f720e0a 100644
--- a/docs/_downloads/6748a8fb7e82692825b259c20af8372a/opt_conv_cuda.ipynb
+++ b/docs/_downloads/6748a8fb7e82692825b259c20af8372a/opt_conv_cuda.ipynb
@@ -40,7 +40,7 @@
       "cell_type": "markdown",
       "metadata": {},
       "source": [
-        "Memory Hierarchy\n----------------\n\nWe first specify the memory hierarchy for buffers. The figure below shows the\nGPU memory hierarchy. One important difference from CPU memory hierarchy is\nthat GPU provides a cache buffer called shared memory, which is managed by\nprogrammers. Thus how to maximize the data reuse in the shared memory is\ncritical to achieve high performance in GPU kernels.\n\n![](https://github.com/dmlc/web-data/raw/main/tvm/tutorial/gpu_memory_hierarchy.png [...]
+        "Memory Hierarchy\n----------------\n\nWe first specify the memory hierarchy for buffers. The figure below shows the\nGPU memory hierarchy. One important difference from CPU memory hierarchy is\nthat GPU provides a cache buffer called shared memory, which is managed by\nprogrammers. Thus how to maximize the data reuse in the shared memory is\ncritical to achieve high performance in GPU kernels.\n\n![](https://github.com/dmlc/web-data/raw/master/tvm/tutorial/gpu_memory_hierarchy.p [...]
       ]
     },
     {
@@ -58,7 +58,7 @@
       "cell_type": "markdown",
       "metadata": {},
       "source": [
-        "Blocking\n--------\n\nThe following code splits the workload into thread blocks and individual\nthreads. We follow the blocking scheme in the matrix multiply. As shown in the\nfigure below, given a pixel coordinate (y, x), a thread block is responsible\nfor computing a region of block_factor x block_factor (64 x 64) for output\nchannels and batch. Due to the limit of shared memory space, we only load step\nx block_factor (8 x 64) data from Apad and B each time to buffers in the\ [...]
+        "Blocking\n--------\n\nThe following code splits the workload into thread blocks and individual\nthreads. We follow the blocking scheme in the matrix multiply. As shown in the\nfigure below, given a pixel coordinate (y, x), a thread block is responsible\nfor computing a region of block_factor x block_factor (64 x 64) for output\nchannels and batch. Due to the limit of shared memory space, we only load step\nx block_factor (8 x 64) data from Apad and B each time to buffers in the\ [...]
       ]
     },
     {
@@ -76,7 +76,7 @@
       "cell_type": "markdown",
       "metadata": {},
       "source": [
-        "Virtual Thread Split\n--------------------\n\nWe further split the workload from a thread block to individual threads. To\navoid *memory bank conflict*, we use virtual thread to split the area into 4\nparts, and then tile into 8x8 grids. Therefore, shown in the figure below,\neach thread computes 4 strided grids, where size of each grid is 4 x 4.\n\n![](https://github.com/dmlc/web-data/raw/main/tvm/tutorial/conv_gpu_vthread.png)\n\n     :align: center\n     :height: 188px\n      [...]
+        "Virtual Thread Split\n--------------------\n\nWe further split the workload from a thread block to individual threads. To\navoid *memory bank conflict*, we use virtual thread to split the area into 4\nparts, and then tile into 8x8 grids. Therefore, shown in the figure below,\neach thread computes 4 strided grids, where size of each grid is 4 x 4.\n\n![](https://github.com/dmlc/web-data/raw/master/tvm/tutorial/conv_gpu_vthread.png)\n\n     :align: center\n     :height: 188px\n    [...]
       ]
     },
     {
@@ -143,7 +143,7 @@
       "name": "python",
       "nbconvert_exporter": "python",
       "pygments_lexer": "ipython3",
-      "version": "3.6.12"
+      "version": "3.6.10"
     }
   },
   "nbformat": 4,
diff --git a/docs/_downloads/678f3c372a599a18d909aed0fefb30be/tune_conv2d_layer_cuda.py b/docs/_downloads/678f3c372a599a18d909aed0fefb30be/tune_conv2d_layer_cuda.py
index 42273bf..74b3775 100644
--- a/docs/_downloads/678f3c372a599a18d909aed0fefb30be/tune_conv2d_layer_cuda.py
+++ b/docs/_downloads/678f3c372a599a18d909aed0fefb30be/tune_conv2d_layer_cuda.py
@@ -25,22 +25,17 @@ Auto-scheduling a convolution layer for GPU
 
 Different from the existing :ref:`autotvm <tutorials-autotvm-sec>` which relies on 
 manual templates to define the search space, the auto-scheduler does not require any templates.
-Users only need to write the computation declaration without any schedule commands or templates.
-The auto-scheduler can automatically generate a large search space and
-find a good schedule in the space.
+The auto-scheduler is template-free, so users only need to write the computation declaration without
+any schedule commands or templates.
+The auto-scheduler can automatically generate a large
+search space and find a good schedule in the space.
 
 We use a convolution layer as an example in this tutorial.
-
-Note that this tutorial will not run on Windows or recent versions of macOS. To
-get it to run, you will need to wrap the body of this tutorial in a :code:`if
-__name__ == "__main__":` block.
 """
 
-import os
-
 import numpy as np
 import tvm
-from tvm import te, auto_scheduler, topi
+from tvm import te, testing, auto_scheduler, topi
 from tvm.topi.testing import conv2d_nchw_python
 
 ######################################################################
@@ -68,7 +63,7 @@ def conv2d_layer(N, H, W, CO, CI, KH, KW, stride, padding):
 
 target = tvm.target.Target("cuda")
 
-# Use the last layer in ResNet-50
+# the last layer in resnet
 N, H, W, CO, CI, KH, KW, strides, padding = 1, 7, 7, 512, 512, 3, 3, (1, 1), (1, 1)
 task = auto_scheduler.create_task(conv2d_layer, (N, H, W, CO, CI, KH, KW, strides, padding), target)
 
@@ -94,12 +89,11 @@ print(task.compute_dag)
 # * see :any:`auto_scheduler.TuningOptions`,
 #   :any:`auto_scheduler.LocalRPCMeasureContext` for more parameters.
 
-log_file = "conv2d.json"
 measure_ctx = auto_scheduler.LocalRPCMeasureContext(min_repeat_ms=300)
 tune_option = auto_scheduler.TuningOptions(
     num_measure_trials=10,
     runner=measure_ctx.runner,
-    measure_callbacks=[auto_scheduler.RecordToFile(log_file)],
+    measure_callbacks=[auto_scheduler.RecordToFile("conv2d.json")],
 )
 
 ######################################################################
@@ -111,9 +105,6 @@ tune_option = auto_scheduler.TuningOptions(
 
 sch, args = auto_scheduler.auto_schedule(task, tuning_options=tune_option)
 
-# Kill the process for measurement
-del measure_ctx
-
 ######################################################################
 # We can lower the schedule to see the IR after auto-scheduling.
 # The auto-scheduler correctly performs optimizations including multi-level tiling,
@@ -128,7 +119,7 @@ print(tvm.lower(sch, args, simple_mode=True))
 
 func = tvm.build(sch, args, target)
 
-# Check correctness
+# check correctness
 data_np = np.random.uniform(size=(N, CI, H, W)).astype(np.float32)
 weight_np = np.random.uniform(size=(CO, CI, KH, KW)).astype(np.float32)
 bias_np = np.random.uniform(size=(1, CO, 1, 1)).astype(np.float32)
@@ -143,7 +134,7 @@ out_tvm = tvm.nd.empty(out_np.shape, ctx=ctx)
 func(data_tvm, weight_tvm, bias_tvm, out_tvm)
 
 # Check results
-np.testing.assert_allclose(out_np, out_tvm.asnumpy(), rtol=1e-3)
+tvm.testing.assert_allclose(out_np, out_tvm.asnumpy(), rtol=1e-3)
 
 # Evaluate execution time
 evaluator = func.time_evaluator(func.entry_name, ctx, min_repeat_ms=500)
@@ -164,7 +155,7 @@ print(
 # print the equivalent python schedule API, and build the binary again.
 
 # Load the measuremnt record for the best schedule
-inp, res = auto_scheduler.load_best(log_file, task.workload_key)
+inp, res = auto_scheduler.load_best("conv2d.json", task.workload_key)
 
 # Print equivalent python schedule API. This can be used for debugging and
 # learning the behavior of the auto-scheduler.
@@ -183,12 +174,12 @@ func = tvm.build(sch, args, target)
 # In the example below we resume the status and do more 5 trials.
 
 
+log_file = "conv2d.json"
 cost_model = auto_scheduler.XGBModel()
 cost_model.update_from_file(log_file)
 search_policy = auto_scheduler.SketchPolicy(
     task, cost_model, init_search_callbacks=[auto_scheduler.PreloadMeasuredStates(log_file)]
 )
-measure_ctx = auto_scheduler.LocalRPCMeasureContext(min_repeat_ms=300)
 tune_option = auto_scheduler.TuningOptions(
     num_measure_trials=5,
     runner=measure_ctx.runner,
@@ -196,5 +187,5 @@ tune_option = auto_scheduler.TuningOptions(
 )
 sch, args = auto_scheduler.auto_schedule(task, search_policy, tuning_options=tune_option)
 
-# Kill the measurement process
+# kill the measurement process
 del measure_ctx
diff --git a/docs/_downloads/67c18c78b0f12c3be5dc41b22637d719/matrix_multiply_opt.py b/docs/_downloads/67c18c78b0f12c3be5dc41b22637d719/matrix_multiply_opt.py
index c9d1c13..28600d4 100644
--- a/docs/_downloads/67c18c78b0f12c3be5dc41b22637d719/matrix_multiply_opt.py
+++ b/docs/_downloads/67c18c78b0f12c3be5dc41b22637d719/matrix_multiply_opt.py
@@ -43,7 +43,7 @@ from tvm import te
 import vta
 import numpy as np
 from tvm import rpc
-from tvm.contrib import utils
+from tvm.contrib import util
 from vta.testing import simulator
 
 # Load VTA parameters from the 3rdparty/vta-hw/config/vta_config.json file
@@ -88,7 +88,7 @@ elif env.TARGET in ["sim", "tsim"]:
 # matrix multiplication followed by a rectified linear activation.
 # We describe the TVM dataflow graph of the fully connected layer below:
 #
-# .. image:: https://raw.githubusercontent.com/uwsampl/web-data/main/vta/tutorial/fc_dataflow.png
+# .. image:: https://raw.githubusercontent.com/uwsaml/web-data/master/vta/tutorial/fc_dataflow.png
 #      :align: center
 #
 # This computation is intentionally too large to fit onto VTA's on-chip
@@ -183,7 +183,7 @@ print(tvm.lower(s, [data, weight, res], simple_mode=True))
 # We show the outcome of blocking on the computation schedule in the diagram
 # below:
 #
-# .. image:: https://raw.githubusercontent.com/uwsampl/web-data/main/vta/tutorial/blocking.png
+# .. image:: https://raw.githubusercontent.com/uwsaml/web-data/master/vta/tutorial/blocking.png
 #      :align: center
 #      :width: 480px
 #
@@ -311,7 +311,7 @@ print(vta.lower(s, [data, weight, res], simple_mode=True))
 
 # Compile the TVM module
 my_gemm = vta.build(s, [data, weight, res], "ext_dev", env.target_host, name="my_gemm")
-temp = utils.tempdir()
+temp = util.tempdir()
 my_gemm.save(temp.relpath("gemm.o"))
 remote.upload(temp.relpath("gemm.o"))
 f = remote.load_module("gemm.o")
diff --git a/docs/_downloads/696dd37904ef92773435ca321ff41bfb/from_onnx.py b/docs/_downloads/696dd37904ef92773435ca321ff41bfb/from_onnx.py
index 1557ea5..e68a398 100644
--- a/docs/_downloads/696dd37904ef92773435ca321ff41bfb/from_onnx.py
+++ b/docs/_downloads/696dd37904ef92773435ca321ff41bfb/from_onnx.py
@@ -63,7 +63,7 @@ onnx_model = onnx.load(model_path)
 # A single cat dominates the examples!
 from PIL import Image
 
-img_url = "https://github.com/dmlc/mxnet.js/blob/main/data/cat.png?raw=true"
+img_url = "https://github.com/dmlc/mxnet.js/blob/master/data/cat.png?raw=true"
 img_path = download_testdata(img_url, "cat.png", module="data")
 img = Image.open(img_path).resize((224, 224))
 img_ycbcr = img.convert("YCbCr")  # convert to YCbCr
@@ -103,12 +103,3 @@ canvas[0:224, 0:224, :] = np.asarray(img)
 canvas[:, 672:, :] = np.asarray(result)
 plt.imshow(canvas.astype(np.uint8))
 plt.show()
-
-######################################################################
-# Notes
-# ---------------------------------------------
-# By default, ONNX defines models in terms of dynamic shapes. The ONNX importer
-# retains that dynamism upon import, and the compiler attemps to convert the model
-# into a static shapes at compile time. If this fails, there may still be dynamic
-# operations in the model. Not all TVM kernels currently support dynamic shapes,
-# please file an issue on discuss.tvm.apache.org if you hit an error with dynamic kernels.
diff --git a/docs/_downloads/6a91d98d4242322072303282a1f2de9c/relay_quick_start.py b/docs/_downloads/6a91d98d4242322072303282a1f2de9c/relay_quick_start.py
index 6da62f5..5c7f933 100644
--- a/docs/_downloads/6a91d98d4242322072303282a1f2de9c/relay_quick_start.py
+++ b/docs/_downloads/6a91d98d4242322072303282a1f2de9c/relay_quick_start.py
@@ -31,7 +31,7 @@ Notice that you need to build TVM with cuda and llvm enabled.
 # ----------------------------------------------
 # The image below shows hardware backend currently supported by TVM:
 #
-# .. image:: https://github.com/dmlc/web-data/raw/main/tvm/tutorial/tvm_support_list.png
+# .. image:: https://github.com/dmlc/web-data/raw/master/tvm/tutorial/tvm_support_list.png
 #      :align: center
 #
 # In this tutorial, we'll choose cuda and llvm as target backends.
@@ -129,9 +129,9 @@ print(out.flatten()[0:10])
 ####################################################
 
 # save the graph, lib and params into separate files
-from tvm.contrib import utils
+from tvm.contrib import util
 
-temp = utils.tempdir()
+temp = util.tempdir()
 path_lib = temp.relpath("deploy_lib.tar")
 lib.export_library(path_lib)
 print(temp.listdir())
diff --git a/docs/_downloads/6be1519353297beeea03fe17712dc16f/using_external_lib.ipynb b/docs/_downloads/6be1519353297beeea03fe17712dc16f/using_external_lib.ipynb
index 682f8cd..0806646 100644
--- a/docs/_downloads/6be1519353297beeea03fe17712dc16f/using_external_lib.ipynb
+++ b/docs/_downloads/6be1519353297beeea03fe17712dc16f/using_external_lib.ipynb
@@ -139,7 +139,7 @@
       "name": "python",
       "nbconvert_exporter": "python",
       "pygments_lexer": "ipython3",
-      "version": "3.6.12"
+      "version": "3.6.10"
     }
   },
   "nbformat": 4,
diff --git a/docs/_downloads/6c8a9d3bc4c689f8680a968349965ee5/from_pytorch.ipynb b/docs/_downloads/6c8a9d3bc4c689f8680a968349965ee5/from_pytorch.ipynb
index 6cb27ba..3023e84 100644
--- a/docs/_downloads/6c8a9d3bc4c689f8680a968349965ee5/from_pytorch.ipynb
+++ b/docs/_downloads/6c8a9d3bc4c689f8680a968349965ee5/from_pytorch.ipynb
@@ -15,7 +15,7 @@
       "cell_type": "markdown",
       "metadata": {},
       "source": [
-        "\nCompile PyTorch Models\n======================\n**Author**: `Alex Wong <https://github.com/alexwong/>`_\n\nThis article is an introductory tutorial to deploy PyTorch models with Relay.\n\nFor us to begin with, PyTorch should be installed.\nTorchVision is also required since we will be using it as our model zoo.\n\nA quick solution is to install via pip\n\n.. code-block:: bash\n\n    pip install torch==1.7.0\n    pip install torchvision==0.8.1\n\nor please refer to official sit [...]
+        "\nCompile PyTorch Models\n======================\n**Author**: `Alex Wong <https://github.com/alexwong/>`_\n\nThis article is an introductory tutorial to deploy PyTorch models with Relay.\n\nFor us to begin with, PyTorch should be installed.\nTorchVision is also required since we will be using it as our model zoo.\n\nA quick solution is to install via pip\n\n.. code-block:: bash\n\n    pip install torch==1.4.0\n    pip install torchvision==0.5.0\n\nor please refer to official sit [...]
       ]
     },
     {
@@ -62,7 +62,7 @@
       },
       "outputs": [],
       "source": [
-        "from PIL import Image\n\nimg_url = \"https://github.com/dmlc/mxnet.js/blob/main/data/cat.png?raw=true\"\nimg_path = download_testdata(img_url, \"cat.png\", module=\"data\")\nimg = Image.open(img_path).resize((224, 224))\n\n# Preprocess the image and convert to tensor\nfrom torchvision import transforms\n\nmy_preprocess = transforms.Compose(\n    [\n        transforms.Resize(256),\n        transforms.CenterCrop(224),\n        transforms.ToTensor(),\n        transforms.Normalize(m [...]
+        "from PIL import Image\n\nimg_url = \"https://github.com/dmlc/mxnet.js/blob/master/data/cat.png?raw=true\"\nimg_path = download_testdata(img_url, \"cat.png\", module=\"data\")\nimg = Image.open(img_path).resize((224, 224))\n\n# Preprocess the image and convert to tensor\nfrom torchvision import transforms\n\nmy_preprocess = transforms.Compose(\n    [\n        transforms.Resize(256),\n        transforms.CenterCrop(224),\n        transforms.ToTensor(),\n        transforms.Normalize [...]
       ]
     },
     {
@@ -154,7 +154,7 @@
       "name": "python",
       "nbconvert_exporter": "python",
       "pygments_lexer": "ipython3",
-      "version": "3.6.12"
+      "version": "3.6.10"
     }
   },
   "nbformat": 4,
diff --git a/docs/_downloads/70a0767409e81bb5aaa9ce4e7a151dec/tensor_expr_get_started.ipynb b/docs/_downloads/70a0767409e81bb5aaa9ce4e7a151dec/tensor_expr_get_started.ipynb
index 6032e06..03e14a2 100644
--- a/docs/_downloads/70a0767409e81bb5aaa9ce4e7a151dec/tensor_expr_get_started.ipynb
+++ b/docs/_downloads/70a0767409e81bb5aaa9ce4e7a151dec/tensor_expr_get_started.ipynb
@@ -26,7 +26,7 @@
       },
       "outputs": [],
       "source": [
-        "from __future__ import absolute_import, print_function\n\nimport tvm\nimport tvm.testing\nfrom tvm import te\nimport numpy as np\n\n# Global declarations of environment.\n\ntgt_host = \"llvm\"\n# Change it to respective GPU if gpu is enabled Ex: cuda, opencl, rocm\ntgt = \"cuda\""
+        "from __future__ import absolute_import, print_function\n\nimport tvm\nfrom tvm import te\nimport numpy as np\n\n# Global declarations of environment.\n\ntgt_host = \"llvm\"\n# Change it to respective GPU if gpu is enabled Ex: cuda, opencl, rocm\ntgt = \"cuda\""
       ]
     },
     {
@@ -184,7 +184,7 @@
       },
       "outputs": [],
       "source": [
-        "from tvm.contrib import cc\nfrom tvm.contrib import utils\n\ntemp = utils.tempdir()\nfadd.save(temp.relpath(\"myadd.o\"))\nif tgt == \"cuda\":\n    fadd.imported_modules[0].save(temp.relpath(\"myadd.ptx\"))\nif tgt == \"rocm\":\n    fadd.imported_modules[0].save(temp.relpath(\"myadd.hsaco\"))\nif tgt.startswith(\"opencl\"):\n    fadd.imported_modules[0].save(temp.relpath(\"myadd.cl\"))\ncc.create_shared(temp.relpath(\"myadd.so\"), [temp.relpath(\"myadd.o\")])\nprint(temp.listdir())"
+        "from tvm.contrib import cc\nfrom tvm.contrib import util\n\ntemp = util.tempdir()\nfadd.save(temp.relpath(\"myadd.o\"))\nif tgt == \"cuda\":\n    fadd.imported_modules[0].save(temp.relpath(\"myadd.ptx\"))\nif tgt == \"rocm\":\n    fadd.imported_modules[0].save(temp.relpath(\"myadd.hsaco\"))\nif tgt.startswith(\"opencl\"):\n    fadd.imported_modules[0].save(temp.relpath(\"myadd.cl\"))\ncc.create_shared(temp.relpath(\"myadd.so\"), [temp.relpath(\"myadd.o\")])\nprint(temp.listdir())"
       ]
     },
     {
@@ -279,7 +279,7 @@
       "name": "python",
       "nbconvert_exporter": "python",
       "pygments_lexer": "ipython3",
-      "version": "3.6.12"
+      "version": "3.6.10"
     }
   },
   "nbformat": 4,
diff --git a/docs/_downloads/70d345c5409f99cb5de9dc44f147ff6f/build_gcn.py b/docs/_downloads/70d345c5409f99cb5de9dc44f147ff6f/build_gcn.py
index b832d18..5c571ef 100644
--- a/docs/_downloads/70d345c5409f99cb5de9dc44f147ff6f/build_gcn.py
+++ b/docs/_downloads/70d345c5409f99cb5de9dc44f147ff6f/build_gcn.py
@@ -242,9 +242,7 @@ import networkx as nx
 
 def prepare_params(g, data):
     params = {}
-    params["infeats"] = data.features.numpy().astype(
-        "float32"
-    )  # Only support float32 as feature for now
+    params["infeats"] = data.features.astype("float32")  # Only support float32 as feature for now
 
     # Generate adjacency matrix
     adjacency = nx.to_scipy_sparse_matrix(g)
@@ -352,7 +350,5 @@ test_mask = data.test_mask
 acc = evaluate(data, logits_tvm)
 print("Test accuracy of TVM results: {:.2%}".format(acc))
 
-import tvm.testing
-
 # Verify the results with the DGL model
 tvm.testing.assert_allclose(logits_torch, logits_tvm, atol=1e-3)
diff --git a/docs/_downloads/72871483681951fd0400ddc905113f11/from_caffe2.py b/docs/_downloads/72871483681951fd0400ddc905113f11/from_caffe2.py
index 34581c6..4f6f647 100644
--- a/docs/_downloads/72871483681951fd0400ddc905113f11/from_caffe2.py
+++ b/docs/_downloads/72871483681951fd0400ddc905113f11/from_caffe2.py
@@ -61,7 +61,7 @@ from PIL import Image
 from matplotlib import pyplot as plt
 import numpy as np
 
-img_url = "https://github.com/dmlc/mxnet.js/blob/main/data/cat.png?raw=true"
+img_url = "https://github.com/dmlc/mxnet.js/blob/master/data/cat.png?raw=true"
 img_path = download_testdata(img_url, "cat.png", module="data")
 img = Image.open(img_path).resize((224, 224))
 plt.imshow(img)
diff --git a/docs/_downloads/739deb9ab034a5315ce6ba6bf7e5ff44/tune_relay_cuda.ipynb b/docs/_downloads/739deb9ab034a5315ce6ba6bf7e5ff44/tune_relay_cuda.ipynb
index 8fb25bf..59311bb 100644
--- a/docs/_downloads/739deb9ab034a5315ce6ba6bf7e5ff44/tune_relay_cuda.ipynb
+++ b/docs/_downloads/739deb9ab034a5315ce6ba6bf7e5ff44/tune_relay_cuda.ipynb
@@ -15,7 +15,7 @@
       "cell_type": "markdown",
       "metadata": {},
       "source": [
-        "\nAuto-tuning a convolutional network for NVIDIA GPU\n==================================================\n**Author**: `Lianmin Zheng <https://github.com/merrymercy>`_, `Eddie Yan <https://github.com/eqy/>`_\n\nAuto-tuning for specific devices and workloads is critical for getting the\nbest performance. This is a tutorial on how to tune a whole convolutional\nnetwork for NVIDIA GPU.\n\nThe operator implementation for NVIDIA GPU in TVM is written in template form.\nThe template ha [...]
+        "\nAuto-tuning a convolutional network for NVIDIA GPU\n==================================================\n**Author**: `Lianmin Zheng <https://github.com/merrymercy>`_, `Eddie Yan <https://github.com/eqy/>`_\n\nAuto-tuning for specific devices and workloads is critical for getting the\nbest performance. This is a tutorial on how to tune a whole convolutional\nnetwork for NVIDIA GPU.\n\nThe operator implementation for NVIDIA GPU in TVM is written in template form.\nThe template ha [...]
       ]
     },
     {
@@ -33,7 +33,7 @@
       },
       "outputs": [],
       "source": [
-        "import os\n\nimport numpy as np\n\nimport tvm\nfrom tvm import te\nfrom tvm import autotvm\nfrom tvm import relay\nimport tvm.relay.testing\nfrom tvm.autotvm.tuner import XGBTuner, GATuner, RandomTuner, GridSearchTuner\nfrom tvm.contrib.utils import tempdir\nimport tvm.contrib.graph_runtime as runtime"
+        "import os\n\nimport numpy as np\n\nimport tvm\nfrom tvm import te\nfrom tvm import autotvm\nfrom tvm import relay\nimport tvm.relay.testing\nfrom tvm.autotvm.tuner import XGBTuner, GATuner, RandomTuner, GridSearchTuner\nfrom tvm.contrib.util import tempdir\nimport tvm.contrib.graph_runtime as runtime"
       ]
     },
     {
@@ -126,14 +126,14 @@
       "cell_type": "markdown",
       "metadata": {},
       "source": [
-        "<div class=\"alert alert-info\"><h4>Note</h4><p>**Experiencing Difficulties?**\n\n  The auto tuning module is error-prone. If you always see \" 0.00/ 0.00 GFLOPS\",\n  then there must be something wrong.\n\n  First, make sure you set the correct configuration of your device.\n  Then, you can print debug information by adding these lines in the beginning\n  of the script. It will print every measurement result, where you can find useful\n  error messages.\n\n  .. code-block:: pyt [...]
+        "<div class=\"alert alert-info\"><h4>Note</h4><p>**Experiencing Difficulties?**\n\n  The auto tuning module is error-prone. If you always see \" 0.00/ 0.00 GFLOPS\",\n  then there must be something wrong.\n\n  First, make sure you set the correct configuration of your device.\n  Then, you can print debug information by adding these lines in the beginning\n  of the script. It will print every measurement result, where you can find useful\n  error messages.\n\n  .. code-block:: pyt [...]
       ]
     },
     {
       "cell_type": "markdown",
       "metadata": {},
       "source": [
-        "Scale up measurement by using multiple devices\n----------------------------------------------\n\nIf you have multiple devices, you can use all of them for measurement.\nTVM uses the RPC Tracker to manage distributed devices.\nThe RPC Tracker is a centralized controller node. We can register all devices to\nthe tracker. For example, if we have 10 GPU cards, we can register all of them\nto the tracker, and run 10 measurements in parallel, accelerating the tuning process.\n\nTo st [...]
+        "Scale up measurement by using multiple devices\n----------------------------------------------\n\nIf you have multiple devices, you can use all of them for measurement.\nTVM uses the RPC Tracker to manage distributed devices.\nThe RPC Tracker is a centralized master node. We can register all devices to\nthe tracker. For example, if we have 10 GPU cards, we can register all of them\nto the tracker, and run 10 measurements in parallel, accelerating the tuning process.\n\nTo start  [...]
       ]
     },
     {
@@ -164,7 +164,7 @@
       "name": "python",
       "nbconvert_exporter": "python",
       "pygments_lexer": "ipython3",
-      "version": "3.6.12"
+      "version": "3.6.10"
     }
   },
   "nbformat": 4,
diff --git a/docs/_downloads/78da213eae381b8ff94cc356ee7c5423/deploy_prequantized.ipynb b/docs/_downloads/78da213eae381b8ff94cc356ee7c5423/deploy_prequantized.ipynb
index d7017cc..c508906 100644
--- a/docs/_downloads/78da213eae381b8ff94cc356ee7c5423/deploy_prequantized.ipynb
+++ b/docs/_downloads/78da213eae381b8ff94cc356ee7c5423/deploy_prequantized.ipynb
@@ -15,7 +15,7 @@
       "cell_type": "markdown",
       "metadata": {},
       "source": [
-        "\nDeploy a Framework-prequantized Model with TVM\n==============================================\n**Author**: `Masahiro Masuda <https://github.com/masahi>`_\n\nThis is a tutorial on loading models quantized by deep learning frameworks into TVM.\nPre-quantized model import is one of the quantization support we have in TVM. More details on\nthe quantization story in TVM can be found\n`here <https://discuss.tvm.apache.org/t/quantization-story/3920>`_.\n\nHere, we demonstrate how to [...]
+        "\nDeploy a Framework-prequantized Model with TVM\n==============================================\n**Author**: `Masahiro Masuda <https://github.com/masahi>`_\n\nThis is a tutorial on loading models quantized by deep learning frameworks into TVM.\nPre-quantized model import is one of the quantization support we have in TVM. More details on\nthe quantization story in TVM can be found\n`here <https://discuss.tvm.ai/t/quantization-story/3920>`_.\n\nHere, we demonstrate how to load an [...]
       ]
     },
     {
@@ -51,7 +51,7 @@
       },
       "outputs": [],
       "source": [
-        "def get_transform():\n    import torchvision.transforms as transforms\n\n    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])\n    return transforms.Compose(\n        [\n            transforms.Resize(256),\n            transforms.CenterCrop(224),\n            transforms.ToTensor(),\n            normalize,\n        ]\n    )\n\n\ndef get_real_image(im_height, im_width):\n    img_url = \"https://github.com/dmlc/mxnet.js/blob/main/data/cat.png? [...]
+        "def get_transform():\n    import torchvision.transforms as transforms\n\n    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])\n    return transforms.Compose(\n        [\n            transforms.Resize(256),\n            transforms.CenterCrop(224),\n            transforms.ToTensor(),\n            normalize,\n        ]\n    )\n\n\ndef get_real_image(im_height, im_width):\n    img_url = \"https://github.com/dmlc/mxnet.js/blob/master/data/cat.pn [...]
       ]
     },
     {
@@ -279,7 +279,7 @@
       "name": "python",
       "nbconvert_exporter": "python",
       "pygments_lexer": "ipython3",
-      "version": "3.6.12"
+      "version": "3.6.10"
     }
   },
   "nbformat": 4,
diff --git a/docs/_downloads/8246644805c8dfcb0b33ca356cc1fafc/deploy_ssd_gluoncv.ipynb b/docs/_downloads/8246644805c8dfcb0b33ca356cc1fafc/deploy_ssd_gluoncv.ipynb
index fdeded2..17b24fd 100644
--- a/docs/_downloads/8246644805c8dfcb0b33ca356cc1fafc/deploy_ssd_gluoncv.ipynb
+++ b/docs/_downloads/8246644805c8dfcb0b33ca356cc1fafc/deploy_ssd_gluoncv.ipynb
@@ -62,7 +62,7 @@
       },
       "outputs": [],
       "source": [
-        "im_fname = download_testdata(\n    \"https://github.com/dmlc/web-data/blob/main/\" + \"gluoncv/detection/street_small.jpg?raw=true\",\n    \"street_small.jpg\",\n    module=\"data\",\n)\nx, img = data.transforms.presets.ssd.load_test(im_fname, short=512)"
+        "im_fname = download_testdata(\n    \"https://github.com/dmlc/web-data/blob/master/\" + \"gluoncv/detection/street_small.jpg?raw=true\",\n    \"street_small.jpg\",\n    module=\"data\",\n)\nx, img = data.transforms.presets.ssd.load_test(im_fname, short=512)"
       ]
     },
     {
@@ -136,7 +136,7 @@
       "name": "python",
       "nbconvert_exporter": "python",
       "pygments_lexer": "ipython3",
-      "version": "3.6.12"
+      "version": "3.6.10"
     }
   },
   "nbformat": 4,
diff --git a/docs/_downloads/835a4def1e256b7a1f711621fc031418/from_darknet.ipynb b/docs/_downloads/835a4def1e256b7a1f711621fc031418/from_darknet.ipynb
index c19a0fa..8e154ef 100644
--- a/docs/_downloads/835a4def1e256b7a1f711621fc031418/from_darknet.ipynb
+++ b/docs/_downloads/835a4def1e256b7a1f711621fc031418/from_darknet.ipynb
@@ -62,7 +62,7 @@
       },
       "outputs": [],
       "source": [
-        "CFG_NAME = MODEL_NAME + \".cfg\"\nWEIGHTS_NAME = MODEL_NAME + \".weights\"\nREPO_URL = \"https://github.com/dmlc/web-data/blob/main/darknet/\"\nCFG_URL = REPO_URL + \"cfg/\" + CFG_NAME + \"?raw=true\"\nWEIGHTS_URL = \"https://pjreddie.com/media/files/\" + WEIGHTS_NAME\n\ncfg_path = download_testdata(CFG_URL, CFG_NAME, module=\"darknet\")\nweights_path = download_testdata(WEIGHTS_URL, WEIGHTS_NAME, module=\"darknet\")\n\n# Download and Load darknet library\nif sys.platform in [\" [...]
+        "CFG_NAME = MODEL_NAME + \".cfg\"\nWEIGHTS_NAME = MODEL_NAME + \".weights\"\nREPO_URL = \"https://github.com/dmlc/web-data/blob/master/darknet/\"\nCFG_URL = REPO_URL + \"cfg/\" + CFG_NAME + \"?raw=true\"\nWEIGHTS_URL = \"https://pjreddie.com/media/files/\" + WEIGHTS_NAME\n\ncfg_path = download_testdata(CFG_URL, CFG_NAME, module=\"darknet\")\nweights_path = download_testdata(WEIGHTS_URL, WEIGHTS_NAME, module=\"darknet\")\n\n# Download and Load darknet library\nif sys.platform in [ [...]
       ]
     },
     {
@@ -116,7 +116,7 @@
       },
       "outputs": [],
       "source": [
-        "from tvm.contrib import graph_runtime\n\nm = graph_runtime.GraphModule(lib[\"default\"](ctx))\n\n# set inputs\nm.set_input(\"data\", tvm.nd.array(data.astype(dtype)))\n# execute\nprint(\"Running the test image...\")\n\n# detection\n# thresholds\nthresh = 0.5\nnms_thresh = 0.45\n\nm.run()\n# get outputs\ntvm_out = []\nif MODEL_NAME == \"yolov2\":\n    layer_out = {}\n    layer_out[\"type\"] = \"Region\"\n    # Get the region layer attributes (n, out_c, out_h, out_w, classes, coor [...]
+        "from tvm.contrib import graph_runtime\n\nm = graph_runtime.GraphModule(lib[\"default\"](ctx))\n\n# set inputs\nm.set_input(\"data\", tvm.nd.array(data.astype(dtype)))\n# execute\nprint(\"Running the test image...\")\n\n# detection\n# thresholds\nthresh = 0.5\nnms_thresh = 0.45\n\nm.run()\n# get outputs\ntvm_out = []\nif MODEL_NAME == \"yolov2\":\n    layer_out = {}\n    layer_out[\"type\"] = \"Region\"\n    # Get the region layer attributes (n, out_c, out_h, out_w, classes, coor [...]
       ]
     }
   ],
@@ -136,7 +136,7 @@
       "name": "python",
       "nbconvert_exporter": "python",
       "pygments_lexer": "ipython3",
-      "version": "3.6.12"
+      "version": "3.6.10"
     }
   },
   "nbformat": 4,
diff --git a/docs/_downloads/836dc3852acf09662e9eb37c4c5e1e1b/opt_gemm.py b/docs/_downloads/836dc3852acf09662e9eb37c4c5e1e1b/opt_gemm.py
index 971269d..ead6660 100644
--- a/docs/_downloads/836dc3852acf09662e9eb37c4c5e1e1b/opt_gemm.py
+++ b/docs/_downloads/836dc3852acf09662e9eb37c4c5e1e1b/opt_gemm.py
@@ -231,7 +231,7 @@ print(tvm.lower(s, [A, B, C], simple_mode=True))
 # array to convert the continuous access pattern on certain dimension to a sequential pattern after
 # flattening.
 #
-# .. image:: https://github.com/dmlc/web-data/raw/main/tvm/tutorial/array-packing.png
+# .. image:: https://github.com/dmlc/web-data/raw/master/tvm/tutorial/array-packing.png
 #      :align: center
 #
 
diff --git a/docs/_downloads/83dedc6352b4016772e17480ef01345d/deploy_model_on_rasp.py b/docs/_downloads/83dedc6352b4016772e17480ef01345d/deploy_model_on_rasp.py
index 3687991..c6e2d8f 100644
--- a/docs/_downloads/83dedc6352b4016772e17480ef01345d/deploy_model_on_rasp.py
+++ b/docs/_downloads/83dedc6352b4016772e17480ef01345d/deploy_model_on_rasp.py
@@ -30,7 +30,7 @@ import tvm
 from tvm import te
 import tvm.relay as relay
 from tvm import rpc
-from tvm.contrib import utils, graph_runtime as runtime
+from tvm.contrib import util, graph_runtime as runtime
 from tvm.contrib.download import download_testdata
 
 ######################################################################
@@ -109,7 +109,7 @@ block = get_model("resnet18_v1", pretrained=True)
 ######################################################################
 # In order to test our model, here we download an image of cat and
 # transform its format.
-img_url = "https://github.com/dmlc/mxnet.js/blob/main/data/cat.png?raw=true"
+img_url = "https://github.com/dmlc/mxnet.js/blob/master/data/cat.png?raw=true"
 img_name = "cat.png"
 img_path = download_testdata(img_url, img_name, module="data")
 image = Image.open(img_path).resize((224, 224))
@@ -193,7 +193,7 @@ with tvm.transform.PassContext(opt_level=3):
 # change the parameters but keep the result of model as the same.
 
 # Save the library at local temporary directory.
-tmp = utils.tempdir()
+tmp = util.tempdir()
 lib_fname = tmp.relpath("net.tar")
 lib.export_library(lib_fname)
 
diff --git a/docs/_downloads/85ba00b8ada85b8c5367f37b526a8caa/tune_relay_x86.py b/docs/_downloads/85ba00b8ada85b8c5367f37b526a8caa/tune_relay_x86.py
index b1b7ca2..1dd947f 100644
--- a/docs/_downloads/85ba00b8ada85b8c5367f37b526a8caa/tune_relay_x86.py
+++ b/docs/_downloads/85ba00b8ada85b8c5367f37b526a8caa/tune_relay_x86.py
@@ -23,10 +23,6 @@ Auto-tuning a convolutional network for x86 CPU
 
 This is a tutorial about how to tune convolution neural network
 for x86 CPU.
-
-Note that this tutorial will not run on Windows or recent versions of macOS. To
-get it to run, you will need to wrap the body of this tutorial in a :code:`if
-__name__ == "__main__":` block.
 """
 import os
 import numpy as np
diff --git a/docs/_downloads/8631d5082613ab80110d8237562cd480/extern_op.ipynb b/docs/_downloads/8631d5082613ab80110d8237562cd480/extern_op.ipynb
index 3d42d66..ee19072 100644
--- a/docs/_downloads/8631d5082613ab80110d8237562cd480/extern_op.ipynb
+++ b/docs/_downloads/8631d5082613ab80110d8237562cd480/extern_op.ipynb
@@ -125,7 +125,7 @@
       "name": "python",
       "nbconvert_exporter": "python",
       "pygments_lexer": "ipython3",
-      "version": "3.6.12"
+      "version": "3.6.10"
     }
   },
   "nbformat": 4,
diff --git a/docs/_downloads/870680567a5bf1e4697356b416e302b4/opt_matmul_auto_tensorcore.ipynb b/docs/_downloads/870680567a5bf1e4697356b416e302b4/opt_matmul_auto_tensorcore.ipynb
index dbcf32f..72fdd6f 100644
--- a/docs/_downloads/870680567a5bf1e4697356b416e302b4/opt_matmul_auto_tensorcore.ipynb
+++ b/docs/_downloads/870680567a5bf1e4697356b416e302b4/opt_matmul_auto_tensorcore.ipynb
@@ -103,7 +103,7 @@
       "name": "python",
       "nbconvert_exporter": "python",
       "pygments_lexer": "ipython3",
-      "version": "3.6.12"
+      "version": "3.6.10"
     }
   },
   "nbformat": 4,
diff --git a/docs/_downloads/87b9e8307245d848689e4cdc3e6fa9bf/deploy_sparse.ipynb b/docs/_downloads/87b9e8307245d848689e4cdc3e6fa9bf/deploy_sparse.ipynb
index c3dc776..a66da3c 100644
--- a/docs/_downloads/87b9e8307245d848689e4cdc3e6fa9bf/deploy_sparse.ipynb
+++ b/docs/_downloads/87b9e8307245d848689e4cdc3e6fa9bf/deploy_sparse.ipynb
@@ -179,7 +179,7 @@
       "name": "python",
       "nbconvert_exporter": "python",
       "pygments_lexer": "ipython3",
-      "version": "3.6.12"
+      "version": "3.6.10"
     }
   },
   "nbformat": 4,
diff --git a/docs/_downloads/8a7f17665207908e373e8146da09443a/deploy_prequantized.py b/docs/_downloads/8a7f17665207908e373e8146da09443a/deploy_prequantized.py
index beba332..81959db 100644
--- a/docs/_downloads/8a7f17665207908e373e8146da09443a/deploy_prequantized.py
+++ b/docs/_downloads/8a7f17665207908e373e8146da09443a/deploy_prequantized.py
@@ -22,7 +22,7 @@ Deploy a Framework-prequantized Model with TVM
 This is a tutorial on loading models quantized by deep learning frameworks into TVM.
 Pre-quantized model import is one of the quantization support we have in TVM. More details on
 the quantization story in TVM can be found
-`here <https://discuss.tvm.apache.org/t/quantization-story/3920>`_.
+`here <https://discuss.tvm.ai/t/quantization-story/3920>`_.
 
 Here, we demonstrate how to load and run models quantized by PyTorch, MXNet, and TFLite.
 Once loaded, we can run compiled, quantized models on any hardware TVM supports.
@@ -59,7 +59,7 @@ def get_transform():
 
 
 def get_real_image(im_height, im_width):
-    img_url = "https://github.com/dmlc/mxnet.js/blob/main/data/cat.png?raw=true"
+    img_url = "https://github.com/dmlc/mxnet.js/blob/master/data/cat.png?raw=true"
     img_path = download_testdata(img_url, "cat.png", module="data")
     return Image.open(img_path).resize((im_height, im_width))
 
diff --git a/docs/_downloads/91b0339c8f3cc2594cee580dc450149a/tune_matmul_x86.py b/docs/_downloads/91b0339c8f3cc2594cee580dc450149a/tune_matmul_x86.py
index 0f2ebe0..e5f9d7e 100644
--- a/docs/_downloads/91b0339c8f3cc2594cee580dc450149a/tune_matmul_x86.py
+++ b/docs/_downloads/91b0339c8f3cc2594cee580dc450149a/tune_matmul_x86.py
@@ -22,22 +22,17 @@ Auto-scheduling matrix multiplication for CPU
 
 Different from the existing :ref:`autotvm <tutorials-autotvm-sec>` which relies on 
 manual templates to define the search space, the auto-scheduler does not require any templates.
-Users only need to write the computation declaration without any schedule commands or templates.
-The auto-scheduler can automatically generate a large search space and
-find a good schedule in the space.
+The auto-scheduler is template-free, so users only need to write the computation declaration without
+any schedule commands or templates.
+The auto-scheduler can automatically generate a large
+search space and find a good schedule in the space.
 
 We use matrix multiplication as an example in this tutorial.
-
-Note that this tutorial will not run on Windows or recent versions of macOS. To
-get it to run, you will need to wrap the body of this tutorial in a :code:`if
-__name__ == "__main__":` block.
 """
 
-import os
-
 import numpy as np
 import tvm
-from tvm import te, auto_scheduler
+from tvm import te, testing, auto_scheduler
 
 ######################################################################
 # Define the computation
@@ -86,9 +81,8 @@ print(task.compute_dag)
 #   and do more analyses later.
 # * see :any:`auto_scheduler.TuningOptions` for more parameters
 
-log_file = "matmul.json"
 tune_option = auto_scheduler.TuningOptions(
-    num_measure_trials=10, measure_callbacks=[auto_scheduler.RecordToFile(log_file)]
+    num_measure_trials=10, measure_callbacks=[auto_scheduler.RecordToFile("matmul.json")]
 )
 
 ######################################################################
@@ -126,7 +120,7 @@ out_tvm = tvm.nd.empty(out_np.shape, ctx=ctx)
 func(a_tvm, b_tvm, c_tvm, out_tvm)
 
 # Check results
-np.testing.assert_allclose(out_np, out_tvm.asnumpy(), rtol=1e-3)
+tvm.testing.assert_allclose(out_np, out_tvm.asnumpy(), rtol=1e-3)
 
 # Evaluate execution time.
 evaluator = func.time_evaluator(func.entry_name, ctx, min_repeat_ms=500)
@@ -148,7 +142,7 @@ print(
 # print the equivalent python schedule API, and build the binary again.
 
 # Load the measuremnt record for the best schedule
-inp, res = auto_scheduler.load_best(log_file, task.workload_key)
+inp, res = auto_scheduler.load_best("matmul.json", task.workload_key)
 
 # Print equivalent python schedule API. This can be used for debugging and
 # learning the behavior of the auto-scheduler.
@@ -167,21 +161,19 @@ func = tvm.build(sch, args)
 # In the example below we resume the status and do more 5 trials.
 
 
-def resume_search(task, log_file_name):
+def resume_search(task, log_file):
     cost_model = auto_scheduler.XGBModel()
-    cost_model.update_from_file(log_file_name)
+    cost_model.update_from_file(log_file)
     search_policy = auto_scheduler.SketchPolicy(
-        task,
-        cost_model,
-        init_search_callbacks=[auto_scheduler.PreloadMeasuredStates(log_file_name)],
+        task, cost_model, init_search_callbacks=[auto_scheduler.PreloadMeasuredStates(log_file)]
     )
     tune_option = auto_scheduler.TuningOptions(
-        num_measure_trials=5, measure_callbacks=[auto_scheduler.RecordToFile(log_file_name)]
+        num_measure_trials=5, measure_callbacks=[auto_scheduler.RecordToFile(log_file)]
     )
     sch, args = auto_scheduler.auto_schedule(task, search_policy, tuning_options=tune_option)
 
 
-# resume_search(task, log_file)
+# resume_search(task, "matmul.json")
 
 ######################################################################
 # .. note::
diff --git a/docs/_downloads/9a7956336431664ba6d628347b08f5cb/opt_conv_tensorcore.ipynb b/docs/_downloads/9a7956336431664ba6d628347b08f5cb/opt_conv_tensorcore.ipynb
index 4f351a5..77a0d41 100644
--- a/docs/_downloads/9a7956336431664ba6d628347b08f5cb/opt_conv_tensorcore.ipynb
+++ b/docs/_downloads/9a7956336431664ba6d628347b08f5cb/opt_conv_tensorcore.ipynb
@@ -157,7 +157,7 @@
       "name": "python",
       "nbconvert_exporter": "python",
       "pygments_lexer": "ipython3",
-      "version": "3.6.12"
+      "version": "3.6.10"
     }
   },
   "nbformat": 4,
diff --git a/docs/_downloads/9a950897eeef498440fbe2f0afe2601f/tedd.py b/docs/_downloads/9a950897eeef498440fbe2f0afe2601f/tedd.py
index 34ad43c..e0b8038 100644
--- a/docs/_downloads/9a950897eeef498440fbe2f0afe2601f/tedd.py
+++ b/docs/_downloads/9a950897eeef498440fbe2f0afe2601f/tedd.py
@@ -81,7 +81,7 @@ tedd.viz_dataflow_graph(s, dot_file_path="/tmp/dfg.dot")
 # tedd.viz_dataflow_graph(s, show_svg = True)
 
 ######################################################################
-# .. image:: https://github.com/dmlc/web-data/raw/main/tvm/tutorial/tedd_dfg.png
+# .. image:: https://github.com/dmlc/web-data/raw/master/tvm/tutorial/tedd_dfg.png
 #      :align: center
 #
 # The first one is a dataflow graph.  Every node represents a stage with name and memory
@@ -105,7 +105,7 @@ tedd.viz_schedule_tree(s, dot_file_path="/tmp/scheduletree2.dot")
 # tedd.viz_schedule_tree(s, show_svg = True)
 
 ######################################################################
-# .. image:: https://github.com/dmlc/web-data/raw/main/tvm/tutorial/tedd_st.png
+# .. image:: https://github.com/dmlc/web-data/raw/master/tvm/tutorial/tedd_st.png
 #      :align: center
 #
 # Now, let us take a close look at the second schedule tree.  Every block under ROOT
@@ -138,7 +138,7 @@ tedd.viz_itervar_relationship_graph(s, dot_file_path="/tmp/itervar.dot")
 # tedd.viz_itervar_relationship_graph(s, show_svg = True)
 
 ######################################################################
-# .. image:: https://github.com/dmlc/web-data/raw/main/tvm/tutorial/tedd_itervar_rel.png
+# .. image:: https://github.com/dmlc/web-data/raw/master/tvm/tutorial/tedd_itervar_rel.png
 #      :align: center
 #
 # The last one is an IterVar Relationship Graph.  Every subgraph represents a
diff --git a/docs/_downloads/9b0365fd5723f7c4d4e996637ab9a487/intro_topi.py b/docs/_downloads/9b0365fd5723f7c4d4e996637ab9a487/intro_topi.py
index 2459cf2..c9812ff 100644
--- a/docs/_downloads/9b0365fd5723f7c4d4e996637ab9a487/intro_topi.py
+++ b/docs/_downloads/9b0365fd5723f7c4d4e996637ab9a487/intro_topi.py
@@ -26,7 +26,6 @@ In this tutorial, we will see how TOPI can save us from writing boilerplates cod
 from __future__ import absolute_import, print_function
 
 import tvm
-import tvm.testing
 from tvm import te
 from tvm import topi
 import numpy as np
diff --git a/docs/_downloads/a2f661bf234a167b5458fa28d8fafedc/tedd.ipynb b/docs/_downloads/a2f661bf234a167b5458fa28d8fafedc/tedd.ipynb
index 4bc376e..9dbb495 100644
--- a/docs/_downloads/a2f661bf234a167b5458fa28d8fafedc/tedd.ipynb
+++ b/docs/_downloads/a2f661bf234a167b5458fa28d8fafedc/tedd.ipynb
@@ -69,7 +69,7 @@
       "cell_type": "markdown",
       "metadata": {},
       "source": [
-        "![](https://github.com/dmlc/web-data/raw/main/tvm/tutorial/tedd_dfg.png)\n\n     :align: center\n\nThe first one is a dataflow graph.  Every node represents a stage with name and memory\nscope shown in the middle and inputs/outputs information on the sides.\nEdges show nodes' dependency.\n\n\n"
+        "![](https://github.com/dmlc/web-data/raw/master/tvm/tutorial/tedd_dfg.png)\n\n     :align: center\n\nThe first one is a dataflow graph.  Every node represents a stage with name and memory\nscope shown in the middle and inputs/outputs information on the sides.\nEdges show nodes' dependency.\n\n\n"
       ]
     },
     {
@@ -105,7 +105,7 @@
       "cell_type": "markdown",
       "metadata": {},
       "source": [
-        "![](https://github.com/dmlc/web-data/raw/main/tvm/tutorial/tedd_st.png)\n\n     :align: center\n\nNow, let us take a close look at the second schedule tree.  Every block under ROOT\nrepresents a\nstage.  Stage name shows in the top row and compute shows in the bottom row.\nThe middle rows are for IterVars, the higher the outer, the lower the inner.\nAn IterVar row contains its index, name, type, and other optional information.\nLet's use the W.shared stage as an example.  The to [...]
+        "![](https://github.com/dmlc/web-data/raw/master/tvm/tutorial/tedd_st.png)\n\n     :align: center\n\nNow, let us take a close look at the second schedule tree.  Every block under ROOT\nrepresents a\nstage.  Stage name shows in the top row and compute shows in the bottom row.\nThe middle rows are for IterVars, the higher the outer, the lower the inner.\nAn IterVar row contains its index, name, type, and other optional information.\nLet's use the W.shared stage as an example.  The  [...]
       ]
     },
     {
@@ -130,7 +130,7 @@
       "cell_type": "markdown",
       "metadata": {},
       "source": [
-        "![](https://github.com/dmlc/web-data/raw/main/tvm/tutorial/tedd_itervar_rel.png)\n\n     :align: center\n\nThe last one is an IterVar Relationship Graph.  Every subgraph represents a\nstage and contains IterVar nodes and transformation nodes.  For example,\nW.shared has three split nodes and three fuse nodes.  The rest are IterVar\nnodes of the same format as the IterVar rows in Schedule Trees.  Root\nIterVars are those not driven by any transformation node, such as ax0; leaf\nI [...]
+        "![](https://github.com/dmlc/web-data/raw/master/tvm/tutorial/tedd_itervar_rel.png)\n\n     :align: center\n\nThe last one is an IterVar Relationship Graph.  Every subgraph represents a\nstage and contains IterVar nodes and transformation nodes.  For example,\nW.shared has three split nodes and three fuse nodes.  The rest are IterVar\nnodes of the same format as the IterVar rows in Schedule Trees.  Root\nIterVars are those not driven by any transformation node, such as ax0; leaf\ [...]
       ]
     },
     {
@@ -157,7 +157,7 @@
       "name": "python",
       "nbconvert_exporter": "python",
       "pygments_lexer": "ipython3",
-      "version": "3.6.12"
+      "version": "3.6.10"
     }
   },
   "nbformat": 4,
diff --git a/docs/_downloads/a7ce44923ffcc359fd2e532ac1f62c9e/from_darknet.py b/docs/_downloads/a7ce44923ffcc359fd2e532ac1f62c9e/from_darknet.py
index fc77079..bbfb410 100644
--- a/docs/_downloads/a7ce44923ffcc359fd2e532ac1f62c9e/from_darknet.py
+++ b/docs/_downloads/a7ce44923ffcc359fd2e532ac1f62c9e/from_darknet.py
@@ -60,7 +60,7 @@ MODEL_NAME = "yolov3"
 # Download cfg and weights file if first time.
 CFG_NAME = MODEL_NAME + ".cfg"
 WEIGHTS_NAME = MODEL_NAME + ".weights"
-REPO_URL = "https://github.com/dmlc/web-data/blob/main/darknet/"
+REPO_URL = "https://github.com/dmlc/web-data/blob/master/darknet/"
 CFG_URL = REPO_URL + "cfg/" + CFG_NAME + "?raw=true"
 WEIGHTS_URL = "https://pjreddie.com/media/files/" + WEIGHTS_NAME
 
@@ -195,7 +195,6 @@ with open(coco_path) as f:
 
 names = [x.strip() for x in content]
 
-tvm.relay.testing.yolo_detection.show_detections(img, dets, thresh, names, last_layer.classes)
 tvm.relay.testing.yolo_detection.draw_detections(
     font_path, img, dets, thresh, names, last_layer.classes
 )
diff --git a/docs/_downloads/b4d760859f6d9338f70bdb79ddfa3aa8/opt_conv_cuda.py b/docs/_downloads/b4d760859f6d9338f70bdb79ddfa3aa8/opt_conv_cuda.py
index 9cb29b5..f50d302 100644
--- a/docs/_downloads/b4d760859f6d9338f70bdb79ddfa3aa8/opt_conv_cuda.py
+++ b/docs/_downloads/b4d760859f6d9338f70bdb79ddfa3aa8/opt_conv_cuda.py
@@ -91,7 +91,7 @@ B = te.compute(
 # programmers. Thus how to maximize the data reuse in the shared memory is
 # critical to achieve high performance in GPU kernels.
 #
-# .. image:: https://github.com/dmlc/web-data/raw/main/tvm/tutorial/gpu_memory_hierarchy.png
+# .. image:: https://github.com/dmlc/web-data/raw/master/tvm/tutorial/gpu_memory_hierarchy.png
 #      :align: center
 #      :height: 319px
 #      :width: 271px
@@ -125,7 +125,7 @@ BL = s.cache_write(B, "local")
 # x block_factor (8 x 64) data from Apad and B each time to buffers in the
 # shared memory.
 #
-# .. image:: https://github.com/dmlc/web-data/raw/main/tvm/tutorial/conv_gpu_blocking.png
+# .. image:: https://github.com/dmlc/web-data/raw/master/tvm/tutorial/conv_gpu_blocking.png
 #      :align: center
 #      :height: 308px
 #      :width: 317px
@@ -167,7 +167,7 @@ s[B].bind(bx, block_x)
 # parts, and then tile into 8x8 grids. Therefore, shown in the figure below,
 # each thread computes 4 strided grids, where size of each grid is 4 x 4.
 #
-# .. image:: https://github.com/dmlc/web-data/raw/main/tvm/tutorial/conv_gpu_vthread.png
+# .. image:: https://github.com/dmlc/web-data/raw/master/tvm/tutorial/conv_gpu_vthread.png
 #      :align: center
 #      :height: 188px
 #      :width: 268px
diff --git a/docs/_downloads/b7eedb9d280f012e9fad34189017330e/bring_your_own_datatypes.py b/docs/_downloads/b7eedb9d280f012e9fad34189017330e/bring_your_own_datatypes.py
deleted file mode 100644
index c85ec07..0000000
--- a/docs/_downloads/b7eedb9d280f012e9fad34189017330e/bring_your_own_datatypes.py
+++ /dev/null
@@ -1,411 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""
-Bring Your Own Datatypes to TVM
-===============================
-**Authors**: `Gus Smith <https://github.com/gussmith23>`_, `Andrew Liu <https://github.com/hypercubestart>`_
-
-In this tutorial, we will show you how to utilize the Bring Your Own Datatypes framework to use your own custom datatypes in TVM.
-Note that the Bring Your Own Datatypes framework currently only handles **software emulated versions of datatypes**.
-The framework does not support compiling for custom accelerator datatypes out-of-the-box.
-
-Datatype Libraries
-------------------
-
-The Bring Your Own Datatypes allows users to register their own datatype implementations alongside TVM's native datatypes (such as ``float``).
-In the wild, these datatype implementations often appear as libraries.
-For example:
-
-- `libposit <https://github.com/cjdelisle/libposit>`_, a posit library
-- `Stillwater Universal <https://github.com/stillwater-sc/universal>`_, a library with posits, fixed-point numbers, and other types
-- `SoftFloat <https://github.com/ucb-bar/berkeley-softfloat-3>`_, Berkeley's software implementation of IEEE 754 floating-point
-
-The Bring Your Own Datatypes enables users to plug these datatype implementations into TVM!
-
-In this section, we will use an example library we have already implemented, located at ``3rdparty/byodt/myfloat.cc``.
-This datatype, which we dubbed "myfloat", is really just a IEE-754 float under-the-hood, but it serves a useful example
-to show that any datatype can be used in the BYODT framework.
-
-Setup
------
-
-Since we do not use any 3rdparty library, there is no setup needed.
-
-If you would like to try this with your own datatype library, first bring the library's functions into the process space with ``CDLL``:
-
-.. code-block :: python
-
-    ctypes.CDLL('my-datatype-lib.so', ctypes.RTLD_GLOBAL)
-"""
-
-######################
-# A Simple TVM Program
-# --------------------
-#
-# We'll begin by writing a simple program in TVM; afterwards, we will re-write it to use custom datatypes.
-import tvm
-from tvm import relay
-
-# Our basic program: Z = X + Y
-x = relay.var("x", shape=(3,), dtype="float32")
-y = relay.var("y", shape=(3,), dtype="float32")
-z = x + y
-program = relay.Function([x, y], z)
-module = tvm.IRModule.from_expr(program)
-
-######################################################################
-# Now, we create random inputs to feed into this program using numpy:
-
-import numpy as np
-
-np.random.seed(23)  # for reproducibility
-
-x_input = np.random.rand(3).astype("float32")
-y_input = np.random.rand(3).astype("float32")
-print("x: {}".format(x_input))
-print("y: {}".format(y_input))
-
-######################################################################
-# Finally, we're ready to run the program:
-
-ex = relay.create_executor(mod=module)
-
-z_output = ex.evaluate()(x_input, y_input)
-print("z: {}".format(z_output))
-
-######################################################################
-# Adding Custom Datatypes
-# -----------------------
-# Now, we will do the same, but we will use a custom datatype for our intermediate computation.
-#
-# We use the same input variables ``x`` and ``y`` as above, but before adding ``x + y``, we first cast both ``x`` and ``y`` to a custom datatype via the ``relay.cast(...)`` call.
-#
-# Note how we specify the custom datatype: we indicate it using the special ``custom[...]`` syntax.
-# Additionally, note the "32" after the datatype: this is the bitwidth of the custom datatype. This tells TVM that each instance of ``myfloat`` is 32 bits wide.
-
-try:
-    with tvm.transform.PassContext(config={"tir.disable_vectorize": True}):
-        x_myfloat = relay.cast(x, dtype="custom[myfloat]32")
-        y_myfloat = relay.cast(y, dtype="custom[myfloat]32")
-        z_myfloat = x_myfloat + y_myfloat
-        z = relay.cast(z_myfloat, dtype="float32")
-except tvm.TVMError as e:
-    # Print last line of error
-    print(str(e).split("\n")[-1])
-
-######################################################################
-# Trying to generate this program throws an error from TVM.
-# TVM does not know how to handle any custom datatype out of the box!
-# We first have to register the custom type with TVM, giving it a name and a type code:
-
-tvm.target.datatype.register("myfloat", 150)
-
-######################################################################
-# Note that the type code, 150, is currently chosen manually by the user.
-# See ``TVMTypeCode::kCustomBegin`` in `include/tvm/runtime/c_runtime_api.h <https://github.com/apache/incubator-tvm/blob/main/include/tvm/runtime/data_type.h>`_.
-# Now we can generate our program again:
-
-x_myfloat = relay.cast(x, dtype="custom[myfloat]32")
-y_myfloat = relay.cast(y, dtype="custom[myfloat]32")
-z_myfloat = x_myfloat + y_myfloat
-z = relay.cast(z_myfloat, dtype="float32")
-program = relay.Function([x, y], z)
-module = tvm.IRModule.from_expr(program)
-module = relay.transform.InferType()(module)
-
-######################################################################
-# Now we have a Relay program that uses myfloat!
-print(program)
-
-######################################################################
-# Now that we can express our program without errors, let's try running it!
-try:
-    with tvm.transform.PassContext(config={"tir.disable_vectorize": True}):
-        ex = relay.create_executor("graph", mod=module)
-        z_output_myfloat = ex.evaluate()(x_input, y_input)
-        print("z: {}".format(y_myfloat))
-except tvm.TVMError as e:
-    # Print last line of error
-    print(str(e).split("\n")[-1])
-
-######################################################################
-# Now, trying to compile this program throws an error.
-# Let's dissect this error.
-#
-# The error is occurring during the process of lowering the custom datatype code to code that TVM can compile and run.
-# TVM is telling us that it cannot find a *lowering function* for the ``Cast`` operation, when casting from source type 2 (``float``, in TVM), to destination type 150 (our custom datatype).
-# When lowering custom datatypes, if TVM encounters an operation over a custom datatype, it looks for a user-registered *lowering function*, which tells it how to lower the operation to an operation over datatypes it understands.
-# We have not told TVM how to lower ``Cast`` operations for our custom datatypes; thus, the source of this error.
-#
-# To fix this error, we simply need to specify a lowering function:
-
-tvm.target.datatype.register_op(
-    tvm.target.datatype.create_lower_func(
-        {
-            (32, 32): "FloatToCustom32",  # cast from float32 to myfloat32
-        }
-    ),
-    "Cast",
-    "llvm",
-    "float",
-    "myfloat",
-)
-
-######################################################################
-# The ``register_op(...)`` call takes a lowering function, and a number of parameters which specify exactly the operation which should be lowered with the provided lowering function.
-# In this case, the arguments we pass specify that this lowering function is for lowering a ``Cast`` from ``float`` to ``myfloat`` for target ``"llvm"``.
-#
-# The lowering function passed into this call is very general: it should take an operation of the specified type (in this case, `Cast`) and return another operation which only uses datatypes which TVM understands.
-#
-# In the general case, we expect users to implement operations over their custom datatypes using calls to an external library.
-# In our example, our ``myfloat`` library implements a ``Cast`` from ``float`` to 32-bit ``myfloat`` in the function ``FloatToCustom32``.
-# To provide for the general case, we have made a helper function, ``create_lower_func(...)``,
-# which does just this: given a dictionary, it replaces the given operation with a ``Call`` to the appropriate function name provided based on the op and the bit widths.
-# It additionally removes usages of the custom datatype by storing the custom datatype in an opaque ``uint`` of the appropriate width; in our case, a ``uint32_t``.
-# For more information, see `the source code <https://github.com/apache/incubator-tvm/blob/main/python/tvm/target/datatype.py>`_.
-
-# We can now re-try running the program:
-try:
-    with tvm.transform.PassContext(config={"tir.disable_vectorize": True}):
-        ex = relay.create_executor("graph", mod=module)
-        z_output_myfloat = ex.evaluate()(x_input, y_input)
-        print("z: {}".format(z_output_myfloat))
-except tvm.TVMError as e:
-    # Print last line of error
-    print(str(e).split("\n")[-1])
-
-######################################################################
-# This new error tells us that the ``Add`` lowering function is not found, which is good news, as it's no longer complaining about the ``Cast``!
-# We know what to do from here: we just need to register the lowering functions for the other operations in our program.
-#
-# Note that for ``Add``, ``create_lower_func`` takes in a dict where the key is an integer.
-# For ``Cast`` operations, we require a 2-tuple to specify the ``src_bit_length`` and the ``dest_bit_length``,
-# while for all other operations, the bit length is the same between the operands so we only require one integer to specify ``bit_length``.
-tvm.target.datatype.register_op(
-    tvm.target.datatype.create_lower_func({32: "Custom32Add"}),
-    "Add",
-    "llvm",
-    "myfloat",
-)
-tvm.target.datatype.register_op(
-    tvm.target.datatype.create_lower_func({(32, 32): "Custom32ToFloat"}),
-    "Cast",
-    "llvm",
-    "myfloat",
-    "float",
-)
-
-# Now, we can run our program without errors.
-with tvm.transform.PassContext(config={"tir.disable_vectorize": True}):
-    compiled = ex.evaluate(program)
-    z_output_myfloat = compiled(x_input, y_input)
-print("z: {}".format(z_output_myfloat))
-
-print("x:\t\t{}".format(x_input))
-print("y:\t\t{}".format(y_input))
-print("z (float32):\t{}".format(z_output))
-print("z (myfloat32):\t{}".format(z_output_myfloat))
-
-# Perhaps as expected, the ``myfloat32`` results and ``float32`` are exactly the same!
-
-######################################################################
-# Running Models With Custom Datatypes
-# ------------------------------------
-#
-# We will first choose the model which we would like to run with myfloat.
-# In this case we use `Mobilenet <https://arxiv.org/abs/1704.04861>`_.
-# We choose Mobilenet due to its small size.
-# In this alpha state of the Bring Your Own Datatypes framework, we have not implemented any software optimizations for running software emulations of custom datatypes; the result is poor performance due to many calls into our datatype emulation library.
-#
-# First let us define two helper functions to get the mobilenet model and a cat image.
-
-
-def get_mobilenet():
-    dshape = (1, 3, 224, 224)
-    from mxnet.gluon.model_zoo.vision import get_model
-
-    block = get_model("mobilenet0.25", pretrained=True)
-    shape_dict = {"data": dshape}
-    return relay.frontend.from_mxnet(block, shape_dict)
-
-
-def get_cat_image():
-    from tvm.contrib.download import download_testdata
-    from PIL import Image
-
-    url = "https://gist.githubusercontent.com/zhreshold/bcda4716699ac97ea44f791c24310193/raw/fa7ef0e9c9a5daea686d6473a62aacd1a5885849/cat.png"
-    dst = "cat.png"
-    real_dst = download_testdata(url, dst, module="data")
-    img = Image.open(real_dst).resize((224, 224))
-    # CoreML's standard model image format is BGR
-    img_bgr = np.array(img)[:, :, ::-1]
-    img = np.transpose(img_bgr, (2, 0, 1))[np.newaxis, :]
-    return np.asarray(img, dtype="float32")
-
-
-module, params = get_mobilenet()
-
-######################################################################
-# It's easy to execute MobileNet with native TVM:
-
-ex = tvm.relay.create_executor("graph", mod=module)
-input = get_cat_image()
-result = ex.evaluate()(input, **params).asnumpy()
-# print first 10 elements
-print(result.flatten()[:10])
-
-######################################################################
-# Now, we would like to change the model to use myfloat internally. To do so, we need to convert the network. To do this, we first define a function which will help us convert tensors:
-
-
-def convert_ndarray(dst_dtype, array):
-    """Converts an NDArray into the specified datatype"""
-    x = relay.var("x", shape=array.shape, dtype=str(array.dtype))
-    cast = relay.Function([x], x.astype(dst_dtype))
-    with tvm.transform.PassContext(config={"tir.disable_vectorize": True}):
-        return relay.create_executor("graph").evaluate(cast)(array)
-
-
-######################################################################
-# Now, to actually convert the entire network, we have written `a pass in Relay <https://github.com/gussmith23/tvm/blob/ea174c01c54a2529e19ca71e125f5884e728da6e/python/tvm/relay/frontend/change_datatype.py#L21>`_ which simply converts all nodes within the model to use the new datatype.
-
-from tvm.relay.frontend.change_datatype import ChangeDatatype
-
-src_dtype = "float32"
-dst_dtype = "custom[myfloat]32"
-
-module = relay.transform.InferType()(module)
-
-# Currently, custom datatypes only work if you run simplify_inference beforehand
-module = tvm.relay.transform.SimplifyInference()(module)
-
-# Run type inference before changing datatype
-module = tvm.relay.transform.InferType()(module)
-
-# Change datatype from float to myfloat and re-infer types
-cdtype = ChangeDatatype(src_dtype, dst_dtype)
-expr = cdtype.visit(module["main"])
-module = tvm.relay.transform.InferType()(module)
-
-# We also convert the parameters:
-params = {k: convert_ndarray(dst_dtype, v) for k, v in params.items()}
-
-# We also need to convert our input:
-input = convert_ndarray(dst_dtype, input)
-
-# Finally, we can try to run the converted model:
-try:
-    # Vectorization is not implemented with custom datatypes.
-    with tvm.transform.PassContext(config={"tir.disable_vectorize": True}):
-        result_myfloat = ex.evaluate(expr)(input, **params)
-except tvm.TVMError as e:
-    print(str(e).split("\n")[-1])
-
-######################################################################
-# When we attempt to run the model, we get a familiar error telling us that more funcions need to be registerd for myfloat.
-#
-# Because this is a neural network, many more operations are required.
-# Here, we register all the needed functions:
-
-tvm.target.datatype.register_op(
-    tvm.target.datatype.create_lower_func({32: "FloatToCustom32"}),
-    "FloatImm",
-    "llvm",
-    "myfloat",
-)
-
-tvm.target.datatype.register_op(
-    tvm.target.datatype.lower_ite, "Call", "llvm", "myfloat", intrinsic_name="tir.if_then_else"
-)
-
-tvm.target.datatype.register_op(
-    tvm.target.datatype.lower_call_pure_extern,
-    "Call",
-    "llvm",
-    "myfloat",
-    intrinsic_name="tir.call_pure_extern",
-)
-
-tvm.target.datatype.register_op(
-    tvm.target.datatype.create_lower_func({32: "Custom32Mul"}),
-    "Mul",
-    "llvm",
-    "myfloat",
-)
-tvm.target.datatype.register_op(
-    tvm.target.datatype.create_lower_func({32: "Custom32Div"}),
-    "Div",
-    "llvm",
-    "myfloat",
-)
-
-tvm.target.datatype.register_op(
-    tvm.target.datatype.create_lower_func({32: "Custom32Sqrt"}),
-    "Call",
-    "llvm",
-    "myfloat",
-    intrinsic_name="tir.sqrt",
-)
-
-tvm.target.datatype.register_op(
-    tvm.target.datatype.create_lower_func({32: "Custom32Sub"}),
-    "Sub",
-    "llvm",
-    "myfloat",
-)
-
-tvm.target.datatype.register_op(
-    tvm.target.datatype.create_lower_func({32: "Custom32Exp"}),
-    "Call",
-    "llvm",
-    "myfloat",
-    intrinsic_name="tir.exp",
-)
-
-tvm.target.datatype.register_op(
-    tvm.target.datatype.create_lower_func({32: "Custom32Max"}),
-    "Max",
-    "llvm",
-    "myfloat",
-)
-
-tvm.target.datatype.register_min_func(
-    tvm.target.datatype.create_min_lower_func({32: "MinCustom32"}, "myfloat"),
-    "myfloat",
-)
-
-######################################################################
-# Note we are making use of two new functions: ``register_min_func`` and ``create_min_lower_func``.
-#
-# ``register_min_func`` takes in an integer ``num_bits`` for the bit length, and should return an operation
-# representing the minimum finite representable value for the custom data type with the specified bit length.
-#
-# Similar to ``register_op`` and ``create_lower_func``, the ``create_min_lower_func`` handles the general case
-# where the minimum representable custom datatype value is implemented using calls to an external library.
-#
-# Now we can finally run the model:
-
-# Vectorization is not implemented with custom datatypes.
-with tvm.transform.PassContext(config={"tir.disable_vectorize": True}):
-    result_myfloat = ex.evaluate(expr)(input, **params)
-    result_myfloat = convert_ndarray(src_dtype, result_myfloat).asnumpy()
-    # print first 10 elements
-    print(result_myfloat.flatten()[:10])
-
-# Again, note that the output using 32-bit myfloat exactly the same as 32-bit floats,
-# because myfloat is exactly a float!
-np.testing.assert_array_equal(result, result_myfloat)
diff --git a/docs/_downloads/b9891d1a23f84eec3271025d99d005f7/tune_relay_x86.ipynb b/docs/_downloads/b9891d1a23f84eec3271025d99d005f7/tune_relay_x86.ipynb
index f072ef2..ec69e18 100644
--- a/docs/_downloads/b9891d1a23f84eec3271025d99d005f7/tune_relay_x86.ipynb
+++ b/docs/_downloads/b9891d1a23f84eec3271025d99d005f7/tune_relay_x86.ipynb
@@ -15,7 +15,7 @@
       "cell_type": "markdown",
       "metadata": {},
       "source": [
-        "\n\nAuto-tuning a convolutional network for x86 CPU\n===============================================\n**Author**: `Yao Wang <https://github.com/kevinthesun>`_, `Eddie Yan <https://github.com/eqy>`_\n\nThis is a tutorial about how to tune convolution neural network\nfor x86 CPU.\n\nNote that this tutorial will not run on Windows or recent versions of macOS. To\nget it to run, you will need to wrap the body of this tutorial in a :code:`if\n__name__ == \"__main__\":` block.\n\n"
+        "\n\nAuto-tuning a convolutional network for x86 CPU\n===============================================\n**Author**: `Yao Wang <https://github.com/kevinthesun>`_, `Eddie Yan <https://github.com/eqy>`_\n\nThis is a tutorial about how to tune convolution neural network\nfor x86 CPU.\n\n"
       ]
     },
     {
@@ -107,7 +107,7 @@
       "name": "python",
       "nbconvert_exporter": "python",
       "pygments_lexer": "ipython3",
-      "version": "3.6.12"
+      "version": "3.6.10"
     }
   },
   "nbformat": 4,
diff --git a/docs/_downloads/baa4de13ce6d932de43e0eb5c4cb8f16/tensorize.py b/docs/_downloads/baa4de13ce6d932de43e0eb5c4cb8f16/tensorize.py
index e91cfe4..601adb8 100644
--- a/docs/_downloads/baa4de13ce6d932de43e0eb5c4cb8f16/tensorize.py
+++ b/docs/_downloads/baa4de13ce6d932de43e0eb5c4cb8f16/tensorize.py
@@ -160,9 +160,9 @@ def gemv_impl():
         return 0;
       }
     """
-    from tvm.contrib import utils, clang
+    from tvm.contrib import util, clang
 
-    temp = utils.tempdir()
+    temp = util.tempdir()
     ll_path = temp.relpath("temp.ll")
     # Create LLVM ir from c source code
     ll_code = clang.create_llvm(cc_code, output=ll_path)
@@ -182,7 +182,7 @@ print(tvm.lower(s, [A, B, C], simple_mode=True))
 #
 func = tvm.build(s, [A, B, C], target="llvm", name="gemv")
 
-from tvm.topi.utils import get_const_tuple
+from tvm.topi.util import get_const_tuple
 
 dtype = A.dtype
 ctx = tvm.context("cpu", 0)
@@ -228,9 +228,9 @@ def gemv_impl():
         return 0;
       }
     """
-    from tvm.contrib import utils, clang
+    from tvm.contrib import util, clang
 
-    temp = utils.tempdir()
+    temp = util.tempdir()
     ll_path = temp.relpath("temp.ll")
     # Create LLVM ir from c source code
     ll_code = clang.create_llvm(cc_code, output=ll_path)
diff --git a/docs/_downloads/baf1373314e0e040008107ff2571b4cd/tune_relay_arm.py b/docs/_downloads/baf1373314e0e040008107ff2571b4cd/tune_relay_arm.py
index 7514ee7..a336870 100644
--- a/docs/_downloads/baf1373314e0e040008107ff2571b4cd/tune_relay_arm.py
+++ b/docs/_downloads/baf1373314e0e040008107ff2571b4cd/tune_relay_arm.py
@@ -35,10 +35,6 @@ these operators, it will query this log file to get the best knob values.
 We also released pre-tuned parameters for some arm devices. You can go to
 `ARM CPU Benchmark <https://github.com/apache/incubator-tvm/wiki/Benchmark#arm-cpu>`_
 to see the results.
-
-Note that this tutorial will not run on Windows or recent versions of macOS. To
-get it to run, you will need to wrap the body of this tutorial in a :code:`if
-__name__ == "__main__":` block.
 """
 
 ######################################################################
@@ -71,7 +67,7 @@ from tvm import autotvm
 from tvm import relay
 import tvm.relay.testing
 from tvm.autotvm.tuner import XGBTuner, GATuner, RandomTuner, GridSearchTuner
-from tvm.contrib.utils import tempdir
+from tvm.contrib.util import tempdir
 import tvm.contrib.graph_runtime as runtime
 
 #################################################################
@@ -131,7 +127,7 @@ def get_network(name, batch_size):
 # measure the speed of code on the board.
 #
 # To scale up the tuning, TVM uses RPC Tracker to manage distributed devices.
-# The RPC Tracker is a centralized controller node. We can register all devices to
+# The RPC Tracker is a centralized master node. We can register all devices to
 # the tracker. For example, if we have 10 phones, we can register all of them
 # to the tracker, and run 10 measurements in parallel, accelerating the tuning process.
 #
@@ -166,7 +162,7 @@ def get_network(name, batch_size):
 #   (replace :code:`[HOST_IP]` with the IP address of your host machine)
 #
 # * For Android:
-#   Follow this `readme page <https://github.com/apache/incubator-tvm/tree/main/apps/android_rpc>`_ to
+#   Follow this `readme page <https://github.com/apache/incubator-tvm/tree/master/apps/android_rpc>`_ to
 #   install the TVM RPC APK on the android device. Make sure you can pass the android rpc test.
 #   Then you have already registered your device. During tuning, you have to go to developer option
 #   and enable "Keep screen awake during changing" and charge your phone to make it stable.
@@ -416,4 +412,4 @@ def tune_and_evaluate(tuning_opt):
 #      import logging
 #      logging.getLogger('autotvm').setLevel(logging.DEBUG)
 #
-#   Finally, always feel free to ask our community for help on https://discuss.tvm.apache.org
+#   Finally, always feel free to ask our community for help on https://discuss.tvm.ai
diff --git a/docs/_downloads/bba10250678e70879823196c946734fc/vta_get_started.py b/docs/_downloads/bba10250678e70879823196c946734fc/vta_get_started.py
index 1a097b8..46b050f 100644
--- a/docs/_downloads/bba10250678e70879823196c946734fc/vta_get_started.py
+++ b/docs/_downloads/bba10250678e70879823196c946734fc/vta_get_started.py
@@ -67,7 +67,7 @@ env = vta.get_env()
 
 # We'll need the TVM RPC module and the VTA simulator module
 from tvm import rpc
-from tvm.contrib import utils
+from tvm.contrib import util
 from vta.testing import simulator
 
 # We read the Pynq RPC host IP address and port number from the OS environment
@@ -115,7 +115,7 @@ elif env.TARGET == "sim":
 # The last operation is a cast and copy back to DRAM, into results tensor
 # :code:`C`.
 #
-# .. image:: https://raw.githubusercontent.com/uwsampl/web-data/main/vta/tutorial/vadd_dataflow.png
+# .. image:: https://raw.githubusercontent.com/uwsaml/web-data/master/vta/tutorial/vadd_dataflow.png
 #      :align: center
 
 ######################################################################
@@ -320,7 +320,7 @@ my_vadd = vta.build(s, [A, B, C], "ext_dev", env.target_host, name="my_vadd")
 # execution.
 
 # Write the compiled module into an object file.
-temp = utils.tempdir()
+temp = util.tempdir()
 my_vadd.save(temp.relpath("vadd.o"))
 
 # Send the executable over RPC
diff --git a/docs/_downloads/bcb4a24e8acc1ca84214bc8d7fb7954b/tune_conv2d_layer_cuda.ipynb b/docs/_downloads/bcb4a24e8acc1ca84214bc8d7fb7954b/tune_conv2d_layer_cuda.ipynb
index 6960e9b..c19a19c 100644
--- a/docs/_downloads/bcb4a24e8acc1ca84214bc8d7fb7954b/tune_conv2d_layer_cuda.ipynb
+++ b/docs/_downloads/bcb4a24e8acc1ca84214bc8d7fb7954b/tune_conv2d_layer_cuda.ipynb
@@ -15,7 +15,7 @@
       "cell_type": "markdown",
       "metadata": {},
       "source": [
-        "\n\nAuto-scheduling a convolution layer for GPU\n===========================================\n**Author**: `Lianmin Zheng <https://github.com/merrymercy>`_,             `Chengfan Jia <https://github.com/jcf94/>`_\n\n\nDifferent from the existing `autotvm <tutorials-autotvm-sec>` which relies on \nmanual templates to define the search space, the auto-scheduler does not require any templates.\nUsers only need to write the computation declaration without any schedule commands or tem [...]
+        "\n\nAuto-scheduling a convolution layer for GPU\n===========================================\n**Author**: `Lianmin Zheng <https://github.com/merrymercy>`_,             `Chengfan Jia <https://github.com/jcf94/>`_\n\n\nDifferent from the existing `autotvm <tutorials-autotvm-sec>` which relies on \nmanual templates to define the search space, the auto-scheduler does not require any templates.\nThe auto-scheduler is template-free, so users only need to write the computation declarat [...]
       ]
     },
     {
@@ -26,7 +26,7 @@
       },
       "outputs": [],
       "source": [
-        "import os\n\nimport numpy as np\nimport tvm\nfrom tvm import te, auto_scheduler, topi\nfrom tvm.topi.testing import conv2d_nchw_python"
+        "import numpy as np\nimport tvm\nfrom tvm import te, testing, auto_scheduler, topi\nfrom tvm.topi.testing import conv2d_nchw_python"
       ]
     },
     {
@@ -62,7 +62,7 @@
       },
       "outputs": [],
       "source": [
-        "target = tvm.target.Target(\"cuda\")\n\n# Use the last layer in ResNet-50\nN, H, W, CO, CI, KH, KW, strides, padding = 1, 7, 7, 512, 512, 3, 3, (1, 1), (1, 1)\ntask = auto_scheduler.create_task(conv2d_layer, (N, H, W, CO, CI, KH, KW, strides, padding), target)\n\n# Inspect the computational graph\nprint(task.compute_dag)"
+        "target = tvm.target.Target(\"cuda\")\n\n# the last layer in resnet\nN, H, W, CO, CI, KH, KW, strides, padding = 1, 7, 7, 512, 512, 3, 3, (1, 1), (1, 1)\ntask = auto_scheduler.create_task(conv2d_layer, (N, H, W, CO, CI, KH, KW, strides, padding), target)\n\n# Inspect the computational graph\nprint(task.compute_dag)"
       ]
     },
     {
@@ -80,7 +80,7 @@
       },
       "outputs": [],
       "source": [
-        "log_file = \"conv2d.json\"\nmeasure_ctx = auto_scheduler.LocalRPCMeasureContext(min_repeat_ms=300)\ntune_option = auto_scheduler.TuningOptions(\n    num_measure_trials=10,\n    runner=measure_ctx.runner,\n    measure_callbacks=[auto_scheduler.RecordToFile(log_file)],\n)"
+        "measure_ctx = auto_scheduler.LocalRPCMeasureContext(min_repeat_ms=300)\ntune_option = auto_scheduler.TuningOptions(\n    num_measure_trials=10,\n    runner=measure_ctx.runner,\n    measure_callbacks=[auto_scheduler.RecordToFile(\"conv2d.json\")],\n)"
       ]
     },
     {
@@ -98,7 +98,7 @@
       },
       "outputs": [],
       "source": [
-        "sch, args = auto_scheduler.auto_schedule(task, tuning_options=tune_option)\n\n# Kill the process for measurement\ndel measure_ctx"
+        "sch, args = auto_scheduler.auto_schedule(task, tuning_options=tune_option)"
       ]
     },
     {
@@ -134,7 +134,7 @@
       },
       "outputs": [],
       "source": [
-        "func = tvm.build(sch, args, target)\n\n# Check correctness\ndata_np = np.random.uniform(size=(N, CI, H, W)).astype(np.float32)\nweight_np = np.random.uniform(size=(CO, CI, KH, KW)).astype(np.float32)\nbias_np = np.random.uniform(size=(1, CO, 1, 1)).astype(np.float32)\nconv_np = conv2d_nchw_python(data_np, weight_np, strides, padding)\nout_np = np.maximum(conv_np + bias_np, 0.0)\n\nctx = tvm.gpu()\ndata_tvm = tvm.nd.array(data_np, ctx=ctx)\nweight_tvm = tvm.nd.array(weight_np, ct [...]
+        "func = tvm.build(sch, args, target)\n\n# check correctness\ndata_np = np.random.uniform(size=(N, CI, H, W)).astype(np.float32)\nweight_np = np.random.uniform(size=(CO, CI, KH, KW)).astype(np.float32)\nbias_np = np.random.uniform(size=(1, CO, 1, 1)).astype(np.float32)\nconv_np = conv2d_nchw_python(data_np, weight_np, strides, padding)\nout_np = np.maximum(conv_np + bias_np, 0.0)\n\nctx = tvm.gpu()\ndata_tvm = tvm.nd.array(data_np, ctx=ctx)\nweight_tvm = tvm.nd.array(weight_np, ct [...]
       ]
     },
     {
@@ -159,7 +159,7 @@
       },
       "outputs": [],
       "source": [
-        "# Load the measuremnt record for the best schedule\ninp, res = auto_scheduler.load_best(log_file, task.workload_key)\n\n# Print equivalent python schedule API. This can be used for debugging and\n# learning the behavior of the auto-scheduler.\nprint(\"Equivalent python schedule:\")\nprint(task.compute_dag.print_python_code_from_state(inp.state))\n\n# Rebuild the binary. This shows how you can apply the best schedule from a\n# log file without reruning the search again.\nsch, arg [...]
+        "# Load the measuremnt record for the best schedule\ninp, res = auto_scheduler.load_best(\"conv2d.json\", task.workload_key)\n\n# Print equivalent python schedule API. This can be used for debugging and\n# learning the behavior of the auto-scheduler.\nprint(\"Equivalent python schedule:\")\nprint(task.compute_dag.print_python_code_from_state(inp.state))\n\n# Rebuild the binary. This shows how you can apply the best schedule from a\n# log file without reruning the search again.\ns [...]
       ]
     },
     {
@@ -177,7 +177,7 @@
       },
       "outputs": [],
       "source": [
-        "cost_model = auto_scheduler.XGBModel()\ncost_model.update_from_file(log_file)\nsearch_policy = auto_scheduler.SketchPolicy(\n    task, cost_model, init_search_callbacks=[auto_scheduler.PreloadMeasuredStates(log_file)]\n)\nmeasure_ctx = auto_scheduler.LocalRPCMeasureContext(min_repeat_ms=300)\ntune_option = auto_scheduler.TuningOptions(\n    num_measure_trials=5,\n    runner=measure_ctx.runner,\n    measure_callbacks=[auto_scheduler.RecordToFile(log_file)],\n)\nsch, args = auto_s [...]
+        "log_file = \"conv2d.json\"\ncost_model = auto_scheduler.XGBModel()\ncost_model.update_from_file(log_file)\nsearch_policy = auto_scheduler.SketchPolicy(\n    task, cost_model, init_search_callbacks=[auto_scheduler.PreloadMeasuredStates(log_file)]\n)\ntune_option = auto_scheduler.TuningOptions(\n    num_measure_trials=5,\n    runner=measure_ctx.runner,\n    measure_callbacks=[auto_scheduler.RecordToFile(log_file)],\n)\nsch, args = auto_scheduler.auto_schedule(task, search_policy,  [...]
       ]
     }
   ],
@@ -197,7 +197,7 @@
       "name": "python",
       "nbconvert_exporter": "python",
       "pygments_lexer": "ipython3",
-      "version": "3.6.12"
+      "version": "3.6.10"
     }
   },
   "nbformat": 4,
diff --git a/docs/_downloads/beb2188d497d67b66bcfbc2c254dccb7/deploy_model_on_rasp.ipynb b/docs/_downloads/beb2188d497d67b66bcfbc2c254dccb7/deploy_model_on_rasp.ipynb
index b6c4e85..24e2202 100644
--- a/docs/_downloads/beb2188d497d67b66bcfbc2c254dccb7/deploy_model_on_rasp.ipynb
+++ b/docs/_downloads/beb2188d497d67b66bcfbc2c254dccb7/deploy_model_on_rasp.ipynb
@@ -26,7 +26,7 @@
       },
       "outputs": [],
       "source": [
-        "import tvm\nfrom tvm import te\nimport tvm.relay as relay\nfrom tvm import rpc\nfrom tvm.contrib import utils, graph_runtime as runtime\nfrom tvm.contrib.download import download_testdata"
+        "import tvm\nfrom tvm import te\nimport tvm.relay as relay\nfrom tvm import rpc\nfrom tvm.contrib import util, graph_runtime as runtime\nfrom tvm.contrib.download import download_testdata"
       ]
     },
     {
@@ -76,7 +76,7 @@
       },
       "outputs": [],
       "source": [
-        "img_url = \"https://github.com/dmlc/mxnet.js/blob/main/data/cat.png?raw=true\"\nimg_name = \"cat.png\"\nimg_path = download_testdata(img_url, img_name, module=\"data\")\nimage = Image.open(img_path).resize((224, 224))\n\n\ndef transform_image(image):\n    image = np.array(image) - np.array([123.0, 117.0, 104.0])\n    image /= np.array([58.395, 57.12, 57.375])\n    image = image.transpose((2, 0, 1))\n    image = image[np.newaxis, :]\n    return image\n\n\nx = transform_image(image)"
+        "img_url = \"https://github.com/dmlc/mxnet.js/blob/master/data/cat.png?raw=true\"\nimg_name = \"cat.png\"\nimg_path = download_testdata(img_url, img_name, module=\"data\")\nimage = Image.open(img_path).resize((224, 224))\n\n\ndef transform_image(image):\n    image = np.array(image) - np.array([123.0, 117.0, 104.0])\n    image /= np.array([58.395, 57.12, 57.375])\n    image = image.transpose((2, 0, 1))\n    image = image[np.newaxis, :]\n    return image\n\n\nx = transform_image(image)"
       ]
     },
     {
@@ -155,7 +155,7 @@
       },
       "outputs": [],
       "source": [
-        "local_demo = True\n\nif local_demo:\n    target = tvm.target.Target(\"llvm\")\nelse:\n    target = tvm.target.arm_cpu(\"rasp3b\")\n    # The above line is a simple form of\n    # target = tvm.target.Target('llvm -device=arm_cpu -model=bcm2837 -mtriple=armv7l-linux-gnueabihf -mattr=+neon')\n\nwith tvm.transform.PassContext(opt_level=3):\n    lib = relay.build(func, target, params=params)\n\n# After `relay.build`, you will get three return values: graph,\n# library and the new par [...]
+        "local_demo = True\n\nif local_demo:\n    target = tvm.target.Target(\"llvm\")\nelse:\n    target = tvm.target.arm_cpu(\"rasp3b\")\n    # The above line is a simple form of\n    # target = tvm.target.Target('llvm -device=arm_cpu -model=bcm2837 -mtriple=armv7l-linux-gnueabihf -mattr=+neon')\n\nwith tvm.transform.PassContext(opt_level=3):\n    lib = relay.build(func, target, params=params)\n\n# After `relay.build`, you will get three return values: graph,\n# library and the new par [...]
       ]
     },
     {
@@ -193,7 +193,7 @@
       "name": "python",
       "nbconvert_exporter": "python",
       "pygments_lexer": "ipython3",
-      "version": "3.6.12"
+      "version": "3.6.10"
     }
   },
   "nbformat": 4,
diff --git a/docs/_downloads/bfe6d0353011c263c40646792b85a5d6/bring_your_own_datatypes.ipynb b/docs/_downloads/bfe6d0353011c263c40646792b85a5d6/bring_your_own_datatypes.ipynb
deleted file mode 100644
index dff4e19..0000000
--- a/docs/_downloads/bfe6d0353011c263c40646792b85a5d6/bring_your_own_datatypes.ipynb
+++ /dev/null
@@ -1,349 +0,0 @@
-{
-  "cells": [
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "collapsed": false
-      },
-      "outputs": [],
-      "source": [
-        "%matplotlib inline"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-        "\nBring Your Own Datatypes to TVM\n===============================\n**Authors**: `Gus Smith <https://github.com/gussmith23>`_, `Andrew Liu <https://github.com/hypercubestart>`_\n\nIn this tutorial, we will show you how to utilize the Bring Your Own Datatypes framework to use your own custom datatypes in TVM.\nNote that the Bring Your Own Datatypes framework currently only handles **software emulated versions of datatypes**.\nThe framework does not support compiling for custom ac [...]
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-        "A Simple TVM Program\n--------------------\n\nWe'll begin by writing a simple program in TVM; afterwards, we will re-write it to use custom datatypes.\n\n"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "collapsed": false
-      },
-      "outputs": [],
-      "source": [
-        "import tvm\nfrom tvm import relay\n\n# Our basic program: Z = X + Y\nx = relay.var(\"x\", shape=(3,), dtype=\"float32\")\ny = relay.var(\"y\", shape=(3,), dtype=\"float32\")\nz = x + y\nprogram = relay.Function([x, y], z)\nmodule = tvm.IRModule.from_expr(program)"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-        "Now, we create random inputs to feed into this program using numpy:\n\n"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "collapsed": false
-      },
-      "outputs": [],
-      "source": [
-        "import numpy as np\n\nnp.random.seed(23)  # for reproducibility\n\nx_input = np.random.rand(3).astype(\"float32\")\ny_input = np.random.rand(3).astype(\"float32\")\nprint(\"x: {}\".format(x_input))\nprint(\"y: {}\".format(y_input))"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-        "Finally, we're ready to run the program:\n\n"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "collapsed": false
-      },
-      "outputs": [],
-      "source": [
-        "ex = relay.create_executor(mod=module)\n\nz_output = ex.evaluate()(x_input, y_input)\nprint(\"z: {}\".format(z_output))"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-        "Adding Custom Datatypes\n-----------------------\nNow, we will do the same, but we will use a custom datatype for our intermediate computation.\n\nWe use the same input variables ``x`` and ``y`` as above, but before adding ``x + y``, we first cast both ``x`` and ``y`` to a custom datatype via the ``relay.cast(...)`` call.\n\nNote how we specify the custom datatype: we indicate it using the special ``custom[...]`` syntax.\nAdditionally, note the \"32\" after the datatype: this is [...]
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "collapsed": false
-      },
-      "outputs": [],
-      "source": [
-        "try:\n    with tvm.transform.PassContext(config={\"tir.disable_vectorize\": True}):\n        x_myfloat = relay.cast(x, dtype=\"custom[myfloat]32\")\n        y_myfloat = relay.cast(y, dtype=\"custom[myfloat]32\")\n        z_myfloat = x_myfloat + y_myfloat\n        z = relay.cast(z_myfloat, dtype=\"float32\")\nexcept tvm.TVMError as e:\n    # Print last line of error\n    print(str(e).split(\"\\n\")[-1])"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-        "Trying to generate this program throws an error from TVM.\nTVM does not know how to handle any custom datatype out of the box!\nWe first have to register the custom type with TVM, giving it a name and a type code:\n\n"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "collapsed": false
-      },
-      "outputs": [],
-      "source": [
-        "tvm.target.datatype.register(\"myfloat\", 150)"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-        "Note that the type code, 150, is currently chosen manually by the user.\nSee ``TVMTypeCode::kCustomBegin`` in `include/tvm/runtime/c_runtime_api.h <https://github.com/apache/incubator-tvm/blob/main/include/tvm/runtime/data_type.h>`_.\nNow we can generate our program again:\n\n"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "collapsed": false
-      },
-      "outputs": [],
-      "source": [
-        "x_myfloat = relay.cast(x, dtype=\"custom[myfloat]32\")\ny_myfloat = relay.cast(y, dtype=\"custom[myfloat]32\")\nz_myfloat = x_myfloat + y_myfloat\nz = relay.cast(z_myfloat, dtype=\"float32\")\nprogram = relay.Function([x, y], z)\nmodule = tvm.IRModule.from_expr(program)\nmodule = relay.transform.InferType()(module)"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-        "Now we have a Relay program that uses myfloat!\n\n"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "collapsed": false
-      },
-      "outputs": [],
-      "source": [
-        "print(program)"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-        "Now that we can express our program without errors, let's try running it!\n\n"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "collapsed": false
-      },
-      "outputs": [],
-      "source": [
-        "try:\n    with tvm.transform.PassContext(config={\"tir.disable_vectorize\": True}):\n        ex = relay.create_executor(\"graph\", mod=module)\n        z_output_myfloat = ex.evaluate()(x_input, y_input)\n        print(\"z: {}\".format(y_myfloat))\nexcept tvm.TVMError as e:\n    # Print last line of error\n    print(str(e).split(\"\\n\")[-1])"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-        "Now, trying to compile this program throws an error.\nLet's dissect this error.\n\nThe error is occurring during the process of lowering the custom datatype code to code that TVM can compile and run.\nTVM is telling us that it cannot find a *lowering function* for the ``Cast`` operation, when casting from source type 2 (``float``, in TVM), to destination type 150 (our custom datatype).\nWhen lowering custom datatypes, if TVM encounters an operation over a custom datatype, it loo [...]
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "collapsed": false
-      },
-      "outputs": [],
-      "source": [
-        "tvm.target.datatype.register_op(\n    tvm.target.datatype.create_lower_func(\n        {\n            (32, 32): \"FloatToCustom32\",  # cast from float32 to myfloat32\n        }\n    ),\n    \"Cast\",\n    \"llvm\",\n    \"float\",\n    \"myfloat\",\n)"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-        "The ``register_op(...)`` call takes a lowering function, and a number of parameters which specify exactly the operation which should be lowered with the provided lowering function.\nIn this case, the arguments we pass specify that this lowering function is for lowering a ``Cast`` from ``float`` to ``myfloat`` for target ``\"llvm\"``.\n\nThe lowering function passed into this call is very general: it should take an operation of the specified type (in this case, `Cast`) and return [...]
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "collapsed": false
-      },
-      "outputs": [],
-      "source": [
-        "# We can now re-try running the program:\ntry:\n    with tvm.transform.PassContext(config={\"tir.disable_vectorize\": True}):\n        ex = relay.create_executor(\"graph\", mod=module)\n        z_output_myfloat = ex.evaluate()(x_input, y_input)\n        print(\"z: {}\".format(z_output_myfloat))\nexcept tvm.TVMError as e:\n    # Print last line of error\n    print(str(e).split(\"\\n\")[-1])"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-        "This new error tells us that the ``Add`` lowering function is not found, which is good news, as it's no longer complaining about the ``Cast``!\nWe know what to do from here: we just need to register the lowering functions for the other operations in our program.\n\nNote that for ``Add``, ``create_lower_func`` takes in a dict where the key is an integer.\nFor ``Cast`` operations, we require a 2-tuple to specify the ``src_bit_length`` and the ``dest_bit_length``,\nwhile for all ot [...]
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "collapsed": false
-      },
-      "outputs": [],
-      "source": [
-        "tvm.target.datatype.register_op(\n    tvm.target.datatype.create_lower_func({32: \"Custom32Add\"}),\n    \"Add\",\n    \"llvm\",\n    \"myfloat\",\n)\ntvm.target.datatype.register_op(\n    tvm.target.datatype.create_lower_func({(32, 32): \"Custom32ToFloat\"}),\n    \"Cast\",\n    \"llvm\",\n    \"myfloat\",\n    \"float\",\n)\n\n# Now, we can run our program without errors.\nwith tvm.transform.PassContext(config={\"tir.disable_vectorize\": True}):\n    compiled = ex.evaluate(pro [...]
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-        "Running Models With Custom Datatypes\n------------------------------------\n\nWe will first choose the model which we would like to run with myfloat.\nIn this case we use `Mobilenet <https://arxiv.org/abs/1704.04861>`_.\nWe choose Mobilenet due to its small size.\nIn this alpha state of the Bring Your Own Datatypes framework, we have not implemented any software optimizations for running software emulations of custom datatypes; the result is poor performance due to many calls in [...]
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "collapsed": false
-      },
-      "outputs": [],
-      "source": [
-        "def get_mobilenet():\n    dshape = (1, 3, 224, 224)\n    from mxnet.gluon.model_zoo.vision import get_model\n\n    block = get_model(\"mobilenet0.25\", pretrained=True)\n    shape_dict = {\"data\": dshape}\n    return relay.frontend.from_mxnet(block, shape_dict)\n\n\ndef get_cat_image():\n    from tvm.contrib.download import download_testdata\n    from PIL import Image\n\n    url = \"https://gist.githubusercontent.com/zhreshold/bcda4716699ac97ea44f791c24310193/raw/fa7ef0e9c9a5da [...]
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-        "It's easy to execute MobileNet with native TVM:\n\n"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "collapsed": false
-      },
-      "outputs": [],
-      "source": [
-        "ex = tvm.relay.create_executor(\"graph\", mod=module)\ninput = get_cat_image()\nresult = ex.evaluate()(input, **params).asnumpy()\n# print first 10 elements\nprint(result.flatten()[:10])"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-        "Now, we would like to change the model to use myfloat internally. To do so, we need to convert the network. To do this, we first define a function which will help us convert tensors:\n\n"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "collapsed": false
-      },
-      "outputs": [],
-      "source": [
-        "def convert_ndarray(dst_dtype, array):\n    \"\"\"Converts an NDArray into the specified datatype\"\"\"\n    x = relay.var(\"x\", shape=array.shape, dtype=str(array.dtype))\n    cast = relay.Function([x], x.astype(dst_dtype))\n    with tvm.transform.PassContext(config={\"tir.disable_vectorize\": True}):\n        return relay.create_executor(\"graph\").evaluate(cast)(array)"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-        "Now, to actually convert the entire network, we have written `a pass in Relay <https://github.com/gussmith23/tvm/blob/ea174c01c54a2529e19ca71e125f5884e728da6e/python/tvm/relay/frontend/change_datatype.py#L21>`_ which simply converts all nodes within the model to use the new datatype.\n\n"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "collapsed": false
-      },
-      "outputs": [],
-      "source": [
-        "from tvm.relay.frontend.change_datatype import ChangeDatatype\n\nsrc_dtype = \"float32\"\ndst_dtype = \"custom[myfloat]32\"\n\nmodule = relay.transform.InferType()(module)\n\n# Currently, custom datatypes only work if you run simplify_inference beforehand\nmodule = tvm.relay.transform.SimplifyInference()(module)\n\n# Run type inference before changing datatype\nmodule = tvm.relay.transform.InferType()(module)\n\n# Change datatype from float to myfloat and re-infer types\ncdtype  [...]
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-        "When we attempt to run the model, we get a familiar error telling us that more funcions need to be registerd for myfloat.\n\nBecause this is a neural network, many more operations are required.\nHere, we register all the needed functions:\n\n"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "collapsed": false
-      },
-      "outputs": [],
-      "source": [
-        "tvm.target.datatype.register_op(\n    tvm.target.datatype.create_lower_func({32: \"FloatToCustom32\"}),\n    \"FloatImm\",\n    \"llvm\",\n    \"myfloat\",\n)\n\ntvm.target.datatype.register_op(\n    tvm.target.datatype.lower_ite, \"Call\", \"llvm\", \"myfloat\", intrinsic_name=\"tir.if_then_else\"\n)\n\ntvm.target.datatype.register_op(\n    tvm.target.datatype.lower_call_pure_extern,\n    \"Call\",\n    \"llvm\",\n    \"myfloat\",\n    intrinsic_name=\"tir.call_pure_extern\",\n [...]
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-        "Note we are making use of two new functions: ``register_min_func`` and ``create_min_lower_func``.\n\n``register_min_func`` takes in an integer ``num_bits`` for the bit length, and should return an operation\nrepresenting the minimum finite representable value for the custom data type with the specified bit length.\n\nSimilar to ``register_op`` and ``create_lower_func``, the ``create_min_lower_func`` handles the general case\nwhere the minimum representable custom datatype value  [...]
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "collapsed": false
-      },
-      "outputs": [],
-      "source": [
-        "# Vectorization is not implemented with custom datatypes.\nwith tvm.transform.PassContext(config={\"tir.disable_vectorize\": True}):\n    result_myfloat = ex.evaluate(expr)(input, **params)\n    result_myfloat = convert_ndarray(src_dtype, result_myfloat).asnumpy()\n    # print first 10 elements\n    print(result_myfloat.flatten()[:10])\n\n# Again, note that the output using 32-bit myfloat exactly the same as 32-bit floats,\n# because myfloat is exactly a float!\nnp.testing.asser [...]
-      ]
-    }
-  ],
-  "metadata": {
-    "kernelspec": {
-      "display_name": "Python 3",
-      "language": "python",
-      "name": "python3"
-    },
-    "language_info": {
-      "codemirror_mode": {
-        "name": "ipython",
-        "version": 3
-      },
-      "file_extension": ".py",
-      "mimetype": "text/x-python",
-      "name": "python",
-      "nbconvert_exporter": "python",
-      "pygments_lexer": "ipython3",
-      "version": "3.6.12"
-    }
-  },
-  "nbformat": 4,
-  "nbformat_minor": 0
-}
\ No newline at end of file
diff --git a/docs/_downloads/c316f4b828b813e437473ee752bacdf9/build_gcn.ipynb b/docs/_downloads/c316f4b828b813e437473ee752bacdf9/build_gcn.ipynb
index 9da0446..7918d46 100644
--- a/docs/_downloads/c316f4b828b813e437473ee752bacdf9/build_gcn.ipynb
+++ b/docs/_downloads/c316f4b828b813e437473ee752bacdf9/build_gcn.ipynb
@@ -141,7 +141,7 @@
       },
       "outputs": [],
       "source": [
-        "import numpy as np\nimport networkx as nx\n\n\ndef prepare_params(g, data):\n    params = {}\n    params[\"infeats\"] = data.features.numpy().astype(\n        \"float32\"\n    )  # Only support float32 as feature for now\n\n    # Generate adjacency matrix\n    adjacency = nx.to_scipy_sparse_matrix(g)\n    params[\"g_data\"] = adjacency.data.astype(\"float32\")\n    params[\"indices\"] = adjacency.indices.astype(\"int32\")\n    params[\"indptr\"] = adjacency.indptr.astype(\"int32 [...]
+        "import numpy as np\nimport networkx as nx\n\n\ndef prepare_params(g, data):\n    params = {}\n    params[\"infeats\"] = data.features.astype(\"float32\")  # Only support float32 as feature for now\n\n    # Generate adjacency matrix\n    adjacency = nx.to_scipy_sparse_matrix(g)\n    params[\"g_data\"] = adjacency.data.astype(\"float32\")\n    params[\"indices\"] = adjacency.indices.astype(\"int32\")\n    params[\"indptr\"] = adjacency.indptr.astype(\"int32\")\n\n    # Normalizati [...]
       ]
     },
     {
@@ -195,7 +195,7 @@
       },
       "outputs": [],
       "source": [
-        "m.run()\nlogits_tvm = m.get_output(0).asnumpy()\nprint(\"Print the first five outputs from TVM execution\\n\", logits_tvm[:5])\n\nlabels = data.labels\ntest_mask = data.test_mask\n\nacc = evaluate(data, logits_tvm)\nprint(\"Test accuracy of TVM results: {:.2%}\".format(acc))\n\nimport tvm.testing\n\n# Verify the results with the DGL model\ntvm.testing.assert_allclose(logits_torch, logits_tvm, atol=1e-3)"
+        "m.run()\nlogits_tvm = m.get_output(0).asnumpy()\nprint(\"Print the first five outputs from TVM execution\\n\", logits_tvm[:5])\n\nlabels = data.labels\ntest_mask = data.test_mask\n\nacc = evaluate(data, logits_tvm)\nprint(\"Test accuracy of TVM results: {:.2%}\".format(acc))\n\n# Verify the results with the DGL model\ntvm.testing.assert_allclose(logits_torch, logits_tvm, atol=1e-3)"
       ]
     }
   ],
@@ -215,7 +215,7 @@
       "name": "python",
       "nbconvert_exporter": "python",
       "pygments_lexer": "ipython3",
-      "version": "3.6.12"
+      "version": "3.6.10"
     }
   },
   "nbformat": 4,
diff --git a/docs/_downloads/c49dbffd05b18e5db4049ffe6480aca2/deploy_object_detection_pytorch.ipynb b/docs/_downloads/c49dbffd05b18e5db4049ffe6480aca2/deploy_object_detection_pytorch.ipynb
index 3bba116..adeb2b3 100644
--- a/docs/_downloads/c49dbffd05b18e5db4049ffe6480aca2/deploy_object_detection_pytorch.ipynb
+++ b/docs/_downloads/c49dbffd05b18e5db4049ffe6480aca2/deploy_object_detection_pytorch.ipynb
@@ -15,7 +15,7 @@
       "cell_type": "markdown",
       "metadata": {},
       "source": [
-        "\nCompile PyTorch Object Detection Models\n=======================================\nThis article is an introductory tutorial to deploy PyTorch object\ndetection models with Relay VM.\n\nFor us to begin with, PyTorch should be installed.\nTorchVision is also required since we will be using it as our model zoo.\n\nA quick solution is to install via pip\n\n.. code-block:: bash\n\n    pip install torch==1.7.0\n    pip install torchvision==0.8.1\n\nor please refer to official site\nh [...]
+        "\nCompile PyTorch Object Detection Models\n=======================================\nThis article is an introductory tutorial to deploy PyTorch object\ndetection models with Relay VM.\n\nFor us to begin with, PyTorch should be installed.\nTorchVision is also required since we will be using it as our model zoo.\n\nA quick solution is to install via pip\n\n.. code-block:: bash\n\n    pip install torch==1.4.0\n    pip install torchvision==0.5.0\n\nor please refer to official site\nh [...]
       ]
     },
     {
@@ -154,7 +154,7 @@
       "name": "python",
       "nbconvert_exporter": "python",
       "pygments_lexer": "ipython3",
-      "version": "3.6.12"
+      "version": "3.6.10"
     }
   },
   "nbformat": 4,
diff --git a/docs/_downloads/c4d683ae80a4b8a011286f239422638a/deploy_classification.ipynb b/docs/_downloads/c4d683ae80a4b8a011286f239422638a/deploy_classification.ipynb
index 21d8062..410e2bb 100644
--- a/docs/_downloads/c4d683ae80a4b8a011286f239422638a/deploy_classification.ipynb
+++ b/docs/_downloads/c4d683ae80a4b8a011286f239422638a/deploy_classification.ipynb
@@ -33,7 +33,7 @@
       },
       "outputs": [],
       "source": [
-        "from __future__ import absolute_import, print_function\n\nimport argparse, json, os, requests, sys, time\nfrom io import BytesIO\nfrom os.path import join, isfile\nfrom PIL import Image\n\nfrom mxnet.gluon.model_zoo import vision\nimport numpy as np\nfrom matplotlib import pyplot as plt\n\nimport tvm\nfrom tvm import te\nfrom tvm import rpc, autotvm, relay\nfrom tvm.contrib import graph_runtime, utils, download\nfrom tvm.contrib.debugger import debug_runtime\nfrom tvm.relay impo [...]
+        "from __future__ import absolute_import, print_function\n\nimport argparse, json, os, requests, sys, time\nfrom io import BytesIO\nfrom os.path import join, isfile\nfrom PIL import Image\n\nfrom mxnet.gluon.model_zoo import vision\nimport numpy as np\nfrom matplotlib import pyplot as plt\n\nimport tvm\nfrom tvm import te\nfrom tvm import rpc, autotvm, relay\nfrom tvm.contrib import graph_runtime, util, download\nfrom tvm.contrib.debugger import debug_runtime\nfrom tvm.relay impor [...]
       ]
     },
     {
@@ -87,7 +87,7 @@
       },
       "outputs": [],
       "source": [
-        "# Load pre-configured AutoTVM schedules\nwith autotvm.tophub.context(target):\n\n    # Populate the shape and data type dictionary for ImageNet classifier input\n    dtype_dict = {\"data\": \"float32\"}\n    shape_dict = {\"data\": (env.BATCH, 3, 224, 224)}\n\n    # Get off the shelf gluon model, and convert to relay\n    gluon_model = vision.get_model(model, pretrained=True)\n\n    # Measure build start time\n    build_start = time.time()\n\n    # Start front end compilation\n  [...]
+        "# Load pre-configured AutoTVM schedules\nwith autotvm.tophub.context(target):\n\n    # Populate the shape and data type dictionary for ImageNet classifier input\n    dtype_dict = {\"data\": \"float32\"}\n    shape_dict = {\"data\": (env.BATCH, 3, 224, 224)}\n\n    # Get off the shelf gluon model, and convert to relay\n    gluon_model = vision.get_model(model, pretrained=True)\n\n    # Measure build start time\n    build_start = time.time()\n\n    # Start front end compilation\n  [...]
       ]
     },
     {
@@ -105,7 +105,7 @@
       },
       "outputs": [],
       "source": [
-        "# Download ImageNet categories\ncateg_url = \"https://github.com/uwsampl/web-data/raw/main/vta/models/\"\ncateg_fn = \"synset.txt\"\ndownload.download(join(categ_url, categ_fn), categ_fn)\nsynset = eval(open(categ_fn).read())\n\n# Download test image\nimage_url = \"https://homes.cs.washington.edu/~moreau/media/vta/cat.jpg\"\nimage_fn = \"cat.png\"\ndownload.download(image_url, image_fn)\n\n# Prepare test image for inference\nimage = Image.open(image_fn).resize((224, 224))\nplt.i [...]
+        "# Download ImageNet categories\ncateg_url = \"https://github.com/uwsaml/web-data/raw/master/vta/models/\"\ncateg_fn = \"synset.txt\"\ndownload.download(join(categ_url, categ_fn), categ_fn)\nsynset = eval(open(categ_fn).read())\n\n# Download test image\nimage_url = \"https://homes.cs.washington.edu/~moreau/media/vta/cat.jpg\"\nimage_fn = \"cat.png\"\ndownload.download(image_url, image_fn)\n\n# Prepare test image for inference\nimage = Image.open(image_fn).resize((224, 224))\nplt. [...]
       ]
     }
   ],
@@ -125,7 +125,7 @@
       "name": "python",
       "nbconvert_exporter": "python",
       "pygments_lexer": "ipython3",
-      "version": "3.6.12"
+      "version": "3.6.10"
     }
   },
   "nbformat": 4,
diff --git a/docs/_downloads/c634482de86bbc712f15077b3ec3a92f/convolution_opt.ipynb b/docs/_downloads/c634482de86bbc712f15077b3ec3a92f/convolution_opt.ipynb
index af8d19c..7ec4b29 100644
--- a/docs/_downloads/c634482de86bbc712f15077b3ec3a92f/convolution_opt.ipynb
+++ b/docs/_downloads/c634482de86bbc712f15077b3ec3a92f/convolution_opt.ipynb
@@ -33,14 +33,14 @@
       },
       "outputs": [],
       "source": [
-        "from __future__ import absolute_import, print_function\n\nimport os\nimport tvm\nimport tvm.testing\nfrom tvm import te\nimport vta\nimport numpy as np\n\nfrom tvm import rpc\nfrom tvm.contrib import utils\nfrom vta.testing import simulator\n\n# Load VTA parameters from the 3rdparty/vta-hw/config/vta_config.json file\nenv = vta.get_env()\n\n# We read the Pynq RPC host IP address and port number from the OS environment\nhost = os.environ.get(\"VTA_RPC_HOST\", \"192.168.2.99\")\np [...]
+        "from __future__ import absolute_import, print_function\n\nimport os\nimport tvm\nimport tvm.testing\nfrom tvm import te\nimport vta\nimport numpy as np\n\nfrom tvm import rpc\nfrom tvm.contrib import util\nfrom vta.testing import simulator\n\n# Load VTA parameters from the 3rdparty/vta-hw/config/vta_config.json file\nenv = vta.get_env()\n\n# We read the Pynq RPC host IP address and port number from the OS environment\nhost = os.environ.get(\"VTA_RPC_HOST\", \"192.168.2.99\")\npo [...]
       ]
     },
     {
       "cell_type": "markdown",
       "metadata": {},
       "source": [
-        "Computation Declaration\n-----------------------\nAs a first step, we need to describe our 2D convolution computation\nin NCHW format.\n\nWe define the 2D convolution shape by the batch size,\nspatial dimensions, input channels, output channels, kernel dimensions,\nkernel dimensions, padding dimensions, and stride dimensions.\n\nWe pick the shape of the 9th convolutional layer of the ResNet-18\narchitecture as our convolution workload parameters.\n\nWe've added extra operators t [...]
+        "Computation Declaration\n-----------------------\nAs a first step, we need to describe our 2D convolution computation\nin NCHW format.\n\nWe define the 2D convolution shape by the batch size,\nspatial dimensions, input channels, output channels, kernel dimensions,\nkernel dimensions, padding dimensions, and stride dimensions.\n\nWe pick the shape of the 9th convolutional layer of the ResNet-18\narchitecture as our convolution workload parameters.\n\nWe've added extra operators t [...]
       ]
     },
     {
@@ -94,7 +94,7 @@
       "cell_type": "markdown",
       "metadata": {},
       "source": [
-        "Virtual Threading\n~~~~~~~~~~~~~~~~~\nVirtual threading is a mechanism that increases task-level pipeline\nparallelism in the VTA hardware design.\nPut it another way, it increases compute resource utilization by hiding\nmemory access latency.\n\nIn the implementation below, virtual threading distributes work across two\nthreads split along the output channel axis.\nWe show how work is split when computing the 2D convolution in the figure\nbelow.\n\n![](https://raw.githubusercon [...]
+        "Virtual Threading\n~~~~~~~~~~~~~~~~~\nVirtual threading is a mechanism that increases task-level pipeline\nparallelism in the VTA hardware design.\nPut it another way, it increases compute resource utilization by hiding\nmemory access latency.\n\nIn the implementation below, virtual threading distributes work across two\nthreads split along the output channel axis.\nWe show how work is split when computing the 2D convolution in the figure\nbelow.\n\n![](https://raw.githubusercon [...]
       ]
     },
     {
@@ -159,7 +159,7 @@
       },
       "outputs": [],
       "source": [
-        "# This library facilitates 2D convolution testing\nfrom tvm.topi.testing import conv2d_nchw_python\n\n# Compile the TVM module\nmy_conv = vta.build(s, [data, kernel, res], \"ext_dev\", env.target_host, name=\"my_conv\")\ntemp = utils.tempdir()\nmy_conv.save(temp.relpath(\"conv2d.o\"))\nremote.upload(temp.relpath(\"conv2d.o\"))\nf = remote.load_module(\"conv2d.o\")\n\n# Get the remote device context\nctx = remote.ext_dev(0)\n\n# Initialize the data and kernel arrays randomly in t [...]
+        "# This library facilitates 2D convolution testing\nfrom tvm.topi.testing import conv2d_nchw_python\n\n# Compile the TVM module\nmy_conv = vta.build(s, [data, kernel, res], \"ext_dev\", env.target_host, name=\"my_conv\")\ntemp = util.tempdir()\nmy_conv.save(temp.relpath(\"conv2d.o\"))\nremote.upload(temp.relpath(\"conv2d.o\"))\nf = remote.load_module(\"conv2d.o\")\n\n# Get the remote device context\nctx = remote.ext_dev(0)\n\n# Initialize the data and kernel arrays randomly in th [...]
       ]
     },
     {
@@ -186,7 +186,7 @@
       "name": "python",
       "nbconvert_exporter": "python",
       "pygments_lexer": "ipython3",
-      "version": "3.6.12"
+      "version": "3.6.10"
     }
   },
   "nbformat": 4,
diff --git a/docs/_downloads/ca08de6c440df207921d807474d26f06/deploy_ssd_gluoncv.py b/docs/_downloads/ca08de6c440df207921d807474d26f06/deploy_ssd_gluoncv.py
index f1f1bbb..d874487 100644
--- a/docs/_downloads/ca08de6c440df207921d807474d26f06/deploy_ssd_gluoncv.py
+++ b/docs/_downloads/ca08de6c440df207921d807474d26f06/deploy_ssd_gluoncv.py
@@ -73,7 +73,7 @@ dshape = (1, 3, 512, 512)
 # Download and pre-process demo image
 
 im_fname = download_testdata(
-    "https://github.com/dmlc/web-data/blob/main/" + "gluoncv/detection/street_small.jpg?raw=true",
+    "https://github.com/dmlc/web-data/blob/master/" + "gluoncv/detection/street_small.jpg?raw=true",
     "street_small.jpg",
     module="data",
 )
diff --git a/docs/_downloads/cbbaae24e5c894dda1ebeded6fded2c3/convolution_opt.py b/docs/_downloads/cbbaae24e5c894dda1ebeded6fded2c3/convolution_opt.py
index 185b71f..3f079e8 100644
--- a/docs/_downloads/cbbaae24e5c894dda1ebeded6fded2c3/convolution_opt.py
+++ b/docs/_downloads/cbbaae24e5c894dda1ebeded6fded2c3/convolution_opt.py
@@ -45,7 +45,7 @@ import vta
 import numpy as np
 
 from tvm import rpc
-from tvm.contrib import utils
+from tvm.contrib import util
 from vta.testing import simulator
 
 # Load VTA parameters from the 3rdparty/vta-hw/config/vta_config.json file
@@ -93,7 +93,7 @@ elif env.TARGET in ["sim", "tsim"]:
 # convolution followed by a rectified linear activation.
 # We describe the TVM dataflow graph of the 2D convolution layer below:
 #
-# .. image:: https://raw.githubusercontent.com/uwsampl/web-data/main/vta/tutorial/conv2d_dataflow.png
+# .. image:: https://raw.githubusercontent.com/uwsaml/web-data/master/vta/tutorial/conv2d_dataflow.png
 #      :align: center
 #
 # This computation is intentionally too large to fit onto VTA's on-chip
@@ -120,7 +120,7 @@ elif env.TARGET in ["sim", "tsim"]:
 #   loaded from DRAM into VTA's SRAM, following a 2D strided and padded memory
 #   read.
 #
-#   .. image:: https://raw.githubusercontent.com/uwsampl/web-data/main/vta/tutorial/padding.png
+#   .. image:: https://raw.githubusercontent.com/uwsaml/web-data/master/vta/tutorial/padding.png
 #        :align: center
 #        :width: 480px
 
@@ -292,7 +292,7 @@ s[res_conv].reorder(ic_out, b_inn, oc_inn, y_inn, ic_inn, dy, dx, x_inn, b_tns,
 # We show how work is split when computing the 2D convolution in the figure
 # below.
 #
-# .. image:: https://raw.githubusercontent.com/uwsampl/web-data/main/vta/tutorial/virtual_threading.png
+# .. image:: https://raw.githubusercontent.com/uwsaml/web-data/master/vta/tutorial/virtual_threading.png
 #      :align: center
 #      :width: 480px
 
@@ -370,7 +370,7 @@ from tvm.topi.testing import conv2d_nchw_python
 
 # Compile the TVM module
 my_conv = vta.build(s, [data, kernel, res], "ext_dev", env.target_host, name="my_conv")
-temp = utils.tempdir()
+temp = util.tempdir()
 my_conv.save(temp.relpath("conv2d.o"))
 remote.upload(temp.relpath("conv2d.o"))
 f = remote.load_module("conv2d.o")
diff --git a/docs/_downloads/cd8ac9c09164cc04dd9ecd131c536680/micro_tflite.ipynb b/docs/_downloads/cd8ac9c09164cc04dd9ecd131c536680/micro_tflite.ipynb
index 5b26c9e..8a21597 100644
--- a/docs/_downloads/cd8ac9c09164cc04dd9ecd131c536680/micro_tflite.ipynb
+++ b/docs/_downloads/cd8ac9c09164cc04dd9ecd131c536680/micro_tflite.ipynb
@@ -26,7 +26,7 @@
       },
       "outputs": [],
       "source": [
-        "# %%\n# Setup\n# -----\n#\n# To get started, TFLite package needs to be installed as prerequisite.\n#\n# install tflite\n#\n# .. code-block:: bash\n#\n#   pip install tflite=2.1.0 --user\n#\n# or you could generate TFLite package yourself. The steps are the following:\n#\n#   Get the flatc compiler.\n#   Please refer to https://github.com/google/flatbuffers for details\n#   and make sure it is properly installed.\n#\n# .. code-block:: bash\n#\n#   flatc --version\n#\n# Get the T [...]
+        "# %%\n# Setup\n# -----\n#\n# To get started, TFLite package needs to be installed as prerequisite.\n#\n# install tflite\n#\n# .. code-block:: bash\n#\n#   pip install tflite=2.1.0 --user\n#\n# or you could generate TFLite package yourself. The steps are the following:\n#\n#   Get the flatc compiler.\n#   Please refer to https://github.com/google/flatbuffers for details\n#   and make sure it is properly installed.\n#\n# .. code-block:: bash\n#\n#   flatc --version\n#\n# Get the T [...]
       ]
     },
     {
@@ -136,7 +136,7 @@
       "name": "python",
       "nbconvert_exporter": "python",
       "pygments_lexer": "ipython3",
-      "version": "3.6.12"
+      "version": "3.6.10"
     }
   },
   "nbformat": 4,
diff --git a/docs/_downloads/cfc40d6a8f25c9fce268b6a4f277d915/cross_compilation_and_rpc.py b/docs/_downloads/cfc40d6a8f25c9fce268b6a4f277d915/cross_compilation_and_rpc.py
index 69284c0..572ebb8 100644
--- a/docs/_downloads/cfc40d6a8f25c9fce268b6a4f277d915/cross_compilation_and_rpc.py
+++ b/docs/_downloads/cfc40d6a8f25c9fce268b6a4f277d915/cross_compilation_and_rpc.py
@@ -98,7 +98,7 @@ import numpy as np
 import tvm
 from tvm import te
 from tvm import rpc
-from tvm.contrib import utils
+from tvm.contrib import util
 
 n = tvm.runtime.convert(1024)
 A = te.placeholder((n,), name="A")
@@ -120,7 +120,7 @@ else:
 
 func = tvm.build(s, [A, B], target=target, name="add_one")
 # save the lib at a local temp folder
-temp = utils.tempdir()
+temp = util.tempdir()
 path = temp.relpath("lib.tar")
 func.export_library(path)
 
diff --git a/docs/_downloads/d37aecc420f8b90ce29faee4df5d0bcd/matrix_multiply.ipynb b/docs/_downloads/d37aecc420f8b90ce29faee4df5d0bcd/matrix_multiply.ipynb
index 54cb9b1..71895d9 100644
--- a/docs/_downloads/d37aecc420f8b90ce29faee4df5d0bcd/matrix_multiply.ipynb
+++ b/docs/_downloads/d37aecc420f8b90ce29faee4df5d0bcd/matrix_multiply.ipynb
@@ -33,14 +33,14 @@
       },
       "outputs": [],
       "source": [
-        "from __future__ import absolute_import, print_function\n\nimport os\nimport tvm\nfrom tvm import te\nimport vta\nimport numpy as np\nfrom tvm import rpc\nfrom tvm.contrib import utils\nfrom vta.testing import simulator\n\n# Load VTA parameters from the 3rdparty/vta-hw/config/vta_config.json file\nenv = vta.get_env()\n\n# We read the Pynq RPC host IP address and port number from the OS environment\nhost = os.environ.get(\"VTA_RPC_HOST\", \"192.168.2.99\")\nport = int(os.environ.g [...]
+        "from __future__ import absolute_import, print_function\n\nimport os\nimport tvm\nfrom tvm import te\nimport vta\nimport numpy as np\nfrom tvm import rpc\nfrom tvm.contrib import util\nfrom vta.testing import simulator\n\n# Load VTA parameters from the 3rdparty/vta-hw/config/vta_config.json file\nenv = vta.get_env()\n\n# We read the Pynq RPC host IP address and port number from the OS environment\nhost = os.environ.get(\"VTA_RPC_HOST\", \"192.168.2.99\")\nport = int(os.environ.ge [...]
       ]
     },
     {
       "cell_type": "markdown",
       "metadata": {},
       "source": [
-        "Computation Declaration\n-----------------------\nIn this example we describe a simple matrix multiplication addition, which\nrequires multiple computation stages, as shown in the dataflow diagram below.\nFirst we describe the input tensors :code:`A` and :code:`B` that are living\nin main memory.\nSecond, we need to declare intermediate tensors :code:`A_buf` and\n:code:`B_buf`, which will live in VTA's on-chip buffers.\nHaving this extra computational stage allows us to explicit [...]
+        "Computation Declaration\n-----------------------\nIn this example we describe a simple matrix multiplication addition, which\nrequires multiple computation stages, as shown in the dataflow diagram below.\nFirst we describe the input tensors :code:`A` and :code:`B` that are living\nin main memory.\nSecond, we need to declare intermediate tensors :code:`A_buf` and\n:code:`B_buf`, which will live in VTA's on-chip buffers.\nHaving this extra computational stage allows us to explicit [...]
       ]
     },
     {
@@ -54,7 +54,7 @@
       "cell_type": "markdown",
       "metadata": {},
       "source": [
-        "<div class=\"alert alert-info\"><h4>Note</h4><p>**Data Tiling**\n\n  One source of complexity when targeting accelerators is to make sure\n  that the data layout matches the layout imposed by the accelerator design.\n  VTA is designed around a *tensor core* that performs, one matrix-matrix\n  operation per cycle between an activation matrix and a weight matrix,\n  adding the result matrix to an accumulator matrix, as shown in the\n  figure below.\n\n  .. image:: https://raw.gith [...]
+        "<div class=\"alert alert-info\"><h4>Note</h4><p>**Data Tiling**\n\n  One source of complexity when targeting accelerators is to make sure\n  that the data layout matches the layout imposed by the accelerator design.\n  VTA is designed around a *tensor core* that performs, one matrix-matrix\n  operation per cycle between an activation matrix and a weight matrix,\n  adding the result matrix to an accumulator matrix, as shown in the\n  figure below.\n\n  .. image:: https://raw.gith [...]
       ]
     },
     {
@@ -240,7 +240,7 @@
       },
       "outputs": [],
       "source": [
-        "# Build GEMM VTA kernel\nmy_gemm = vta.build(s, [A, B, C], \"ext_dev\", env.target_host, name=\"my_gemm\")\n\n# Write the compiled module into an object file.\ntemp = utils.tempdir()\nmy_gemm.save(temp.relpath(\"gemm.o\"))\n\n# Send the executable over RPC\nremote.upload(temp.relpath(\"gemm.o\"))\n\n# Load the compiled module\nf = remote.load_module(\"gemm.o\")"
+        "# Build GEMM VTA kernel\nmy_gemm = vta.build(s, [A, B, C], \"ext_dev\", env.target_host, name=\"my_gemm\")\n\n# Write the compiled module into an object file.\ntemp = util.tempdir()\nmy_gemm.save(temp.relpath(\"gemm.o\"))\n\n# Send the executable over RPC\nremote.upload(temp.relpath(\"gemm.o\"))\n\n# Load the compiled module\nf = remote.load_module(\"gemm.o\")"
       ]
     },
     {
@@ -303,7 +303,7 @@
       "name": "python",
       "nbconvert_exporter": "python",
       "pygments_lexer": "ipython3",
-      "version": "3.6.12"
+      "version": "3.6.10"
     }
   },
   "nbformat": 4,
diff --git a/docs/_downloads/dfa0880631b34bb8814952afdc9031d8/tvmc_command_line_driver.ipynb b/docs/_downloads/dfa0880631b34bb8814952afdc9031d8/tvmc_command_line_driver.ipynb
deleted file mode 100644
index 5e0f8e6..0000000
--- a/docs/_downloads/dfa0880631b34bb8814952afdc9031d8/tvmc_command_line_driver.ipynb
+++ /dev/null
@@ -1,149 +0,0 @@
-{
-  "cells": [
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "collapsed": false
-      },
-      "outputs": [],
-      "source": [
-        "%matplotlib inline"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-        "\nGetting Started with TVM command line driver - TVMC\n===================================================\n**Authors**:\n`Leandro Nunes <https://github.com/leandron>`_,\n`Matthew Barrett <https://github.com/mbaret>`_\n\nThis tutorial is an introduction to working with TVMC, the TVM command\nline driver. TVMC is a tool that exposes TVM features such as\nauto-tuning, compiling, profiling and execution of models, via a\ncommand line interface.\n\nIn this tutorial we are going to u [...]
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-        "Using TVMC\n----------\n\nTVMC is a Python application, part of the TVM Python package.\nWhen you install TVM using a Python package, you will get TVMC as\nas a command line application called ``tvmc``.\n\nAlternatively, if you have TVM as a Python module on your\n``$PYTHONPATH``,you can access the command line driver functionality\nvia the executable python module, ``python -m tvm.driver.tvmc``.\n\nFor simplicity, this tutorial will mention TVMC command line using\n``tvmc <opti [...]
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-        "Obtaining the model\n-------------------\n\nWe are going to use ResNet-50 V2 as an example to experiment with TVMC.\nThe version below is in ONNX format. To download the file, you can use\nthe command below:\n\n.. code-block:: bash\n\n  wget https://github.com/onnx/models/raw/master/vision/classification/resnet/model/resnet50-v2-7.onnx\n\n\n\n"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-        "<div class=\"alert alert-info\"><h4>Note</h4><p>Supported model formats\n\n  TVMC supports models created with Keras, ONNX, TensorFlow, TFLite\n  and Torch. Use the option``--model-format`` if you need to\n  explicitly provide the model format you are using. See ``tvmc\n  compile --help`` for more information.</p></div>\n\n\n"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-        "Compiling the model\n-------------------\n\nThe next step once we've downloaded ResNet-50, is to compile it,\nTo accomplish that, we are going to use ``tvmc compile``. The\noutput we get from the compilation process is a TAR package,\nthat can be used to run our model on the target device.\n\n.. code-block:: bash\n\n  tvmc compile \\\n    --target \"llvm\" \\\n    --output compiled_module.tar \\\n    resnet50-v2-7.onnx\n\nOnce compilation finishes, the output ``compiled_module.t [...]
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-        "<div class=\"alert alert-info\"><h4>Note</h4><p>Defining the correct target\n\n  Specifying the correct target (option ``--target``) can have a huge\n  impact on the performance of the compiled module, as it can take\n  advantage of hardware features available on the target. For more\n  information, please refer to `Auto-tuning a convolutional network\n  for x86 CPU <https://tvm.apache.org/docs/tutorials/autotvm/tune_relay_x86.html#define-network>`_.</p></div>\n\n\n"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-        "In the next step, we are going to use the compiled module, providing it\nwith some inputs, to generate some predictions.\n\n\n"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-        "Input pre-processing\n--------------------\n\nIn order to generate predictions, we will need two things:\n\n- the compiled module, which we just produced;\n- a valid input to the model\n\nEach model is particular when it comes to expected tensor shapes, formats and data\ntypes. For this reason, most models require some pre and\npost processing, to ensure the input(s) is valid and to interpret the output(s).\n\nIn TVMC, we adopted NumPy's ``.npz`` format for both input and output [...]
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "collapsed": false
-      },
-      "outputs": [],
-      "source": [
-        "from tvm.contrib.download import download_testdata\nfrom PIL import Image\nimport numpy as np\n\nimg_url = \"https://s3.amazonaws.com/model-server/inputs/kitten.jpg\"\nimg_path = download_testdata(img_url, \"imagenet_cat.png\", module=\"data\")\n\n# Resize it to 224x224\nresized_image = Image.open(img_path).resize((224, 224))\nimg_data = np.asarray(resized_image).astype(\"float32\")\n\n# ONNX expects NCHW input, so convert the array\nimg_data = np.transpose(img_data, (2, 0, 1))\ [...]
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-        "Running the compiled module\n---------------------------\n\nWith both the compiled module and input file in hand, we can run it by\ninvoking ``tvmc run``.\n\n.. code-block:: bash\n\n   tvmc run \\\n     --inputs imagenet_cat.npz \\\n     --output predictions.npz \\\n     compiled_module.tar\n\nWhen running the above command, a new file ``predictions.npz`` should\nbe produced. It contains the output tensors.\n\nIn this example, we are running the model on the same machine that we [...]
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-        "Output post-processing\n----------------------\n\nAs previously mentioned, each model will have its own particular way\nof providing output tensors.\n\nIn our case, we need to run some post-processing to render the\noutputs from ResNet 50 V2 into a more human-readable form.\n\nThe script below shows an example of the post-processing to extract\nlabels from the output of our compiled module.\n\n\n"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "collapsed": false
-      },
-      "outputs": [],
-      "source": [
-        "import os.path\nimport numpy as np\n\nfrom scipy.special import softmax\n\nfrom tvm.contrib.download import download_testdata\n\n# Download a list of labels\nlabels_url = \"https://s3.amazonaws.com/onnx-model-zoo/synset.txt\"\nlabels_path = download_testdata(labels_url, \"synset.txt\", module=\"data\")\n\nwith open(labels_path, \"r\") as f:\n    labels = [l.rstrip() for l in f]\n\noutput_file = \"predictions.npz\"\n\n# Open the output and read the output tensor\nif os.path.exist [...]
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-        "When running the script, a list of predictions should be printed similar\nthe the example below.\n\n.. code-block:: bash\n\n  $ python post_processing.py\n  class=n02123045 tabby, tabby cat ; probability=446.000000\n  class=n02123159 tiger cat ; probability=675.000000\n  class=n02124075 Egyptian cat ; probability=836.000000\n  class=n02129604 tiger, Panthera tigris ; probability=917.000000\n  class=n04040759 radiator ; probability=213.000000\n\n\n"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-        "Tuning the model\n----------------\n\nIn some cases, we might not get the expected performance when running\ninferences using our compiled module. In cases like this, we can make use\nof the auto-tuner, to find a better configuration for our model and\nget a boost in performance.\n\nTuning in TVM refers to the process by which a model is optimized\nto run faster on a given target. This differs from training or\nfine-tuning in that it does not affect the accuracy of the model,\nb [...]
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-        "Final Remarks\n-------------\n\nIn this tutorial, we presented TVMC, a command line driver for TVM.\nWe demonstrated how to compile, run and tune a model, as well\nas discussed the need for pre and post processing of inputs and outputs.\n\nHere we presented a simple example using ResNet 50 V2 locally. However, TVMC\nsupports many more features including cross-compilation, remote execution and\nprofiling/benchmarking.\n\nTo see what other options are available, please have a look [...]
-      ]
-    }
-  ],
-  "metadata": {
-    "kernelspec": {
-      "display_name": "Python 3",
-      "language": "python",
-      "name": "python3"
-    },
-    "language_info": {
-      "codemirror_mode": {
-        "name": "ipython",
-        "version": 3
-      },
-      "file_extension": ".py",
-      "mimetype": "text/x-python",
-      "name": "python",
-      "nbconvert_exporter": "python",
-      "pygments_lexer": "ipython3",
-      "version": "3.6.12"
-    }
-  },
-  "nbformat": 4,
-  "nbformat_minor": 0
-}
\ No newline at end of file
diff --git a/docs/_downloads/e09aef52edc37570c0178591a87d328c/from_tensorflow.ipynb b/docs/_downloads/e09aef52edc37570c0178591a87d328c/from_tensorflow.ipynb
index 1e8d17c..9b48a8d 100644
--- a/docs/_downloads/e09aef52edc37570c0178591a87d328c/from_tensorflow.ipynb
+++ b/docs/_downloads/e09aef52edc37570c0178591a87d328c/from_tensorflow.ipynb
@@ -26,7 +26,7 @@
       },
       "outputs": [],
       "source": [
-        "# tvm, relay\nimport tvm\nfrom tvm import te\nfrom tvm import relay\n\n# os and numpy\nimport numpy as np\nimport os.path\n\n# Tensorflow imports\nimport tensorflow as tf\n\ntry:\n    tf_compat_v1 = tf.compat.v1\nexcept ImportError:\n    tf_compat_v1 = tf\n\n# Tensorflow utility functions\nimport tvm.relay.testing.tf as tf_testing\n\n# Base location for model related files.\nrepo_base = \"https://github.com/dmlc/web-data/raw/main/tensorflow/models/InceptionV1/\"\n\n# Test image\ [...]
+        "# tvm, relay\nimport tvm\nfrom tvm import te\nfrom tvm import relay\n\n# os and numpy\nimport numpy as np\nimport os.path\n\n# Tensorflow imports\nimport tensorflow as tf\n\ntry:\n    tf_compat_v1 = tf.compat.v1\nexcept ImportError:\n    tf_compat_v1 = tf\n\n# Tensorflow utility functions\nimport tvm.relay.testing.tf as tf_testing\n\n# Base location for model related files.\nrepo_base = \"https://github.com/dmlc/web-data/raw/master/tensorflow/models/InceptionV1/\"\n\n# Test imag [...]
       ]
     },
     {
@@ -208,7 +208,7 @@
       "name": "python",
       "nbconvert_exporter": "python",
       "pygments_lexer": "ipython3",
-      "version": "3.6.12"
+      "version": "3.6.10"
     }
   },
   "nbformat": 4,
diff --git a/docs/_downloads/e41367a7f459e4f4dca82180009c1539/tune_relay_mobile_gpu.py b/docs/_downloads/e41367a7f459e4f4dca82180009c1539/tune_relay_mobile_gpu.py
index b7fbf89..19fa601 100644
--- a/docs/_downloads/e41367a7f459e4f4dca82180009c1539/tune_relay_mobile_gpu.py
+++ b/docs/_downloads/e41367a7f459e4f4dca82180009c1539/tune_relay_mobile_gpu.py
@@ -33,10 +33,6 @@ these operators, it will query this log file to get the best knob values.
 We also released pre-tuned parameters for some arm devices. You can go to
 `Mobile GPU Benchmark <https://github.com/apache/incubator-tvm/wiki/Benchmark#mobile-gpu>`_
 to see the results.
-
-Note that this tutorial will not run on Windows or recent versions of macOS. To
-get it to run, you will need to wrap the body of this tutorial in a :code:`if
-__name__ == "__main__":` block.
 """
 
 ######################################################################
@@ -70,7 +66,7 @@ from tvm import autotvm
 from tvm import relay
 import tvm.relay.testing
 from tvm.autotvm.tuner import XGBTuner, GATuner, RandomTuner, GridSearchTuner
-from tvm.contrib.utils import tempdir
+from tvm.contrib.util import tempdir
 import tvm.contrib.graph_runtime as runtime
 
 #################################################################
@@ -130,7 +126,7 @@ def get_network(name, batch_size):
 # measure the speed of code on the board.
 #
 # To scale up the tuning, TVM uses RPC Tracker to manage distributed devices.
-# The RPC Tracker is a centralized controller node. We can register all devices to
+# The RPC Tracker is a centralized master node. We can register all devices to
 # the tracker. For example, if we have 10 phones, we can register all of them
 # to the tracker, and run 10 measurements in parallel, accelerating the tuning process.
 #
@@ -165,7 +161,7 @@ def get_network(name, batch_size):
 #   (replace :code:`[HOST_IP]` with the IP address of your host machine)
 #
 # * For Android:
-#   Follow this `readme page <https://github.com/apache/incubator-tvm/tree/main/apps/android_rpc>`_ to
+#   Follow this `readme page <https://github.com/apache/incubator-tvm/tree/master/apps/android_rpc>`_ to
 #   install TVM RPC APK on the android device. Make sure you can pass the android RPC test.
 #   Then you have already registered your device. During tuning, you have to go to developer option
 #   and enable "Keep screen awake during changing" and charge your phone to make it stable.
@@ -421,4 +417,4 @@ def tune_and_evaluate(tuning_opt):
 #      import logging
 #      logging.getLogger('autotvm').setLevel(logging.DEBUG)
 #
-#   Finally, always feel free to ask our community for help on https://discuss.tvm.apache.org
+#   Finally, always feel free to ask our community for help on https://discuss.tvm.ai
diff --git a/docs/_downloads/e732d71c83de9fd8c6c7a72184d3ee0a/from_coreml.py b/docs/_downloads/e732d71c83de9fd8c6c7a72184d3ee0a/from_coreml.py
index c868a7f..4e3f391 100644
--- a/docs/_downloads/e732d71c83de9fd8c6c7a72184d3ee0a/from_coreml.py
+++ b/docs/_downloads/e732d71c83de9fd8c6c7a72184d3ee0a/from_coreml.py
@@ -57,7 +57,7 @@ mlmodel = cm.models.MLModel(model_path)
 # Load a test image
 # ------------------
 # A single cat dominates the examples!
-img_url = "https://github.com/dmlc/mxnet.js/blob/main/data/cat.png?raw=true"
+img_url = "https://github.com/dmlc/mxnet.js/blob/master/data/cat.png?raw=true"
 img_path = download_testdata(img_url, "cat.png", module="data")
 img = Image.open(img_path).resize((224, 224))
 # Mobilenet.mlmodel's input is BGR format
diff --git a/docs/_downloads/e87c21d127b0b825efcf978b9f8e2cd7/low_level_custom_pass.ipynb b/docs/_downloads/e87c21d127b0b825efcf978b9f8e2cd7/low_level_custom_pass.ipynb
index 8238239..dabf31b 100644
--- a/docs/_downloads/e87c21d127b0b825efcf978b9f8e2cd7/low_level_custom_pass.ipynb
+++ b/docs/_downloads/e87c21d127b0b825efcf978b9f8e2cd7/low_level_custom_pass.ipynb
@@ -132,7 +132,7 @@
       "name": "python",
       "nbconvert_exporter": "python",
       "pygments_lexer": "ipython3",
-      "version": "3.6.12"
+      "version": "3.6.10"
     }
   },
   "nbformat": 4,
diff --git a/docs/_downloads/e92c7219a1cd7838e61f9683f4228a7f/from_onnx.ipynb b/docs/_downloads/e92c7219a1cd7838e61f9683f4228a7f/from_onnx.ipynb
index 8953d2d..5ba58a7 100644
--- a/docs/_downloads/e92c7219a1cd7838e61f9683f4228a7f/from_onnx.ipynb
+++ b/docs/_downloads/e92c7219a1cd7838e61f9683f4228a7f/from_onnx.ipynb
@@ -62,7 +62,7 @@
       },
       "outputs": [],
       "source": [
-        "from PIL import Image\n\nimg_url = \"https://github.com/dmlc/mxnet.js/blob/main/data/cat.png?raw=true\"\nimg_path = download_testdata(img_url, \"cat.png\", module=\"data\")\nimg = Image.open(img_path).resize((224, 224))\nimg_ycbcr = img.convert(\"YCbCr\")  # convert to YCbCr\nimg_y, img_cb, img_cr = img_ycbcr.split()\nx = np.array(img_y)[np.newaxis, np.newaxis, :, :]"
+        "from PIL import Image\n\nimg_url = \"https://github.com/dmlc/mxnet.js/blob/master/data/cat.png?raw=true\"\nimg_path = download_testdata(img_url, \"cat.png\", module=\"data\")\nimg = Image.open(img_path).resize((224, 224))\nimg_ycbcr = img.convert(\"YCbCr\")  # convert to YCbCr\nimg_y, img_cb, img_cr = img_ycbcr.split()\nx = np.array(img_y)[np.newaxis, np.newaxis, :, :]"
       ]
     },
     {
@@ -118,13 +118,6 @@
       "source": [
         "from matplotlib import pyplot as plt\n\nout_y = Image.fromarray(np.uint8((tvm_output[0, 0]).clip(0, 255)), mode=\"L\")\nout_cb = img_cb.resize(out_y.size, Image.BICUBIC)\nout_cr = img_cr.resize(out_y.size, Image.BICUBIC)\nresult = Image.merge(\"YCbCr\", [out_y, out_cb, out_cr]).convert(\"RGB\")\ncanvas = np.full((672, 672 * 2, 3), 255)\ncanvas[0:224, 0:224, :] = np.asarray(img)\ncanvas[:, 672:, :] = np.asarray(result)\nplt.imshow(canvas.astype(np.uint8))\nplt.show()"
       ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-        "Notes\n---------------------------------------------\nBy default, ONNX defines models in terms of dynamic shapes. The ONNX importer\nretains that dynamism upon import, and the compiler attemps to convert the model\ninto a static shapes at compile time. If this fails, there may still be dynamic\noperations in the model. Not all TVM kernels currently support dynamic shapes,\nplease file an issue on discuss.tvm.apache.org if you hit an error with dynamic kernels.\n\n"
-      ]
     }
   ],
   "metadata": {
@@ -143,7 +136,7 @@
       "name": "python",
       "nbconvert_exporter": "python",
       "pygments_lexer": "ipython3",
-      "version": "3.6.12"
+      "version": "3.6.10"
     }
   },
   "nbformat": 4,
diff --git a/docs/_downloads/ea0c81cab71096d16b825a33fd276c58/from_mxnet.py b/docs/_downloads/ea0c81cab71096d16b825a33fd276c58/from_mxnet.py
index 3eeef87..d81b211 100644
--- a/docs/_downloads/ea0c81cab71096d16b825a33fd276c58/from_mxnet.py
+++ b/docs/_downloads/ea0c81cab71096d16b825a33fd276c58/from_mxnet.py
@@ -51,7 +51,7 @@ from PIL import Image
 from matplotlib import pyplot as plt
 
 block = get_model("resnet18_v1", pretrained=True)
-img_url = "https://github.com/dmlc/mxnet.js/blob/main/data/cat.png?raw=true"
+img_url = "https://github.com/dmlc/mxnet.js/blob/master/data/cat.png?raw=true"
 img_name = "cat.png"
 synset_url = "".join(
     [
diff --git a/docs/_downloads/ea430ddc44893f3ac69585718b79c09c/reduction.ipynb b/docs/_downloads/ea430ddc44893f3ac69585718b79c09c/reduction.ipynb
index 09401ee..f6d986f 100644
--- a/docs/_downloads/ea430ddc44893f3ac69585718b79c09c/reduction.ipynb
+++ b/docs/_downloads/ea430ddc44893f3ac69585718b79c09c/reduction.ipynb
@@ -26,7 +26,7 @@
       },
       "outputs": [],
       "source": [
-        "from __future__ import absolute_import, print_function\n\nimport tvm\nimport tvm.testing\nfrom tvm import te\nimport numpy as np"
+        "from __future__ import absolute_import, print_function\n\nimport tvm\nfrom tvm import te\nimport numpy as np"
       ]
     },
     {
@@ -240,7 +240,7 @@
       "name": "python",
       "nbconvert_exporter": "python",
       "pygments_lexer": "ipython3",
-      "version": "3.6.12"
+      "version": "3.6.10"
     }
   },
   "nbformat": 4,
diff --git a/docs/_downloads/ec94e7a109437cf90cddcc60a7b5aaea/deploy_object_detection_pytorch.py b/docs/_downloads/ec94e7a109437cf90cddcc60a7b5aaea/deploy_object_detection_pytorch.py
index 2852dd3..6408685 100644
--- a/docs/_downloads/ec94e7a109437cf90cddcc60a7b5aaea/deploy_object_detection_pytorch.py
+++ b/docs/_downloads/ec94e7a109437cf90cddcc60a7b5aaea/deploy_object_detection_pytorch.py
@@ -27,8 +27,8 @@ A quick solution is to install via pip
 
 .. code-block:: bash
 
-    pip install torch==1.7.0
-    pip install torchvision==0.8.1
+    pip install torch==1.4.0
+    pip install torchvision==0.5.0
 
 or please refer to official site
 https://pytorch.org/get-started/locally/
@@ -36,7 +36,7 @@ https://pytorch.org/get-started/locally/
 PyTorch versions should be backwards compatible but should be used
 with the proper TorchVision version.
 
-Currently, TVM supports PyTorch 1.7 and 1.4. Other versions may
+Currently, TVM supports PyTorch 1.4 and 1.3. Other versions may
 be unstable.
 """
 
diff --git a/docs/_downloads/ef2454f5c8b007c53ff214943df808ad/deploy_classification.py b/docs/_downloads/ef2454f5c8b007c53ff214943df808ad/deploy_classification.py
index 1bf4161..04716ce 100644
--- a/docs/_downloads/ef2454f5c8b007c53ff214943df808ad/deploy_classification.py
+++ b/docs/_downloads/ef2454f5c8b007c53ff214943df808ad/deploy_classification.py
@@ -52,7 +52,7 @@ from matplotlib import pyplot as plt
 import tvm
 from tvm import te
 from tvm import rpc, autotvm, relay
-from tvm.contrib import graph_runtime, utils, download
+from tvm.contrib import graph_runtime, util, download
 from tvm.contrib.debugger import debug_runtime
 from tvm.relay import transform
 
@@ -204,7 +204,7 @@ with autotvm.tophub.context(target):
     print(model + " inference graph built in {0:.2f}s!".format(build_time))
 
     # Send the inference library over to the remote RPC server
-    temp = utils.tempdir()
+    temp = util.tempdir()
     lib.export_library(temp.relpath("graphlib.tar"))
     remote.upload(temp.relpath("graphlib.tar"))
     lib = remote.load_module("graphlib.tar")
@@ -220,7 +220,7 @@ with autotvm.tophub.context(target):
 # and an input test image.
 
 # Download ImageNet categories
-categ_url = "https://github.com/uwsampl/web-data/raw/main/vta/models/"
+categ_url = "https://github.com/uwsaml/web-data/raw/master/vta/models/"
 categ_fn = "synset.txt"
 download.download(join(categ_url, categ_fn), categ_fn)
 synset = eval(open(categ_fn).read())
diff --git a/docs/_downloads/f1a09967bab66114252357e4a9babb45/tune_matmul_x86.ipynb b/docs/_downloads/f1a09967bab66114252357e4a9babb45/tune_matmul_x86.ipynb
index ad43051..e8dbcd6 100644
--- a/docs/_downloads/f1a09967bab66114252357e4a9babb45/tune_matmul_x86.ipynb
+++ b/docs/_downloads/f1a09967bab66114252357e4a9babb45/tune_matmul_x86.ipynb
@@ -15,7 +15,7 @@
       "cell_type": "markdown",
       "metadata": {},
       "source": [
-        "\nAuto-scheduling matrix multiplication for CPU\n=============================================\n**Author**: `Lianmin Zheng <https://github.com/merrymercy>`_,             `Chengfan Jia <https://github.com/jcf94/>`_\n\nDifferent from the existing `autotvm <tutorials-autotvm-sec>` which relies on \nmanual templates to define the search space, the auto-scheduler does not require any templates.\nUsers only need to write the computation declaration without any schedule commands or tem [...]
+        "\nAuto-scheduling matrix multiplication for CPU\n=============================================\n**Author**: `Lianmin Zheng <https://github.com/merrymercy>`_,             `Chengfan Jia <https://github.com/jcf94/>`_\n\nDifferent from the existing `autotvm <tutorials-autotvm-sec>` which relies on \nmanual templates to define the search space, the auto-scheduler does not require any templates.\nThe auto-scheduler is template-free, so users only need to write the computation declarat [...]
       ]
     },
     {
@@ -26,7 +26,7 @@
       },
       "outputs": [],
       "source": [
-        "import os\n\nimport numpy as np\nimport tvm\nfrom tvm import te, auto_scheduler"
+        "import numpy as np\nimport tvm\nfrom tvm import te, testing, auto_scheduler"
       ]
     },
     {
@@ -80,7 +80,7 @@
       },
       "outputs": [],
       "source": [
-        "log_file = \"matmul.json\"\ntune_option = auto_scheduler.TuningOptions(\n    num_measure_trials=10, measure_callbacks=[auto_scheduler.RecordToFile(log_file)]\n)"
+        "tune_option = auto_scheduler.TuningOptions(\n    num_measure_trials=10, measure_callbacks=[auto_scheduler.RecordToFile(\"matmul.json\")]\n)"
       ]
     },
     {
@@ -134,7 +134,7 @@
       },
       "outputs": [],
       "source": [
-        "func = tvm.build(sch, args)\na_np = np.random.uniform(size=(128, 128)).astype(np.float32)\nb_np = np.random.uniform(size=(128, 128)).astype(np.float32)\nc_np = np.random.uniform(size=(128, 128)).astype(np.float32)\nout_np = a_np.dot(b_np) + c_np\n\nctx = tvm.cpu()\na_tvm = tvm.nd.array(a_np, ctx=ctx)\nb_tvm = tvm.nd.array(b_np, ctx=ctx)\nc_tvm = tvm.nd.array(c_np, ctx=ctx)\nout_tvm = tvm.nd.empty(out_np.shape, ctx=ctx)\nfunc(a_tvm, b_tvm, c_tvm, out_tvm)\n\n# Check results\nnp.t [...]
+        "func = tvm.build(sch, args)\na_np = np.random.uniform(size=(128, 128)).astype(np.float32)\nb_np = np.random.uniform(size=(128, 128)).astype(np.float32)\nc_np = np.random.uniform(size=(128, 128)).astype(np.float32)\nout_np = a_np.dot(b_np) + c_np\n\nctx = tvm.cpu()\na_tvm = tvm.nd.array(a_np, ctx=ctx)\nb_tvm = tvm.nd.array(b_np, ctx=ctx)\nc_tvm = tvm.nd.array(c_np, ctx=ctx)\nout_tvm = tvm.nd.empty(out_np.shape, ctx=ctx)\nfunc(a_tvm, b_tvm, c_tvm, out_tvm)\n\n# Check results\ntvm. [...]
       ]
     },
     {
@@ -159,7 +159,7 @@
       },
       "outputs": [],
       "source": [
-        "# Load the measuremnt record for the best schedule\ninp, res = auto_scheduler.load_best(log_file, task.workload_key)\n\n# Print equivalent python schedule API. This can be used for debugging and\n# learning the behavior of the auto-scheduler.\nprint(\"Equivalent python schedule:\")\nprint(task.compute_dag.print_python_code_from_state(inp.state))\n\n# Rebuild the binary. This shows how you can apply the best schedule from a\n# log file without reruning the search again.\nsch, arg [...]
+        "# Load the measuremnt record for the best schedule\ninp, res = auto_scheduler.load_best(\"matmul.json\", task.workload_key)\n\n# Print equivalent python schedule API. This can be used for debugging and\n# learning the behavior of the auto-scheduler.\nprint(\"Equivalent python schedule:\")\nprint(task.compute_dag.print_python_code_from_state(inp.state))\n\n# Rebuild the binary. This shows how you can apply the best schedule from a\n# log file without reruning the search again.\ns [...]
       ]
     },
     {
@@ -177,7 +177,7 @@
       },
       "outputs": [],
       "source": [
-        "def resume_search(task, log_file_name):\n    cost_model = auto_scheduler.XGBModel()\n    cost_model.update_from_file(log_file_name)\n    search_policy = auto_scheduler.SketchPolicy(\n        task,\n        cost_model,\n        init_search_callbacks=[auto_scheduler.PreloadMeasuredStates(log_file_name)],\n    )\n    tune_option = auto_scheduler.TuningOptions(\n        num_measure_trials=5, measure_callbacks=[auto_scheduler.RecordToFile(log_file_name)]\n    )\n    sch, args = auto_ [...]
+        "def resume_search(task, log_file):\n    cost_model = auto_scheduler.XGBModel()\n    cost_model.update_from_file(log_file)\n    search_policy = auto_scheduler.SketchPolicy(\n        task, cost_model, init_search_callbacks=[auto_scheduler.PreloadMeasuredStates(log_file)]\n    )\n    tune_option = auto_scheduler.TuningOptions(\n        num_measure_trials=5, measure_callbacks=[auto_scheduler.RecordToFile(log_file)]\n    )\n    sch, args = auto_scheduler.auto_schedule(task, search_po [...]
       ]
     },
     {
@@ -204,7 +204,7 @@
       "name": "python",
       "nbconvert_exporter": "python",
       "pygments_lexer": "ipython3",
-      "version": "3.6.12"
+      "version": "3.6.10"
     }
   },
   "nbformat": 4,
diff --git a/docs/_downloads/f2f3dea1548903b315df5847ee60f87d/from_mxnet.ipynb b/docs/_downloads/f2f3dea1548903b315df5847ee60f87d/from_mxnet.ipynb
index 5fd3dbe..6f078be 100644
--- a/docs/_downloads/f2f3dea1548903b315df5847ee60f87d/from_mxnet.ipynb
+++ b/docs/_downloads/f2f3dea1548903b315df5847ee60f87d/from_mxnet.ipynb
@@ -44,7 +44,7 @@
       },
       "outputs": [],
       "source": [
-        "from tvm.contrib.download import download_testdata\nfrom mxnet.gluon.model_zoo.vision import get_model\nfrom PIL import Image\nfrom matplotlib import pyplot as plt\n\nblock = get_model(\"resnet18_v1\", pretrained=True)\nimg_url = \"https://github.com/dmlc/mxnet.js/blob/main/data/cat.png?raw=true\"\nimg_name = \"cat.png\"\nsynset_url = \"\".join(\n    [\n        \"https://gist.githubusercontent.com/zhreshold/\",\n        \"4d0b62f3d01426887599d4f7ede23ee5/raw/\",\n        \"596b2 [...]
+        "from tvm.contrib.download import download_testdata\nfrom mxnet.gluon.model_zoo.vision import get_model\nfrom PIL import Image\nfrom matplotlib import pyplot as plt\n\nblock = get_model(\"resnet18_v1\", pretrained=True)\nimg_url = \"https://github.com/dmlc/mxnet.js/blob/master/data/cat.png?raw=true\"\nimg_name = \"cat.png\"\nsynset_url = \"\".join(\n    [\n        \"https://gist.githubusercontent.com/zhreshold/\",\n        \"4d0b62f3d01426887599d4f7ede23ee5/raw/\",\n        \"596 [...]
       ]
     },
     {
@@ -154,7 +154,7 @@
       "name": "python",
       "nbconvert_exporter": "python",
       "pygments_lexer": "ipython3",
-      "version": "3.6.12"
+      "version": "3.6.10"
     }
   },
   "nbformat": 4,
diff --git a/docs/_downloads/f59fd8b968f7dcde34ed872c8527c192/from_keras.py b/docs/_downloads/f59fd8b968f7dcde34ed872c8527c192/from_keras.py
index 25a1e5c..a68df55 100644
--- a/docs/_downloads/f59fd8b968f7dcde34ed872c8527c192/from_keras.py
+++ b/docs/_downloads/f59fd8b968f7dcde34ed872c8527c192/from_keras.py
@@ -45,25 +45,13 @@ import numpy as np
 # Load pretrained keras model
 # ----------------------------
 # We load a pretrained resnet-50 classification model provided by keras.
-
-if tuple(keras.__version__.split(".")) < ("2", "4", "0"):
-    weights_url = "".join(
-        [
-            "https://github.com/fchollet/deep-learning-models/releases/",
-            "download/v0.2/resnet50_weights_tf_dim_ordering_tf_kernels.h5",
-        ]
-    )
-    weights_file = "resnet50_keras_old.h5"
-else:
-    weights_url = "".join(
-        [
-            " https://storage.googleapis.com/tensorflow/keras-applications/",
-            "resnet/resnet50_weights_tf_dim_ordering_tf_kernels.h5",
-        ]
-    )
-    weights_file = "resnet50_keras_new.h5"
-
-
+weights_url = "".join(
+    [
+        "https://github.com/fchollet/deep-learning-models/releases/",
+        "download/v0.2/resnet50_weights_tf_dim_ordering_tf_kernels.h5",
+    ]
+)
+weights_file = "resnet50_weights.h5"
 weights_path = download_testdata(weights_url, weights_file, module="keras")
 keras_resnet50 = keras.applications.resnet50.ResNet50(
     include_top=True, weights=None, input_shape=(224, 224, 3), classes=1000
@@ -78,7 +66,7 @@ from PIL import Image
 from matplotlib import pyplot as plt
 from keras.applications.resnet50 import preprocess_input
 
-img_url = "https://github.com/dmlc/mxnet.js/blob/main/data/cat.png?raw=true"
+img_url = "https://github.com/dmlc/mxnet.js/blob/master/data/cat.png?raw=true"
 img_path = download_testdata(img_url, "cat.png", module="data")
 img = Image.open(img_path).resize((224, 224))
 plt.imshow(img)
diff --git a/docs/_downloads/f5b39ff89c432f7750c338ebf15407f6/vta_get_started.ipynb b/docs/_downloads/f5b39ff89c432f7750c338ebf15407f6/vta_get_started.ipynb
index 9163c5e..c6fa24a 100644
--- a/docs/_downloads/f5b39ff89c432f7750c338ebf15407f6/vta_get_started.ipynb
+++ b/docs/_downloads/f5b39ff89c432f7750c338ebf15407f6/vta_get_started.ipynb
@@ -62,14 +62,14 @@
       },
       "outputs": [],
       "source": [
-        "# We'll need the TVM RPC module and the VTA simulator module\nfrom tvm import rpc\nfrom tvm.contrib import utils\nfrom vta.testing import simulator\n\n# We read the Pynq RPC host IP address and port number from the OS environment\nhost = os.environ.get(\"VTA_RPC_HOST\", \"192.168.2.99\")\nport = int(os.environ.get(\"VTA_RPC_PORT\", \"9091\"))\n\n# We configure both the bitstream and the runtime system on the Pynq\n# to match the VTA configuration specified by the vta_config.json [...]
+        "# We'll need the TVM RPC module and the VTA simulator module\nfrom tvm import rpc\nfrom tvm.contrib import util\nfrom vta.testing import simulator\n\n# We read the Pynq RPC host IP address and port number from the OS environment\nhost = os.environ.get(\"VTA_RPC_HOST\", \"192.168.2.99\")\nport = int(os.environ.get(\"VTA_RPC_PORT\", \"9091\"))\n\n# We configure both the bitstream and the runtime system on the Pynq\n# to match the VTA configuration specified by the vta_config.json  [...]
       ]
     },
     {
       "cell_type": "markdown",
       "metadata": {},
       "source": [
-        "Computation Declaration\n-----------------------\nAs a first step, we need to describe our computation.\nTVM adopts tensor semantics, with each intermediate result\nrepresented as multi-dimensional array. The user needs to describe\nthe computation rule that generates the output tensors.\n\nIn this example we describe a vector addition, which requires multiple\ncomputation stages, as shown in the dataflow diagram below.\nFirst we describe the input tensors :code:`A` and :code:`B [...]
+        "Computation Declaration\n-----------------------\nAs a first step, we need to describe our computation.\nTVM adopts tensor semantics, with each intermediate result\nrepresented as multi-dimensional array. The user needs to describe\nthe computation rule that generates the output tensors.\n\nIn this example we describe a vector addition, which requires multiple\ncomputation stages, as shown in the dataflow diagram below.\nFirst we describe the input tensors :code:`A` and :code:`B [...]
       ]
     },
     {
@@ -284,7 +284,7 @@
       },
       "outputs": [],
       "source": [
-        "# Write the compiled module into an object file.\ntemp = utils.tempdir()\nmy_vadd.save(temp.relpath(\"vadd.o\"))\n\n# Send the executable over RPC\nremote.upload(temp.relpath(\"vadd.o\"))"
+        "# Write the compiled module into an object file.\ntemp = util.tempdir()\nmy_vadd.save(temp.relpath(\"vadd.o\"))\n\n# Send the executable over RPC\nremote.upload(temp.relpath(\"vadd.o\"))"
       ]
     },
     {
@@ -365,7 +365,7 @@
       "name": "python",
       "nbconvert_exporter": "python",
       "pygments_lexer": "ipython3",
-      "version": "3.6.12"
+      "version": "3.6.10"
     }
   },
   "nbformat": 4,
diff --git a/docs/_downloads/f83f0c3da8a2ab10657c61e034b7218d/from_pytorch.py b/docs/_downloads/f83f0c3da8a2ab10657c61e034b7218d/from_pytorch.py
index b5bcdf6..2328651 100644
--- a/docs/_downloads/f83f0c3da8a2ab10657c61e034b7218d/from_pytorch.py
+++ b/docs/_downloads/f83f0c3da8a2ab10657c61e034b7218d/from_pytorch.py
@@ -28,8 +28,8 @@ A quick solution is to install via pip
 
 .. code-block:: bash
 
-    pip install torch==1.7.0
-    pip install torchvision==0.8.1
+    pip install torch==1.4.0
+    pip install torchvision==0.5.0
 
 or please refer to official site
 https://pytorch.org/get-started/locally/
@@ -37,7 +37,7 @@ https://pytorch.org/get-started/locally/
 PyTorch versions should be backwards compatible but should be used
 with the proper TorchVision version.
 
-Currently, TVM supports PyTorch 1.7 and 1.4. Other versions may
+Currently, TVM supports PyTorch 1.4 and 1.3. Other versions may
 be unstable.
 """
 
@@ -70,7 +70,7 @@ scripted_model = torch.jit.trace(model, input_data).eval()
 # Classic cat example!
 from PIL import Image
 
-img_url = "https://github.com/dmlc/mxnet.js/blob/main/data/cat.png?raw=true"
+img_url = "https://github.com/dmlc/mxnet.js/blob/master/data/cat.png?raw=true"
 img_path = download_testdata(img_url, "cat.png", module="data")
 img = Image.open(img_path).resize((224, 224))
 
diff --git a/docs/_downloads/f8f7a2adf30f5033603d79cdbacd9235/tune_relay_arm.ipynb b/docs/_downloads/f8f7a2adf30f5033603d79cdbacd9235/tune_relay_arm.ipynb
index b8bf4f4..93b3a2f 100644
--- a/docs/_downloads/f8f7a2adf30f5033603d79cdbacd9235/tune_relay_arm.ipynb
+++ b/docs/_downloads/f8f7a2adf30f5033603d79cdbacd9235/tune_relay_arm.ipynb
@@ -15,7 +15,7 @@
       "cell_type": "markdown",
       "metadata": {},
       "source": [
-        "\n\nAuto-tuning a convolutional network for ARM CPU\n===============================================\n**Author**: `Lianmin Zheng <https://github.com/merrymercy>`_, `Zhao Wu <https://github.com/FrozenGene>`_, `Eddie Yan <https://github.com/eqy>`_\n\nAuto-tuning for a specific ARM device is critical for getting the best\nperformance. This is a tutorial about how to tune a whole convolutional\nnetwork.\n\nThe operator implementation for ARM CPU in TVM is written in template form.\n [...]
+        "\n\nAuto-tuning a convolutional network for ARM CPU\n===============================================\n**Author**: `Lianmin Zheng <https://github.com/merrymercy>`_, `Zhao Wu <https://github.com/FrozenGene>`_, `Eddie Yan <https://github.com/eqy>`_\n\nAuto-tuning for a specific ARM device is critical for getting the best\nperformance. This is a tutorial about how to tune a whole convolutional\nnetwork.\n\nThe operator implementation for ARM CPU in TVM is written in template form.\n [...]
       ]
     },
     {
@@ -33,7 +33,7 @@
       },
       "outputs": [],
       "source": [
-        "import os\n\nimport numpy as np\nimport tvm\nfrom tvm import te\nfrom tvm import autotvm\nfrom tvm import relay\nimport tvm.relay.testing\nfrom tvm.autotvm.tuner import XGBTuner, GATuner, RandomTuner, GridSearchTuner\nfrom tvm.contrib.utils import tempdir\nimport tvm.contrib.graph_runtime as runtime"
+        "import os\n\nimport numpy as np\nimport tvm\nfrom tvm import te\nfrom tvm import autotvm\nfrom tvm import relay\nimport tvm.relay.testing\nfrom tvm.autotvm.tuner import XGBTuner, GATuner, RandomTuner, GridSearchTuner\nfrom tvm.contrib.util import tempdir\nimport tvm.contrib.graph_runtime as runtime"
       ]
     },
     {
@@ -58,14 +58,14 @@
       "cell_type": "markdown",
       "metadata": {},
       "source": [
-        "Start RPC Tracker\n-----------------\nTVM uses RPC session to communicate with ARM boards.\nDuring tuning, the tuner will send the generated code to the board and\nmeasure the speed of code on the board.\n\nTo scale up the tuning, TVM uses RPC Tracker to manage distributed devices.\nThe RPC Tracker is a centralized controller node. We can register all devices to\nthe tracker. For example, if we have 10 phones, we can register all of them\nto the tracker, and run 10 measurements  [...]
+        "Start RPC Tracker\n-----------------\nTVM uses RPC session to communicate with ARM boards.\nDuring tuning, the tuner will send the generated code to the board and\nmeasure the speed of code on the board.\n\nTo scale up the tuning, TVM uses RPC Tracker to manage distributed devices.\nThe RPC Tracker is a centralized master node. We can register all devices to\nthe tracker. For example, if we have 10 phones, we can register all of them\nto the tracker, and run 10 measurements in p [...]
       ]
     },
     {
       "cell_type": "markdown",
       "metadata": {},
       "source": [
-        "Register devices to RPC Tracker\n-----------------------------------\nNow we can register our devices to the tracker. The first step is to\nbuild the TVM runtime for the ARM devices.\n\n* For Linux:\n  Follow this section `build-tvm-runtime-on-device` to build\n  the TVM runtime on the device. Then register the device to tracker by\n\n  .. code-block:: bash\n\n    python -m tvm.exec.rpc_server --tracker=[HOST_IP]:9190 --key=rk3399\n\n  (replace :code:`[HOST_IP]` with the IP addr [...]
+        "Register devices to RPC Tracker\n-----------------------------------\nNow we can register our devices to the tracker. The first step is to\nbuild the TVM runtime for the ARM devices.\n\n* For Linux:\n  Follow this section `build-tvm-runtime-on-device` to build\n  the TVM runtime on the device. Then register the device to tracker by\n\n  .. code-block:: bash\n\n    python -m tvm.exec.rpc_server --tracker=[HOST_IP]:9190 --key=rk3399\n\n  (replace :code:`[HOST_IP]` with the IP addr [...]
       ]
     },
     {
@@ -140,7 +140,7 @@
       "cell_type": "markdown",
       "metadata": {},
       "source": [
-        "<div class=\"alert alert-info\"><h4>Note</h4><p>**Experiencing Difficulties?**\n\n  The auto tuning module is error-prone. If you always see \" 0.00/ 0.00 GFLOPS\",\n  then there must be something wrong.\n\n  First, make sure you set the correct configuration of your device.\n  Then, you can print debug information by adding these lines in the beginning\n  of the script. It will print every measurement result, where you can find useful\n  error messages.\n\n  .. code-block:: pyt [...]
+        "<div class=\"alert alert-info\"><h4>Note</h4><p>**Experiencing Difficulties?**\n\n  The auto tuning module is error-prone. If you always see \" 0.00/ 0.00 GFLOPS\",\n  then there must be something wrong.\n\n  First, make sure you set the correct configuration of your device.\n  Then, you can print debug information by adding these lines in the beginning\n  of the script. It will print every measurement result, where you can find useful\n  error messages.\n\n  .. code-block:: pyt [...]
       ]
     }
   ],
@@ -160,7 +160,7 @@
       "name": "python",
       "nbconvert_exporter": "python",
       "pygments_lexer": "ipython3",
-      "version": "3.6.12"
+      "version": "3.6.10"
     }
   },
   "nbformat": 4,
diff --git a/docs/_downloads/fd012fa7b67f4e333acce1d25a8e62bc/micro_tflite.py b/docs/_downloads/fd012fa7b67f4e333acce1d25a8e62bc/micro_tflite.py
index 293f95c..6fd2de1 100644
--- a/docs/_downloads/fd012fa7b67f4e333acce1d25a8e62bc/micro_tflite.py
+++ b/docs/_downloads/fd012fa7b67f4e333acce1d25a8e62bc/micro_tflite.py
@@ -98,7 +98,7 @@ import numpy as np
 import tvm
 import tvm.micro as micro
 from tvm.contrib.download import download_testdata
-from tvm.contrib import graph_runtime, utils
+from tvm.contrib import graph_runtime, util
 from tvm import relay
 
 # %%
diff --git a/docs/_images/sphx_glr_bring_your_own_datatypes_thumb.png b/docs/_images/sphx_glr_bring_your_own_datatypes_thumb.png
deleted file mode 100644
index 233f8e6..0000000
Binary files a/docs/_images/sphx_glr_bring_your_own_datatypes_thumb.png and /dev/null differ
diff --git a/docs/_images/sphx_glr_deploy_classification_001.png b/docs/_images/sphx_glr_deploy_classification_001.png
index b6a5ddf..06c3d77 100644
Binary files a/docs/_images/sphx_glr_deploy_classification_001.png and b/docs/_images/sphx_glr_deploy_classification_001.png differ
diff --git a/docs/_images/sphx_glr_deploy_ssd_gluoncv_001.png b/docs/_images/sphx_glr_deploy_ssd_gluoncv_001.png
index e56c192..7667886 100644
Binary files a/docs/_images/sphx_glr_deploy_ssd_gluoncv_001.png and b/docs/_images/sphx_glr_deploy_ssd_gluoncv_001.png differ
diff --git a/docs/_images/sphx_glr_from_caffe2_001.png b/docs/_images/sphx_glr_from_caffe2_001.png
index b6a5ddf..06c3d77 100644
Binary files a/docs/_images/sphx_glr_from_caffe2_001.png and b/docs/_images/sphx_glr_from_caffe2_001.png differ
diff --git a/docs/_images/sphx_glr_from_darknet_001.png b/docs/_images/sphx_glr_from_darknet_001.png
index ad8ed68..2c1866c 100644
Binary files a/docs/_images/sphx_glr_from_darknet_001.png and b/docs/_images/sphx_glr_from_darknet_001.png differ
diff --git a/docs/_images/sphx_glr_from_darknet_thumb.png b/docs/_images/sphx_glr_from_darknet_thumb.png
index b42eabc..5059f17 100644
Binary files a/docs/_images/sphx_glr_from_darknet_thumb.png and b/docs/_images/sphx_glr_from_darknet_thumb.png differ
diff --git a/docs/_images/sphx_glr_from_keras_001.png b/docs/_images/sphx_glr_from_keras_001.png
index b6a5ddf..06c3d77 100644
Binary files a/docs/_images/sphx_glr_from_keras_001.png and b/docs/_images/sphx_glr_from_keras_001.png differ
diff --git a/docs/_images/sphx_glr_from_mxnet_001.png b/docs/_images/sphx_glr_from_mxnet_001.png
index b6a5ddf..06c3d77 100644
Binary files a/docs/_images/sphx_glr_from_mxnet_001.png and b/docs/_images/sphx_glr_from_mxnet_001.png differ
diff --git a/docs/_images/sphx_glr_from_onnx_001.png b/docs/_images/sphx_glr_from_onnx_001.png
index 61d41f9..1138eca 100644
Binary files a/docs/_images/sphx_glr_from_onnx_001.png and b/docs/_images/sphx_glr_from_onnx_001.png differ
diff --git a/docs/_images/sphx_glr_from_tflite_001.png b/docs/_images/sphx_glr_from_tflite_001.png
index b6a5ddf..06c3d77 100644
Binary files a/docs/_images/sphx_glr_from_tflite_001.png and b/docs/_images/sphx_glr_from_tflite_001.png differ
diff --git a/docs/_images/sphx_glr_tvmc_command_line_driver_thumb.png b/docs/_images/sphx_glr_tvmc_command_line_driver_thumb.png
deleted file mode 100644
index 233f8e6..0000000
Binary files a/docs/_images/sphx_glr_tvmc_command_line_driver_thumb.png and /dev/null differ
diff --git a/docs/_sources/api/python/contrib.rst.txt b/docs/_sources/api/python/contrib.rst.txt
index 0eb3024..8ac4e1f 100644
--- a/docs/_sources/api/python/contrib.rst.txt
+++ b/docs/_sources/api/python/contrib.rst.txt
@@ -122,9 +122,9 @@ tvm.contrib.tar
     :members:
 
 
-tvm.contrib.utils
-~~~~~~~~~~~~~~~~~
-.. automodule:: tvm.contrib.utils
+tvm.contrib.util
+~~~~~~~~~~~~~~~~
+.. automodule:: tvm.contrib.util
     :members:
 
 
diff --git a/docs/_sources/contribute/code_guide.rst.txt b/docs/_sources/contribute/code_guide.rst.txt
index dbcddf7..d790ce6 100644
--- a/docs/_sources/contribute/code_guide.rst.txt
+++ b/docs/_sources/contribute/code_guide.rst.txt
@@ -36,7 +36,7 @@ C++ Code Styles
 
 We use `clang-format` to enforce the code style. Because different version
 of clang-format might change by its version, it is recommended to use the same
-version of the clang-format as the main one.
+version of the clang-format as the master.
 You can also use the following command via docker.
 
 .. code:: bash
diff --git a/docs/_sources/contribute/community.rst.txt b/docs/_sources/contribute/community.rst.txt
index fd6df0f..f6ea514 100644
--- a/docs/_sources/contribute/community.rst.txt
+++ b/docs/_sources/contribute/community.rst.txt
@@ -20,7 +20,7 @@
 TVM Community Guideline
 =======================
 
-TVM adopts the Apache style model and governs by merit. We believe that it is important to create an inclusive community where everyone can use, contribute to, and influence the direction of the project. See `CONTRIBUTORS.md <https://github.com/apache/incubator-tvm/blob/main/CONTRIBUTORS.md>`_ for the current list of contributors.
+TVM adopts the Apache style model and governs by merit. We believe that it is important to create an inclusive community where everyone can use, contribute to, and influence the direction of the project. See `CONTRIBUTORS.md <https://github.com/apache/incubator-tvm/blob/master/CONTRIBUTORS.md>`_ for the current list of contributors.
 
 
 
diff --git a/docs/_sources/contribute/document.rst.txt b/docs/_sources/contribute/document.rst.txt
index 1bfab1e..a6f54dc 100644
--- a/docs/_sources/contribute/document.rst.txt
+++ b/docs/_sources/contribute/document.rst.txt
@@ -68,7 +68,7 @@ Be careful to leave blank lines between sections of your documents.
 In the above case, there has to be a blank line before `Parameters`, `Returns` and `Examples`
 in order for the doc to be built correctly. To add a new function to the doc,
 we need to add the `sphinx.autodoc <http://www.sphinx-doc.org/en/master/ext/autodoc.html>`_
-rules to the `docs/api/python <https://github.com/apache/incubator-tvm/tree/main/docs/api/python>`_).
+rules to the `docs/api/python <https://github.com/apache/incubator-tvm/tree/master/docs/api/python>`_).
 You can refer to the existing files under this folder on how to add the functions.
 
 
@@ -96,7 +96,7 @@ to add comments about code logics to improve readability.
 Write Tutorials
 ---------------
 We use the `sphinx-gallery <https://sphinx-gallery.github.io/>`_ to build python tutorials.
-You can find the source code under `tutorials <https://github.com/apache/incubator-tvm/tree/main/tutorials>`_ quite self explanatory.
+You can find the source code under `tutorials <https://github.com/apache/incubator-tvm/tree/master/tutorials>`_ quite self explanatory.
 One thing that worth noting is that the comment blocks are written in reStructuredText instead of markdown so be aware of the syntax.
 
 The tutorial code will run on our build server to generate the document page.
diff --git a/docs/_sources/contribute/error_handling.rst.txt b/docs/_sources/contribute/error_handling.rst.txt
index d31b401..8f71ee6 100644
--- a/docs/_sources/contribute/error_handling.rst.txt
+++ b/docs/_sources/contribute/error_handling.rst.txt
@@ -37,14 +37,14 @@ raise an error of the corresponding type.
 Note that you do not have to add a new type
 :py:class:`tvm.error.TVMError` will be raised by default when
 there is no error type prefix in the message.
-This mechanism works for both ``LOG(FATAL)`` and ``ICHECK`` macros.
+This mechanism works for both ``LOG(FATAL)`` and ``CHECK`` macros.
 The following code gives an example on how to do so.
 
 .. code:: c
 
   // src/api_test.cc
   void ErrorTest(int x, int y) {
-    ICHECK_EQ(x, y) << "ValueError: expect x and y to be equal."
+    CHECK_EQ(x, y) << "ValueError: expect x and y to be equal."
     if (x == 1) {
       LOG(FATAL) << "InternalError: cannot reach here";
     }
diff --git a/docs/_sources/contribute/git_howto.rst.txt b/docs/_sources/contribute/git_howto.rst.txt
index 4585736..6bb0399 100644
--- a/docs/_sources/contribute/git_howto.rst.txt
+++ b/docs/_sources/contribute/git_howto.rst.txt
@@ -23,16 +23,16 @@ Git Usage Tips
 
 Here are some tips for git workflow.
 
-## How to resolve conflict with main
+## How to resolve conflict with master
 
-- First rebase to most recent main
+- First rebase to most recent master
 
 .. code:: bash
 
   # The first two steps can be skipped after you do it once.
   git remote add upstream [url to tvm repo]
   git fetch upstream
-  git rebase upstream/main
+  git rebase upstream/master
 
 
 - The git may show some conflicts it cannot merge, say `conflicted.py`.
@@ -84,16 +84,16 @@ to create a PR with set of meaningful commits. You can do it by following steps.
   git push --force
 
 
-Reset to the most recent main branch
-------------------------------------
+Reset to the most recent master
+-------------------------------
 
-You can always use git reset to reset your version to the most recent main.
+You can always use git reset to reset your version to the most recent master.
 Note that all your ***local changes will get lost***.
 So only do it when you do not have local changes or when your pull request just get merged.
 
 .. code:: bash
 
-  git reset --hard [hash tag of main]
+  git reset --hard [hash tag of master]
 
 
 Recover a Previous Commit after Reset
@@ -110,12 +110,12 @@ Once you get the right hashtag, you can use git reset again to change
 the head to the right commit.
 
 
-Apply only k-Latest Commits on to the main
-------------------------------------------
+Apply only k-Latest Commits on to the master
+--------------------------------------------
 
-Sometimes it is useful to only apply your k-latest changes on top of the main.
+Sometimes it is useful to only apply your k-latest changes on top of the master.
 This usually happens when you have other m-commits that are already merged
-before these k-commits. Directly rebase against the main might cause merge conflicts
+before these k-commits. Directly rebase against the master might cause merge conflicts
 on these first m-commits(which are can be safely discarded).
 
 You can instead use the following command
@@ -124,9 +124,9 @@ You can instead use the following command
 
   # k is the concrete number
   # Put HEAD~2 for the last 1 commit.
-  git rebase --onto upstream/main HEAD~k
+  git rebase --onto upstream/master HEAD~k
 
-You can then force push to the main. Note that the above command will discard
+You can then force push to the master. Note that the above command will discard
 all the commits before tha last k ones.
 
 
diff --git a/docs/_sources/contribute/pull_request.rst.txt b/docs/_sources/contribute/pull_request.rst.txt
index e498c70..935f2d5 100644
--- a/docs/_sources/contribute/pull_request.rst.txt
+++ b/docs/_sources/contribute/pull_request.rst.txt
@@ -20,13 +20,13 @@ Submit a Pull Request
 
 This is a quick guide to submit a pull request, please also refer to the detailed guidelines.
 
-- Before submit, please rebase your code on the most recent version of main, you can do it by
+- Before submit, please rebase your code on the most recent version of master, you can do it by
 
   .. code:: bash
 
     git remote add upstream [url to tvm repo]
     git fetch upstream
-    git rebase upstream/main
+    git rebase upstream/master
 
 - Make sure code style check pass by typing the following command, and all the existing test-cases pass.
 
@@ -48,8 +48,8 @@ This is a quick guide to submit a pull request, please also refer to the detaile
 
   .. code:: bash
 
-    # Run clang-format check for all the files that changed since upstream/main
-    docker/bash.sh tvmai/ci-lint ./tests/lint/git-clang-format.sh upstream/main
+    # Run clang-format check for all the files that changed since upstream/master
+    docker/bash.sh tvmai/ci-lint ./tests/lint/git-clang-format.sh upstream/master
 
 - Add test-cases to cover the new features or bugfix the patch introduces.
 - Document the code you wrote, see more at :ref:`doc_guide`
diff --git a/docs/_sources/contribute/release_process.rst.txt b/docs/_sources/contribute/release_process.rst.txt
index 0f1e515..3e2239f 100644
--- a/docs/_sources/contribute/release_process.rst.txt
+++ b/docs/_sources/contribute/release_process.rst.txt
@@ -59,18 +59,7 @@ After generating the gpg key, you need to upload your key to a public key server
 
 If you want to do the release on another machine, you can transfer your gpg key to that machine via the :code:`gpg --export` and :code:`gpg --import` commands.
 
-The last step is to update the KEYS file with your code signing key https://www.apache.org/dev/openpgp.html#export-public-key. Check in the changes to the TVM main branch, as well as ASF SVN,
-
-.. code-block:: bash
-
-	# the --depth=files will avoid checkout existing folders
-	svn co --depth=files "https://dist.apache.org/repos/dist/dev/incubator/tvm" svn-tvm
-	cd svn-tvm
-	# edit KEYS file
-	svn ci --username $ASF_USERNAME --password "$ASF_PASSWORD" -m "Update KEYS"
-	# update downloads.apache.org
-	svn rm --username $ASF_USERNAME --password "$ASF_PASSWORD" https://dist.apache.org/repos/dist/release/incubator/tvm/KEYS -m "Update KEYS"
-	svn cp --username $ASF_USERNAME --password "$ASF_PASSWORD" https://dist.apache.org/repos/dist/dev/incubator/tvm/KEYS https://dist.apache.org/repos/dist/release/incubator/tvm/ -m "Update KEYS"
+The last step is to update the KEYS file with your code signing key https://www.apache.org/dev/openpgp.html#export-public-key. Check in the changes to the master branch.
 
 
 Cut a Release Candidate
@@ -90,7 +79,7 @@ To cut a release candidate, one needs to first cut a branch using selected versi
 Go to the GitHub repositories "releases" tab and click "Draft a new release",
 
 - Provide the release tag in the form of “v1.0.0.rc0” where 0 means it’s the first release candidate
-- Select the commit by clicking Target: branch > Recent commits > $commit_hash
+- Select the commit by clicking Target: branch > Recent commits > $commit_hash 
 - Copy and paste release note draft into the description box
 - Select "This is a pre-release"
 - Click "Publish release"
@@ -115,7 +104,7 @@ Create source code artifacts,
 	rm -rf .DS_Store
 	find . -name ".git*" -print0 | xargs -0 rm -rf
 	cd ..
-	brew install gnu-tar
+	brew install gnu-tar 
 	gtar -czvf apache-tvm-src-v0.6.0.rc0-incubating.tar.gz apache-tvm-src-v0.6.0.rc0-incubating
 
 Use your GPG key to sign the created artifact. First make sure your GPG is set to use the correct private key,
@@ -147,7 +136,7 @@ The release manager also needs to upload the artifacts to ASF SVN,
 	cd svn-tvm
 	mkdir tvm-v0.6.0-rc0
 	# copy files into it
-	svn add tvm-0.6.0-rc0
+	svn add tvm-0.6.0-rc0 
 	svn ci --username $ASF_USERNAME --password "$ASF_PASSWORD" -m "Add RC"
 
 
diff --git a/docs/_sources/deploy/android.rst.txt b/docs/_sources/deploy/android.rst.txt
index e28eef3..c724eab 100644
--- a/docs/_sources/deploy/android.rst.txt
+++ b/docs/_sources/deploy/android.rst.txt
@@ -38,5 +38,5 @@ deploy_lib.so, deploy_graph.json, deploy_param.params will go to android target.
 TVM Runtime for Android Target
 ------------------------------
 
-Refer `here <https://github.com/apache/incubator-tvm/blob/main/apps/android_deploy/README.md#build-and-installation>`_ to build CPU/OpenCL version flavor TVM runtime for android target.
-From android java TVM API to load model & execute can be referred at this `java <https://github.com/apache/incubator-tvm/blob/main/apps/android_deploy/app/src/main/java/org/apache/tvm/android/demo/MainActivity.java>`_ sample source.
+Refer `here <https://github.com/apache/incubator-tvm/blob/master/apps/android_deploy/README.md#build-and-installation>`_ to build CPU/OpenCL version flavor TVM runtime for android target.
+From android java TVM API to load model & execute can be referred at this `java <https://github.com/apache/incubator-tvm/blob/master/apps/android_deploy/app/src/main/java/org/apache/tvm/android/demo/MainActivity.java>`_ sample source.
diff --git a/docs/_sources/deploy/arm_compute_lib.rst.txt b/docs/_sources/deploy/arm_compute_lib.rst.txt
index 5dd0076..e3399c5 100644
--- a/docs/_sources/deploy/arm_compute_lib.rst.txt
+++ b/docs/_sources/deploy/arm_compute_lib.rst.txt
@@ -232,12 +232,6 @@ Operator support
 +----------------------+-------------------------------------------------------------------------+
 | reshape              | fp32, uint8                                                             |
 +----------------------+-------------------------------------------------------------------------+
-| maximum              | fp32                                                                    |
-+----------------------+-------------------------------------------------------------------------+
-| add                  | fp32                                                                    |
-+----------------------+-------------------------------------------------------------------------+
-| qnn.add              | uint8                                                                   |
-+----------------------+-------------------------------------------------------------------------+
 
 .. note::
     A composite operator is a series of operators that map to a single Arm Compute Library operator. You can view this
diff --git a/docs/_sources/deploy/cpp_deploy.rst.txt b/docs/_sources/deploy/cpp_deploy.rst.txt
index f3de69d..a298f95 100644
--- a/docs/_sources/deploy/cpp_deploy.rst.txt
+++ b/docs/_sources/deploy/cpp_deploy.rst.txt
@@ -19,7 +19,7 @@
 Deploy TVM Module using C++ API
 ===============================
 
-We provide an example on how to deploy TVM modules in `apps/howto_deploy <https://github.com/apache/incubator-tvm/tree/main/apps/howto_deploy>`_
+We provide an example on how to deploy TVM modules in `apps/howto_deploy <https://github.com/apache/incubator-tvm/tree/master/apps/howto_deploy>`_
 
 To run the example, you can use the following command
 
@@ -38,17 +38,17 @@ TVM provides a minimum runtime, which costs around 300K to 600K depending on how
 In most cases, we can use ``libtvm_runtime.so`` that comes with the build.
 
 If somehow you find it is hard to build ``libtvm_runtime``, checkout
-`tvm_runtime_pack.cc <https://github.com/apache/incubator-tvm/tree/main/apps/howto_deploy/tvm_runtime_pack.cc>`_.
+`tvm_runtime_pack.cc <https://github.com/apache/incubator-tvm/tree/master/apps/howto_deploy/tvm_runtime_pack.cc>`_.
 It is an example all in one file that gives you TVM runtime.
 You can compile this file using your build system and include this into your project.
 
-You can also checkout `apps <https://github.com/apache/incubator-tvm/tree/main/apps/>`_ for example applications build with TVM on iOS, Android and others.
+You can also checkout `apps <https://github.com/apache/incubator-tvm/tree/master/apps/>`_ for example applications build with TVM on iOS, Android and others.
 
 Dynamic Library vs. System Module
 ---------------------------------
 TVM provides two ways to use the compiled library.
-You can checkout `prepare_test_libs.py <https://github.com/apache/incubator-tvm/tree/main/apps/howto_deploy/prepare_test_libs.py>`_
-on how to generate the library and `cpp_deploy.cc <https://github.com/apache/incubator-tvm/tree/main/apps/howto_deploy/cpp_deploy.cc>`_ on how to use them.
+You can checkout `prepare_test_libs.py <https://github.com/apache/incubator-tvm/tree/master/apps/howto_deploy/prepare_test_libs.py>`_
+on how to generate the library and `cpp_deploy.cc <https://github.com/apache/incubator-tvm/tree/master/apps/howto_deploy/cpp_deploy.cc>`_ on how to use them.
 
 - Store library as a shared library and dynamically load the library into your project.
 - Bundle the compiled library into your project in system module mode.
diff --git a/docs/_sources/deploy/hls.rst.txt b/docs/_sources/deploy/hls.rst.txt
index a8faf64..da1721d 100644
--- a/docs/_sources/deploy/hls.rst.txt
+++ b/docs/_sources/deploy/hls.rst.txt
@@ -45,7 +45,7 @@ We use two python scripts for this tutorial.
       s = te.create_schedule(C.op)
       px, x = s[C].split(C.op.axis[0], nparts=1)
 
-      s[C].bind(px, tvm.te.thread_axis("pipeline"))
+      s[C].bind(px, tvm.thread_axis("pipeline"))
 
       fadd = tvm.build(s, [A, B, C], tgt, target_host=tgt_host, name="myadd")
 
diff --git a/docs/_sources/deploy/index.rst.txt b/docs/_sources/deploy/index.rst.txt
index 68843ba..b38a7f5 100644
--- a/docs/_sources/deploy/index.rst.txt
+++ b/docs/_sources/deploy/index.rst.txt
@@ -69,4 +69,3 @@ target device without relying on RPC. see the following resources on how to do s
    integrate
    hls
    arm_compute_lib
-   tensorrt
diff --git a/docs/_sources/deploy/integrate.rst.txt b/docs/_sources/deploy/integrate.rst.txt
index fe40f68..99c968f 100644
--- a/docs/_sources/deploy/integrate.rst.txt
+++ b/docs/_sources/deploy/integrate.rst.txt
@@ -43,8 +43,7 @@ The only thing we have to do in C++ is to convert your array to DLTensor and pas
 ``DLTensor*`` to the generated function.
 
 
-Integrate User Defined Python Array
------------------------------------
+## Integrate User Defined Python Array
 
 Assume you have a python object ``MyArray``. There are three things that you need to do
 
@@ -65,6 +64,6 @@ Assume you have a python object ``MyArray``. There are three things that you nee
            dltensor_addr = self.get_dltensor_addr()
            return dltensor_addr
 
-   # You can put registration step in a separate file mypkg.tvm.py
-   # and only optionally import that if you only want optional dependency.
-   tvm.register_extension(MyArray)
+       # You can put registration step in a separate file mypkg.tvm.py
+       # and only optionally import that if you only want optional dependency.
+  tvm.register_extension(MyArray)
diff --git a/docs/_sources/deploy/tensorrt.rst.txt b/docs/_sources/deploy/tensorrt.rst.txt
deleted file mode 100644
index 27f11e9..0000000
--- a/docs/_sources/deploy/tensorrt.rst.txt
+++ /dev/null
@@ -1,297 +0,0 @@
-..  Licensed to the Apache Software Foundation (ASF) under one
-    or more contributor license agreements.  See the NOTICE file
-    distributed with this work for additional information
-    regarding copyright ownership.  The ASF licenses this file
-    to you under the Apache License, Version 2.0 (the
-    "License"); you may not use this file except in compliance
-    with the License.  You may obtain a copy of the License at
-
-..    http://www.apache.org/licenses/LICENSE-2.0
-
-..  Unless required by applicable law or agreed to in writing,
-    software distributed under the License is distributed on an
-    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-    KIND, either express or implied.  See the License for the
-    specific language governing permissions and limitations
-    under the License.
-
-Relay TensorRT Integration
-==========================
-**Author**: `Trevor Morris <https://github.com/trevor-m>`_
-
-Introduction
-------------
-
-NVIDIA TensorRT is a library for optimized deep learning inference. This integration will offload as
-many operators as possible from Relay to TensorRT, providing a performance boost on NVIDIA GPUs
-without the need to tune schedules.
-
-This guide will demonstrate how to install TensorRT and build TVM with TensorRT BYOC and runtime
-enabled. It will also provide example code to compile and run a ResNet-18 model using TensorRT and
-how to configure the compilation and runtime settings. Finally, we document the supported operators
-and how to extend the integration to support other operators.
-
-Installing TensorRT
--------------------
-
-In order to download TensorRT, you will need to create an NVIDIA Developer program account. Please
-see NVIDIA's documentation for more info:
-https://docs.nvidia.com/deeplearning/tensorrt/install-guide/index.html. If you have a Jetson device
-such as a TX1, TX2, Xavier, or Nano, TensorRT will already be installed on the device via the
-JetPack SDK.
-
-There are two methods to install TensorRT:
-
-* System install via deb or rpm package.
-* Tar file installation.
-
-With the tar file installation method, you must provide the path of the extracted tar archive to
-USE_TENSORRT_RUNTIME=/path/to/TensorRT. With the system install method,
-USE_TENSORRT_RUNTIME=ON will automatically locate your installation.
-
-Building TVM with TensorRT support
-----------------------------------
-
-There are two separate build flags for TensorRT integration in TVM. These flags also enable
-cross-compilation: USE_TENSORRT_CODEGEN=ON will also you to build a module with TensorRT support on
-a host machine, while USE_TENSORRT_RUNTIME=ON will enable the TVM runtime on an edge device to
-execute the TensorRT module. You should enable both if you want to compile and also execute models
-with the same TVM build.
-
-* USE_TENSORRT_CODEGEN=ON/OFF - This flag will enable compiling a TensorRT module, which does not require any
-  TensorRT library.
-* USE_TENSORRT_RUNTIME=ON/OFF/path-to-TensorRT - This flag will enable the TensorRT runtime module.
-  This will build TVM against the installed TensorRT library.
-
-Example setting in config.cmake file:
-
-.. code:: cmake
-
-    set(USE_TENSORRT_CODEGEN ON)
-    set(USE_TENSORRT_RUNTIME /home/ubuntu/TensorRT-7.0.0.11)
-
-
-Build and Deploy ResNet-18 with TensorRT
-----------------------------------------
-
-Create a Relay graph from a MXNet ResNet-18 model.
-
-.. code:: python
-
-    import tvm
-    from tvm import relay
-    import mxnet
-    from mxnet.gluon.model_zoo.vision import get_model
-
-    dtype = "float32"
-    input_shape = (1, 3, 224, 224)
-    block = get_model('resnet18_v1', pretrained=True)
-    mod, params = relay.frontend.from_mxnet(block, shape={'data': input_shape}, dtype=dtype)
-
-
-Annotate and partition the graph for TensorRT. All ops which are supported by the TensorRT
-integration will be marked and offloaded to TensorRT. The rest of the ops will go through the
-regular TVM CUDA compilation and code generation.
-
-.. code:: python
-
-    from tvm.relay.op.contrib.tensorrt import partition_for_tensorrt
-    mod, config = partition_for_tensorrt(mod, params)
-
-
-Build the Relay graph, using the new module and config returned by partition_for_tensorrt. The
-target must always be a cuda target. ``partition_for_tensorrt`` will automatically fill out the
-required values in the config, so there is no need to modify it - just pass it along to the
-PassContext so the values can be read during compilation.
-
-.. code:: python
-
-    target = "cuda"
-    with tvm.transform.PassContext(opt_level=3, config={'relay.ext.tensorrt.options': config}):
-        lib = relay.build(mod, target=target, params=params)
-
-
-Export the module.
-
-.. code:: python
-
-    lib.export_library('compiled.so')
-
-
-Load module and run inference on the target machine, which must be built with
-``USE_TENSORRT_RUNTIME`` enabled. The first run will take longer because the TensorRT engine will
-have to be built.
-
-.. code:: python
-
-    ctx = tvm.gpu(0)
-    loaded_lib = tvm.runtime.load_module('compiled.so')
-    gen_module = tvm.contrib.graph_runtime.GraphModule(loaded_lib['default'](ctx))
-    input_data = np.random.uniform(0, 1, input_shape).astype(dtype)
-    gen_module.run(data=input_data)
-
-
-Partitioning and Compilation Settings
--------------------------------------
-
-There are some options which can be configured in ``partition_for_tensorrt``.
-
-* ``version`` - TensorRT version to target as tuple of (major, minor, patch). If TVM is compiled
-  with USE_TENSORRT_RUNTIME=ON, the linked TensorRT version will be used instead. The version
-  will affect which ops can be partitioned to TensorRT.
-* ``use_implicit_batch`` - Use TensorRT implicit batch mode (default true). Setting to false will
-  enable explicit batch mode which will widen supported operators to include those which modify the
-  batch dimension, but may reduce performance for some models.
-* ``remove_no_mac_subgraphs`` - A heuristic to improve performance. Removes subgraphs which have
-  been partitioned for TensorRT if they do not have any multiply-accumulate operations. The removed
-  subgraphs will go through TVM's standard compilation instead.
-* ``max_workspace_size`` - How many bytes of workspace size to allow each subgraph to use for
-  TensorRT engine creation. See TensorRT documentation for more info. Can be overriden at runtime.
-
-
-Runtime Settings
-----------------
-
-There are some additional options which can be configured at runtime using environment variables.
-
-* Automatic FP16 Conversion - Environment variable ``TVM_TENSORRT_USE_FP16=1`` can be set to
-  automatically convert the TensorRT components of your model to 16-bit floating point precision.
-  This can greatly increase performance, but may cause some slight loss in the model accuracy.
-* Caching TensorRT Engines - During the first inference, the runtime will invoke the TensorRT API
-  to build an engine. This can be time consuming, so you can set ``TVM_TENSORRT_CACHE_DIR`` to
-  point to a directory to save these built engines to on the disk. The next time you load the model
-  and give it the same directory, the runtime will load the already built engines to avoid the long
-  warmup time. A unique directory is required for each model.
-* TensorRT has a paramter to configure the maximum amount of scratch space that each layer in the
-  model can use. It is generally best to use the highest value which does not cause you to run out
-  of memory. You can use ``TVM_TENSORRT_MAX_WORKSPACE_SIZE`` to override this by specifying the
-  workspace size in bytes you would like to use.
-
-
-Operator support
-----------------
-+------------------------+------------------------------------+
-|       Relay Node       |              Remarks               |
-+========================+====================================+
-| nn.relu                |                                    |
-+------------------------+------------------------------------+
-| sigmoid                |                                    |
-+------------------------+------------------------------------+
-| tanh                   |                                    |
-+------------------------+------------------------------------+
-| nn.batch_norm          |                                    |
-+------------------------+------------------------------------+
-| nn.softmax             |                                    |
-+------------------------+------------------------------------+
-| nn.conv2d              |                                    |
-+------------------------+------------------------------------+
-| nn.dense               |                                    |
-+------------------------+------------------------------------+
-| nn.bias_add            |                                    |
-+------------------------+------------------------------------+
-| add                    |                                    |
-+------------------------+------------------------------------+
-| subtract               |                                    |
-+------------------------+------------------------------------+
-| multiply               |                                    |
-+------------------------+------------------------------------+
-| divide                 |                                    |
-+------------------------+------------------------------------+
-| power                  |                                    |
-+------------------------+------------------------------------+
-| maximum                |                                    |
-+------------------------+------------------------------------+
-| minimum                |                                    |
-+------------------------+------------------------------------+
-| nn.max_pool2d          |                                    |
-+------------------------+------------------------------------+
-| nn.avg_pool2d          |                                    |
-+------------------------+------------------------------------+
-| nn.global_max_pool2d   |                                    |
-+------------------------+------------------------------------+
-| nn.global_avg_pool2d   |                                    |
-+------------------------+------------------------------------+
-| exp                    |                                    |
-+------------------------+------------------------------------+
-| log                    |                                    |
-+------------------------+------------------------------------+
-| sqrt                   |                                    |
-+------------------------+------------------------------------+
-| abs                    |                                    |
-+------------------------+------------------------------------+
-| negative               |                                    |
-+------------------------+------------------------------------+
-| nn.batch_flatten       |                                    |
-+------------------------+------------------------------------+
-| expand_dims            |                                    |
-+------------------------+------------------------------------+
-| squeeze                |                                    |
-+------------------------+------------------------------------+
-| concatenate            |                                    |
-+------------------------+------------------------------------+
-| nn.conv2d_transpose    |                                    |
-+------------------------+------------------------------------+
-| transpose              |                                    |
-+------------------------+------------------------------------+
-| layout_transform       |                                    |
-+------------------------+------------------------------------+
-| reshape                |                                    |
-+------------------------+------------------------------------+
-| nn.pad                 |                                    |
-+------------------------+------------------------------------+
-| sum                    |                                    |
-+------------------------+------------------------------------+
-| prod                   |                                    |
-+------------------------+------------------------------------+
-| max                    |                                    |
-+------------------------+------------------------------------+
-| min                    |                                    |
-+------------------------+------------------------------------+
-| mean                   |                                    |
-+------------------------+------------------------------------+
-| nn.adaptive_max_pool2d |                                    |
-+------------------------+------------------------------------+
-| nn.adaptive_avg_pool2d |                                    |
-+------------------------+------------------------------------+
-| clip                   | Requires TensorRT 5.1.5 or greater |
-+------------------------+------------------------------------+
-| nn.leaky_relu          | Requires TensorRT 5.1.5 or greater |
-+------------------------+------------------------------------+
-| sin                    | Requires TensorRT 5.1.5 or greater |
-+------------------------+------------------------------------+
-| cos                    | Requires TensorRT 5.1.5 or greater |
-+------------------------+------------------------------------+
-| atan                   | Requires TensorRT 5.1.5 or greater |
-+------------------------+------------------------------------+
-| ceil                   | Requires TensorRT 5.1.5 or greater |
-+------------------------+------------------------------------+
-| floor                  | Requires TensorRT 5.1.5 or greater |
-+------------------------+------------------------------------+
-| strided_slice          | Requires TensorRT 5.1.5 or greater |
-+------------------------+------------------------------------+
-| nn.conv3d              | Requires TensorRT 6.0.1 or greater |
-+------------------------+------------------------------------+
-| nn.max_pool3d          | Requires TensorRT 6.0.1 or greater |
-+------------------------+------------------------------------+
-| nn.avg_pool3d          | Requires TensorRT 6.0.1 or greater |
-+------------------------+------------------------------------+
-| nn.conv3d_transpose    | Requires TensorRT 6.0.1 or greater |
-+------------------------+------------------------------------+
-
-
-Adding a new operator
----------------------
-To add support for a new operator, there are a series of files we need to make changes to:
-
-* `src/runtime/contrib/tensorrt/tensorrt_ops.cc` Create a new op converter class which
-  implements the ``TensorRTOpConverter`` interface. You must implement the constructor to specify how
-  many inputs there are and whether they are tensors or weights. You must also implement the
-  ``Convert`` method to perform the conversion. This is done by using the inputs, attributes, and
-  network from params to add the new TensorRT layers and push the layer outputs. You can use the
-  existing converters as an example. Finally, register your new op conventer in the
-  ``GetOpConverters()`` map.
-* `python/relay/op/contrib/tensorrt.py` This file contains the annotation rules for TensorRT. These
-  determine which operators and their attributes that are supported. You must register an annotation
-  function for the relay operator and specify which attributes are supported by your converter, by
-  checking the attributes are returning true or false.
-* `tests/python/contrib/test_tensorrt.py` Add unit tests for the given operator.
diff --git a/docs/_sources/dev/convert_layout.rst.txt b/docs/_sources/dev/convert_layout.rst.txt
index 6c9890f..07ebc20 100644
--- a/docs/_sources/dev/convert_layout.rst.txt
+++ b/docs/_sources/dev/convert_layout.rst.txt
@@ -157,7 +157,7 @@ First example is for layout agnostic operators. These operators do not have any
       Layout ret;
 
       if (new_in_layouts.defined()) {
-        ICHECK_GE(new_in_layouts.size(), 1);
+        CHECK_GE(new_in_layouts.size(), 1);
         ret = new_in_layouts[0];
       } else {
         for (size_t i = 0; i < old_in_layouts.size(); ++i) {
diff --git a/docs/_sources/dev/frontend/tensorflow.rst.txt b/docs/_sources/dev/frontend/tensorflow.rst.txt
index b234ed7..bca0fc1 100644
--- a/docs/_sources/dev/frontend/tensorflow.rst.txt
+++ b/docs/_sources/dev/frontend/tensorflow.rst.txt
@@ -57,7 +57,7 @@ Export
 
 TensorFlow frontend expects a frozen protobuf (.pb) or saved model as input. It currently does not support checkpoint (.ckpt). The graphdef needed by the TensorFlow frontend can be extracted from the active session, or by using the `TFParser`_ helper class.
 
-.. _TFParser: https://github.com/apache/incubator-tvm/blob/main/python/tvm/relay/frontend/tensorflow_parser.py
+.. _TFParser: https://github.com/apache/incubator-tvm/blob/master/python/tvm/relay/frontend/tensorflow_parser.py
 
 The model should be exported with a number of transformations to prepare the model for inference. It is also important to set ```add_shapes=True```, as this will embed the output shapes of each node into the graph. Here is one function to export a model as a protobuf given a session:
 
@@ -101,7 +101,7 @@ Import the Model
 Explicit Shape:
 ~~~~~~~~~~~~~~~
 
-To ensure shapes can be known throughout the entire graph, pass the ```shape``` argument to ```from_tensorflow```. This dictionary maps input names to input shapes. Please refer to these `test cases <https://github.com/apache/incubator-tvm/blob/main/tests/python/frontend/tensorflow/test_forward.py#L36>`_ for examples.
+To ensure shapes can be known throughout the entire graph, pass the ```shape``` argument to ```from_tensorflow```. This dictionary maps input names to input shapes. Please refer to these `test cases <https://github.com/apache/incubator-tvm/blob/master/tests/python/frontend/tensorflow/test_forward.py#L36>`_ for examples.
 
 Data Layout
 ~~~~~~~~~~~
diff --git a/docs/_sources/dev/hybrid_script.rst.txt b/docs/_sources/dev/hybrid_script.rst.txt
index 33a65f2..939cf05 100644
--- a/docs/_sources/dev/hybrid_script.rst.txt
+++ b/docs/_sources/dev/hybrid_script.rst.txt
@@ -31,7 +31,7 @@ Features
 Software Emulation
 ~~~~~~~~~~~~~~~~~~
 
-In software emulation, the most interesting thing is the decorator ``tvm.te.hybrid.script``.
+In software emulation, the most interesting thing is the decorator ``tvm.hybrid.script``.
 This decorator helps 2 things:
 
 1. Importing runtime variables
@@ -40,7 +40,7 @@ This decorator helps 2 things:
 
 Correct me if I am wrong: I believe that how 1. is implemented is dangerous, but I have no
 choice. What I did is to add those names into python dict ``func.__global__`` and after
-the call to ``func`` is done, those names will be cleaned up.
+the call to ``func`` is done, those names will be cleaned up. 
 
 Overload is simple: the decorator checks the arguments' types and determines which function
 should be actually called.
@@ -49,16 +49,16 @@ should be actually called.
 Backend Compilation
 ~~~~~~~~~~~~~~~~~~~
 
-Compilation is a large module, you can see ``python/tvm/te/hybrid/`` for more
-details. The first stage determines the usage, or more accurately the
-declaration of each variable and the second stage does the actual IR
-generation.
+Compilation is a large module, you can see ``python/tvm/hybrid/var_decl.py`` and
+``python/tvm/hybrid/parser.py`` for more details. The first stage determines the
+usage, or more accurately the declaration of each variable and the second stage does
+the actual IR generation.
 
 Attributes
 ~~~~~~~~~~
 
 So far, ONLY tensors' `shape` attribute is supported. You can see ``visit_Subscript``
-in ``python/tvm/te/hybrid/parser.py`` for more details. This is a hacky solution, I just
+in ``python/tvm/hybrid/parser.py`` for more details. This is a hacky solution, I just
 check the attributes when subscript.
 
 Loops
diff --git a/docs/_sources/dev/index.rst.txt b/docs/_sources/dev/index.rst.txt
index d70b90a..2e577df 100644
--- a/docs/_sources/dev/index.rst.txt
+++ b/docs/_sources/dev/index.rst.txt
@@ -49,7 +49,7 @@ In this guide, we will study an example compilation flow in the compiler. The fi
 - Runtime Execution: the user loads back a `runtime.Module` and runs the compiled functions in the supported runtime environment.
 
 
-.. figure:: https://raw.githubusercontent.com/tvmai/web-data/main/images/design/tvm_dyn_workflow.svg
+.. figure:: https://raw.githubusercontent.com/tvmai/web-data/master/images/design/tvm_dyn_workflow.svg
    :align: center
    :width: 85%
 
@@ -201,7 +201,7 @@ except that the data structure of interest changes from the numpy.ndarray to tvm
 Logical Architecture Components
 -------------------------------
 
-.. figure:: https://raw.githubusercontent.com/tvmai/web-data/main/images/design/tvm_static_overview.svg
+.. figure:: https://raw.githubusercontent.com/tvmai/web-data/master/images/design/tvm_static_overview.svg
    :align: center
    :width: 85%
 
diff --git a/docs/_sources/dev/inferbound.rst.txt b/docs/_sources/dev/inferbound.rst.txt
index 7d0127a..6956600 100644
--- a/docs/_sources/dev/inferbound.rst.txt
+++ b/docs/_sources/dev/inferbound.rst.txt
@@ -22,7 +22,7 @@ InferBound Pass
 *******************************************
 
 
-The InferBound pass is run after normalize, and before ScheduleOps `build_module.py <https://github.com/apache/incubator-tvm/blob/main/python/tvm/driver/build_module.py>`_. The main job of InferBound is to create the bounds map, which specifies a Range for each IterVar in the program. These bounds are then passed to ScheduleOps, where they are used to set the extents of For loops, see `MakeLoopNest <https://github.com/apache/incubator-tvm/blob/main/src/te/operation/op_util.cc>`_, and to  [...]
+The InferBound pass is run after normalize, and before ScheduleOps `build_module.py <https://github.com/apache/incubator-tvm/blob/master/python/tvm/driver/build_module.py>`_. The main job of InferBound is to create the bounds map, which specifies a Range for each IterVar in the program. These bounds are then passed to ScheduleOps, where they are used to set the extents of For loops, see `MakeLoopNest <https://github.com/apache/incubator-tvm/blob/master/src/te/operation/op_util.cc>`_, and [...]
 
 The output of InferBound is a map from IterVar to Range:
 
@@ -53,9 +53,9 @@ Therefore, let's review the Range and IterVar classes:
    	};
    }
 
-Note that IterVarNode also contains a Range ``dom``. This ``dom`` may or may not have a meaningful value, depending on when the IterVar was created. For example, when ``tvm.compute`` is called, an `IterVar is created <https://github.com/apache/incubator-tvm/blob/main/src/te/operation/compute_op.cc>`_ for each axis and reduce axis, with dom's equal to the shape supplied in the call to ``tvm.compute``.
+Note that IterVarNode also contains a Range ``dom``. This ``dom`` may or may not have a meaningful value, depending on when the IterVar was created. For example, when ``tvm.compute`` is called, an `IterVar is created <https://github.com/apache/incubator-tvm/blob/master/src/te/operation/compute_op.cc>`_ for each axis and reduce axis, with dom's equal to the shape supplied in the call to ``tvm.compute``.
 
-On the other hand, when ``tvm.split`` is called, `IterVars are created <https://github.com/apache/incubator-tvm/blob/main/src/te/schedule/schedule_lang.cc>`_ for the inner and outer axes, but these IterVars are not given a meaningful ``dom`` value.
+On the other hand, when ``tvm.split`` is called, `IterVars are created <https://github.com/apache/incubator-tvm/blob/master/src/te/schedule/schedule_lang.cc>`_ for the inner and outer axes, but these IterVars are not given a meaningful ``dom`` value.
 
 In any case, the ``dom`` member of an IterVar is never modified during InferBound. However, keep in mind that the ``dom`` member of an IterVar is sometimes used as default value for the Ranges InferBound computes.
 
@@ -117,14 +117,14 @@ Tensors haven't been mentioned yet, but in the context of TVM, a Tensor represen
    	int value_index;
    };
 
-In the Operation class declaration above, we can see that each operation also has a list of InputTensors. Thus the stages of the schedule form a DAG, where each stage is a node in the graph. There is an edge in the graph from Stage A to Stage B, if the operation of Stage B has an input tensor whose source operation is the op of Stage A. Put simply, there is an edge from A to B, if B consumes a tensor produced by A. See the diagram below. This graph is created at the beginning of InferBou [...]
+In the Operation class declaration above, we can see that each operation also has a list of InputTensors. Thus the stages of the schedule form a DAG, where each stage is a node in the graph. There is an edge in the graph from Stage A to Stage B, if the operation of Stage B has an input tensor whose source operation is the op of Stage A. Put simply, there is an edge from A to B, if B consumes a tensor produced by A. See the diagram below. This graph is created at the beginning of InferBou [...]
 
-.. image:: https://raw.githubusercontent.com/tvmai/tvmai.github.io/main/images/docs/inferbound/stage_graph.png
+.. image:: https://raw.githubusercontent.com/tvmai/tvmai.github.io/master/images/docs/inferbound/stage_graph.png
     :align: center
 
 InferBound makes one pass through the graph, visiting each stage exactly once. InferBound starts from the output stages (i.e., the solid blue nodes in the graph above), and moves upwards (in the opposite direction of the edges). This is achieved by performing a reverse topological sort on the nodes of the graph. Therefore, when InferBound visits a stage, each of its consumer stages has already been visited.
 
-.. image:: https://raw.githubusercontent.com/tvmai/tvmai.github.io/main/images/docs/inferbound/inferbound_traversal.png
+.. image:: https://raw.githubusercontent.com/tvmai/tvmai.github.io/master/images/docs/inferbound/inferbound_traversal.png
     :align: center
 
 The InferBound pass is shown in the following pseudo-code:
@@ -161,7 +161,7 @@ The InferBound pass traverses the stage graph, as described above. However, with
 
 Recall that all IterVars of the stage are related by IterVarRelations. The IterVarRelations of a stage form a directed acyclic hyper-graph, where each node of the graph corresponds to an IterVar, and each hyper-edge corresponds to an IterVarRelation. We can also represent this hyper-graph as a DAG, which is simpler to visualize as shown below.
 
-.. image:: https://raw.githubusercontent.com/tvmai/tvmai.github.io/main/images/docs/inferbound/relations.png
+.. image:: https://raw.githubusercontent.com/tvmai/tvmai.github.io/master/images/docs/inferbound/relations.png
     :align: center
 
 
@@ -206,7 +206,7 @@ This process can seem complicated. One reason is that a stage can have more than
 
 As mentioned above, a consumer may only require a small number of elements from each tensor. The consumers can be thought of as making requests to the stage, for certain regions of its output tensors. The job of Phases 1-3 is to establish the regions of each output tensor that are required by each consumer.
 
-.. image:: https://raw.githubusercontent.com/tvmai/tvmai.github.io/main/images/docs/inferbound/inferbound_phases.png
+.. image:: https://raw.githubusercontent.com/tvmai/tvmai.github.io/master/images/docs/inferbound/inferbound_phases.png
     :align: center
 
 IntSets
@@ -320,13 +320,13 @@ A ComputeOp has only a single output Tensor, whose axes correspond to the axis v
    // i is the dimension
    rmap[axis[i]] = arith::Union(tmap[output][i]).cover_range(axis[i]->dom);
 
-.. image:: https://raw.githubusercontent.com/tvmai/tvmai.github.io/main/images/docs/inferbound/gatherbound.png
+.. image:: https://raw.githubusercontent.com/tvmai/tvmai.github.io/master/images/docs/inferbound/gatherbound.png
     :align: center
 
 
 The union of IntSets is computed by converting each IntSet to an Interval, and then taking the minimum of all minimums, and the maximum of all of these interval's maximums.
 
-.. image:: https://raw.githubusercontent.com/tvmai/tvmai.github.io/main/images/docs/inferbound/union.png
+.. image:: https://raw.githubusercontent.com/tvmai/tvmai.github.io/master/images/docs/inferbound/union.png
     :align: center
 
 
@@ -335,7 +335,7 @@ This clearly results in some unnecessary computation, i.e., tensor elements will
 Unfortunately, even if we're lucky and the IntervalSet unions do not produce unnecessary computation, the fact that GatherBound considers each dimension of the tensor separately can also cause unnecessary computation. For example, in the diagram below the two consumers A and B require disjoint regions of the 2D tensor: consumer A requires T[0:2, 0:2], and consumer B requires T[2:4, 2:4]. GatherBound operates on each dimension of the tensor separately. For the first dimension of the tenso [...]
 
 
-.. image:: https://raw.githubusercontent.com/tvmai/tvmai.github.io/main/images/docs/inferbound/gatherbound_problem.png
+.. image:: https://raw.githubusercontent.com/tvmai/tvmai.github.io/master/images/docs/inferbound/gatherbound_problem.png
     :align: center
 
 .. _InferBoundCA:
@@ -691,7 +691,7 @@ Determining the amount of B that must be computed is the responsibility of Infer
 When InferRootBound is working on stage B, it visits B's consumer stage C to find out how much of B is requested by C. C has root_iter_vars ci and cj, which have been fused and then split. This results in the following :ref:`IterVarHyperGraph` for stage C.
 
 
-.. image:: https://raw.githubusercontent.com/tvmai/tvmai.github.io/main/images/docs/inferbound/passupdomain_problem.png
+.. image:: https://raw.githubusercontent.com/tvmai/tvmai.github.io/master/images/docs/inferbound/passupdomain_problem.png
     :align: center
 
 
@@ -750,16 +750,16 @@ This example shows that schedules containing a split of fused axes are difficult
 
 If the split factor is 4, or 8, in the above example, the region of B needed in each iteration of the outer loop is rectangular.
 
-.. image:: https://raw.githubusercontent.com/tvmai/tvmai.github.io/main/images/docs/inferbound/passupdomain_div.png
+.. image:: https://raw.githubusercontent.com/tvmai/tvmai.github.io/master/images/docs/inferbound/passupdomain_div.png
     :align: center
 
 However, if the split factor is changed from 4 to 3 in the example above, it is easy to see that the region of B that C needs can no longer be described by an independent Range for each of its axes.
 
 
-.. image:: https://raw.githubusercontent.com/tvmai/tvmai.github.io/main/images/docs/inferbound/passupdomain_nodiv.png
+.. image:: https://raw.githubusercontent.com/tvmai/tvmai.github.io/master/images/docs/inferbound/passupdomain_nodiv.png
     :align: center
 
 The best that can be done with rectangular regions is shown in the following diagram. The orange regions are the minimum rectangular regions covering the region of B that needs to be computed, at each iteration of the outer loop.
 
-.. image:: https://raw.githubusercontent.com/tvmai/tvmai.github.io/main/images/docs/inferbound/passupdomain_min.png
+.. image:: https://raw.githubusercontent.com/tvmai/tvmai.github.io/master/images/docs/inferbound/passupdomain_min.png
     :align: center
diff --git a/docs/_sources/dev/introduction_to_module_serialization.rst.txt b/docs/_sources/dev/introduction_to_module_serialization.rst.txt
index 6b2f2ad..5451b84 100644
--- a/docs/_sources/dev/introduction_to_module_serialization.rst.txt
+++ b/docs/_sources/dev/introduction_to_module_serialization.rst.txt
@@ -32,7 +32,7 @@ Let us build one ResNet-18 workload for GPU as an example first.
 
    from tvm import relay
    from tvm.relay import testing
-   from tvm.contrib import utils
+   from tvm.contrib import util
    import tvm
 
    # Resnet18 workload
@@ -43,7 +43,7 @@ Let us build one ResNet-18 workload for GPU as an example first.
        _, resnet18_lib, _ = relay.build_module.build(resnet18_mod, "cuda", params=resnet18_params)
 
    # create one tempory directory
-   temp = utils.tempdir()
+   temp = util.tempdir()
 
    # path lib
    file_name = "deploy.so"
diff --git a/docs/_sources/dev/pass_infra.rst.txt b/docs/_sources/dev/pass_infra.rst.txt
index 898e517..6fd150d 100644
--- a/docs/_sources/dev/pass_infra.rst.txt
+++ b/docs/_sources/dev/pass_infra.rst.txt
@@ -196,7 +196,7 @@ optimizations (IPO), which are similar to the module pass used in LLVM. Some
 typical passes in Relay that need the global picture of a module, such as
 A-normal form conversion and lambda lifting, etc., fall into this set. At this
 level, users can even add and/or delete functions in a module. Note that all
-passes
+passes 
 
 .. code:: c++
 
@@ -276,12 +276,12 @@ order that they were appended to the pass list.
                                       const PassContext& pass_ctx) const {
       Module mod = module;
       for (const Pass& pass : passes) {
-        ICHECK(pass.defined()) << "Found undefined pass for optimization.";
+        CHECK(pass.defined()) << "Found undefined pass for optimization.";
         const PassInfo& pass_info = pass->Info();
         if (!PassEnabled(pass_info))  continue;
         for (const auto& it : pass_info->required) {
           const auto* name = it.as<tvm::ir::StringImm>();
-          ICHECK(name);
+          CHECK(name);
           mod = GetPass(name->value)(mod, pass_ctx);
         }
         mod = pass(mod, pass_ctx);
@@ -306,7 +306,7 @@ pass is registered with an API endpoint as we will show later.
       using tvm::runtime::Registry;
       std::string fpass_name = "relay._transform." + pass_name;
       const auto* f = Registry::Get(fpass_name);
-      ICHECK(f != nullptr) << "Cannot find " << fpass_name
+      CHECK(f != nullptr) << "Cannot find " << fpass_name
                           << "to create the pass " << pass_name;
       return (*f)();
     }
@@ -530,20 +530,20 @@ optimization pipeline and debug Relay and tir passes, please refer to the
 
 .. _Block: https://mxnet.incubator.apache.org/api/python/docs/api/gluon/block.html#gluon-block
 
-.. _include/tvm/ir/transform.h: https://github.com/apache/incubator-tvm/blob/main/include/tvm/ir/transform.h
+.. _include/tvm/ir/transform.h: https://github.com/apache/incubator-tvm/blob/master/include/tvm/ir/transform.h
 
-.. _src/relay/ir/transform.cc: https://github.com/apache/incubator-tvm/blob/main/src/relay/ir/transform.cc
+.. _src/relay/ir/transform.cc: https://github.com/apache/incubator-tvm/blob/master/src/relay/ir/transform.cc
 
-.. _src/ir/transform.cc: https://github.com/apache/incubator-tvm/blob/main/src/ir/transform.cc
+.. _src/ir/transform.cc: https://github.com/apache/incubator-tvm/blob/master/src/ir/transform.cc
 
-.. _src/relay/pass/fold_constant.cc: https://github.com/apache/incubator-tvm/blob/main/src/relay/pass/fold_constant.cc
+.. _src/relay/pass/fold_constant.cc: https://github.com/apache/incubator-tvm/blob/master/src/relay/pass/fold_constant.cc
 
-.. _python/tvm/relay/transform.py: https://github.com/apache/incubator-tvm/blob/main/python/tvm/relay/transform.py
+.. _python/tvm/relay/transform.py: https://github.com/apache/incubator-tvm/blob/master/python/tvm/relay/transform.py
 
-.. _include/tvm/relay/transform.h: https://github.com/apache/incubator-tvm/blob/main/include/tvm/relay/transform.h
+.. _include/tvm/relay/transform.h: https://github.com/apache/incubator-tvm/blob/master/include/tvm/relay/transform.h
 
-.. _python/tvm/ir/transform.py: https://github.com/apache/incubator-tvm/blob/main/python/tvm/ir/transform.py
+.. _python/tvm/ir/transform.py: https://github.com/apache/incubator-tvm/blob/master/python/tvm/ir/transform.py
 
-.. _src/tir/transforms/unroll_loop.cc: https://github.com/apache/incubator-tvm/blob/main/src/tir/transforms/unroll_loop.cc
+.. _src/tir/transforms/unroll_loop.cc: https://github.com/apache/incubator-tvm/blob/master/src/tir/transforms/unroll_loop.cc
 
-.. _use pass infra: https://github.com/apache/incubator-tvm/blob/main/tutorials/dev/use_pass_infra.py
+.. _use pass infra: https://github.com/apache/incubator-tvm/blob/master/tutorials/dev/use_pass_infra.py
diff --git a/docs/_sources/dev/relay_add_op.rst.txt b/docs/_sources/dev/relay_add_op.rst.txt
index 0697939..7dca251 100644
--- a/docs/_sources/dev/relay_add_op.rst.txt
+++ b/docs/_sources/dev/relay_add_op.rst.txt
@@ -231,7 +231,7 @@ Adding a Gradient in C++
 Adding a gradient in C++ is similar to adding one in Python, but the
 interface for registering is slightly different.
 
-First, make sure ``src/relay/pass/pattern_utils.h`` is included. It provides
+First, make sure ``src/relay/pass/pattern_util.h`` is included. It provides
 helper functions for creating nodes in the Relay AST. Then, define the
 gradient in a similar fashion as in the Python example:
 
diff --git a/docs/_sources/dev/relay_add_pass.rst.txt b/docs/_sources/dev/relay_add_pass.rst.txt
index 02c0ba2..e1a5e7e 100644
--- a/docs/_sources/dev/relay_add_pass.rst.txt
+++ b/docs/_sources/dev/relay_add_pass.rst.txt
@@ -399,8 +399,8 @@ information about the pass manager interface can be found in :ref:`pass-infra`.
 Relay's standard passes are listed in `include/tvm/relay/transform.h`_ and implemented
 in `src/relay/pass/`_.
 
-.. _include/tvm/relay/transform.h: https://github.com/apache/incubator-tvm/blob/main/include/tvm/relay/transform.h
+.. _include/tvm/relay/transform.h: https://github.com/apache/incubator-tvm/blob/master/include/tvm/relay/transform.h
 
-.. _src/relay/pass/: https://github.com/apache/incubator-tvm/tree/main/src/relay/pass
+.. _src/relay/pass/: https://github.com/apache/incubator-tvm/tree/master/src/relay/pass
 
-.. _src/relay/transforms/fold_constant.cc: https://github.com/apache/incubator-tvm/blob/main/src/relay/transforms/fold_constant.cc
+.. _src/relay/transforms/fold_constant.cc: https://github.com/apache/incubator-tvm/blob/master/src/relay/transforms/fold_constant.cc
diff --git a/docs/_sources/dev/relay_bring_your_own_codegen.rst.txt b/docs/_sources/dev/relay_bring_your_own_codegen.rst.txt
index a4d4ebd..3dc56ce 100644
--- a/docs/_sources/dev/relay_bring_your_own_codegen.rst.txt
+++ b/docs/_sources/dev/relay_bring_your_own_codegen.rst.txt
@@ -137,7 +137,7 @@ Here we highlight the notes marked in the above code:
 
 * **Note 3** is a TVM runtime compatible wrapper function. It accepts a list of input tensors and one output tensor (the last argument), casts them to the right data type, and invokes the subgraph function described in Note 2. In addition, ``TVM_DLL_EXPORT_TYPED_FUNC`` is a TVM macro that generates another function ``gcc_0`` with unified the function arguments by packing all tensors to ``TVMArgs``. As a result, the TVM runtime can directly invoke ``gcc_0`` to execute the subgraph without [...]
 
-In the rest of this section, we will implement a codegen step-by-step to generate the above code. Your own codegen has to be located at ``src/relay/backend/contrib/<your-codegen-name>/``. In our example, we name our codegen "codegen_c" and put it under `/src/relay/backend/contrib/codegen_c/ <https://github.com/apache/incubator-tvm/blob/main/src/relay/backend/contrib/codegen_c/codegen.cc>`_. Feel free to check this file for a complete implementation.
+In the rest of this section, we will implement a codegen step-by-step to generate the above code. Your own codegen has to be located at ``src/relay/backend/contrib/<your-codegen-name>/``. In our example, we name our codegen "codegen_c" and put it under `/src/relay/backend/contrib/codegen_c/ <https://github.com/apache/incubator-tvm/blob/master/src/relay/backend/contrib/codegen_c/codegen.cc>`_. Feel free to check this file for a complete implementation.
 
 Specifically, we are going to implement two classes in this file and here is their relationship:
 
@@ -296,7 +296,7 @@ As mentioned in the previous step, in addition to the subgraph input and output
 
     // This example only supports single output.
     auto type_node = call->checked_type().as<TensorTypeNode>();
-    ICHECK(type_node != nullptr && runtime::TypeMatch(type_node->dtype, kDLFloat, 32))
+    CHECK(type_node != nullptr && runtime::TypeMatch(type_node->dtype, kDLFloat, 32))
           << "Only support single output tensor with float type";
 
     // Generate a unique buffer name.
@@ -410,7 +410,7 @@ Implement GenCFunc
 .. code-block:: c++
 
   void GenCFunc(const Function& func) {
-    ICHECK(func.defined()) << "Input error: expect a Relay function.";
+    CHECK(func.defined()) << "Input error: expect a Relay function.";
 
     // Record the external symbol for runtime lookup.
     auto sid = GetExtSymbol(func);
@@ -474,7 +474,7 @@ This function creates a runtime module for the external library. In this example
 
     // Create a CSourceModule
     const auto* pf = runtime::Registry::Get("module.csource_module_create");
-    ICHECK(pf != nullptr) << "Cannot find csource module to create the external runtime module";
+    CHECK(pf != nullptr) << "Cannot find csource module to create the external runtime module";
     return (*pf)(code_stream_.str(), "cc");
   }
 
@@ -556,7 +556,7 @@ In this section, our goal is to implement the following customized TVM runtime m
       ExampleJsonCodeGen codegen(ref);
       std::string code = codegen.gen(); // Note 1
       const auto* pf = runtime::Registry::Get("module.examplejson_module_create"); // Note 2
-      ICHECK(pf != nullptr) << "Cannot find ExampleJson module to create the external runtime module";
+      CHECK(pf != nullptr) << "Cannot find ExampleJson module to create the external runtime module";
       return (*pf)(code);
   }
   TVM_REGISTER_GLOBAL("relay.ext.examplejsoncompiler").set_body_typed(ExampleJsonCompiler);
@@ -785,7 +785,7 @@ After the construction, we should have the above class variables ready. We then
 
         // Copy input tensors to corresponding data entries.
         for (auto i = 0; i < args.size(); ++i) {
-          ICHECK(args[i].type_code() == kNDArrayContainer || args[i].type_code() == kArrayHandle)
+          CHECK(args[i].type_code() == kNDArrayContainer || args[i].type_code() == kArrayHandle)
               << "Expect NDArray or DLTensor as inputs\n";
           if (args[i].type_code() == kArrayHandle) {
             DLTensor* arg = args[i];
@@ -800,7 +800,7 @@ After the construction, we should have the above class variables ready. We then
         for (const auto& it : this->graph_[this->curr_subgraph_]) {
           this->Run(it.id, it.inputs, it.output);
         }
-        ICHECK_GT(graph_.count(this->curr_subgraph_), 0U);
+        CHECK_GT(graph_.count(this->curr_subgraph_), 0U);
 
         // Copy the output from a data entry back to TVM runtime argument.
         auto out_idx = graph_[this->curr_subgraph_].back().output;
diff --git a/docs/_sources/dev/relay_intro.rst.txt b/docs/_sources/dev/relay_intro.rst.txt
index 87f68fc..fac4479 100644
--- a/docs/_sources/dev/relay_intro.rst.txt
+++ b/docs/_sources/dev/relay_intro.rst.txt
@@ -37,7 +37,7 @@ Though dataflow graphs are limited in terms of the computations they are capable
 lacking control flow, their simplicity makes it easier to implement automatic differentiation and
 compile for heterogeneous execution environments (e.g., executing parts of the graph on specialized hardware).
 
-.. image:: https://raw.githubusercontent.com/tvmai/tvmai.github.io/main/images/relay/dataflow.png
+.. image:: https://raw.githubusercontent.com/tvmai/tvmai.github.io/master/images/relay/dataflow.png
     :align: center
 
 
@@ -127,7 +127,7 @@ it to the var, then return the evaluated result in the body expression.
 You can use a sequence of let bindings to construct a logically equivalent program to a dataflow program.
 The code example below shows one program with two forms side by side.
 
-.. image:: https://raw.githubusercontent.com/tvmai/tvmai.github.io/main/images/relay/dataflow_vs_func.png
+.. image:: https://raw.githubusercontent.com/tvmai/tvmai.github.io/master/images/relay/dataflow_vs_func.png
     :align: center
 
 
@@ -151,7 +151,7 @@ Why We Might Need Let Binding
 One key usage of let binding is that it specifies the scope of computation. Let us take a look at the following example,
 which does not use let bindings.
 
-.. image:: https://raw.githubusercontent.com/tvmai/tvmai.github.io/main/images/relay/let_scope.png
+.. image:: https://raw.githubusercontent.com/tvmai/tvmai.github.io/master/images/relay/let_scope.png
     :align: center
 
 The problem comes when we try to decide where we should evaluate node ``%1``. In particular, while the text format seems
diff --git a/docs/_sources/dev/runtime.rst.txt b/docs/_sources/dev/runtime.rst.txt
index 91b19ee..7a001fa 100644
--- a/docs/_sources/dev/runtime.rst.txt
+++ b/docs/_sources/dev/runtime.rst.txt
@@ -45,7 +45,7 @@ PackedFunc
 `PackedFunc`_ is a simple but elegant solution
 we find to solve the challenges listed. The following code block provides an example in C++
 
-.. _PackedFunc: https://github.com/apache/incubator-tvm/blob/main/include/tvm/runtime/packed_func.h
+.. _PackedFunc: https://github.com/apache/incubator-tvm/blob/master/include/tvm/runtime/packed_func.h
 
 .. code:: c
 
@@ -131,9 +131,9 @@ which allows us to embed the PackedFunc into any languages. Besides python, so f
 `java`_ and `javascript`_.
 This philosophy of embedded API is very like Lua, except that we don't have a new language but use C++.
 
-.. _minimum C API: https://github.com/apache/incubator-tvm/blob/main/include/tvm/runtime/c_runtime_api.h
-.. _java: https://github.com/apache/incubator-tvm/tree/main/jvm
-.. _javascript: https://github.com/apache/incubator-tvm/tree/main/web
+.. _minimum C API: https://github.com/apache/incubator-tvm/blob/master/include/tvm/runtime/c_runtime_api.h
+.. _java: https://github.com/apache/incubator-tvm/tree/master/jvm
+.. _javascript: https://github.com/apache/incubator-tvm/tree/master/web
 
 
 One fun fact about PackedFunc is that we use it for both compiler and deployment stack.
@@ -141,7 +141,7 @@ One fun fact about PackedFunc is that we use it for both compiler and deployment
 - All TVM's compiler pass functions are exposed to frontend as PackedFunc, see `here`_
 - The compiled module also returns the compiled function as PackedFunc
 
-.. _here: https://github.com/apache/incubator-tvm/tree/main/src/api
+.. _here: https://github.com/apache/incubator-tvm/tree/master/src/api
 
 To keep the runtime minimum, we isolated the IR Object support from the deployment runtime. The resulting runtime takes around 200K - 600K depending on how many runtime driver modules (e.g., CUDA) get included.
 
@@ -162,7 +162,7 @@ TVM defines the compiled object as `Module`_.
 The user can get the compiled function from Module as PackedFunc.
 The generated compiled code can dynamically get function from Module in runtime. It caches the function handle in the first call and reuses in subsequent calls. We use this to link device code and callback into any PackedFunc(e.g., python) from generated code.
 
-.. _Module: https://github.com/apache/incubator-tvm/blob/main/include/tvm/runtime/module.h
+.. _Module: https://github.com/apache/incubator-tvm/blob/master/include/tvm/runtime/module.h
 
 The ModuleNode is an abstract class that can be implemented by each type of device.
 So far we support modules for CUDA, Metal, OpenCL and loading dynamic shared libraries. This abstraction makes introduction
@@ -198,7 +198,7 @@ All the language object in the compiler stack is a subclass of ``Object``. Each
 the type of object. We choose string instead of int as type key so new ``Object`` class can be added in the decentralized fashion without
 adding the code back to the central repo. To ease the speed of dispatching, we allocate an integer type_index at runtime for each type_key.
 
-.. _Object: https://github.com/apache/incubator-tvm/blob/main/include/tvm/runtime/object.h
+.. _Object: https://github.com/apache/incubator-tvm/blob/master/include/tvm/runtime/object.h
 
 Since usually one ``Object`` could be referenced in multiple places in the language, we use a shared_ptr to keep
 track of reference. We use ``ObjectRef`` class to represent a reference to the ``Object``.
@@ -279,17 +279,17 @@ Each argument in PackedFunc contains a union value `TVMValue`_
 and a type code. This design allows the dynamically typed language to convert to the corresponding type directly, and statically typed language to
 do runtime type checking during conversion.
 
-.. _TVMValue: https://github.com/apache/incubator-tvm/blob/main/include/tvm/runtime/c_runtime_api.h#L122
+.. _TVMValue: https://github.com/apache/incubator-tvm/blob/master/include/tvm/runtime/c_runtime_api.h#L122
 
 The relevant files are
 
 - `packed_func.h`_ for C++ API
 - `c_runtime_api.cc`_ for C API and how to provide callback.
 
-.. _packed_func.h: https://github.com/apache/incubator-tvm/blob/main/include/tvm/runtime/packed_func.h
-.. _c_runtime_api.cc: https://github.com/apache/incubator-tvm/blob/main/src/runtime/c_runtime_api.cc#L262
+.. _packed_func.h: https://github.com/apache/incubator-tvm/blob/master/include/tvm/runtime/packed_func.h
+.. _c_runtime_api.cc: https://github.com/apache/incubator-tvm/blob/master/src/runtime/c_runtime_api.cc#L262
 
 To support extension types, we used a registry system to register type related information, like support of any
 in C++, see `Extension types`_ for more details.
 
-.. _Extension types: https://github.com/apache/incubator-tvm/tree/main/apps/extension
+.. _Extension types: https://github.com/apache/incubator-tvm/tree/master/apps/extension
diff --git a/docs/_sources/dev/virtual_machine.rst.txt b/docs/_sources/dev/virtual_machine.rst.txt
index 0986328..ae6cac2 100644
--- a/docs/_sources/dev/virtual_machine.rst.txt
+++ b/docs/_sources/dev/virtual_machine.rst.txt
@@ -278,11 +278,11 @@ to represent tensor, tuple/list, and closure data, respectively. More details
 for each of them can be found at `include/tvm/runtime/ndarray.h`_,
 `include/tvm/runtime/vm/vm.h`_, and `include/tvm/runtime/container.h`_, respectively.
 
-.. _include/tvm/runtime/ndarray.h: https://github.com/apache/incubator-tvm/blob/main/include/tvm/runtime/ndarray.h
+.. _include/tvm/runtime/ndarray.h: https://github.com/apache/incubator-tvm/blob/master/include/tvm/runtime/ndarray.h
 
-.. _include/tvm/runtime/vm/vm.h: https://github.com/apache/incubator-tvm/blob/main/include/tvm/runtime/vm/vm.h
+.. _include/tvm/runtime/vm/vm.h: https://github.com/apache/incubator-tvm/blob/master/include/tvm/runtime/vm/vm.h
 
-.. _include/tvm/runtime/container.h: https://github.com/apache/incubator-tvm/blob/main/include/tvm/runtime/container.h
+.. _include/tvm/runtime/container.h: https://github.com/apache/incubator-tvm/blob/master/include/tvm/runtime/container.h
 
 Stack and State
 ~~~~~~~~~~~~~~~
@@ -326,7 +326,7 @@ The functions contain metadata about the function as well as its compiled byteco
 object then can be loaded and run by a ``tvm::relay::vm::VirtualMachine`` object. For full definitions of the
 data structures, please see `include/tvm/runtime/vm/executable.h`_ and `include/tvm/runtime/vm/vm.h`_.
 
-.. _include/tvm/runtime/vm/executable.h: https://github.com/apache/incubator-tvm/blob/main/include/tvm/runtime/vm/executable.h
+.. _include/tvm/runtime/vm/executable.h: https://github.com/apache/incubator-tvm/blob/master/include/tvm/runtime/vm/executable.h
 
 Optimizations
 ~~~~~~~~~~~~~
@@ -343,11 +343,11 @@ Optimizations marked with `TODO` are not implemented yet.
 - Tail Call Optimization (TODO)
 - Liveness Analysis (TODO)
 
-.. _src/relay/vm/lambda_lift.cc: https://github.com/apache/incubator-tvm/blob/main/src/relay/backend/vm/lambda_lift.cc
+.. _src/relay/vm/lambda_lift.cc: https://github.com/apache/incubator-tvm/blob/master/src/relay/backend/vm/lambda_lift.cc
 
-.. _src/relay/vm/inline_primitives.cc: https://github.com/apache/incubator-tvm/blob/main/src/relay/backend/vm/inline_primitives.cc
+.. _src/relay/vm/inline_primitives.cc: https://github.com/apache/incubator-tvm/blob/master/src/relay/backend/vm/inline_primitives.cc
 
-.. _src/relay/backend/vm/compiler.cc: https://github.com/apache/incubator-tvm/blob/main/src/relay/backend/vm/compiler.cc
+.. _src/relay/backend/vm/compiler.cc: https://github.com/apache/incubator-tvm/blob/master/src/relay/backend/vm/compiler.cc
 
 Serialization
 ~~~~~~~~~~~~~
@@ -386,7 +386,7 @@ load the serialized kernel binary and executable related binary code, which will
 instantiate a VM object. Please refer to the `test_vm_serialization.py`_ file for more
 examples.
 
-.. _test_vm_serialization.py: https://github.com/apache/incubator-tvm/blob/main/tests/python/relay/test_vm_serialization.py
+.. _test_vm_serialization.py: https://github.com/apache/incubator-tvm/blob/master/tests/python/relay/test_vm_serialization.py
 
 Unresolved Questions
 ~~~~~~~~~~~~~~~~~~~~
@@ -406,4 +406,4 @@ How do we support heterogenous execution?
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 Heterogenous execution should work out of the box assuming we have annotated the appropriate device copies.
-In order to do this properly we need to run the device annotation and copying passes.
+In order to do this properly we need to run the device annotation and copying passes. 
diff --git a/docs/_sources/install/docker.rst.txt b/docs/_sources/install/docker.rst.txt
index 243e438..b77e122 100644
--- a/docs/_sources/install/docker.rst.txt
+++ b/docs/_sources/install/docker.rst.txt
@@ -67,7 +67,7 @@ with ``localhost`` when pasting it into browser.
 
 Docker Source
 -------------
-Check out `The docker source <https://github.com/apache/incubator-tvm/tree/main/docker>`_ if you are interested in
+Check out `The docker source <https://github.com/apache/incubator-tvm/tree/master/docker>`_ if you are interested in
 building your own docker images.
 
 
diff --git a/docs/_sources/install/from_source.rst.txt b/docs/_sources/install/from_source.rst.txt
index 2bb6e55..7759916 100644
--- a/docs/_sources/install/from_source.rst.txt
+++ b/docs/_sources/install/from_source.rst.txt
@@ -173,8 +173,8 @@ Method 1
    This method is **recommended for developers** who may change the codes.
 
    Set the environment variable `PYTHONPATH` to tell python where to find
-   the library. For example, assume we cloned `tvm` on the directory
-   `/path/to/tvm` then we can add the following line in `~/.bashrc`.
+   the library. For example, assume we cloned `tvm` on the home directory
+   `~`. then we can added the following line in `~/.bashrc`.
    The changes will be immediately reflected once you pull the code and rebuild the project (no need to call ``setup`` again)
 
    .. code:: bash
@@ -242,7 +242,7 @@ tests in TVM. The easiest way to install GTest is from source.
        cd build
        cmake ..
        make
-       sudo make install
+       make install
 
 
 After installing GTest, the C++ tests can be built and started with ``./tests/scripts/task_cpp_unittest.sh`` or just built with ``make cpptest``.
diff --git a/docs/_sources/langref/hybrid_script.rst.txt b/docs/_sources/langref/hybrid_script.rst.txt
index 984e83c..5fae67d 100644
--- a/docs/_sources/langref/hybrid_script.rst.txt
+++ b/docs/_sources/langref/hybrid_script.rst.txt
@@ -38,12 +38,12 @@ you need to use ``tvm.te.hybrid.script`` decorator to indicate this is a hybrid
 .. code-block:: python
 
     @tvm.te.hybrid.script
-    def outer_product(a, b):
+    def outer_product(a, b, c):
         c = output_tensor((100, 99), 'float32')
         for i in range(a.shape[0]):
             for j in range(b.shape[0]):
                 c[i, j] = a[i] * b[j]
-        return c
+          return c
     a = numpy.random.randn(100)
     b = numpy.random.randn(99)
     c = outer_product(a, b)
@@ -76,7 +76,7 @@ or ``tvm.container.Array``, to this function, it returns a op node:
 
    a = tvm.te.placeholder((100, ), name='a')
    b = tvm.te.placeholder((99, ), name='b')
-   c = outer_product(a, b) # return the output tensor(s) of the operator
+   c = outer_product(a, b, c) # return the output tensor(s) of the operator
 
 You can use any methods that can be applied on a TVM ``OpNode``, like create_schedule, although
 so far, the functionality of schedule is as limited as ``ExternOpNode``. At least, it can be built
@@ -230,8 +230,5 @@ Assert statement is supported, you can simply use it as it is in standard Python
 
 Keywords
 ~~~~~~~~
-- For keywords: ``serial``, ``range``, ``unroll``, ``parallel``, ``vectorize``, ``bind``, ``const_range``
-- Math keywords: ``log``, ``exp``, ``sqrt``, ``rsqrt``, ``sigmoid``, ``tanh``, ``power``, ``popcount``, ``round``, ``ceil_div``
-- Allocate keywords: ``allocate``, ``output_tensor``
-- Data type keywords: ``uint8``, ``uint16``, ``uint32``, ``uint64``, ``int8``, ``int16``, ``int32``, ``int64``, ``float16``, ``float32``, ``float64``
-- Others: ``max_num_threads``
+- For keywords: ``serial``, ``range``, ``unroll``, ``parallel``, ``vectorize``, ``bind``, ``const_expr``
+- Math keywords: ``log``, ``exp``, ``sigmoid``, ``tanh``, ``power``, ``popcount``
diff --git a/docs/_sources/langref/relay_pattern.rst.txt b/docs/_sources/langref/relay_pattern.rst.txt
index 17282e1..6cacff2 100644
--- a/docs/_sources/langref/relay_pattern.rst.txt
+++ b/docs/_sources/langref/relay_pattern.rst.txt
@@ -20,9 +20,9 @@
 Pattern Matching in Relay
 =========================
 
-There are many places in TVM where we identify pure data-flow sub-graphs of the Relay program and attempt to transform them in some way example passes include fusion, quantization, external code generation, and device specific optimizations such as bitpacking, and layer slicing used by VTA.
+There are many places in TVM where we identify pure data-flow sub-graphs of the Relay program and attempt to transform them in some way example passes include fusion, quantization, external code generation, and device specific optimizations such as bitpacking, and layer slicing used by VTA. 
 
-Many of these passes today require a lots of boring boilerplate code in order to implement as well as requiring users to think in terms of visitors and AST matching. Many of these transformations can easily be described in terms of graph rewrites. In order to build a rewriter or other advanced machinery we first need a language of patterns to describe what we can match.
+Many of these passes today require a lots of boring boilerplate code in order to implement as well as requiring users to think in terms of visitors and AST matching. Many of these transformations can easily be described in terms of graph rewrites. In order to build a rewriter or other advanced machinery we first need a language of patterns to describe what we can match. 
 
 Such a language is not just useful for building a rewriter but also providing extension points for existing passes. For example the fusion pass could be parameterized by a set of fusion patterns which describes the capability of your hardware, and the quantization pass could take a set of patterns which describe which operators can be quantized on a given platform.
 
@@ -35,7 +35,7 @@ There are quite a few properties of operators that are worth matching. Below we
 demonstrates how to write patterns. It is recommended to check `tests/python/relay/test_dataflow_pattern.py`_
 for more use cases.
 
-.. _tests/python/relay/test_dataflow_pattern.py: https://github.com/apache/incubator-tvm/blob/main/tests/python/relay/test_dataflow_pattern.py
+.. _tests/python/relay/test_dataflow_pattern.py: https://github.com/apache/incubator-tvm/blob/master/tests/python/relay/test_dataflow_pattern.py
 
 .. note::
 
@@ -200,7 +200,7 @@ use ``is_expr``. This could be useful for algebraic simplify.
     def test_match_plus_zero():
         zero = (is_expr(relay.const(0)) | is_expr(relay.const(0.0)))
         pattern = wildcard() + zero
-
+        
         x = relay.Var('x')
         y = x + relay.const(0)
         assert pattern.match(y)
@@ -356,7 +356,7 @@ with a single batch_norm op:
             self.beta = wildcard()
             self.gamma = wildcard()
             self.eps = wildcard()
-
+            
             self.pattern = self.gamma * (self.x - self.mean)/is_op("sqrt")(self.var + self.eps) + self.beta
 
         def callback(self, pre, post, node_map):
diff --git a/docs/_sources/tutorials/auto_scheduler/sg_execution_times.rst.txt b/docs/_sources/tutorials/auto_scheduler/sg_execution_times.rst.txt
index 31b5288..1cea4be 100644
--- a/docs/_sources/tutorials/auto_scheduler/sg_execution_times.rst.txt
+++ b/docs/_sources/tutorials/auto_scheduler/sg_execution_times.rst.txt
@@ -5,7 +5,7 @@
 
 Computation times
 =================
-**04:25.488** total execution time for **tutorials_auto_scheduler** files:
+**04:38.356** total execution time for **tutorials_auto_scheduler** files:
 
-- **02:40.668**: :ref:`sphx_glr_tutorials_auto_scheduler_tune_conv2d_layer_cuda.py` (``tune_conv2d_layer_cuda.py``)
-- **01:44.820**: :ref:`sphx_glr_tutorials_auto_scheduler_tune_matmul_x86.py` (``tune_matmul_x86.py``)
+- **02:51.937**: :ref:`sphx_glr_tutorials_auto_scheduler_tune_conv2d_layer_cuda.py` (``tune_conv2d_layer_cuda.py``)
+- **01:46.419**: :ref:`sphx_glr_tutorials_auto_scheduler_tune_matmul_x86.py` (``tune_matmul_x86.py``)
diff --git a/docs/_sources/tutorials/auto_scheduler/tune_conv2d_layer_cuda.rst.txt b/docs/_sources/tutorials/auto_scheduler/tune_conv2d_layer_cuda.rst.txt
index 692fbba..45d0b6e 100644
--- a/docs/_sources/tutorials/auto_scheduler/tune_conv2d_layer_cuda.rst.txt
+++ b/docs/_sources/tutorials/auto_scheduler/tune_conv2d_layer_cuda.rst.txt
@@ -16,25 +16,20 @@ Auto-scheduling a convolution layer for GPU
 
 Different from the existing :ref:`autotvm <tutorials-autotvm-sec>` which relies on 
 manual templates to define the search space, the auto-scheduler does not require any templates.
-Users only need to write the computation declaration without any schedule commands or templates.
-The auto-scheduler can automatically generate a large search space and
-find a good schedule in the space.
+The auto-scheduler is template-free, so users only need to write the computation declaration without
+any schedule commands or templates.
+The auto-scheduler can automatically generate a large
+search space and find a good schedule in the space.
 
 We use a convolution layer as an example in this tutorial.
 
-Note that this tutorial will not run on Windows or recent versions of macOS. To
-get it to run, you will need to wrap the body of this tutorial in a :code:`if
-__name__ == "__main__":` block.
-
 
 .. code-block:: default
 
 
-    import os
-
     import numpy as np
     import tvm
-    from tvm import te, auto_scheduler, topi
+    from tvm import te, testing, auto_scheduler, topi
     from tvm.topi.testing import conv2d_nchw_python
 
 
@@ -80,7 +75,7 @@ We then create a search task for the last convolution layer in the resnet.
 
     target = tvm.target.Target("cuda")
 
-    # Use the last layer in ResNet-50
+    # the last layer in resnet
     N, H, W, CO, CI, KH, KW, strides, padding = 1, 7, 7, 512, 512, 3, 3, (1, 1), (1, 1)
     task = auto_scheduler.create_task(conv2d_layer, (N, H, W, CO, CI, KH, KW, strides, padding), target)
 
@@ -130,12 +125,11 @@ mainly specify how we do the measurement during the search and auto-tuning.
 .. code-block:: default
 
 
-    log_file = "conv2d.json"
     measure_ctx = auto_scheduler.LocalRPCMeasureContext(min_repeat_ms=300)
     tune_option = auto_scheduler.TuningOptions(
         num_measure_trials=10,
         runner=measure_ctx.runner,
-        measure_callbacks=[auto_scheduler.RecordToFile(log_file)],
+        measure_callbacks=[auto_scheduler.RecordToFile("conv2d.json")],
     )
 
 
@@ -164,9 +158,6 @@ After some measurement trials, it will return the best schedule it found.
 
     sch, args = auto_scheduler.auto_schedule(task, tuning_options=tune_option)
 
-    # Kill the process for measurement
-    del measure_ctx
-
 
 
 
@@ -201,22 +192,21 @@ cooperative fetching, unrolling and operator fusion.
 
  .. code-block:: none
 
-    #[version = "0.0.5"]
     primfn(data_1: handle, kernel_1: handle, bias_1: handle, compute_1: handle) -> ()
       attr = {"global_symbol": "main", "tir.noalias": True}
       buffers = {compute: Buffer(compute_2: Pointer(float32), float32, [1, 512, 7, 7], []),
-                 kernel: Buffer(kernel_2: Pointer(float32), float32, [512, 512, 3, 3], []),
                  bias: Buffer(bias_2: Pointer(float32), float32, [1, 512, 1, 1], []),
+                 kernel: Buffer(kernel_2: Pointer(float32), float32, [512, 512, 3, 3], []),
                  data: Buffer(data_2: Pointer(float32), float32, [1, 512, 7, 7], [])}
       buffer_map = {data_1: data, kernel_1: kernel, bias_1: bias, compute_1: compute} {
-      attr [IterVar(blockIdx.x: int32, (nullptr), "ThreadIndex", "blockIdx.x")] "thread_extent" = 64;
+      attr [IterVar(blockIdx.x: int32, (nullptr), "ThreadIndex", "blockIdx.x")] "thread_extent" = 16;
       attr [compute_3: Pointer(float32)] "storage_scope" = "local";
-      allocate(compute_3, float32, [7]);
+      allocate(compute_3, float32, [14]);
       attr [pad_temp.shared: Pointer(float32)] "storage_scope" = "shared";
-      allocate(pad_temp.shared, float32, [1296]);
+      allocate(pad_temp.shared, float32, [162]);
       attr [kernel.shared: Pointer(float32)] "storage_scope" = "shared";
-      allocate(kernel.shared, float32, [1152]);
-      attr [IterVar(threadIdx.x: int32, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 56 {
+      allocate(kernel.shared, float32, [576]);
+      attr [IterVar(threadIdx.x: int32, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 112 {
         compute_3[0] = 0f32
         compute_3[1] = 0f32
         compute_3[2] = 0f32
@@ -224,1145 +214,304 @@ cooperative fetching, unrolling and operator fusion.
         compute_3[4] = 0f32
         compute_3[5] = 0f32
         compute_3[6] = 0f32
-        for (rc.outer.outer: int32, 0, 32) {
-          attr [IterVar(threadIdx.x_1: int32, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 56 {
-            pad_temp.shared[(threadIdx.x_1*4)] = @tir.if_then_else(((((9 <= floormod((threadIdx.x_1*4), 81)) && (floormod((threadIdx.x_1*4), 81) < 72)) && (1 <= floormod((threadIdx.x_1*4), 9))) && (floormod((threadIdx.x_1*4), 9) < 8)), (float32*)data_2[(((((rc.outer.outer*784) + (floordiv((threadIdx.x_1*4), 81)*49)) + (floordiv(floormod((threadIdx.x_1*4), 81), 9)*7)) + floormod((threadIdx.x_1*4), 9)) - 8)], 0f32, dtype=float32)
-            pad_temp.shared[((threadIdx.x_1*4) + 1)] = @tir.if_then_else(((((9 <= floormod(((threadIdx.x_1*4) + 1), 81)) && (floormod(((threadIdx.x_1*4) + 1), 81) < 72)) && (1 <= floormod(((threadIdx.x_1*4) + 1), 9))) && (floormod(((threadIdx.x_1*4) + 1), 9) < 8)), (float32*)data_2[(((((rc.outer.outer*784) + (floordiv(((threadIdx.x_1*4) + 1), 81)*49)) + (floordiv(floormod(((threadIdx.x_1*4) + 1), 81), 9)*7)) + floormod(((threadIdx.x_1*4) + 1), 9)) - 8)], 0f32, dtype=float32)
-            pad_temp.shared[((threadIdx.x_1*4) + 2)] = @tir.if_then_else(((((9 <= floormod(((threadIdx.x_1*4) + 2), 81)) && (floormod(((threadIdx.x_1*4) + 2), 81) < 72)) && (1 <= floormod(((threadIdx.x_1*4) + 2), 9))) && (floormod(((threadIdx.x_1*4) + 2), 9) < 8)), (float32*)data_2[(((((rc.outer.outer*784) + (floordiv(((threadIdx.x_1*4) + 2), 81)*49)) + (floordiv(floormod(((threadIdx.x_1*4) + 2), 81), 9)*7)) + floormod(((threadIdx.x_1*4) + 2), 9)) - 8)], 0f32, dtype=float32)
-            pad_temp.shared[((threadIdx.x_1*4) + 3)] = @tir.if_then_else(((((9 <= floormod(((threadIdx.x_1*4) + 3), 81)) && (floormod(((threadIdx.x_1*4) + 3), 81) < 72)) && (1 <= floormod(((threadIdx.x_1*4) + 3), 9))) && (floormod(((threadIdx.x_1*4) + 3), 9) < 8)), (float32*)data_2[(((((rc.outer.outer*784) + (floordiv(((threadIdx.x_1*4) + 3), 81)*49)) + (floordiv(floormod(((threadIdx.x_1*4) + 3), 81), 9)*7)) + floormod(((threadIdx.x_1*4) + 3), 9)) - 8)], 0f32, dtype=float32)
-          }
-          attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 56 {
-            pad_temp.shared[((threadIdx.x_1*4) + 224)] = @tir.if_then_else(((((9 <= floormod(((threadIdx.x_1*4) + 62), 81)) && (floormod(((threadIdx.x_1*4) + 62), 81) < 72)) && (1 <= floormod(((threadIdx.x_1*4) + 8), 9))) && (floormod(((threadIdx.x_1*4) + 8), 9) < 8)), (float32*)data_2[(((((rc.outer.outer*784) + (floordiv(((threadIdx.x_1*4) + 224), 81)*49)) + (floordiv(floormod(((threadIdx.x_1*4) + 62), 81), 9)*7)) + floormod(((threadIdx.x_1*4) + 8), 9)) - 8)], 0f32, dtype=float32)
-            pad_temp.shared[((threadIdx.x_1*4) + 225)] = @tir.if_then_else(((((9 <= floormod(((threadIdx.x_1*4) + 63), 81)) && (floormod(((threadIdx.x_1*4) + 63), 81) < 72)) && (1 <= floormod((threadIdx.x_1*4), 9))) && (floormod((threadIdx.x_1*4), 9) < 8)), (float32*)data_2[(((((rc.outer.outer*784) + (floordiv(((threadIdx.x_1*4) + 225), 81)*49)) + (floordiv(floormod(((threadIdx.x_1*4) + 63), 81), 9)*7)) + floormod((threadIdx.x_1*4), 9)) - 8)], 0f32, dtype=float32)
-            pad_temp.shared[((threadIdx.x_1*4) + 226)] = @tir.if_then_else(((((9 <= floormod(((threadIdx.x_1*4) + 64), 81)) && (floormod(((threadIdx.x_1*4) + 64), 81) < 72)) && (1 <= floormod(((threadIdx.x_1*4) + 1), 9))) && (floormod(((threadIdx.x_1*4) + 1), 9) < 8)), (float32*)data_2[(((((rc.outer.outer*784) + (floordiv(((threadIdx.x_1*4) + 226), 81)*49)) + (floordiv(floormod(((threadIdx.x_1*4) + 64), 81), 9)*7)) + floormod(((threadIdx.x_1*4) + 1), 9)) - 8)], 0f32, dtype=float32)
-            pad_temp.shared[((threadIdx.x_1*4) + 227)] = @tir.if_then_else(((((9 <= floormod(((threadIdx.x_1*4) + 65), 81)) && (floormod(((threadIdx.x_1*4) + 65), 81) < 72)) && (1 <= floormod(((threadIdx.x_1*4) + 2), 9))) && (floormod(((threadIdx.x_1*4) + 2), 9) < 8)), (float32*)data_2[(((((rc.outer.outer*784) + (floordiv(((threadIdx.x_1*4) + 227), 81)*49)) + (floordiv(floormod(((threadIdx.x_1*4) + 65), 81), 9)*7)) + floormod(((threadIdx.x_1*4) + 2), 9)) - 8)], 0f32, dtype=float32)
-          }
-          attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 56 {
-            pad_temp.shared[((threadIdx.x_1*4) + 448)] = @tir.if_then_else(((((9 <= floormod(((threadIdx.x_1*4) + 43), 81)) && (floormod(((threadIdx.x_1*4) + 43), 81) < 72)) && (1 <= floormod(((threadIdx.x_1*4) + 7), 9))) && (floormod(((threadIdx.x_1*4) + 7), 9) < 8)), (float32*)data_2[(((((rc.outer.outer*784) + (floordiv(((threadIdx.x_1*4) + 448), 81)*49)) + (floordiv(floormod(((threadIdx.x_1*4) + 43), 81), 9)*7)) + floormod(((threadIdx.x_1*4) + 7), 9)) - 8)], 0f32, dtype=float32)
-            pad_temp.shared[((threadIdx.x_1*4) + 449)] = @tir.if_then_else(((((9 <= floormod(((threadIdx.x_1*4) + 44), 81)) && (floormod(((threadIdx.x_1*4) + 44), 81) < 72)) && (1 <= floormod(((threadIdx.x_1*4) + 8), 9))) && (floormod(((threadIdx.x_1*4) + 8), 9) < 8)), (float32*)data_2[(((((rc.outer.outer*784) + (floordiv(((threadIdx.x_1*4) + 449), 81)*49)) + (floordiv(floormod(((threadIdx.x_1*4) + 44), 81), 9)*7)) + floormod(((threadIdx.x_1*4) + 8), 9)) - 8)], 0f32, dtype=float32)
-            pad_temp.shared[((threadIdx.x_1*4) + 450)] = @tir.if_then_else(((((9 <= floormod(((threadIdx.x_1*4) + 45), 81)) && (floormod(((threadIdx.x_1*4) + 45), 81) < 72)) && (1 <= floormod((threadIdx.x_1*4), 9))) && (floormod((threadIdx.x_1*4), 9) < 8)), (float32*)data_2[(((((rc.outer.outer*784) + (floordiv(((threadIdx.x_1*4) + 450), 81)*49)) + (floordiv(floormod(((threadIdx.x_1*4) + 45), 81), 9)*7)) + floormod((threadIdx.x_1*4), 9)) - 8)], 0f32, dtype=float32)
-            pad_temp.shared[((threadIdx.x_1*4) + 451)] = @tir.if_then_else(((((9 <= floormod(((threadIdx.x_1*4) + 46), 81)) && (floormod(((threadIdx.x_1*4) + 46), 81) < 72)) && (1 <= floormod(((threadIdx.x_1*4) + 1), 9))) && (floormod(((threadIdx.x_1*4) + 1), 9) < 8)), (float32*)data_2[(((((rc.outer.outer*784) + (floordiv(((threadIdx.x_1*4) + 451), 81)*49)) + (floordiv(floormod(((threadIdx.x_1*4) + 46), 81), 9)*7)) + floormod(((threadIdx.x_1*4) + 1), 9)) - 8)], 0f32, dtype=float32)
-          }
-          attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 56 {
-            pad_temp.shared[((threadIdx.x_1*4) + 672)] = @tir.if_then_else(((((9 <= floormod(((threadIdx.x_1*4) + 24), 81)) && (floormod(((threadIdx.x_1*4) + 24), 81) < 72)) && (1 <= floormod(((threadIdx.x_1*4) + 6), 9))) && (floormod(((threadIdx.x_1*4) + 6), 9) < 8)), (float32*)data_2[(((((rc.outer.outer*784) + (floordiv(((threadIdx.x_1*4) + 672), 81)*49)) + (floordiv(floormod(((threadIdx.x_1*4) + 24), 81), 9)*7)) + floormod(((threadIdx.x_1*4) + 6), 9)) - 8)], 0f32, dtype=float32)
-            pad_temp.shared[((threadIdx.x_1*4) + 673)] = @tir.if_then_else(((((9 <= floormod(((threadIdx.x_1*4) + 25), 81)) && (floormod(((threadIdx.x_1*4) + 25), 81) < 72)) && (1 <= floormod(((threadIdx.x_1*4) + 7), 9))) && (floormod(((threadIdx.x_1*4) + 7), 9) < 8)), (float32*)data_2[(((((rc.outer.outer*784) + (floordiv(((threadIdx.x_1*4) + 673), 81)*49)) + (floordiv(floormod(((threadIdx.x_1*4) + 25), 81), 9)*7)) + floormod(((threadIdx.x_1*4) + 7), 9)) - 8)], 0f32, dtype=float32)
-            pad_temp.shared[((threadIdx.x_1*4) + 674)] = @tir.if_then_else(((((9 <= floormod(((threadIdx.x_1*4) + 26), 81)) && (floormod(((threadIdx.x_1*4) + 26), 81) < 72)) && (1 <= floormod(((threadIdx.x_1*4) + 8), 9))) && (floormod(((threadIdx.x_1*4) + 8), 9) < 8)), (float32*)data_2[(((((rc.outer.outer*784) + (floordiv(((threadIdx.x_1*4) + 674), 81)*49)) + (floordiv(floormod(((threadIdx.x_1*4) + 26), 81), 9)*7)) + floormod(((threadIdx.x_1*4) + 8), 9)) - 8)], 0f32, dtype=float32)
-            pad_temp.shared[((threadIdx.x_1*4) + 675)] = @tir.if_then_else(((((9 <= floormod(((threadIdx.x_1*4) + 27), 81)) && (floormod(((threadIdx.x_1*4) + 27), 81) < 72)) && (1 <= floormod((threadIdx.x_1*4), 9))) && (floormod((threadIdx.x_1*4), 9) < 8)), (float32*)data_2[(((((rc.outer.outer*784) + (floordiv(((threadIdx.x_1*4) + 675), 81)*49)) + (floordiv(floormod(((threadIdx.x_1*4) + 27), 81), 9)*7)) + floormod((threadIdx.x_1*4), 9)) - 8)], 0f32, dtype=float32)
-          }
-          attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 56 {
-            pad_temp.shared[((threadIdx.x_1*4) + 896)] = @tir.if_then_else(((((9 <= floormod(((threadIdx.x_1*4) + 5), 81)) && (floormod(((threadIdx.x_1*4) + 5), 81) < 72)) && (1 <= floormod(((threadIdx.x_1*4) + 5), 9))) && (floormod(((threadIdx.x_1*4) + 5), 9) < 8)), (float32*)data_2[(((((rc.outer.outer*784) + (floordiv(((threadIdx.x_1*4) + 896), 81)*49)) + (floordiv(floormod(((threadIdx.x_1*4) + 5), 81), 9)*7)) + floormod(((threadIdx.x_1*4) + 5), 9)) - 8)], 0f32, dtype=float32)
-            pad_temp.shared[((threadIdx.x_1*4) + 897)] = @tir.if_then_else(((((9 <= floormod(((threadIdx.x_1*4) + 6), 81)) && (floormod(((threadIdx.x_1*4) + 6), 81) < 72)) && (1 <= floormod(((threadIdx.x_1*4) + 6), 9))) && (floormod(((threadIdx.x_1*4) + 6), 9) < 8)), (float32*)data_2[(((((rc.outer.outer*784) + (floordiv(((threadIdx.x_1*4) + 897), 81)*49)) + (floordiv(floormod(((threadIdx.x_1*4) + 6), 81), 9)*7)) + floormod(((threadIdx.x_1*4) + 6), 9)) - 8)], 0f32, dtype=float32)
-            pad_temp.shared[((threadIdx.x_1*4) + 898)] = @tir.if_then_else(((((9 <= floormod(((threadIdx.x_1*4) + 7), 81)) && (floormod(((threadIdx.x_1*4) + 7), 81) < 72)) && (1 <= floormod(((threadIdx.x_1*4) + 7), 9))) && (floormod(((threadIdx.x_1*4) + 7), 9) < 8)), (float32*)data_2[(((((rc.outer.outer*784) + (floordiv(((threadIdx.x_1*4) + 898), 81)*49)) + (floordiv(floormod(((threadIdx.x_1*4) + 7), 81), 9)*7)) + floormod(((threadIdx.x_1*4) + 7), 9)) - 8)], 0f32, dtype=float32)
-            pad_temp.shared[((threadIdx.x_1*4) + 899)] = @tir.if_then_else(((((9 <= floormod(((threadIdx.x_1*4) + 8), 81)) && (floormod(((threadIdx.x_1*4) + 8), 81) < 72)) && (1 <= floormod(((threadIdx.x_1*4) + 8), 9))) && (floormod(((threadIdx.x_1*4) + 8), 9) < 8)), (float32*)data_2[(((((rc.outer.outer*784) + (floordiv(((threadIdx.x_1*4) + 899), 81)*49)) + (floordiv(floormod(((threadIdx.x_1*4) + 8), 81), 9)*7)) + floormod(((threadIdx.x_1*4) + 8), 9)) - 8)], 0f32, dtype=float32)
-          }
-          attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 56 {
-            if @tir.likely((threadIdx.x_1 < 44), dtype=bool) {
-              pad_temp.shared[((threadIdx.x_1*4) + 1120)] = @tir.if_then_else(((((9 <= floormod(((threadIdx.x_1*4) + 67), 81)) && (floormod(((threadIdx.x_1*4) + 67), 81) < 72)) && (1 <= floormod(((threadIdx.x_1*4) + 4), 9))) && (floormod(((threadIdx.x_1*4) + 4), 9) < 8)), (float32*)data_2[(((((rc.outer.outer*784) + (floordiv(((threadIdx.x_1*4) + 1120), 81)*49)) + (floordiv(floormod(((threadIdx.x_1*4) + 67), 81), 9)*7)) + floormod(((threadIdx.x_1*4) + 4), 9)) - 8)], 0f32, dtype=float32)
+        compute_3[7] = 0f32
+        compute_3[8] = 0f32
+        compute_3[9] = 0f32
+        compute_3[10] = 0f32
+        compute_3[11] = 0f32
+        compute_3[12] = 0f32
+        compute_3[13] = 0f32
+        for (rc.outer.outer: int32, 0, 256) {
+          attr [IterVar(threadIdx.x_1: int32, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 112 {
+            if @tir.likely((threadIdx.x_1 < 41), dtype=bool) {
+              pad_temp.shared[(threadIdx.x_1*4)] = @tir.if_then_else(((((9 <= floormod((threadIdx.x_1*4), 81)) && (floormod((threadIdx.x_1*4), 81) < 72)) && (1 <= floormod((threadIdx.x_1*4), 9))) && (floormod((threadIdx.x_1*4), 9) < 8)), (float32*)data_2[(((((rc.outer.outer*98) + (floordiv((threadIdx.x_1*4), 81)*49)) + (floordiv(floormod((threadIdx.x_1*4), 81), 9)*7)) + floormod((threadIdx.x_1*4), 9)) - 8)], 0f32, dtype=float32)
             }
-            if @tir.likely(((threadIdx.x_1*4) < 175), dtype=bool) {
-              if @tir.likely((threadIdx.x_1 < 44), dtype=bool) {
-                pad_temp.shared[((threadIdx.x_1*4) + 1121)] = @tir.if_then_else(((((9 <= floormod(((threadIdx.x_1*4) + 68), 81)) && (floormod(((threadIdx.x_1*4) + 68), 81) < 72)) && (1 <= floormod(((threadIdx.x_1*4) + 5), 9))) && (floormod(((threadIdx.x_1*4) + 5), 9) < 8)), (float32*)data_2[(((((rc.outer.outer*784) + (floordiv(((threadIdx.x_1*4) + 1121), 81)*49)) + (floordiv(floormod(((threadIdx.x_1*4) + 68), 81), 9)*7)) + floormod(((threadIdx.x_1*4) + 5), 9)) - 8)], 0f32, dtype=float32)
-              }
+            if @tir.likely((threadIdx.x_1 < 41), dtype=bool) {
+              pad_temp.shared[((threadIdx.x_1*4) + 1)] = @tir.if_then_else(((((9 <= floormod(((threadIdx.x_1*4) + 1), 81)) && (floormod(((threadIdx.x_1*4) + 1), 81) < 72)) && (1 <= floormod(((threadIdx.x_1*4) + 1), 9))) && (floormod(((threadIdx.x_1*4) + 1), 9) < 8)), (float32*)data_2[(((((rc.outer.outer*98) + (floordiv(((threadIdx.x_1*4) + 1), 81)*49)) + (floordiv(floormod(((threadIdx.x_1*4) + 1), 81), 9)*7)) + floormod(((threadIdx.x_1*4) + 1), 9)) - 8)], 0f32, dtype=float32)
             }
-            if @tir.likely(((threadIdx.x_1*4) < 174), dtype=bool) {
-              if @tir.likely((threadIdx.x_1 < 44), dtype=bool) {
-                pad_temp.shared[((threadIdx.x_1*4) + 1122)] = @tir.if_then_else(((((9 <= floormod(((threadIdx.x_1*4) + 69), 81)) && (floormod(((threadIdx.x_1*4) + 69), 81) < 72)) && (1 <= floormod(((threadIdx.x_1*4) + 6), 9))) && (floormod(((threadIdx.x_1*4) + 6), 9) < 8)), (float32*)data_2[(((((rc.outer.outer*784) + (floordiv(((threadIdx.x_1*4) + 1122), 81)*49)) + (floordiv(floormod(((threadIdx.x_1*4) + 69), 81), 9)*7)) + floormod(((threadIdx.x_1*4) + 6), 9)) - 8)], 0f32, dtype=float32)
-              }
+            if @tir.likely((threadIdx.x_1 < 40), dtype=bool) {
+              pad_temp.shared[((threadIdx.x_1*4) + 2)] = @tir.if_then_else(((((9 <= floormod(((threadIdx.x_1*4) + 2), 81)) && (floormod(((threadIdx.x_1*4) + 2), 81) < 72)) && (1 <= floormod(((threadIdx.x_1*4) + 2), 9))) && (floormod(((threadIdx.x_1*4) + 2), 9) < 8)), (float32*)data_2[(((((rc.outer.outer*98) + (floordiv(((threadIdx.x_1*4) + 2), 81)*49)) + (floordiv(floormod(((threadIdx.x_1*4) + 2), 81), 9)*7)) + floormod(((threadIdx.x_1*4) + 2), 9)) - 8)], 0f32, dtype=float32)
             }
-            if @tir.likely(((threadIdx.x_1*4) < 173), dtype=bool) {
-              if @tir.likely((threadIdx.x_1 < 44), dtype=bool) {
-                pad_temp.shared[((threadIdx.x_1*4) + 1123)] = @tir.if_then_else(((((9 <= floormod(((threadIdx.x_1*4) + 70), 81)) && (floormod(((threadIdx.x_1*4) + 70), 81) < 72)) && (1 <= floormod(((threadIdx.x_1*4) + 7), 9))) && (floormod(((threadIdx.x_1*4) + 7), 9) < 8)), (float32*)data_2[(((((rc.outer.outer*784) + (floordiv(((threadIdx.x_1*4) + 1123), 81)*49)) + (floordiv(floormod(((threadIdx.x_1*4) + 70), 81), 9)*7)) + floormod(((threadIdx.x_1*4) + 7), 9)) - 8)], 0f32, dtype=float32)
-              }
+            if @tir.likely((threadIdx.x_1 < 40), dtype=bool) {
+              pad_temp.shared[((threadIdx.x_1*4) + 3)] = @tir.if_then_else(((((9 <= floormod(((threadIdx.x_1*4) + 3), 81)) && (floormod(((threadIdx.x_1*4) + 3), 81) < 72)) && (1 <= floormod(((threadIdx.x_1*4) + 3), 9))) && (floormod(((threadIdx.x_1*4) + 3), 9) < 8)), (float32*)data_2[(((((rc.outer.outer*98) + (floordiv(((threadIdx.x_1*4) + 3), 81)*49)) + (floordiv(floormod(((threadIdx.x_1*4) + 3), 81), 9)*7)) + floormod(((threadIdx.x_1*4) + 3), 9)) - 8)], 0f32, dtype=float32)
             }
           }
-          attr [IterVar(threadIdx.x_2: int32, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 56;
-          kernel.shared[threadIdx.x_2] = (float32*)kernel_2[(((blockIdx.x*36864) + (rc.outer.outer*144)) + threadIdx.x_2)]
-          attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 56;
-          kernel.shared[(threadIdx.x_2 + 56)] = (float32*)kernel_2[(((blockIdx.x*36864) + (rc.outer.outer*144)) + (threadIdx.x_2 + 56))]
-          attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 56;
-          kernel.shared[(threadIdx.x_2 + 112)] = (float32*)kernel_2[((((blockIdx.x*36864) + (floordiv((threadIdx.x_2 + 112), 144)*4608)) + (rc.outer.outer*144)) + floormod((threadIdx.x_2 + 112), 144))]
-          attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 56;
-          kernel.shared[(threadIdx.x_2 + 168)] = (float32*)kernel_2[((((blockIdx.x*36864) + (floordiv((threadIdx.x_2 + 168), 144)*4608)) + (rc.outer.outer*144)) + floormod((threadIdx.x_2 + 24), 144))]
-          attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 56;
-          kernel.shared[(threadIdx.x_2 + 224)] = (float32*)kernel_2[((((blockIdx.x*36864) + (floordiv((threadIdx.x_2 + 224), 144)*4608)) + (rc.outer.outer*144)) + floormod((threadIdx.x_2 + 80), 144))]
-          attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 56;
-          kernel.shared[(threadIdx.x_2 + 280)] = (float32*)kernel_2[((((blockIdx.x*36864) + (floordiv((threadIdx.x_2 + 280), 144)*4608)) + (rc.outer.outer*144)) + floormod((threadIdx.x_2 + 136), 144))]
-          attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 56;
-          kernel.shared[(threadIdx.x_2 + 336)] = (float32*)kernel_2[((((blockIdx.x*36864) + (floordiv((threadIdx.x_2 + 336), 144)*4608)) + (rc.outer.outer*144)) + floormod((threadIdx.x_2 + 48), 144))]
-          attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 56;
-          kernel.shared[(threadIdx.x_2 + 392)] = (float32*)kernel_2[((((blockIdx.x*36864) + (floordiv((threadIdx.x_2 + 392), 144)*4608)) + (rc.outer.outer*144)) + floormod((threadIdx.x_2 + 104), 144))]
-          attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 56;
-          kernel.shared[(threadIdx.x_2 + 448)] = (float32*)kernel_2[((((blockIdx.x*36864) + (floordiv((threadIdx.x_2 + 448), 144)*4608)) + (rc.outer.outer*144)) + floormod((threadIdx.x_2 + 16), 144))]
-          attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 56;
-          kernel.shared[(threadIdx.x_2 + 504)] = (float32*)kernel_2[((((blockIdx.x*36864) + (floordiv((threadIdx.x_2 + 504), 144)*4608)) + (rc.outer.outer*144)) + floormod((threadIdx.x_2 + 72), 144))]
-          attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 56;
-          kernel.shared[(threadIdx.x_2 + 560)] = (float32*)kernel_2[((((blockIdx.x*36864) + (floordiv((threadIdx.x_2 + 560), 144)*4608)) + (rc.outer.outer*144)) + floormod((threadIdx.x_2 + 128), 144))]
-          attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 56;
-          kernel.shared[(threadIdx.x_2 + 616)] = (float32*)kernel_2[((((blockIdx.x*36864) + (floordiv((threadIdx.x_2 + 616), 144)*4608)) + (rc.outer.outer*144)) + floormod((threadIdx.x_2 + 40), 144))]
-          attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 56;
-          kernel.shared[(threadIdx.x_2 + 672)] = (float32*)kernel_2[((((blockIdx.x*36864) + (floordiv((threadIdx.x_2 + 672), 144)*4608)) + (rc.outer.outer*144)) + floormod((threadIdx.x_2 + 96), 144))]
-          attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 56;
-          kernel.shared[(threadIdx.x_2 + 728)] = (float32*)kernel_2[((((blockIdx.x*36864) + (floordiv((threadIdx.x_2 + 728), 144)*4608)) + (rc.outer.outer*144)) + floormod((threadIdx.x_2 + 8), 144))]
-          attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 56;
-          kernel.shared[(threadIdx.x_2 + 784)] = (float32*)kernel_2[((((blockIdx.x*36864) + (floordiv((threadIdx.x_2 + 784), 144)*4608)) + (rc.outer.outer*144)) + floormod((threadIdx.x_2 + 64), 144))]
-          attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 56;
-          kernel.shared[(threadIdx.x_2 + 840)] = (float32*)kernel_2[((((blockIdx.x*36864) + (floordiv((threadIdx.x_2 + 840), 144)*4608)) + (rc.outer.outer*144)) + floormod((threadIdx.x_2 + 120), 144))]
-          attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 56;
-          kernel.shared[(threadIdx.x_2 + 896)] = (float32*)kernel_2[((((blockIdx.x*36864) + (floordiv((threadIdx.x_2 + 896), 144)*4608)) + (rc.outer.outer*144)) + floormod((threadIdx.x_2 + 32), 144))]
-          attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 56;
-          kernel.shared[(threadIdx.x_2 + 952)] = (float32*)kernel_2[((((blockIdx.x*36864) + (floordiv((threadIdx.x_2 + 952), 144)*4608)) + (rc.outer.outer*144)) + floormod((threadIdx.x_2 + 88), 144))]
-          attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 56;
-          kernel.shared[(threadIdx.x_2 + 1008)] = (float32*)kernel_2[((((blockIdx.x*36864) + (rc.outer.outer*144)) + threadIdx.x_2) + 32256)]
-          attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 56;
-          kernel.shared[(threadIdx.x_2 + 1064)] = (float32*)kernel_2[((((blockIdx.x*36864) + (floordiv((threadIdx.x_2 + 1064), 144)*4608)) + (rc.outer.outer*144)) + floormod((threadIdx.x_2 + 56), 144))]
-          attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 56;
-          if @tir.likely((threadIdx.x_2 < 32), dtype=bool) {
-            kernel.shared[(threadIdx.x_2 + 1120)] = (float32*)kernel_2[((((blockIdx.x*36864) + (floordiv((threadIdx.x_2 + 1120), 144)*4608)) + (rc.outer.outer*144)) + floormod((threadIdx.x_2 + 112), 144))]
+          attr [IterVar(threadIdx.x_2: int32, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 112;
+          kernel.shared[threadIdx.x_2] = (float32*)kernel_2[((((blockIdx.x*147456) + (floordiv(threadIdx.x_2, 18)*4608)) + (rc.outer.outer*18)) + floormod(threadIdx.x_2, 18))]
+          attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 112;
+          kernel.shared[(threadIdx.x_2 + 112)] = (float32*)kernel_2[((((blockIdx.x*147456) + (floordiv((threadIdx.x_2 + 112), 18)*4608)) + (rc.outer.outer*18)) + floormod((threadIdx.x_2 + 4), 18))]
+          attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 112;
+          kernel.shared[(threadIdx.x_2 + 224)] = (float32*)kernel_2[((((blockIdx.x*147456) + (floordiv((threadIdx.x_2 + 224), 18)*4608)) + (rc.outer.outer*18)) + floormod((threadIdx.x_2 + 8), 18))]
+          attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 112;
+          kernel.shared[(threadIdx.x_2 + 336)] = (float32*)kernel_2[((((blockIdx.x*147456) + (floordiv((threadIdx.x_2 + 336), 18)*4608)) + (rc.outer.outer*18)) + floormod((threadIdx.x_2 + 12), 18))]
+          attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 112;
+          kernel.shared[(threadIdx.x_2 + 448)] = (float32*)kernel_2[((((blockIdx.x*147456) + (floordiv((threadIdx.x_2 + 448), 18)*4608)) + (rc.outer.outer*18)) + floormod((threadIdx.x_2 + 16), 18))]
+          attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 112;
+          if @tir.likely((threadIdx.x_2 < 16), dtype=bool) {
+            kernel.shared[(threadIdx.x_2 + 560)] = (float32*)kernel_2[((((blockIdx.x*147456) + (floordiv((threadIdx.x_2 + 560), 18)*4608)) + (rc.outer.outer*18)) + floormod((threadIdx.x_2 + 2), 18))]
           }
-          compute_3[0] = ((float32*)compute_3[0] + ((float32*)pad_temp.shared[(floormod(threadIdx.x, 7)*9)]*(float32*)kernel.shared[(floordiv(threadIdx.x, 7)*144)]))
-          compute_3[1] = ((float32*)compute_3[1] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 1)]*(float32*)kernel.shared[(floordiv(threadIdx.x, 7)*144)]))
-          compute_3[2] = ((float32*)compute_3[2] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 2)]*(float32*)kernel.shared[(floordiv(threadIdx.x, 7)*144)]))
-          compute_3[3] = ((float32*)compute_3[3] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 3)]*(float32*)kernel.shared[(floordiv(threadIdx.x, 7)*144)]))
-          compute_3[4] = ((float32*)compute_3[4] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 4)]*(float32*)kernel.shared[(floordiv(threadIdx.x, 7)*144)]))
-          compute_3[5] = ((float32*)compute_3[5] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 5)]*(float32*)kernel.shared[(floordiv(threadIdx.x, 7)*144)]))
-          compute_3[6] = ((float32*)compute_3[6] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 6)]*(float32*)kernel.shared[(floordiv(threadIdx.x, 7)*144)]))
-          compute_3[0] = ((float32*)compute_3[0] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 9)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 3)]))
-          compute_3[1] = ((float32*)compute_3[1] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 10)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 3)]))
-          compute_3[2] = ((float32*)compute_3[2] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 11)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 3)]))
-          compute_3[3] = ((float32*)compute_3[3] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 12)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 3)]))
-          compute_3[4] = ((float32*)compute_3[4] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 13)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 3)]))
-          compute_3[5] = ((float32*)compute_3[5] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 14)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 3)]))
-          compute_3[6] = ((float32*)compute_3[6] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 15)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 3)]))
-          compute_3[0] = ((float32*)compute_3[0] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 18)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 6)]))
-          compute_3[1] = ((float32*)compute_3[1] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 19)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 6)]))
-          compute_3[2] = ((float32*)compute_3[2] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 20)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 6)]))
-          compute_3[3] = ((float32*)compute_3[3] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 21)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 6)]))
-          compute_3[4] = ((float32*)compute_3[4] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 22)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 6)]))
-          compute_3[5] = ((float32*)compute_3[5] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 23)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 6)]))
-          compute_3[6] = ((float32*)compute_3[6] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 24)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 6)]))
-          compute_3[0] = ((float32*)compute_3[0] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 1)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 1)]))
-          compute_3[1] = ((float32*)compute_3[1] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 2)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 1)]))
-          compute_3[2] = ((float32*)compute_3[2] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 3)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 1)]))
-          compute_3[3] = ((float32*)compute_3[3] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 4)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 1)]))
-          compute_3[4] = ((float32*)compute_3[4] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 5)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 1)]))
-          compute_3[5] = ((float32*)compute_3[5] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 6)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 1)]))
-          compute_3[6] = ((float32*)compute_3[6] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 7)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 1)]))
-          compute_3[0] = ((float32*)compute_3[0] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 10)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 4)]))
-          compute_3[1] = ((float32*)compute_3[1] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 11)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 4)]))
-          compute_3[2] = ((float32*)compute_3[2] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 12)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 4)]))
-          compute_3[3] = ((float32*)compute_3[3] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 13)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 4)]))
-          compute_3[4] = ((float32*)compute_3[4] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 14)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 4)]))
-          compute_3[5] = ((float32*)compute_3[5] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 15)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 4)]))
-          compute_3[6] = ((float32*)compute_3[6] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 16)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 4)]))
-          compute_3[0] = ((float32*)compute_3[0] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 19)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 7)]))
-          compute_3[1] = ((float32*)compute_3[1] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 20)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 7)]))
-          compute_3[2] = ((float32*)compute_3[2] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 21)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 7)]))
-          compute_3[3] = ((float32*)compute_3[3] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 22)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 7)]))
-          compute_3[4] = ((float32*)compute_3[4] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 23)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 7)]))
-          compute_3[5] = ((float32*)compute_3[5] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 24)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 7)]))
-          compute_3[6] = ((float32*)compute_3[6] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 25)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 7)]))
-          compute_3[0] = ((float32*)compute_3[0] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 2)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 2)]))
-          compute_3[1] = ((float32*)compute_3[1] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 3)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 2)]))
-          compute_3[2] = ((float32*)compute_3[2] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 4)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 2)]))
-          compute_3[3] = ((float32*)compute_3[3] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 5)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 2)]))
-          compute_3[4] = ((float32*)compute_3[4] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 6)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 2)]))
-          compute_3[5] = ((float32*)compute_3[5] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 7)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 2)]))
-          compute_3[6] = ((float32*)compute_3[6] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 8)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 2)]))
-          compute_3[0] = ((float32*)compute_3[0] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 11)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 5)]))
-          compute_3[1] = ((float32*)compute_3[1] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 12)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 5)]))
-          compute_3[2] = ((float32*)compute_3[2] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 13)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 5)]))
-          compute_3[3] = ((float32*)compute_3[3] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 14)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 5)]))
-          compute_3[4] = ((float32*)compute_3[4] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 15)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 5)]))
-          compute_3[5] = ((float32*)compute_3[5] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 16)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 5)]))
-          compute_3[6] = ((float32*)compute_3[6] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 17)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 5)]))
-          compute_3[0] = ((float32*)compute_3[0] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 20)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 8)]))
-          compute_3[1] = ((float32*)compute_3[1] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 21)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 8)]))
-          compute_3[2] = ((float32*)compute_3[2] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 22)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 8)]))
-          compute_3[3] = ((float32*)compute_3[3] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 23)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 8)]))
-          compute_3[4] = ((float32*)compute_3[4] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 24)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 8)]))
-          compute_3[5] = ((float32*)compute_3[5] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 25)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 8)]))
-          compute_3[6] = ((float32*)compute_3[6] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 26)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 8)]))
-          compute_3[0] = ((float32*)compute_3[0] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 81)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 9)]))
-          compute_3[1] = ((float32*)compute_3[1] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 82)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 9)]))
-          compute_3[2] = ((float32*)compute_3[2] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 83)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 9)]))
-          compute_3[3] = ((float32*)compute_3[3] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 84)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 9)]))
-          compute_3[4] = ((float32*)compute_3[4] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 85)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 9)]))
-          compute_3[5] = ((float32*)compute_3[5] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 86)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 9)]))
-          compute_3[6] = ((float32*)compute_3[6] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 87)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 9)]))
-          compute_3[0] = ((float32*)compute_3[0] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 90)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 12)]))
-          compute_3[1] = ((float32*)compute_3[1] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 91)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 12)]))
-          compute_3[2] = ((float32*)compute_3[2] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 92)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 12)]))
-          compute_3[3] = ((float32*)compute_3[3] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 93)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 12)]))
-          compute_3[4] = ((float32*)compute_3[4] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 94)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 12)]))
-          compute_3[5] = ((float32*)compute_3[5] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 95)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 12)]))
-          compute_3[6] = ((float32*)compute_3[6] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 96)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 12)]))
-          compute_3[0] = ((float32*)compute_3[0] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 99)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 15)]))
-          compute_3[1] = ((float32*)compute_3[1] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 100)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 15)]))
-          compute_3[2] = ((float32*)compute_3[2] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 101)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 15)]))
-          compute_3[3] = ((float32*)compute_3[3] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 102)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 15)]))
-          compute_3[4] = ((float32*)compute_3[4] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 103)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 15)]))
-          compute_3[5] = ((float32*)compute_3[5] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 104)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 15)]))
-          compute_3[6] = ((float32*)compute_3[6] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 105)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 15)]))
-          compute_3[0] = ((float32*)compute_3[0] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 82)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 10)]))
-          compute_3[1] = ((float32*)compute_3[1] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 83)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 10)]))
-          compute_3[2] = ((float32*)compute_3[2] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 84)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 10)]))
-          compute_3[3] = ((float32*)compute_3[3] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 85)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 10)]))
-          compute_3[4] = ((float32*)compute_3[4] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 86)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 10)]))
-          compute_3[5] = ((float32*)compute_3[5] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 87)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 10)]))
-          compute_3[6] = ((float32*)compute_3[6] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 88)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 10)]))
-          compute_3[0] = ((float32*)compute_3[0] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 91)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 13)]))
-          compute_3[1] = ((float32*)compute_3[1] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 92)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 13)]))
-          compute_3[2] = ((float32*)compute_3[2] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 93)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 13)]))
-          compute_3[3] = ((float32*)compute_3[3] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 94)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 13)]))
-          compute_3[4] = ((float32*)compute_3[4] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 95)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 13)]))
-          compute_3[5] = ((float32*)compute_3[5] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 96)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 13)]))
-          compute_3[6] = ((float32*)compute_3[6] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 97)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 13)]))
-          compute_3[0] = ((float32*)compute_3[0] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 100)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 16)]))
-          compute_3[1] = ((float32*)compute_3[1] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 101)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 16)]))
-          compute_3[2] = ((float32*)compute_3[2] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 102)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 16)]))
-          compute_3[3] = ((float32*)compute_3[3] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 103)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 16)]))
-          compute_3[4] = ((float32*)compute_3[4] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 104)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 16)]))
-          compute_3[5] = ((float32*)compute_3[5] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 105)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 16)]))
-          compute_3[6] = ((float32*)compute_3[6] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 106)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 16)]))
-          compute_3[0] = ((float32*)compute_3[0] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 83)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 11)]))
-          compute_3[1] = ((float32*)compute_3[1] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 84)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 11)]))
-          compute_3[2] = ((float32*)compute_3[2] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 85)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 11)]))
-          compute_3[3] = ((float32*)compute_3[3] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 86)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 11)]))
-          compute_3[4] = ((float32*)compute_3[4] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 87)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 11)]))
-          compute_3[5] = ((float32*)compute_3[5] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 88)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 11)]))
-          compute_3[6] = ((float32*)compute_3[6] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 89)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 11)]))
-          compute_3[0] = ((float32*)compute_3[0] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 92)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 14)]))
-          compute_3[1] = ((float32*)compute_3[1] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 93)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 14)]))
-          compute_3[2] = ((float32*)compute_3[2] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 94)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 14)]))
-          compute_3[3] = ((float32*)compute_3[3] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 95)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 14)]))
-          compute_3[4] = ((float32*)compute_3[4] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 96)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 14)]))
-          compute_3[5] = ((float32*)compute_3[5] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 97)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 14)]))
-          compute_3[6] = ((float32*)compute_3[6] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 98)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 14)]))
-          compute_3[0] = ((float32*)compute_3[0] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 101)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 17)]))
-          compute_3[1] = ((float32*)compute_3[1] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 102)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 17)]))
-          compute_3[2] = ((float32*)compute_3[2] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 103)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 17)]))
-          compute_3[3] = ((float32*)compute_3[3] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 104)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 17)]))
-          compute_3[4] = ((float32*)compute_3[4] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 105)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 17)]))
-          compute_3[5] = ((float32*)compute_3[5] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 106)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 17)]))
-          compute_3[6] = ((float32*)compute_3[6] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 107)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 17)]))
-          compute_3[0] = ((float32*)compute_3[0] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 162)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 18)]))
-          compute_3[1] = ((float32*)compute_3[1] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 163)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 18)]))
-          compute_3[2] = ((float32*)compute_3[2] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 164)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 18)]))
-          compute_3[3] = ((float32*)compute_3[3] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 165)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 18)]))
-          compute_3[4] = ((float32*)compute_3[4] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 166)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 18)]))
-          compute_3[5] = ((float32*)compute_3[5] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 167)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 18)]))
-          compute_3[6] = ((float32*)compute_3[6] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 168)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 18)]))
-          compute_3[0] = ((float32*)compute_3[0] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 171)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 21)]))
-          compute_3[1] = ((float32*)compute_3[1] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 172)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 21)]))
-          compute_3[2] = ((float32*)compute_3[2] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 173)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 21)]))
-          compute_3[3] = ((float32*)compute_3[3] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 174)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 21)]))
-          compute_3[4] = ((float32*)compute_3[4] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 175)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 21)]))
-          compute_3[5] = ((float32*)compute_3[5] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 176)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 21)]))
-          compute_3[6] = ((float32*)compute_3[6] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 177)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 21)]))
-          compute_3[0] = ((float32*)compute_3[0] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 180)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 24)]))
-          compute_3[1] = ((float32*)compute_3[1] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 181)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 24)]))
-          compute_3[2] = ((float32*)compute_3[2] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 182)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 24)]))
-          compute_3[3] = ((float32*)compute_3[3] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 183)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 24)]))
-          compute_3[4] = ((float32*)compute_3[4] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 184)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 24)]))
-          compute_3[5] = ((float32*)compute_3[5] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 185)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 24)]))
-          compute_3[6] = ((float32*)compute_3[6] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 186)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 24)]))
-          compute_3[0] = ((float32*)compute_3[0] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 163)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 19)]))
-          compute_3[1] = ((float32*)compute_3[1] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 164)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 19)]))
-          compute_3[2] = ((float32*)compute_3[2] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 165)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 19)]))
-          compute_3[3] = ((float32*)compute_3[3] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 166)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 19)]))
-          compute_3[4] = ((float32*)compute_3[4] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 167)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 19)]))
-          compute_3[5] = ((float32*)compute_3[5] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 168)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 19)]))
-          compute_3[6] = ((float32*)compute_3[6] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 169)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 19)]))
-          compute_3[0] = ((float32*)compute_3[0] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 172)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 22)]))
-          compute_3[1] = ((float32*)compute_3[1] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 173)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 22)]))
-          compute_3[2] = ((float32*)compute_3[2] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 174)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 22)]))
-          compute_3[3] = ((float32*)compute_3[3] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 175)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 22)]))
-          compute_3[4] = ((float32*)compute_3[4] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 176)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 22)]))
-          compute_3[5] = ((float32*)compute_3[5] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 177)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 22)]))
-          compute_3[6] = ((float32*)compute_3[6] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 178)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 22)]))
-          compute_3[0] = ((float32*)compute_3[0] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 181)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 25)]))
-          compute_3[1] = ((float32*)compute_3[1] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 182)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 25)]))
-          compute_3[2] = ((float32*)compute_3[2] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 183)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 25)]))
-          compute_3[3] = ((float32*)compute_3[3] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 184)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 25)]))
-          compute_3[4] = ((float32*)compute_3[4] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 185)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 25)]))
-          compute_3[5] = ((float32*)compute_3[5] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 186)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 25)]))
-          compute_3[6] = ((float32*)compute_3[6] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 187)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 25)]))
-          compute_3[0] = ((float32*)compute_3[0] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 164)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 20)]))
-          compute_3[1] = ((float32*)compute_3[1] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 165)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 20)]))
-          compute_3[2] = ((float32*)compute_3[2] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 166)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 20)]))
-          compute_3[3] = ((float32*)compute_3[3] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 167)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 20)]))
-          compute_3[4] = ((float32*)compute_3[4] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 168)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 20)]))
-          compute_3[5] = ((float32*)compute_3[5] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 169)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 20)]))
-          compute_3[6] = ((float32*)compute_3[6] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 170)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 20)]))
-          compute_3[0] = ((float32*)compute_3[0] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 173)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 23)]))
-          compute_3[1] = ((float32*)compute_3[1] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 174)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 23)]))
-          compute_3[2] = ((float32*)compute_3[2] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 175)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 23)]))
-          compute_3[3] = ((float32*)compute_3[3] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 176)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 23)]))
-          compute_3[4] = ((float32*)compute_3[4] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 177)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 23)]))
-          compute_3[5] = ((float32*)compute_3[5] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 178)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 23)]))
-          compute_3[6] = ((float32*)compute_3[6] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 179)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 23)]))
-          compute_3[0] = ((float32*)compute_3[0] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 182)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 26)]))
-          compute_3[1] = ((float32*)compute_3[1] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 183)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 26)]))
-          compute_3[2] = ((float32*)compute_3[2] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 184)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 26)]))
-          compute_3[3] = ((float32*)compute_3[3] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 185)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 26)]))
-          compute_3[4] = ((float32*)compute_3[4] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 186)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 26)]))
-          compute_3[5] = ((float32*)compute_3[5] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 187)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 26)]))
-          compute_3[6] = ((float32*)compute_3[6] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 188)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 26)]))
-          compute_3[0] = ((float32*)compute_3[0] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 243)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 27)]))
-          compute_3[1] = ((float32*)compute_3[1] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 244)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 27)]))
-          compute_3[2] = ((float32*)compute_3[2] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 245)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 27)]))
-          compute_3[3] = ((float32*)compute_3[3] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 246)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 27)]))
-          compute_3[4] = ((float32*)compute_3[4] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 247)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 27)]))
-          compute_3[5] = ((float32*)compute_3[5] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 248)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 27)]))
-          compute_3[6] = ((float32*)compute_3[6] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 249)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 27)]))
-          compute_3[0] = ((float32*)compute_3[0] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 252)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 30)]))
-          compute_3[1] = ((float32*)compute_3[1] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 253)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 30)]))
-          compute_3[2] = ((float32*)compute_3[2] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 254)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 30)]))
-          compute_3[3] = ((float32*)compute_3[3] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 255)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 30)]))
-          compute_3[4] = ((float32*)compute_3[4] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 256)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 30)]))
-          compute_3[5] = ((float32*)compute_3[5] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 257)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 30)]))
-          compute_3[6] = ((float32*)compute_3[6] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 258)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 30)]))
-          compute_3[0] = ((float32*)compute_3[0] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 261)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 33)]))
-          compute_3[1] = ((float32*)compute_3[1] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 262)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 33)]))
-          compute_3[2] = ((float32*)compute_3[2] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 263)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 33)]))
-          compute_3[3] = ((float32*)compute_3[3] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 264)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 33)]))
-          compute_3[4] = ((float32*)compute_3[4] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 265)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 33)]))
-          compute_3[5] = ((float32*)compute_3[5] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 266)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 33)]))
-          compute_3[6] = ((float32*)compute_3[6] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 267)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 33)]))
-          compute_3[0] = ((float32*)compute_3[0] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 244)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 28)]))
-          compute_3[1] = ((float32*)compute_3[1] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 245)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 28)]))
-          compute_3[2] = ((float32*)compute_3[2] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 246)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 28)]))
-          compute_3[3] = ((float32*)compute_3[3] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 247)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 28)]))
-          compute_3[4] = ((float32*)compute_3[4] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 248)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 28)]))
-          compute_3[5] = ((float32*)compute_3[5] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 249)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 28)]))
-          compute_3[6] = ((float32*)compute_3[6] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 250)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 28)]))
-          compute_3[0] = ((float32*)compute_3[0] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 253)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 31)]))
-          compute_3[1] = ((float32*)compute_3[1] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 254)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 31)]))
-          compute_3[2] = ((float32*)compute_3[2] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 255)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 31)]))
-          compute_3[3] = ((float32*)compute_3[3] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 256)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 31)]))
-          compute_3[4] = ((float32*)compute_3[4] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 257)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 31)]))
-          compute_3[5] = ((float32*)compute_3[5] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 258)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 31)]))
-          compute_3[6] = ((float32*)compute_3[6] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 259)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 31)]))
-          compute_3[0] = ((float32*)compute_3[0] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 262)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 34)]))
-          compute_3[1] = ((float32*)compute_3[1] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 263)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 34)]))
-          compute_3[2] = ((float32*)compute_3[2] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 264)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 34)]))
-          compute_3[3] = ((float32*)compute_3[3] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 265)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 34)]))
-          compute_3[4] = ((float32*)compute_3[4] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 266)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 34)]))
-          compute_3[5] = ((float32*)compute_3[5] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 267)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 34)]))
-          compute_3[6] = ((float32*)compute_3[6] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 268)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 34)]))
-          compute_3[0] = ((float32*)compute_3[0] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 245)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 29)]))
-          compute_3[1] = ((float32*)compute_3[1] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 246)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 29)]))
-          compute_3[2] = ((float32*)compute_3[2] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 247)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 29)]))
-          compute_3[3] = ((float32*)compute_3[3] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 248)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 29)]))
-          compute_3[4] = ((float32*)compute_3[4] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 249)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 29)]))
-          compute_3[5] = ((float32*)compute_3[5] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 250)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 29)]))
-          compute_3[6] = ((float32*)compute_3[6] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 251)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 29)]))
-          compute_3[0] = ((float32*)compute_3[0] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 254)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 32)]))
-          compute_3[1] = ((float32*)compute_3[1] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 255)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 32)]))
-          compute_3[2] = ((float32*)compute_3[2] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 256)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 32)]))
-          compute_3[3] = ((float32*)compute_3[3] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 257)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 32)]))
-          compute_3[4] = ((float32*)compute_3[4] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 258)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 32)]))
-          compute_3[5] = ((float32*)compute_3[5] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 259)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 32)]))
-          compute_3[6] = ((float32*)compute_3[6] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 260)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 32)]))
-          compute_3[0] = ((float32*)compute_3[0] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 263)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 35)]))
-          compute_3[1] = ((float32*)compute_3[1] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 264)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 35)]))
-          compute_3[2] = ((float32*)compute_3[2] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 265)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 35)]))
-          compute_3[3] = ((float32*)compute_3[3] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 266)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 35)]))
-          compute_3[4] = ((float32*)compute_3[4] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 267)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 35)]))
-          compute_3[5] = ((float32*)compute_3[5] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 268)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 35)]))
-          compute_3[6] = ((float32*)compute_3[6] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 269)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 35)]))
-          compute_3[0] = ((float32*)compute_3[0] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 324)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 36)]))
-          compute_3[1] = ((float32*)compute_3[1] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 325)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 36)]))
-          compute_3[2] = ((float32*)compute_3[2] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 326)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 36)]))
-          compute_3[3] = ((float32*)compute_3[3] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 327)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 36)]))
-          compute_3[4] = ((float32*)compute_3[4] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 328)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 36)]))
-          compute_3[5] = ((float32*)compute_3[5] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 329)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 36)]))
-          compute_3[6] = ((float32*)compute_3[6] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 330)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 36)]))
-          compute_3[0] = ((float32*)compute_3[0] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 333)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 39)]))
-          compute_3[1] = ((float32*)compute_3[1] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 334)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 39)]))
-          compute_3[2] = ((float32*)compute_3[2] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 335)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 39)]))
-          compute_3[3] = ((float32*)compute_3[3] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 336)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 39)]))
-          compute_3[4] = ((float32*)compute_3[4] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 337)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 39)]))
-          compute_3[5] = ((float32*)compute_3[5] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 338)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 39)]))
-          compute_3[6] = ((float32*)compute_3[6] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 339)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 39)]))
-          compute_3[0] = ((float32*)compute_3[0] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 342)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 42)]))
-          compute_3[1] = ((float32*)compute_3[1] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 343)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 42)]))
-          compute_3[2] = ((float32*)compute_3[2] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 344)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 42)]))
-          compute_3[3] = ((float32*)compute_3[3] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 345)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 42)]))
-          compute_3[4] = ((float32*)compute_3[4] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 346)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 42)]))
-          compute_3[5] = ((float32*)compute_3[5] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 347)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 42)]))
-          compute_3[6] = ((float32*)compute_3[6] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 348)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 42)]))
-          compute_3[0] = ((float32*)compute_3[0] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 325)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 37)]))
-          compute_3[1] = ((float32*)compute_3[1] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 326)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 37)]))
-          compute_3[2] = ((float32*)compute_3[2] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 327)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 37)]))
-          compute_3[3] = ((float32*)compute_3[3] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 328)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 37)]))
-          compute_3[4] = ((float32*)compute_3[4] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 329)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 37)]))
-          compute_3[5] = ((float32*)compute_3[5] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 330)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 37)]))
-          compute_3[6] = ((float32*)compute_3[6] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 331)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 37)]))
-          compute_3[0] = ((float32*)compute_3[0] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 334)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 40)]))
-          compute_3[1] = ((float32*)compute_3[1] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 335)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 40)]))
-          compute_3[2] = ((float32*)compute_3[2] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 336)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 40)]))
-          compute_3[3] = ((float32*)compute_3[3] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 337)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 40)]))
-          compute_3[4] = ((float32*)compute_3[4] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 338)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 40)]))
-          compute_3[5] = ((float32*)compute_3[5] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 339)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 40)]))
-          compute_3[6] = ((float32*)compute_3[6] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 340)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 40)]))
-          compute_3[0] = ((float32*)compute_3[0] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 343)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 43)]))
-          compute_3[1] = ((float32*)compute_3[1] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 344)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 43)]))
-          compute_3[2] = ((float32*)compute_3[2] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 345)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 43)]))
-          compute_3[3] = ((float32*)compute_3[3] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 346)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 43)]))
-          compute_3[4] = ((float32*)compute_3[4] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 347)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 43)]))
-          compute_3[5] = ((float32*)compute_3[5] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 348)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 43)]))
-          compute_3[6] = ((float32*)compute_3[6] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 349)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 43)]))
-          compute_3[0] = ((float32*)compute_3[0] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 326)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 38)]))
-          compute_3[1] = ((float32*)compute_3[1] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 327)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 38)]))
-          compute_3[2] = ((float32*)compute_3[2] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 328)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 38)]))
-          compute_3[3] = ((float32*)compute_3[3] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 329)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 38)]))
-          compute_3[4] = ((float32*)compute_3[4] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 330)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 38)]))
-          compute_3[5] = ((float32*)compute_3[5] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 331)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 38)]))
-          compute_3[6] = ((float32*)compute_3[6] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 332)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 38)]))
-          compute_3[0] = ((float32*)compute_3[0] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 335)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 41)]))
-          compute_3[1] = ((float32*)compute_3[1] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 336)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 41)]))
-          compute_3[2] = ((float32*)compute_3[2] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 337)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 41)]))
-          compute_3[3] = ((float32*)compute_3[3] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 338)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 41)]))
-          compute_3[4] = ((float32*)compute_3[4] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 339)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 41)]))
-          compute_3[5] = ((float32*)compute_3[5] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 340)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 41)]))
-          compute_3[6] = ((float32*)compute_3[6] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 341)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 41)]))
-          compute_3[0] = ((float32*)compute_3[0] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 344)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 44)]))
-          compute_3[1] = ((float32*)compute_3[1] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 345)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 44)]))
-          compute_3[2] = ((float32*)compute_3[2] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 346)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 44)]))
-          compute_3[3] = ((float32*)compute_3[3] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 347)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 44)]))
-          compute_3[4] = ((float32*)compute_3[4] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 348)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 44)]))
-          compute_3[5] = ((float32*)compute_3[5] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 349)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 44)]))
-          compute_3[6] = ((float32*)compute_3[6] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 350)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 44)]))
-          compute_3[0] = ((float32*)compute_3[0] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 405)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 45)]))
-          compute_3[1] = ((float32*)compute_3[1] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 406)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 45)]))
-          compute_3[2] = ((float32*)compute_3[2] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 407)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 45)]))
-          compute_3[3] = ((float32*)compute_3[3] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 408)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 45)]))
-          compute_3[4] = ((float32*)compute_3[4] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 409)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 45)]))
-          compute_3[5] = ((float32*)compute_3[5] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 410)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 45)]))
-          compute_3[6] = ((float32*)compute_3[6] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 411)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 45)]))
-          compute_3[0] = ((float32*)compute_3[0] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 414)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 48)]))
-          compute_3[1] = ((float32*)compute_3[1] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 415)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 48)]))
-          compute_3[2] = ((float32*)compute_3[2] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 416)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 48)]))
-          compute_3[3] = ((float32*)compute_3[3] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 417)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 48)]))
-          compute_3[4] = ((float32*)compute_3[4] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 418)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 48)]))
-          compute_3[5] = ((float32*)compute_3[5] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 419)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 48)]))
-          compute_3[6] = ((float32*)compute_3[6] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 420)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 48)]))
-          compute_3[0] = ((float32*)compute_3[0] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 423)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 51)]))
-          compute_3[1] = ((float32*)compute_3[1] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 424)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 51)]))
-          compute_3[2] = ((float32*)compute_3[2] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 425)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 51)]))
-          compute_3[3] = ((float32*)compute_3[3] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 426)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 51)]))
-          compute_3[4] = ((float32*)compute_3[4] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 427)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 51)]))
-          compute_3[5] = ((float32*)compute_3[5] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 428)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 51)]))
-          compute_3[6] = ((float32*)compute_3[6] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 429)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 51)]))
-          compute_3[0] = ((float32*)compute_3[0] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 406)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 46)]))
-          compute_3[1] = ((float32*)compute_3[1] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 407)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 46)]))
-          compute_3[2] = ((float32*)compute_3[2] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 408)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 46)]))
-          compute_3[3] = ((float32*)compute_3[3] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 409)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 46)]))
-          compute_3[4] = ((float32*)compute_3[4] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 410)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 46)]))
-          compute_3[5] = ((float32*)compute_3[5] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 411)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 46)]))
-          compute_3[6] = ((float32*)compute_3[6] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 412)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 46)]))
-          compute_3[0] = ((float32*)compute_3[0] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 415)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 49)]))
-          compute_3[1] = ((float32*)compute_3[1] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 416)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 49)]))
-          compute_3[2] = ((float32*)compute_3[2] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 417)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 49)]))
-          compute_3[3] = ((float32*)compute_3[3] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 418)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 49)]))
-          compute_3[4] = ((float32*)compute_3[4] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 419)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 49)]))
-          compute_3[5] = ((float32*)compute_3[5] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 420)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 49)]))
-          compute_3[6] = ((float32*)compute_3[6] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 421)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 49)]))
-          compute_3[0] = ((float32*)compute_3[0] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 424)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 52)]))
-          compute_3[1] = ((float32*)compute_3[1] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 425)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 52)]))
-          compute_3[2] = ((float32*)compute_3[2] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 426)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 52)]))
-          compute_3[3] = ((float32*)compute_3[3] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 427)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 52)]))
-          compute_3[4] = ((float32*)compute_3[4] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 428)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 52)]))
-          compute_3[5] = ((float32*)compute_3[5] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 429)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 52)]))
-          compute_3[6] = ((float32*)compute_3[6] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 430)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 52)]))
-          compute_3[0] = ((float32*)compute_3[0] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 407)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 47)]))
-          compute_3[1] = ((float32*)compute_3[1] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 408)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 47)]))
-          compute_3[2] = ((float32*)compute_3[2] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 409)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 47)]))
-          compute_3[3] = ((float32*)compute_3[3] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 410)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 47)]))
-          compute_3[4] = ((float32*)compute_3[4] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 411)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 47)]))
-          compute_3[5] = ((float32*)compute_3[5] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 412)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 47)]))
-          compute_3[6] = ((float32*)compute_3[6] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 413)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 47)]))
-          compute_3[0] = ((float32*)compute_3[0] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 416)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 50)]))
-          compute_3[1] = ((float32*)compute_3[1] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 417)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 50)]))
-          compute_3[2] = ((float32*)compute_3[2] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 418)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 50)]))
-          compute_3[3] = ((float32*)compute_3[3] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 419)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 50)]))
-          compute_3[4] = ((float32*)compute_3[4] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 420)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 50)]))
-          compute_3[5] = ((float32*)compute_3[5] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 421)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 50)]))
-          compute_3[6] = ((float32*)compute_3[6] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 422)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 50)]))
-          compute_3[0] = ((float32*)compute_3[0] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 425)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 53)]))
-          compute_3[1] = ((float32*)compute_3[1] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 426)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 53)]))
-          compute_3[2] = ((float32*)compute_3[2] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 427)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 53)]))
-          compute_3[3] = ((float32*)compute_3[3] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 428)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 53)]))
-          compute_3[4] = ((float32*)compute_3[4] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 429)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 53)]))
-          compute_3[5] = ((float32*)compute_3[5] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 430)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 53)]))
-          compute_3[6] = ((float32*)compute_3[6] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 431)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 53)]))
-          compute_3[0] = ((float32*)compute_3[0] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 486)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 54)]))
-          compute_3[1] = ((float32*)compute_3[1] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 487)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 54)]))
-          compute_3[2] = ((float32*)compute_3[2] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 488)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 54)]))
-          compute_3[3] = ((float32*)compute_3[3] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 489)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 54)]))
-          compute_3[4] = ((float32*)compute_3[4] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 490)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 54)]))
-          compute_3[5] = ((float32*)compute_3[5] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 491)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 54)]))
-          compute_3[6] = ((float32*)compute_3[6] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 492)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 54)]))
-          compute_3[0] = ((float32*)compute_3[0] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 495)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 57)]))
-          compute_3[1] = ((float32*)compute_3[1] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 496)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 57)]))
-          compute_3[2] = ((float32*)compute_3[2] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 497)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 57)]))
-          compute_3[3] = ((float32*)compute_3[3] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 498)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 57)]))
-          compute_3[4] = ((float32*)compute_3[4] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 499)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 57)]))
-          compute_3[5] = ((float32*)compute_3[5] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 500)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 57)]))
-          compute_3[6] = ((float32*)compute_3[6] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 501)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 57)]))
-          compute_3[0] = ((float32*)compute_3[0] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 504)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 60)]))
-          compute_3[1] = ((float32*)compute_3[1] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 505)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 60)]))
-          compute_3[2] = ((float32*)compute_3[2] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 506)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 60)]))
-          compute_3[3] = ((float32*)compute_3[3] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 507)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 60)]))
-          compute_3[4] = ((float32*)compute_3[4] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 508)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 60)]))
-          compute_3[5] = ((float32*)compute_3[5] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 509)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 60)]))
-          compute_3[6] = ((float32*)compute_3[6] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 510)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 60)]))
-          compute_3[0] = ((float32*)compute_3[0] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 487)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 55)]))
-          compute_3[1] = ((float32*)compute_3[1] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 488)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 55)]))
-          compute_3[2] = ((float32*)compute_3[2] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 489)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 55)]))
-          compute_3[3] = ((float32*)compute_3[3] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 490)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 55)]))
-          compute_3[4] = ((float32*)compute_3[4] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 491)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 55)]))
-          compute_3[5] = ((float32*)compute_3[5] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 492)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 55)]))
-          compute_3[6] = ((float32*)compute_3[6] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 493)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 55)]))
-          compute_3[0] = ((float32*)compute_3[0] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 496)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 58)]))
-          compute_3[1] = ((float32*)compute_3[1] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 497)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 58)]))
-          compute_3[2] = ((float32*)compute_3[2] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 498)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 58)]))
-          compute_3[3] = ((float32*)compute_3[3] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 499)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 58)]))
-          compute_3[4] = ((float32*)compute_3[4] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 500)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 58)]))
-          compute_3[5] = ((float32*)compute_3[5] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 501)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 58)]))
-          compute_3[6] = ((float32*)compute_3[6] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 502)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 58)]))
-          compute_3[0] = ((float32*)compute_3[0] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 505)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 61)]))
-          compute_3[1] = ((float32*)compute_3[1] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 506)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 61)]))
-          compute_3[2] = ((float32*)compute_3[2] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 507)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 61)]))
-          compute_3[3] = ((float32*)compute_3[3] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 508)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 61)]))
-          compute_3[4] = ((float32*)compute_3[4] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 509)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 61)]))
-          compute_3[5] = ((float32*)compute_3[5] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 510)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 61)]))
-          compute_3[6] = ((float32*)compute_3[6] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 511)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 61)]))
-          compute_3[0] = ((float32*)compute_3[0] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 488)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 56)]))
-          compute_3[1] = ((float32*)compute_3[1] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 489)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 56)]))
-          compute_3[2] = ((float32*)compute_3[2] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 490)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 56)]))
-          compute_3[3] = ((float32*)compute_3[3] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 491)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 56)]))
-          compute_3[4] = ((float32*)compute_3[4] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 492)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 56)]))
-          compute_3[5] = ((float32*)compute_3[5] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 493)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 56)]))
-          compute_3[6] = ((float32*)compute_3[6] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 494)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 56)]))
-          compute_3[0] = ((float32*)compute_3[0] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 497)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 59)]))
-          compute_3[1] = ((float32*)compute_3[1] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 498)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 59)]))
-          compute_3[2] = ((float32*)compute_3[2] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 499)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 59)]))
-          compute_3[3] = ((float32*)compute_3[3] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 500)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 59)]))
-          compute_3[4] = ((float32*)compute_3[4] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 501)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 59)]))
-          compute_3[5] = ((float32*)compute_3[5] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 502)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 59)]))
-          compute_3[6] = ((float32*)compute_3[6] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 503)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 59)]))
-          compute_3[0] = ((float32*)compute_3[0] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 506)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 62)]))
-          compute_3[1] = ((float32*)compute_3[1] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 507)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 62)]))
-          compute_3[2] = ((float32*)compute_3[2] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 508)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 62)]))
-          compute_3[3] = ((float32*)compute_3[3] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 509)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 62)]))
-          compute_3[4] = ((float32*)compute_3[4] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 510)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 62)]))
-          compute_3[5] = ((float32*)compute_3[5] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 511)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 62)]))
-          compute_3[6] = ((float32*)compute_3[6] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 512)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 62)]))
-          compute_3[0] = ((float32*)compute_3[0] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 567)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 63)]))
-          compute_3[1] = ((float32*)compute_3[1] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 568)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 63)]))
-          compute_3[2] = ((float32*)compute_3[2] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 569)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 63)]))
-          compute_3[3] = ((float32*)compute_3[3] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 570)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 63)]))
-          compute_3[4] = ((float32*)compute_3[4] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 571)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 63)]))
-          compute_3[5] = ((float32*)compute_3[5] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 572)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 63)]))
-          compute_3[6] = ((float32*)compute_3[6] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 573)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 63)]))
-          compute_3[0] = ((float32*)compute_3[0] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 576)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 66)]))
-          compute_3[1] = ((float32*)compute_3[1] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 577)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 66)]))
-          compute_3[2] = ((float32*)compute_3[2] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 578)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 66)]))
-          compute_3[3] = ((float32*)compute_3[3] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 579)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 66)]))
-          compute_3[4] = ((float32*)compute_3[4] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 580)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 66)]))
-          compute_3[5] = ((float32*)compute_3[5] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 581)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 66)]))
-          compute_3[6] = ((float32*)compute_3[6] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 582)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 66)]))
-          compute_3[0] = ((float32*)compute_3[0] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 585)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 69)]))
-          compute_3[1] = ((float32*)compute_3[1] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 586)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 69)]))
-          compute_3[2] = ((float32*)compute_3[2] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 587)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 69)]))
-          compute_3[3] = ((float32*)compute_3[3] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 588)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 69)]))
-          compute_3[4] = ((float32*)compute_3[4] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 589)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 69)]))
-          compute_3[5] = ((float32*)compute_3[5] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 590)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 69)]))
-          compute_3[6] = ((float32*)compute_3[6] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 591)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 69)]))
-          compute_3[0] = ((float32*)compute_3[0] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 568)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 64)]))
-          compute_3[1] = ((float32*)compute_3[1] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 569)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 64)]))
-          compute_3[2] = ((float32*)compute_3[2] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 570)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 64)]))
-          compute_3[3] = ((float32*)compute_3[3] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 571)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 64)]))
-          compute_3[4] = ((float32*)compute_3[4] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 572)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 64)]))
-          compute_3[5] = ((float32*)compute_3[5] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 573)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 64)]))
-          compute_3[6] = ((float32*)compute_3[6] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 574)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 64)]))
-          compute_3[0] = ((float32*)compute_3[0] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 577)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 67)]))
-          compute_3[1] = ((float32*)compute_3[1] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 578)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 67)]))
-          compute_3[2] = ((float32*)compute_3[2] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 579)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 67)]))
-          compute_3[3] = ((float32*)compute_3[3] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 580)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 67)]))
-          compute_3[4] = ((float32*)compute_3[4] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 581)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 67)]))
-          compute_3[5] = ((float32*)compute_3[5] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 582)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 67)]))
-          compute_3[6] = ((float32*)compute_3[6] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 583)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 67)]))
-          compute_3[0] = ((float32*)compute_3[0] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 586)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 70)]))
-          compute_3[1] = ((float32*)compute_3[1] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 587)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 70)]))
-          compute_3[2] = ((float32*)compute_3[2] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 588)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 70)]))
-          compute_3[3] = ((float32*)compute_3[3] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 589)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 70)]))
-          compute_3[4] = ((float32*)compute_3[4] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 590)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 70)]))
-          compute_3[5] = ((float32*)compute_3[5] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 591)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 70)]))
-          compute_3[6] = ((float32*)compute_3[6] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 592)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 70)]))
-          compute_3[0] = ((float32*)compute_3[0] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 569)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 65)]))
-          compute_3[1] = ((float32*)compute_3[1] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 570)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 65)]))
-          compute_3[2] = ((float32*)compute_3[2] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 571)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 65)]))
-          compute_3[3] = ((float32*)compute_3[3] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 572)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 65)]))
-          compute_3[4] = ((float32*)compute_3[4] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 573)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 65)]))
-          compute_3[5] = ((float32*)compute_3[5] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 574)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 65)]))
-          compute_3[6] = ((float32*)compute_3[6] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 575)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 65)]))
-          compute_3[0] = ((float32*)compute_3[0] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 578)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 68)]))
-          compute_3[1] = ((float32*)compute_3[1] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 579)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 68)]))
-          compute_3[2] = ((float32*)compute_3[2] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 580)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 68)]))
-          compute_3[3] = ((float32*)compute_3[3] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 581)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 68)]))
-          compute_3[4] = ((float32*)compute_3[4] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 582)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 68)]))
-          compute_3[5] = ((float32*)compute_3[5] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 583)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 68)]))
-          compute_3[6] = ((float32*)compute_3[6] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 584)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 68)]))
-          compute_3[0] = ((float32*)compute_3[0] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 587)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 71)]))
-          compute_3[1] = ((float32*)compute_3[1] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 588)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 71)]))
-          compute_3[2] = ((float32*)compute_3[2] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 589)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 71)]))
-          compute_3[3] = ((float32*)compute_3[3] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 590)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 71)]))
-          compute_3[4] = ((float32*)compute_3[4] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 591)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 71)]))
-          compute_3[5] = ((float32*)compute_3[5] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 592)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 71)]))
-          compute_3[6] = ((float32*)compute_3[6] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 593)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 71)]))
-          compute_3[0] = ((float32*)compute_3[0] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 648)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 72)]))
-          compute_3[1] = ((float32*)compute_3[1] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 649)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 72)]))
-          compute_3[2] = ((float32*)compute_3[2] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 650)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 72)]))
-          compute_3[3] = ((float32*)compute_3[3] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 651)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 72)]))
-          compute_3[4] = ((float32*)compute_3[4] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 652)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 72)]))
-          compute_3[5] = ((float32*)compute_3[5] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 653)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 72)]))
-          compute_3[6] = ((float32*)compute_3[6] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 654)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 72)]))
-          compute_3[0] = ((float32*)compute_3[0] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 657)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 75)]))
-          compute_3[1] = ((float32*)compute_3[1] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 658)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 75)]))
-          compute_3[2] = ((float32*)compute_3[2] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 659)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 75)]))
-          compute_3[3] = ((float32*)compute_3[3] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 660)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 75)]))
-          compute_3[4] = ((float32*)compute_3[4] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 661)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 75)]))
-          compute_3[5] = ((float32*)compute_3[5] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 662)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 75)]))
-          compute_3[6] = ((float32*)compute_3[6] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 663)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 75)]))
-          compute_3[0] = ((float32*)compute_3[0] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 666)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 78)]))
-          compute_3[1] = ((float32*)compute_3[1] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 667)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 78)]))
-          compute_3[2] = ((float32*)compute_3[2] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 668)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 78)]))
-          compute_3[3] = ((float32*)compute_3[3] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 669)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 78)]))
-          compute_3[4] = ((float32*)compute_3[4] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 670)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 78)]))
-          compute_3[5] = ((float32*)compute_3[5] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 671)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 78)]))
-          compute_3[6] = ((float32*)compute_3[6] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 672)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 78)]))
-          compute_3[0] = ((float32*)compute_3[0] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 649)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 73)]))
-          compute_3[1] = ((float32*)compute_3[1] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 650)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 73)]))
-          compute_3[2] = ((float32*)compute_3[2] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 651)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 73)]))
-          compute_3[3] = ((float32*)compute_3[3] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 652)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 73)]))
-          compute_3[4] = ((float32*)compute_3[4] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 653)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 73)]))
-          compute_3[5] = ((float32*)compute_3[5] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 654)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 73)]))
-          compute_3[6] = ((float32*)compute_3[6] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 655)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 73)]))
-          compute_3[0] = ((float32*)compute_3[0] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 658)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 76)]))
-          compute_3[1] = ((float32*)compute_3[1] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 659)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 76)]))
-          compute_3[2] = ((float32*)compute_3[2] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 660)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 76)]))
-          compute_3[3] = ((float32*)compute_3[3] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 661)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 76)]))
-          compute_3[4] = ((float32*)compute_3[4] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 662)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 76)]))
-          compute_3[5] = ((float32*)compute_3[5] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 663)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 76)]))
-          compute_3[6] = ((float32*)compute_3[6] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 664)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 76)]))
-          compute_3[0] = ((float32*)compute_3[0] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 667)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 79)]))
-          compute_3[1] = ((float32*)compute_3[1] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 668)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 79)]))
-          compute_3[2] = ((float32*)compute_3[2] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 669)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 79)]))
-          compute_3[3] = ((float32*)compute_3[3] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 670)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 79)]))
-          compute_3[4] = ((float32*)compute_3[4] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 671)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 79)]))
-          compute_3[5] = ((float32*)compute_3[5] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 672)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 79)]))
-          compute_3[6] = ((float32*)compute_3[6] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 673)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 79)]))
-          compute_3[0] = ((float32*)compute_3[0] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 650)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 74)]))
-          compute_3[1] = ((float32*)compute_3[1] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 651)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 74)]))
-          compute_3[2] = ((float32*)compute_3[2] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 652)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 74)]))
-          compute_3[3] = ((float32*)compute_3[3] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 653)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 74)]))
-          compute_3[4] = ((float32*)compute_3[4] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 654)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 74)]))
-          compute_3[5] = ((float32*)compute_3[5] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 655)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 74)]))
-          compute_3[6] = ((float32*)compute_3[6] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 656)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 74)]))
-          compute_3[0] = ((float32*)compute_3[0] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 659)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 77)]))
-          compute_3[1] = ((float32*)compute_3[1] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 660)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 77)]))
-          compute_3[2] = ((float32*)compute_3[2] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 661)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 77)]))
-          compute_3[3] = ((float32*)compute_3[3] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 662)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 77)]))
-          compute_3[4] = ((float32*)compute_3[4] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 663)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 77)]))
-          compute_3[5] = ((float32*)compute_3[5] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 664)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 77)]))
-          compute_3[6] = ((float32*)compute_3[6] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 665)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 77)]))
-          compute_3[0] = ((float32*)compute_3[0] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 668)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 80)]))
-          compute_3[1] = ((float32*)compute_3[1] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 669)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 80)]))
-          compute_3[2] = ((float32*)compute_3[2] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 670)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 80)]))
-          compute_3[3] = ((float32*)compute_3[3] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 671)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 80)]))
-          compute_3[4] = ((float32*)compute_3[4] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 672)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 80)]))
-          compute_3[5] = ((float32*)compute_3[5] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 673)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 80)]))
-          compute_3[6] = ((float32*)compute_3[6] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 674)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 80)]))
-          compute_3[0] = ((float32*)compute_3[0] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 729)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 81)]))
-          compute_3[1] = ((float32*)compute_3[1] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 730)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 81)]))
-          compute_3[2] = ((float32*)compute_3[2] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 731)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 81)]))
-          compute_3[3] = ((float32*)compute_3[3] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 732)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 81)]))
-          compute_3[4] = ((float32*)compute_3[4] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 733)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 81)]))
-          compute_3[5] = ((float32*)compute_3[5] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 734)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 81)]))
-          compute_3[6] = ((float32*)compute_3[6] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 735)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 81)]))
-          compute_3[0] = ((float32*)compute_3[0] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 738)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 84)]))
-          compute_3[1] = ((float32*)compute_3[1] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 739)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 84)]))
-          compute_3[2] = ((float32*)compute_3[2] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 740)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 84)]))
-          compute_3[3] = ((float32*)compute_3[3] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 741)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 84)]))
-          compute_3[4] = ((float32*)compute_3[4] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 742)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 84)]))
-          compute_3[5] = ((float32*)compute_3[5] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 743)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 84)]))
-          compute_3[6] = ((float32*)compute_3[6] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 744)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 84)]))
-          compute_3[0] = ((float32*)compute_3[0] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 747)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 87)]))
-          compute_3[1] = ((float32*)compute_3[1] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 748)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 87)]))
-          compute_3[2] = ((float32*)compute_3[2] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 749)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 87)]))
-          compute_3[3] = ((float32*)compute_3[3] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 750)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 87)]))
-          compute_3[4] = ((float32*)compute_3[4] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 751)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 87)]))
-          compute_3[5] = ((float32*)compute_3[5] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 752)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 87)]))
-          compute_3[6] = ((float32*)compute_3[6] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 753)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 87)]))
-          compute_3[0] = ((float32*)compute_3[0] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 730)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 82)]))
-          compute_3[1] = ((float32*)compute_3[1] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 731)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 82)]))
-          compute_3[2] = ((float32*)compute_3[2] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 732)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 82)]))
-          compute_3[3] = ((float32*)compute_3[3] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 733)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 82)]))
-          compute_3[4] = ((float32*)compute_3[4] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 734)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 82)]))
-          compute_3[5] = ((float32*)compute_3[5] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 735)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 82)]))
-          compute_3[6] = ((float32*)compute_3[6] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 736)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 82)]))
-          compute_3[0] = ((float32*)compute_3[0] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 739)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 85)]))
-          compute_3[1] = ((float32*)compute_3[1] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 740)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 85)]))
-          compute_3[2] = ((float32*)compute_3[2] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 741)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 85)]))
-          compute_3[3] = ((float32*)compute_3[3] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 742)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 85)]))
-          compute_3[4] = ((float32*)compute_3[4] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 743)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 85)]))
-          compute_3[5] = ((float32*)compute_3[5] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 744)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 85)]))
-          compute_3[6] = ((float32*)compute_3[6] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 745)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 85)]))
-          compute_3[0] = ((float32*)compute_3[0] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 748)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 88)]))
-          compute_3[1] = ((float32*)compute_3[1] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 749)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 88)]))
-          compute_3[2] = ((float32*)compute_3[2] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 750)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 88)]))
-          compute_3[3] = ((float32*)compute_3[3] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 751)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 88)]))
-          compute_3[4] = ((float32*)compute_3[4] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 752)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 88)]))
-          compute_3[5] = ((float32*)compute_3[5] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 753)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 88)]))
-          compute_3[6] = ((float32*)compute_3[6] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 754)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 88)]))
-          compute_3[0] = ((float32*)compute_3[0] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 731)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 83)]))
-          compute_3[1] = ((float32*)compute_3[1] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 732)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 83)]))
-          compute_3[2] = ((float32*)compute_3[2] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 733)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 83)]))
-          compute_3[3] = ((float32*)compute_3[3] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 734)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 83)]))
-          compute_3[4] = ((float32*)compute_3[4] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 735)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 83)]))
-          compute_3[5] = ((float32*)compute_3[5] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 736)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 83)]))
-          compute_3[6] = ((float32*)compute_3[6] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 737)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 83)]))
-          compute_3[0] = ((float32*)compute_3[0] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 740)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 86)]))
-          compute_3[1] = ((float32*)compute_3[1] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 741)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 86)]))
-          compute_3[2] = ((float32*)compute_3[2] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 742)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 86)]))
-          compute_3[3] = ((float32*)compute_3[3] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 743)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 86)]))
-          compute_3[4] = ((float32*)compute_3[4] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 744)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 86)]))
-          compute_3[5] = ((float32*)compute_3[5] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 745)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 86)]))
-          compute_3[6] = ((float32*)compute_3[6] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 746)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 86)]))
-          compute_3[0] = ((float32*)compute_3[0] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 749)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 89)]))
-          compute_3[1] = ((float32*)compute_3[1] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 750)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 89)]))
-          compute_3[2] = ((float32*)compute_3[2] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 751)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 89)]))
-          compute_3[3] = ((float32*)compute_3[3] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 752)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 89)]))
-          compute_3[4] = ((float32*)compute_3[4] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 753)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 89)]))
-          compute_3[5] = ((float32*)compute_3[5] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 754)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 89)]))
-          compute_3[6] = ((float32*)compute_3[6] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 755)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 89)]))
-          compute_3[0] = ((float32*)compute_3[0] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 810)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 90)]))
-          compute_3[1] = ((float32*)compute_3[1] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 811)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 90)]))
-          compute_3[2] = ((float32*)compute_3[2] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 812)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 90)]))
-          compute_3[3] = ((float32*)compute_3[3] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 813)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 90)]))
-          compute_3[4] = ((float32*)compute_3[4] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 814)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 90)]))
-          compute_3[5] = ((float32*)compute_3[5] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 815)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 90)]))
-          compute_3[6] = ((float32*)compute_3[6] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 816)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 90)]))
-          compute_3[0] = ((float32*)compute_3[0] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 819)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 93)]))
-          compute_3[1] = ((float32*)compute_3[1] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 820)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 93)]))
-          compute_3[2] = ((float32*)compute_3[2] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 821)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 93)]))
-          compute_3[3] = ((float32*)compute_3[3] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 822)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 93)]))
-          compute_3[4] = ((float32*)compute_3[4] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 823)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 93)]))
-          compute_3[5] = ((float32*)compute_3[5] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 824)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 93)]))
-          compute_3[6] = ((float32*)compute_3[6] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 825)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 93)]))
-          compute_3[0] = ((float32*)compute_3[0] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 828)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 96)]))
-          compute_3[1] = ((float32*)compute_3[1] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 829)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 96)]))
-          compute_3[2] = ((float32*)compute_3[2] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 830)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 96)]))
-          compute_3[3] = ((float32*)compute_3[3] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 831)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 96)]))
-          compute_3[4] = ((float32*)compute_3[4] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 832)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 96)]))
-          compute_3[5] = ((float32*)compute_3[5] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 833)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 96)]))
-          compute_3[6] = ((float32*)compute_3[6] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 834)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 96)]))
-          compute_3[0] = ((float32*)compute_3[0] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 811)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 91)]))
-          compute_3[1] = ((float32*)compute_3[1] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 812)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 91)]))
-          compute_3[2] = ((float32*)compute_3[2] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 813)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 91)]))
-          compute_3[3] = ((float32*)compute_3[3] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 814)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 91)]))
-          compute_3[4] = ((float32*)compute_3[4] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 815)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 91)]))
-          compute_3[5] = ((float32*)compute_3[5] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 816)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 91)]))
-          compute_3[6] = ((float32*)compute_3[6] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 817)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 91)]))
-          compute_3[0] = ((float32*)compute_3[0] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 820)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 94)]))
-          compute_3[1] = ((float32*)compute_3[1] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 821)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 94)]))
-          compute_3[2] = ((float32*)compute_3[2] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 822)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 94)]))
-          compute_3[3] = ((float32*)compute_3[3] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 823)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 94)]))
-          compute_3[4] = ((float32*)compute_3[4] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 824)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 94)]))
-          compute_3[5] = ((float32*)compute_3[5] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 825)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 94)]))
-          compute_3[6] = ((float32*)compute_3[6] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 826)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 94)]))
-          compute_3[0] = ((float32*)compute_3[0] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 829)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 97)]))
-          compute_3[1] = ((float32*)compute_3[1] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 830)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 97)]))
-          compute_3[2] = ((float32*)compute_3[2] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 831)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 97)]))
-          compute_3[3] = ((float32*)compute_3[3] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 832)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 97)]))
-          compute_3[4] = ((float32*)compute_3[4] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 833)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 97)]))
-          compute_3[5] = ((float32*)compute_3[5] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 834)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 97)]))
-          compute_3[6] = ((float32*)compute_3[6] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 835)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 97)]))
-          compute_3[0] = ((float32*)compute_3[0] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 812)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 92)]))
-          compute_3[1] = ((float32*)compute_3[1] + ((float32*)pad_temp.shared[((floormod(threadIdx.x, 7)*9) + 813)]*(float32*)kernel.shared[((floordiv(threadIdx.x, 7)*144) + 92)]))
... 579385 lines suppressed ...