You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tvm.apache.org by tq...@apache.org on 2021/04/06 22:37:07 UTC

[tvm-site] branch asf-site updated: Docs build at Tue Apr 6 18:32:09 EDT 2021

This is an automated email from the ASF dual-hosted git repository.

tqchen pushed a commit to branch asf-site
in repository https://gitbox.apache.org/repos/asf/tvm-site.git


The following commit(s) were added to refs/heads/asf-site by this push:
     new 581c597  Docs build at Tue Apr  6 18:32:09 EDT 2021
581c597 is described below

commit 581c59705042861d7380f579b14be19490eb8440
Author: tqchen <ti...@gmail.com>
AuthorDate: Tue Apr 6 18:32:10 2021 -0400

    Docs build at Tue Apr  6 18:32:09 EDT 2021
---
 .../from_tflite.py                                 |    2 +-
 .../tune_simple_template.py                        |  336 --
 .../from_tflite.ipynb                              |    2 +-
 .../tune_network_mali.ipynb                        |   12 +-
 .../tune_simple_template.ipynb                     |  190 -
 .../auto_tuning_with_python.py                     |  477 ++
 .../opt_matmul_auto_tensorcore.py                  |   12 +-
 .../143c743c62f58570eabd77fd3395ca8c/scan.py       |    6 +-
 .../tune_sparse_x86.py                             |  316 +
 .../deploy_prequantized_tflite.ipynb               |    4 +-
 .../tvmc_command_line_driver.py                    |  552 +-
 .../tune_conv2d_cuda.ipynb                         |    2 +-
 .../relay_quick_start.ipynb                        |    8 +-
 .../tune_relay_cuda.py                             |    8 +-
 .../tune_network_cuda.py                           |   12 +-
 .../tune_relay_mobile_gpu.ipynb                    |    6 +-
 .../tune_conv2d_cuda.py                            |   10 +-
 .../autotvm_matmul.py                              |  376 ++
 .../from_keras.ipynb                               |    2 +-
 .../from_coreml.ipynb                              |    2 +-
 .../from_caffe2.ipynb                              |    2 +-
 .../deploy_sparse.py                               |   26 +-
 .../tensor_expr_get_started.py                     |  975 ++-
 .../deploy_model_on_android.ipynb                  |   10 +-
 .../deploy_model_on_android.py                     |   30 +-
 .../tune_matmul_x86.py                             |  214 +
 .../opt_gemm.ipynb                                 |   14 +-
 .../using_external_lib.py                          |   10 +-
 .../tune_relay_vta.ipynb                           |    4 +-
 .../4c010cd30d80efae3f2dfe5743a098a5/install.ipynb |   57 +
 .../cross_compilation_and_rpc.ipynb                |    6 +-
 .../4e9540fc014621d8d3bd14869c1ab227/scan.ipynb    |    2 +-
 .../deploy_quantized.ipynb                         |    4 +-
 .../from_tensorflow.py                             |   16 +-
 .../autotvm_matmul.ipynb                           |  222 +
 .../intro_topi.ipynb                               |    2 +-
 .../deploy_quantized.py                            |    4 +-
 .../5bd1bb9c6505ea40407fa19f01579414/reduction.py  |    6 +-
 .../deploy_prequantized_tflite.py                  |    8 +-
 .../tune_relay_vta.py                              |    6 +-
 .../tensorize.ipynb                                |    4 +-
 .../opt_conv_cuda.ipynb                            |    2 +-
 .../tune_conv2d_layer_cuda.py                      |   12 +-
 .../relay_quick_start.py                           |   10 +-
 .../using_external_lib.ipynb                       |    6 +-
 .../from_pytorch.ipynb                             |    4 +-
 .../tensor_expr_get_started.ipynb                  |  361 +-
 .../70d345c5409f99cb5de9dc44f147ff6f/build_gcn.py  |    8 +-
 .../from_caffe2.py                                 |    6 +-
 .../tune_relay_cuda.ipynb                          |    4 +-
 .../tune_network_mali.py                           |   33 +-
 .../deploy_prequantized.ipynb                      |    4 +-
 .../7ece74acc230c7d55086182cc8884b09/extern_op.py  |   12 +-
 .../deploy_ssd_gluoncv.ipynb                       |    4 +-
 .../from_darknet.ipynb                             |    4 +-
 .../836dc3852acf09662e9eb37c4c5e1e1b/opt_gemm.py   |   34 +-
 .../deploy_model_on_rasp.py                        |    6 +-
 .../tune_relay_x86.py                              |    8 +-
 .../extern_op.ipynb                                |    4 +-
 .../opt_matmul_auto_tensorcore.ipynb               |    2 +-
 .../deploy_sparse.ipynb                            |   12 +-
 .../deploy_prequantized.py                         |    6 +-
 .../tune_matmul_x86.py                             |  190 -
 .../95f64205f29091c5f31071f9072f3236/install.py    |   49 +
 .../tune_matmul_x86.ipynb                          |  205 +
 .../opt_conv_tensorcore.ipynb                      |    2 +-
 .../9b0365fd5723f7c4d4e996637ab9a487/intro_topi.py |    8 +-
 .../introduction.py                                |  132 +
 .../from_darknet.py                                |   11 +-
 .../tune_network_x86.ipynb                         |   12 +-
 .../tune_network_x86.py                            |   35 +-
 .../opt_conv_cuda.py                               |   10 +-
 .../tune_network_arm.ipynb                         |   12 +-
 .../tune_relay_x86.ipynb                           |    4 +-
 .../auto_tuning_with_python.ipynb                  |  305 +
 .../baa4de13ce6d932de43e0eb5c4cb8f16/tensorize.py  |   10 +-
 .../tune_relay_arm.py                              |    8 +-
 .../tune_conv2d_layer_cuda.ipynb                   |    2 +-
 .../deploy_model_on_rasp.ipynb                     |    4 +-
 .../build_gcn.ipynb                                |    4 +-
 .../deploy_object_detection_pytorch.ipynb          |    2 +-
 .../deploy_classification.ipynb                    |    6 +-
 .../deploy_ssd_gluoncv.py                          |   14 +-
 .../micro_tflite.ipynb                             |    8 +-
 .../cross_compilation_and_rpc.py                   |   18 +-
 .../tune_network_cuda.ipynb                        |    6 +-
 .../tvmc_command_line_driver.ipynb                 |   45 +-
 .../from_tensorflow.ipynb                          |    6 +-
 .../opt_conv_tensorcore.py                         |   12 +-
 .../tune_relay_mobile_gpu.py                       |   18 +-
 .../from_coreml.py                                 |    6 +-
 .../ea0c81cab71096d16b825a33fd276c58/from_mxnet.py |    6 +-
 .../reduction.ipynb                                |    2 +-
 .../introduction.ipynb                             |   50 +
 .../deploy_object_detection_pytorch.py             |    4 +-
 .../deploy_classification.py                       |   12 +-
 .../tune_sparse_x86.ipynb                          |  205 +
 .../tune_matmul_x86.ipynb                          |  205 -
 .../from_mxnet.ipynb                               |    2 +-
 .../f59fd8b968f7dcde34ed872c8527c192/from_keras.py |    4 +-
 .../from_pytorch.py                                |   11 +-
 .../tune_relay_arm.ipynb                           |    4 +-
 .../tune_network_arm.py                            |   46 +-
 .../micro_tflite.py                                |   13 +-
 ... => sphx_glr_auto_tuning_with_python_thumb.png} |  Bin
 ...thumb.png => sphx_glr_autotvm_matmul_thumb.png} |  Bin
 docs/_images/sphx_glr_from_darknet_001.png         |  Bin 339980 -> 341231 bytes
 docs/_images/sphx_glr_from_darknet_thumb.png       |  Bin 132099 -> 132462 bytes
 ...mplate_thumb.png => sphx_glr_install_thumb.png} |  Bin
 ...e_thumb.png => sphx_glr_introduction_thumb.png} |  Bin
 ...humb.png => sphx_glr_tune_sparse_x86_thumb.png} |  Bin
 docs/_sources/api/python/graph_executor.rst.txt    |   21 +
 docs/_sources/api/python/graph_runtime.rst.txt     |   21 -
 docs/_sources/api/python/index.rst.txt             |    2 +-
 docs/_sources/api/python/relay/backend.rst.txt     |    2 +-
 docs/_sources/contribute/code_guide.rst.txt        |    2 +-
 docs/_sources/deploy/android.rst.txt               |    2 +-
 docs/_sources/deploy/arm_compute_lib.rst.txt       |   14 +-
 docs/_sources/deploy/bnns.rst.txt                  |  183 +
 docs/_sources/deploy/hls.rst.txt                   |    8 +-
 docs/_sources/deploy/index.rst.txt                 |    1 +
 docs/_sources/deploy/tensorrt.rst.txt              |    4 +-
 docs/_sources/deploy/vitis_ai.rst.txt              |   16 +-
 docs/_sources/dev/codebase_walkthrough.rst.txt     |    8 +-
 docs/_sources/dev/debugger.rst.txt                 |   14 +-
 docs/_sources/dev/index.rst.txt                    |    4 +-
 docs/_sources/dev/microtvm_design.rst.txt          |   22 +-
 .../dev/relay_bring_your_own_codegen.rst.txt       |    8 +-
 docs/_sources/dev/virtual_machine.rst.txt          |   10 +-
 docs/_sources/index.rst.txt                        |    2 +-
 docs/_sources/install/from_source.rst.txt          |    2 +-
 docs/_sources/langref/relay_pattern.rst.txt        |   16 +
 docs/_sources/microtvm/index.rst.txt               |    2 +-
 .../auto_scheduler/sg_execution_times.rst.txt      |   16 +-
 .../auto_scheduler/tune_conv2d_layer_cuda.rst.txt  | 1337 +---
 .../auto_scheduler/tune_matmul_x86.rst.txt         |  409 --
 .../auto_scheduler/tune_network_arm.rst.txt        |   45 +-
 .../auto_scheduler/tune_network_cuda.rst.txt       |   14 +-
 .../auto_scheduler/tune_network_mali.rst.txt       |   35 +-
 .../auto_scheduler/tune_network_x86.rst.txt        |   37 +-
 .../auto_scheduler/tune_sparse_x86.rst.txt         |  519 ++
 .../tutorials/autotvm/sg_execution_times.rst.txt   |   15 +-
 .../tutorials/autotvm/tune_conv2d_cuda.rst.txt     |   54 +-
 .../tutorials/autotvm/tune_relay_arm.rst.txt       |    8 +-
 .../tutorials/autotvm/tune_relay_cuda.rst.txt      |    8 +-
 .../autotvm/tune_relay_mobile_gpu.rst.txt          |   18 +-
 .../tutorials/autotvm/tune_relay_x86.rst.txt       |    8 +-
 .../tutorials/autotvm/tune_simple_template.rst.txt |  443 --
 .../tutorials/dev/bring_your_own_datatypes.rst.txt |    6 +-
 .../tutorials/dev/sg_execution_times.rst.txt       |    6 +-
 docs/_sources/tutorials/frontend/build_gcn.rst.txt |    8 +-
 .../frontend/deploy_model_on_android.rst.txt       |   32 +-
 .../frontend/deploy_model_on_rasp.rst.txt          |    6 +-
 .../deploy_object_detection_pytorch.rst.txt        |    6 +-
 .../tutorials/frontend/deploy_prequantized.rst.txt |    8 +-
 .../frontend/deploy_prequantized_tflite.rst.txt    |   12 +-
 .../tutorials/frontend/deploy_quantized.rst.txt    |    4 +-
 .../tutorials/frontend/deploy_sparse.rst.txt       |   26 +-
 .../tutorials/frontend/deploy_ssd_gluoncv.rst.txt  |   16 +-
 .../tutorials/frontend/from_caffe2.rst.txt         |    6 +-
 .../tutorials/frontend/from_coreml.rst.txt         |    6 +-
 .../tutorials/frontend/from_darknet.rst.txt        |   17 +-
 .../_sources/tutorials/frontend/from_keras.rst.txt |    4 +-
 .../_sources/tutorials/frontend/from_mxnet.rst.txt |   14 +-
 docs/_sources/tutorials/frontend/from_onnx.rst.txt |    2 +-
 .../tutorials/frontend/from_pytorch.rst.txt        |   21 +-
 .../tutorials/frontend/from_tensorflow.rst.txt     |   22 +-
 .../tutorials/frontend/from_tflite.rst.txt         |    2 +-
 .../tutorials/frontend/sg_execution_times.rst.txt  |   40 +-
 .../tutorials/frontend/using_external_lib.rst.txt  |   10 +-
 .../get_started/auto_tuning_with_python.rst.txt    |  725 +++
 .../tutorials/get_started/autotvm_matmul.rst.txt   |  473 ++
 .../get_started/cross_compilation_and_rpc.rst.txt  |   20 +-
 .../_sources/tutorials/get_started/install.rst.txt |   66 +
 .../tutorials/get_started/introduction.rst.txt     |  150 +
 .../get_started/relay_quick_start.rst.txt          |   13 +-
 .../get_started/sg_execution_times.rst.txt         |   15 +-
 .../get_started/tensor_expr_get_started.rst.txt    | 1432 ++++-
 .../tutorials/get_started/tune_matmul_x86.rst.txt  |  433 ++
 .../get_started/tvmc_command_line_driver.rst.txt   |  431 +-
 docs/_sources/tutorials/index.rst.txt              |  182 +-
 docs/_sources/tutorials/language/extern_op.rst.txt |   12 +-
 .../tutorials/language/intrin_math.rst.txt         |   16 +-
 docs/_sources/tutorials/language/reduction.rst.txt |   10 +-
 docs/_sources/tutorials/language/scan.rst.txt      |    6 +-
 .../tutorials/language/schedule_primitives.rst.txt |    8 +-
 .../tutorials/language/sg_execution_times.rst.txt  |   18 +-
 docs/_sources/tutorials/language/tensorize.rst.txt |   22 +-
 .../tutorials/language/tuple_inputs.rst.txt        |    8 +-
 docs/_sources/tutorials/micro/micro_tflite.rst.txt |   13 +-
 .../tutorials/micro/sg_execution_times.rst.txt     |    6 +-
 .../tutorials/optimize/opt_conv_cuda.rst.txt       |   12 +-
 .../tutorials/optimize/opt_conv_tensorcore.rst.txt |   14 +-
 docs/_sources/tutorials/optimize/opt_gemm.rst.txt  |   54 +-
 .../optimize/opt_matmul_auto_tensorcore.rst.txt    |   12 +-
 .../tutorials/optimize/sg_execution_times.rst.txt  |   10 +-
 docs/_sources/tutorials/topi/intro_topi.rst.txt    |   10 +-
 .../tutorials/topi/sg_execution_times.rst.txt      |    4 +-
 .../tutorials/autotvm/sg_execution_times.rst.txt   |    4 +-
 .../vta/tutorials/autotvm/tune_relay_vta.rst.txt   |    8 +-
 .../frontend/deploy_classification.rst.txt         |   16 +-
 .../tutorials/frontend/sg_execution_times.rst.txt  |    4 +-
 .../_sources/vta/tutorials/matrix_multiply.rst.txt |    4 +-
 .../vta/tutorials/optimize/convolution_opt.rst.txt |    4 +-
 .../tutorials/optimize/sg_execution_times.rst.txt  |    6 +-
 .../vta/tutorials/sg_execution_times.rst.txt       |    6 +-
 .../_sources/vta/tutorials/vta_get_started.rst.txt |    8 +-
 docs/api/doxygen/algorithm_8h.html                 |    2 +-
 docs/api/doxygen/algorithm_8h__incl.svg            | 2331 ++++---
 docs/api/doxygen/analyzer_8h.html                  |    2 +-
 docs/api/doxygen/analyzer_8h__incl.svg             | 1690 +++---
 docs/api/doxygen/analyzer_8h_source.html           |    5 +-
 docs/api/doxygen/annotated.html                    |  817 +--
 docs/api/doxygen/annotation_8h.html                |    2 +-
 docs/api/doxygen/annotation_8h__incl.svg           | 1493 +++--
 docs/api/doxygen/array__utils_8h.html              |    2 +-
 docs/api/doxygen/array__utils_8h__incl.svg         | 1955 +++---
 docs/api/doxygen/attr__registry__map_8h.html       |    4 +-
 docs/api/doxygen/attr__registry__map_8h__incl.svg  |  647 +-
 .../api/doxygen/attr__registry__map_8h_source.html |    6 +-
 docs/api/doxygen/auto__schedule_8h.html            |    2 +-
 docs/api/doxygen/auto__schedule_8h__incl.svg       | 1756 +++---
 docs/api/doxygen/auto__schedule_8h_source.html     |    2 +-
 docs/api/doxygen/auto__scheduler_2feature_8h.html  |    2 +-
 .../doxygen/auto__scheduler_2feature_8h__incl.svg  | 1887 +++---
 docs/api/doxygen/autodiff_8h.html                  |    2 +-
 docs/api/doxygen/autodiff_8h__incl.svg             | 2006 +++---
 docs/api/doxygen/base_8h.html                      |    2 +-
 docs/api/doxygen/base_8h__incl.svg                 | 1631 +++--
 docs/api/doxygen/base_8h_source.html               |    6 +-
 docs/api/doxygen/bias__add_8h.html                 |    2 +-
 docs/api/doxygen/bias__add_8h__incl.svg            | 1656 +++--
 docs/api/doxygen/bitserial_8h.html                 |    2 +-
 docs/api/doxygen/bitserial_8h__incl.svg            | 1755 +++---
 docs/api/doxygen/bound_8h.html                     |    4 +-
 docs/api/doxygen/bound_8h__incl.svg                | 1701 +++---
 docs/api/doxygen/bound_8h_source.html              |    4 +-
 docs/api/doxygen/broadcast_8h.html                 |    2 +-
 docs/api/doxygen/broadcast_8h__incl.svg            | 2163 ++++---
 docs/api/doxygen/broadcast_8h_source.html          |    4 +-
 docs/api/doxygen/buffer_8h.html                    |    4 +-
 docs/api/doxygen/buffer_8h__incl.svg               | 1449 +++--
 docs/api/doxygen/buffer_8h_source.html             |   14 +-
 docs/api/doxygen/builtin_8h.html                   |    2 +-
 docs/api/doxygen/builtin_8h__incl.svg              | 1933 +++---
 docs/api/doxygen/bytecode_8h.html                  |    2 +-
 docs/api/doxygen/bytecode_8h__incl.svg             |    2 +-
 docs/api/doxygen/bytecode_8h_source.html           |    5 +-
 docs/api/doxygen/c__backend__api_8h.html           |    2 +-
 docs/api/doxygen/c__backend__api_8h__dep__incl.svg |   14 +-
 docs/api/doxygen/c__backend__api_8h_source.html    |    8 +-
 docs/api/doxygen/c__runtime__api_8h.html           |   93 +-
 docs/api/doxygen/c__runtime__api_8h__dep__incl.svg | 1276 ++--
 docs/api/doxygen/c__runtime__api_8h_source.html    |   53 +-
 docs/api/doxygen/classes.html                      |  349 +-
 docs/api/doxygen/classtvm_1_1BaseAttrsNode.html    |   10 +-
 docs/api/doxygen/classtvm_1_1BaseFuncNode.html     |    2 +-
 .../doxygen/classtvm_1_1CompileError-members.html  |  110 +
 docs/api/doxygen/classtvm_1_1CompileError.html     |  287 +
 .../classtvm_1_1CompileError__coll__graph.svg      |   74 +
 .../classtvm_1_1CompileError__inherit__graph.svg   |   39 +
 .../doxygen/classtvm_1_1DenseMapNode-members.html  |  130 -
 docs/api/doxygen/classtvm_1_1DenseMapNode.html     |  574 --
 .../classtvm_1_1DenseMapNode__coll__graph.svg      |   96 -
 .../classtvm_1_1DenseMapNode__inherit__graph.svg   |   78 -
 docs/api/doxygen/classtvm_1_1Diagnostic.html       |    4 +-
 .../api/doxygen/classtvm_1_1DiagnosticBuilder.html |    8 +-
 docs/api/doxygen/classtvm_1_1DiagnosticNode.html   |    4 +-
 docs/api/doxygen/classtvm_1_1DictAttrs.html        |    4 +-
 docs/api/doxygen/classtvm_1_1DictAttrsNode.html    |    6 +-
 .../classtvm_1_1DictAttrsNode__coll__graph.svg     |   81 +-
 docs/api/doxygen/classtvm_1_1Error-members.html    |  110 -
 docs/api/doxygen/classtvm_1_1Error.html            |  287 -
 .../doxygen/classtvm_1_1ErrorReporter-members.html |    4 +-
 docs/api/doxygen/classtvm_1_1ErrorReporter.html    |   22 +-
 .../api/doxygen/classtvm_1_1Error__coll__graph.svg |   74 -
 .../doxygen/classtvm_1_1Error__inherit__graph.svg  |   39 -
 docs/api/doxygen/classtvm_1_1FloatImmNode.html     |    2 +-
 .../classtvm_1_1FloatImmNode__coll__graph.svg      |  129 +-
 docs/api/doxygen/classtvm_1_1IRModule.html         |   14 +-
 docs/api/doxygen/classtvm_1_1IRModuleNode.html     |   10 +-
 .../classtvm_1_1IRModuleNode__coll__graph.svg      |  204 +-
 docs/api/doxygen/classtvm_1_1IntImmNode.html       |    2 +-
 .../classtvm_1_1IntImmNode__coll__graph.svg        |  129 +-
 docs/api/doxygen/classtvm_1_1Map-members.html      |  128 -
 docs/api/doxygen/classtvm_1_1Map.html              |  910 ---
 docs/api/doxygen/classtvm_1_1MapNode-members.html  |  127 -
 docs/api/doxygen/classtvm_1_1MapNode.html          |  825 ---
 .../classtvm_1_1MapNode_1_1iterator-members.html   |  124 -
 .../doxygen/classtvm_1_1MapNode_1_1iterator.html   |  600 --
 ...lasstvm_1_1MapNode_1_1iterator__coll__graph.svg |   79 -
 .../doxygen/classtvm_1_1MapNode__coll__graph.svg   |   52 -
 .../classtvm_1_1MapNode__inherit__graph.svg        |  102 -
 .../classtvm_1_1Map_1_1iterator-members.html       |  118 -
 docs/api/doxygen/classtvm_1_1Map_1_1iterator.html  |  457 --
 .../classtvm_1_1Map_1_1iterator__coll__graph.svg   |   29 -
 docs/api/doxygen/classtvm_1_1Map__coll__graph.svg  |   47 -
 .../doxygen/classtvm_1_1Map__inherit__graph.svg    |   47 -
 docs/api/doxygen/classtvm_1_1PrimExprNode.html     |    2 +-
 .../classtvm_1_1PrimExprNode__coll__graph.svg      |  129 +-
 docs/api/doxygen/classtvm_1_1PrimTypeNode.html     |    2 +-
 .../classtvm_1_1PrimTypeNode__coll__graph.svg      |  129 +-
 docs/api/doxygen/classtvm_1_1ReflectionVTable.html |    4 +-
 .../doxygen/classtvm_1_1SmallMapNode-members.html  |  135 -
 docs/api/doxygen/classtvm_1_1SmallMapNode.html     |  573 --
 .../classtvm_1_1SmallMapNode__coll__graph.svg      |  100 -
 .../classtvm_1_1SmallMapNode__inherit__graph.svg   |  100 -
 docs/api/doxygen/classtvm_1_1Target-members.html   |    1 +
 docs/api/doxygen/classtvm_1_1Target.html           |   55 +-
 .../doxygen/classtvm_1_1TargetNode-members.html    |   21 +-
 docs/api/doxygen/classtvm_1_1TargetNode.html       |   30 +-
 .../classtvm_1_1TargetNode__coll__graph.svg        |  309 +-
 .../classtvm_1_1TargetNode__inherit__graph.svg     |   49 +-
 docs/api/doxygen/classtvm_1_1TargetTag.html        |    8 +-
 docs/api/doxygen/classtvm_1_1TargetTagNode.html    |    6 +-
 .../classtvm_1_1TargetTagNode__coll__graph.svg     |  217 +-
 .../api/doxygen/classtvm_1_1TargetTagRegEntry.html |    4 +-
 .../doxygen/classtvm_1_1Target__coll__graph.svg    |   43 +-
 .../doxygen/classtvm_1_1Target__inherit__graph.svg |   43 +-
 docs/api/doxygen/classtvm_1_1TensorTypeNode.html   |    2 +-
 .../classtvm_1_1TensorTypeNode__coll__graph.svg    |  163 +-
 .../classtvm_1_1arith_1_1Analyzer-members.html     |   17 +-
 .../api/doxygen/classtvm_1_1arith_1_1Analyzer.html |   50 +-
 .../classtvm_1_1arith_1_1Analyzer__coll__graph.svg |  135 +-
 .../classtvm_1_1arith_1_1IntConstraints.html       |    4 +-
 .../classtvm_1_1arith_1_1IntConstraintsNode.html   |    6 +-
 ...1_1arith_1_1IntConstraintsNode__coll__graph.svg |  132 +-
 ...asstvm_1_1arith_1_1IntConstraintsTransform.html |    6 +-
 ...vm_1_1arith_1_1IntConstraintsTransformNode.html |   10 +-
 ...1_1IntConstraintsTransformNode__coll__graph.svg |   99 +-
 .../classtvm_1_1arith_1_1IntGroupBounds.html       |    8 +-
 .../classtvm_1_1arith_1_1IntSetAnalyzer.html       |    4 +-
 .../classtvm_1_1arith_1_1IterMapExprNode.html      |    2 +-
 ...vm_1_1arith_1_1IterMapExprNode__coll__graph.svg |  129 +-
 .../classtvm_1_1arith_1_1IterMark-members.html     |    3 +-
 .../api/doxygen/classtvm_1_1arith_1_1IterMark.html |   22 +-
 .../classtvm_1_1arith_1_1IterMark__coll__graph.svg |   40 +-
 ...asstvm_1_1arith_1_1IterMark__inherit__graph.svg |   40 +-
 .../classtvm_1_1arith_1_1IterSplitExprNode.html    |    2 +-
 ..._1_1arith_1_1IterSplitExprNode__coll__graph.svg |  175 +-
 .../classtvm_1_1arith_1_1IterSumExprNode.html      |    2 +-
 ...vm_1_1arith_1_1IterSumExprNode__coll__graph.svg |  151 +-
 ...m_1_1auto__scheduler_1_1AccessAnalyzerNode.html |    8 +-
 ...asstvm_1_1auto__scheduler_1_1AttachMapNode.html |    8 +-
 ...m_1_1auto__scheduler_1_1SearchTask-members.html |    2 +-
 .../classtvm_1_1auto__scheduler_1_1SearchTask.html |   24 +-
 ...1auto__scheduler_1_1SearchTaskNode-members.html |   10 +-
 ...sstvm_1_1auto__scheduler_1_1SearchTaskNode.html |   38 +-
 ...o__scheduler_1_1SearchTaskNode__coll__graph.svg |  260 +-
 ...scheduler_1_1SearchTaskNode__inherit__graph.svg |   46 +-
 .../doxygen/classtvm_1_1parser_1_1SourceMap.html   |    4 +-
 .../classtvm_1_1parser_1_1SourceMapNode.html       |    6 +-
 ...tvm_1_1parser_1_1SourceMapNode__coll__graph.svg |   71 +-
 .../doxygen/classtvm_1_1relay_1_1AltPattern.html   |    2 +-
 .../doxygen/classtvm_1_1relay_1_1AttrPattern.html  |    2 +-
 .../doxygen/classtvm_1_1relay_1_1CallPattern.html  |    2 +-
 .../classtvm_1_1relay_1_1ConstantPattern.html      |    2 +-
 .../doxygen/classtvm_1_1relay_1_1DFPattern.html    |    4 +-
 .../classtvm_1_1relay_1_1DataTypePattern.html      |    2 +-
 .../classtvm_1_1relay_1_1DataTypePatternNode.html  |    2 +-
 ..._1relay_1_1DataTypePatternNode__coll__graph.svg |  135 +-
 .../classtvm_1_1relay_1_1DominatorPattern.html     |    2 +-
 .../doxygen/classtvm_1_1relay_1_1ExprPattern.html  |    2 +-
 .../classtvm_1_1relay_1_1FunctionPattern.html      |    2 +-
 .../doxygen/classtvm_1_1relay_1_1IfPattern.html    |    2 +-
 .../classtvm_1_1relay_1_1InterpreterClosure.html   |    4 +-
 ..._1_1relay_1_1InterpreterClosureObj-members.html |    3 +-
 ...classtvm_1_1relay_1_1InterpreterClosureObj.html |   10 +-
 ...relay_1_1InterpreterClosureObj__coll__graph.svg |  262 +-
 ...ay_1_1InterpreterClosureObj__inherit__graph.svg |   59 +-
 .../doxygen/classtvm_1_1relay_1_1LetPattern.html   |    2 +-
 .../doxygen/classtvm_1_1relay_1_1ShapePattern.html |    2 +-
 .../classtvm_1_1relay_1_1TupleGetItemPattern.html  |    2 +-
 .../doxygen/classtvm_1_1relay_1_1TuplePattern.html |    2 +-
 .../doxygen/classtvm_1_1relay_1_1TypePattern.html  |    2 +-
 .../doxygen/classtvm_1_1relay_1_1VarPattern.html   |    2 +-
 .../classtvm_1_1relay_1_1WildcardPattern.html      |    2 +-
 docs/api/doxygen/classtvm_1_1runtime_1_1ADT.html   |    4 +-
 .../classtvm_1_1runtime_1_1ADTObj-members.html     |    3 +-
 .../api/doxygen/classtvm_1_1runtime_1_1ADTObj.html |   10 +-
 .../classtvm_1_1runtime_1_1ADTObj__coll__graph.svg |   91 +-
 ...asstvm_1_1runtime_1_1ADTObj__inherit__graph.svg |   87 +-
 docs/api/doxygen/classtvm_1_1runtime_1_1Array.html |    4 +-
 .../classtvm_1_1runtime_1_1ArrayNode-members.html  |    3 +-
 .../doxygen/classtvm_1_1runtime_1_1ArrayNode.html  |   10 +-
 ...asstvm_1_1runtime_1_1ArrayNode__coll__graph.svg |   91 +-
 ...tvm_1_1runtime_1_1ArrayNode__inherit__graph.svg |   87 +-
 .../doxygen/classtvm_1_1runtime_1_1Closure.html    |    4 +-
 .../classtvm_1_1runtime_1_1ClosureObj-members.html |    1 +
 .../doxygen/classtvm_1_1runtime_1_1ClosureObj.html |   10 +-
 ...sstvm_1_1runtime_1_1ClosureObj__coll__graph.svg |   63 +-
 ...vm_1_1runtime_1_1ClosureObj__inherit__graph.svg |   59 +-
 .../classtvm_1_1runtime_1_1DataType-members.html   |   77 +-
 .../doxygen/classtvm_1_1runtime_1_1DataType.html   |   53 +-
 ...lasstvm_1_1runtime_1_1DataType__coll__graph.svg |   43 +-
 ...lasstvm_1_1runtime_1_1DenseMapNode-members.html |  159 +
 .../classtvm_1_1runtime_1_1DenseMapNode.html       |  654 ++
 ...tvm_1_1runtime_1_1DenseMapNode__coll__graph.svg |  135 +
 ..._1_1runtime_1_1DenseMapNode__inherit__graph.svg |  112 +
 .../classtvm_1_1runtime_1_1DeviceAPI-members.html  |   30 +-
 .../doxygen/classtvm_1_1runtime_1_1DeviceAPI.html  |  222 +-
 ...asstvm_1_1runtime_1_1DeviceAPI__coll__graph.svg |   18 +-
 .../classtvm_1_1runtime_1_1InplaceArrayBase.html   |   10 +-
 ...runtime_1_1InplaceArrayBase__inherit__graph.svg |   54 +
 .../classtvm_1_1runtime_1_1IterAdapter.html        |    4 +-
 .../classtvm_1_1runtime_1_1Map-members.html        |  146 +
 docs/api/doxygen/classtvm_1_1runtime_1_1Map.html   |  972 +++
 .../classtvm_1_1runtime_1_1MapNode-members.html    |  156 +
 .../doxygen/classtvm_1_1runtime_1_1MapNode.html    |  908 +++
 ..._1_1runtime_1_1MapNode_1_1iterator-members.html |  124 +
 ...classtvm_1_1runtime_1_1MapNode_1_1iterator.html |  600 ++
 ...runtime_1_1MapNode_1_1iterator__coll__graph.svg |  119 +
 ...classtvm_1_1runtime_1_1MapNode__coll__graph.svg |   91 +
 ...sstvm_1_1runtime_1_1MapNode__inherit__graph.svg |  136 +
 ...stvm_1_1runtime_1_1Map_1_1iterator-members.html |  118 +
 .../classtvm_1_1runtime_1_1Map_1_1iterator.html    |  457 ++
 ..._1_1runtime_1_1Map_1_1iterator__coll__graph.svg |   30 +
 .../classtvm_1_1runtime_1_1Map__coll__graph.svg    |   94 +
 .../classtvm_1_1runtime_1_1Map__inherit__graph.svg |   66 +
 .../classtvm_1_1runtime_1_1ModuleNode-members.html |    3 +-
 .../doxygen/classtvm_1_1runtime_1_1ModuleNode.html |   12 +-
 ...sstvm_1_1runtime_1_1ModuleNode__coll__graph.svg |   63 +-
 ...vm_1_1runtime_1_1ModuleNode__inherit__graph.svg |   99 +-
 .../classtvm_1_1runtime_1_1NDArray-members.html    |    4 +-
 .../doxygen/classtvm_1_1runtime_1_1NDArray.html    |   32 +-
 ...1_1runtime_1_1NDArray_1_1Container-members.html |    3 +-
 ...lasstvm_1_1runtime_1_1NDArray_1_1Container.html |   16 +-
 ...untime_1_1NDArray_1_1Container__coll__graph.svg |   85 +-
 ...ime_1_1NDArray_1_1Container__inherit__graph.svg |   81 +-
 .../classtvm_1_1runtime_1_1Object-members.html     |    1 +
 .../api/doxygen/classtvm_1_1runtime_1_1Object.html |   33 +-
 .../doxygen/classtvm_1_1runtime_1_1ObjectPtr.html  |   10 +-
 .../doxygen/classtvm_1_1runtime_1_1ObjectRef.html  |    4 +-
 ...tvm_1_1runtime_1_1ObjectRef__inherit__graph.svg |  293 +-
 .../classtvm_1_1runtime_1_1Object__coll__graph.svg |   59 +-
 ...asstvm_1_1runtime_1_1Object__inherit__graph.svg |  655 +-
 .../doxygen/classtvm_1_1runtime_1_1Optional.html   |    4 +-
 .../classtvm_1_1runtime_1_1ReverseIterAdapter.html |    4 +-
 ...lasstvm_1_1runtime_1_1SmallMapNode-members.html |  164 +
 .../classtvm_1_1runtime_1_1SmallMapNode.html       |  654 ++
 ...tvm_1_1runtime_1_1SmallMapNode__coll__graph.svg |  139 +
 ..._1_1runtime_1_1SmallMapNode__inherit__graph.svg |  134 +
 .../api/doxygen/classtvm_1_1runtime_1_1String.html |    4 +-
 .../classtvm_1_1runtime_1_1StringObj-members.html  |    1 +
 .../doxygen/classtvm_1_1runtime_1_1StringObj.html  |   10 +-
 ...1_1runtime_1_1StringObj_1_1FromStd-members.html |    1 +
 ...lasstvm_1_1runtime_1_1StringObj_1_1FromStd.html |   10 +-
 ...untime_1_1StringObj_1_1FromStd__coll__graph.svg |   63 +-
 ...ime_1_1StringObj_1_1FromStd__inherit__graph.svg |   59 +-
 ...asstvm_1_1runtime_1_1StringObj__coll__graph.svg |   63 +-
 ...tvm_1_1runtime_1_1StringObj__inherit__graph.svg |   59 +-
 ...classtvm_1_1runtime_1_1TVMArgValue-members.html |   20 +-
 .../classtvm_1_1runtime_1_1TVMArgValue.html        |    4 +-
 ...stvm_1_1runtime_1_1TVMArgValue__coll__graph.svg |    4 +-
 ...m_1_1runtime_1_1TVMArgValue__inherit__graph.svg |    2 +-
 ...asstvm_1_1runtime_1_1TVMArgsSetter-members.html |   34 +-
 .../classtvm_1_1runtime_1_1TVMArgsSetter.html      |  146 +-
 ...classtvm_1_1runtime_1_1TVMArgs__coll__graph.svg |    2 +-
 ...1_1runtime_1_1TVMMovableArgValue__-members.html |   22 +-
 ...lasstvm_1_1runtime_1_1TVMMovableArgValue__.html |    4 +-
 ...untime_1_1TVMMovableArgValue____coll__graph.svg |    4 +-
 ...ime_1_1TVMMovableArgValue____inherit__graph.svg |    2 +-
 ...asstvm_1_1runtime_1_1TVMPODValue__-members.html |   14 +-
 .../classtvm_1_1runtime_1_1TVMPODValue__.html      |   32 +-
 ...vm_1_1runtime_1_1TVMPODValue____coll__graph.svg |    4 +-
 ...1_1runtime_1_1TVMPODValue____inherit__graph.svg |    2 +-
 ...classtvm_1_1runtime_1_1TVMRetValue-members.html |   22 +-
 .../classtvm_1_1runtime_1_1TVMRetValue.html        |   16 +-
 ...stvm_1_1runtime_1_1TVMRetValue__coll__graph.svg |    4 +-
 ...m_1_1runtime_1_1TVMRetValue__inherit__graph.svg |    2 +-
 .../classtvm_1_1runtime_1_1Timer-members.html      |  126 +
 docs/api/doxygen/classtvm_1_1runtime_1_1Timer.html |  273 +
 .../classtvm_1_1runtime_1_1TimerNode-members.html  |  141 +
 .../doxygen/classtvm_1_1runtime_1_1TimerNode.html  |  386 ++
 ...asstvm_1_1runtime_1_1TimerNode__coll__graph.svg |   79 +
 ...tvm_1_1runtime_1_1TimerNode__inherit__graph.svg |   74 +
 .../classtvm_1_1runtime_1_1Timer__coll__graph.svg  |   85 +
 ...lasstvm_1_1runtime_1_1Timer__inherit__graph.svg |   57 +
 ...PackedFunc_3_01R_07Args_8_8_8_08_4-members.html |    2 +-
 ...1_1TypedPackedFunc_3_01R_07Args_8_8_8_08_4.html |   18 +-
 ...1runtime_1_1profiling_1_1CountNode-members.html |  139 +
 ...sstvm_1_1runtime_1_1profiling_1_1CountNode.html |  310 +
 ...time_1_1profiling_1_1CountNode__coll__graph.svg |   78 +
 ...e_1_1profiling_1_1CountNode__inherit__graph.svg |   73 +
 ...ntime_1_1profiling_1_1DurationNode-members.html |  139 +
 ...vm_1_1runtime_1_1profiling_1_1DurationNode.html |  310 +
 ...e_1_1profiling_1_1DurationNode__coll__graph.svg |   78 +
 ..._1profiling_1_1DurationNode__inherit__graph.svg |   73 +
 ...untime_1_1profiling_1_1PercentNode-members.html |  139 +
 ...tvm_1_1runtime_1_1profiling_1_1PercentNode.html |  310 +
 ...me_1_1profiling_1_1PercentNode__coll__graph.svg |   78 +
 ...1_1profiling_1_1PercentNode__inherit__graph.svg |   73 +
 ..._1runtime_1_1profiling_1_1Profiler-members.html |  111 +
 ...asstvm_1_1runtime_1_1profiling_1_1Profiler.html |  313 +
 ...ntime_1_1profiling_1_1Profiler__coll__graph.svg |   28 +
 ...stvm_1_1runtime_1_1vm_1_1Allocator-members.html |    2 +-
 .../classtvm_1_1runtime_1_1vm_1_1Allocator.html    |   14 +-
 ...tvm_1_1runtime_1_1vm_1_1Executable-members.html |   35 +-
 .../classtvm_1_1runtime_1_1vm_1_1Executable.html   |  153 +-
 ...1_1runtime_1_1vm_1_1Executable__coll__graph.svg |  255 +-
 ...runtime_1_1vm_1_1Executable__inherit__graph.svg |  155 +-
 ..._1_1runtime_1_1vm_1_1MemoryManager-members.html |    4 +-
 ...classtvm_1_1runtime_1_1vm_1_1MemoryManager.html |   28 +-
 ...tvm_1_1runtime_1_1vm_1_1StorageObj-members.html |    3 +-
 .../classtvm_1_1runtime_1_1vm_1_1StorageObj.html   |    6 +-
 ...1_1runtime_1_1vm_1_1StorageObj__coll__graph.svg |   85 +-
 ...runtime_1_1vm_1_1StorageObj__inherit__graph.svg |   59 +-
 ...m_1_1runtime_1_1vm_1_1VMClosureObj-members.html |    1 +
 .../classtvm_1_1runtime_1_1vm_1_1VMClosureObj.html |    6 +-
 ...1runtime_1_1vm_1_1VMClosureObj__coll__graph.svg |   63 +-
 ...ntime_1_1vm_1_1VMClosureObj__inherit__graph.svg |   59 +-
 ...1_1runtime_1_1vm_1_1VirtualMachine-members.html |   19 +-
 ...lasstvm_1_1runtime_1_1vm_1_1VirtualMachine.html |   48 +-
 ...untime_1_1vm_1_1VirtualMachine__coll__graph.svg |  437 +-
 ...ime_1_1vm_1_1VirtualMachine__inherit__graph.svg |   63 +-
 .../classtvm_1_1te_1_1BaseComputeOpNode.html       |    4 +-
 ...tvm_1_1te_1_1BaseComputeOpNode__coll__graph.svg |   75 +-
 docs/api/doxygen/classtvm_1_1te_1_1ComputeOp.html  |    4 +-
 .../doxygen/classtvm_1_1te_1_1ComputeOpNode.html   |    4 +-
 ...lasstvm_1_1te_1_1ComputeOpNode__coll__graph.svg |   83 +-
 docs/api/doxygen/classtvm_1_1te_1_1ExternOp.html   |    4 +-
 .../doxygen/classtvm_1_1te_1_1ExternOpNode.html    |    4 +-
 ...classtvm_1_1te_1_1ExternOpNode__coll__graph.svg |  200 +-
 docs/api/doxygen/classtvm_1_1te_1_1HybridOp.html   |    4 +-
 .../doxygen/classtvm_1_1te_1_1HybridOpNode.html    |    4 +-
 ...classtvm_1_1te_1_1HybridOpNode__coll__graph.svg |  200 +-
 .../doxygen/classtvm_1_1te_1_1OperationNode.html   |    6 +-
 ...lasstvm_1_1te_1_1OperationNode__coll__graph.svg |   77 +-
 .../classtvm_1_1te_1_1PlaceholderOpNode.html       |    4 +-
 ...tvm_1_1te_1_1PlaceholderOpNode__coll__graph.svg |  182 +-
 docs/api/doxygen/classtvm_1_1te_1_1ScanOp.html     |    4 +-
 docs/api/doxygen/classtvm_1_1te_1_1ScanOpNode.html |    4 +-
 .../classtvm_1_1te_1_1ScanOpNode__coll__graph.svg  |  204 +-
 .../doxygen/classtvm_1_1te_1_1ScheduleNode.html    |    6 +-
 ...classtvm_1_1te_1_1ScheduleNode__coll__graph.svg |  138 +-
 docs/api/doxygen/classtvm_1_1te_1_1StageNode.html  |    6 +-
 .../classtvm_1_1te_1_1StageNode__coll__graph.svg   |  301 +-
 .../classtvm_1_1te_1_1TensorComputeOpNode.html     |    4 +-
 ...m_1_1te_1_1TensorComputeOpNode__coll__graph.svg |  304 +-
 docs/api/doxygen/classtvm_1_1te_1_1TensorNode.html |    2 +-
 .../classtvm_1_1te_1_1TensorNode__coll__graph.svg  |  155 +-
 docs/api/doxygen/classtvm_1_1tir_1_1AddNode.html   |    2 +-
 .../classtvm_1_1tir_1_1AddNode__coll__graph.svg    |  151 +-
 .../doxygen/classtvm_1_1tir_1_1AllocateNode.html   |    2 +-
 ...lasstvm_1_1tir_1_1AllocateNode__coll__graph.svg |  245 +-
 docs/api/doxygen/classtvm_1_1tir_1_1AndNode.html   |    2 +-
 .../classtvm_1_1tir_1_1AndNode__coll__graph.svg    |  151 +-
 docs/api/doxygen/classtvm_1_1tir_1_1AnyNode.html   |    2 +-
 .../classtvm_1_1tir_1_1AnyNode__coll__graph.svg    |  129 +-
 .../doxygen/classtvm_1_1tir_1_1AssertStmtNode.html |    4 +-
 .../doxygen/classtvm_1_1tir_1_1BinaryOpNode.html   |    2 +-
 ...lasstvm_1_1tir_1_1BinaryOpNode__coll__graph.svg |  151 +-
 .../doxygen/classtvm_1_1tir_1_1Block-members.html  |  109 +
 docs/api/doxygen/classtvm_1_1tir_1_1Block.html     |  273 +
 .../classtvm_1_1tir_1_1BlockNode-members.html      |  126 +
 docs/api/doxygen/classtvm_1_1tir_1_1BlockNode.html |  453 ++
 .../classtvm_1_1tir_1_1BlockNode__coll__graph.svg  |  307 +
 ...lasstvm_1_1tir_1_1BlockNode__inherit__graph.svg |   73 +
 .../classtvm_1_1tir_1_1BlockRealize-members.html   |  109 +
 .../doxygen/classtvm_1_1tir_1_1BlockRealize.html   |  237 +
 ...lasstvm_1_1tir_1_1BlockRealizeNode-members.html |  120 +
 .../classtvm_1_1tir_1_1BlockRealizeNode.html       |  350 ++
 ...tvm_1_1tir_1_1BlockRealizeNode__coll__graph.svg |  203 +
 ..._1_1tir_1_1BlockRealizeNode__inherit__graph.svg |   67 +
 ...lasstvm_1_1tir_1_1BlockRealize__coll__graph.svg |   58 +
 ...stvm_1_1tir_1_1BlockRealize__inherit__graph.svg |   58 +
 .../classtvm_1_1tir_1_1Block__coll__graph.svg      |   58 +
 .../classtvm_1_1tir_1_1Block__inherit__graph.svg   |   58 +
 .../doxygen/classtvm_1_1tir_1_1BroadcastNode.html  |    2 +-
 ...asstvm_1_1tir_1_1BroadcastNode__coll__graph.svg |  151 +-
 .../doxygen/classtvm_1_1tir_1_1BufferLoadNode.html |    2 +-
 ...sstvm_1_1tir_1_1BufferLoadNode__coll__graph.svg |  165 +-
 .../api/doxygen/classtvm_1_1tir_1_1BufferNode.html |    2 +-
 .../classtvm_1_1tir_1_1BufferNode__coll__graph.svg |  307 +-
 .../classtvm_1_1tir_1_1BufferRegion-members.html   |  108 +
 .../doxygen/classtvm_1_1tir_1_1BufferRegion.html   |  244 +
 ...lasstvm_1_1tir_1_1BufferRegionNode-members.html |  114 +
 .../classtvm_1_1tir_1_1BufferRegionNode.html       |  357 ++
 ...tvm_1_1tir_1_1BufferRegionNode__coll__graph.svg |   97 +
 ..._1_1tir_1_1BufferRegionNode__inherit__graph.svg |   45 +
 ...lasstvm_1_1tir_1_1BufferRegion__coll__graph.svg |   39 +
 ...stvm_1_1tir_1_1BufferRegion__inherit__graph.svg |   39 +
 docs/api/doxygen/classtvm_1_1tir_1_1CallNode.html  |    2 +-
 .../classtvm_1_1tir_1_1CallNode__coll__graph.svg   |  151 +-
 docs/api/doxygen/classtvm_1_1tir_1_1CastNode.html  |    2 +-
 .../classtvm_1_1tir_1_1CastNode__coll__graph.svg   |  151 +-
 docs/api/doxygen/classtvm_1_1tir_1_1CmpOpNode.html |    2 +-
 .../classtvm_1_1tir_1_1CmpOpNode__coll__graph.svg  |  151 +-
 docs/api/doxygen/classtvm_1_1tir_1_1DivNode.html   |    2 +-
 .../classtvm_1_1tir_1_1DivNode__coll__graph.svg    |  151 +-
 docs/api/doxygen/classtvm_1_1tir_1_1EQNode.html    |    2 +-
 .../classtvm_1_1tir_1_1EQNode__coll__graph.svg     |  151 +-
 .../doxygen/classtvm_1_1tir_1_1FloorDivNode.html   |    2 +-
 ...lasstvm_1_1tir_1_1FloorDivNode__coll__graph.svg |  151 +-
 .../doxygen/classtvm_1_1tir_1_1FloorModNode.html   |    2 +-
 ...lasstvm_1_1tir_1_1FloorModNode__coll__graph.svg |  151 +-
 docs/api/doxygen/classtvm_1_1tir_1_1For.html       |    6 +-
 docs/api/doxygen/classtvm_1_1tir_1_1ForNode.html   |    6 +-
 .../classtvm_1_1tir_1_1ForNode__coll__graph.svg    |  230 +-
 docs/api/doxygen/classtvm_1_1tir_1_1GENode.html    |    2 +-
 .../classtvm_1_1tir_1_1GENode__coll__graph.svg     |  151 +-
 docs/api/doxygen/classtvm_1_1tir_1_1GTNode.html    |    2 +-
 .../classtvm_1_1tir_1_1GTNode__coll__graph.svg     |  151 +-
 docs/api/doxygen/classtvm_1_1tir_1_1LTNode.html    |    2 +-
 .../classtvm_1_1tir_1_1LTNode__coll__graph.svg     |  151 +-
 docs/api/doxygen/classtvm_1_1tir_1_1LetNode.html   |    2 +-
 .../classtvm_1_1tir_1_1LetNode__coll__graph.svg    |  177 +-
 docs/api/doxygen/classtvm_1_1tir_1_1LoadNode.html  |    2 +-
 .../classtvm_1_1tir_1_1LoadNode__coll__graph.svg   |  177 +-
 ...asstvm_1_1tir_1_1MatchBufferRegion-members.html |  107 +
 .../classtvm_1_1tir_1_1MatchBufferRegion.html      |  204 +
 ...vm_1_1tir_1_1MatchBufferRegionNode-members.html |  114 +
 .../classtvm_1_1tir_1_1MatchBufferRegionNode.html  |  358 ++
 ..._1tir_1_1MatchBufferRegionNode__coll__graph.svg |  108 +
 ...ir_1_1MatchBufferRegionNode__inherit__graph.svg |   45 +
 ...vm_1_1tir_1_1MatchBufferRegion__coll__graph.svg |   38 +
 ...1_1tir_1_1MatchBufferRegion__inherit__graph.svg |   38 +
 docs/api/doxygen/classtvm_1_1tir_1_1MaxNode.html   |    2 +-
 .../classtvm_1_1tir_1_1MaxNode__coll__graph.svg    |  151 +-
 docs/api/doxygen/classtvm_1_1tir_1_1MinNode.html   |    2 +-
 .../classtvm_1_1tir_1_1MinNode__coll__graph.svg    |  151 +-
 docs/api/doxygen/classtvm_1_1tir_1_1ModNode.html   |    2 +-
 .../classtvm_1_1tir_1_1ModNode__coll__graph.svg    |  151 +-
 docs/api/doxygen/classtvm_1_1tir_1_1MulNode.html   |    2 +-
 .../classtvm_1_1tir_1_1MulNode__coll__graph.svg    |  151 +-
 docs/api/doxygen/classtvm_1_1tir_1_1NENode.html    |    2 +-
 .../classtvm_1_1tir_1_1NENode__coll__graph.svg     |  151 +-
 docs/api/doxygen/classtvm_1_1tir_1_1NotNode.html   |    2 +-
 .../classtvm_1_1tir_1_1NotNode__coll__graph.svg    |  151 +-
 docs/api/doxygen/classtvm_1_1tir_1_1OrNode.html    |    2 +-
 .../classtvm_1_1tir_1_1OrNode__coll__graph.svg     |  151 +-
 docs/api/doxygen/classtvm_1_1tir_1_1Prefetch.html  |    6 +-
 .../doxygen/classtvm_1_1tir_1_1PrefetchNode.html   |    4 +-
 docs/api/doxygen/classtvm_1_1tir_1_1PrimFunc.html  |    6 +-
 .../doxygen/classtvm_1_1tir_1_1PrimFuncNode.html   |    8 +-
 ...lasstvm_1_1tir_1_1PrimFuncNode__coll__graph.svg |  323 +-
 .../classtvm_1_1tir_1_1ProducerLoadNode.html       |    2 +-
 ...tvm_1_1tir_1_1ProducerLoadNode__coll__graph.svg |  151 +-
 docs/api/doxygen/classtvm_1_1tir_1_1RampNode.html  |    2 +-
 .../classtvm_1_1tir_1_1RampNode__coll__graph.svg   |  151 +-
 .../api/doxygen/classtvm_1_1tir_1_1ReduceNode.html |    2 +-
 .../classtvm_1_1tir_1_1ReduceNode__coll__graph.svg |  175 +-
 .../api/doxygen/classtvm_1_1tir_1_1SelectNode.html |    2 +-
 .../classtvm_1_1tir_1_1SelectNode__coll__graph.svg |  151 +-
 .../doxygen/classtvm_1_1tir_1_1ShuffleNode.html    |    2 +-
 ...classtvm_1_1tir_1_1ShuffleNode__coll__graph.svg |  129 +-
 .../doxygen/classtvm_1_1tir_1_1SizeVarNode.html    |    2 +-
 ...classtvm_1_1tir_1_1SizeVarNode__coll__graph.svg |  225 +-
 docs/api/doxygen/classtvm_1_1tir_1_1Stmt.html      |    2 +-
 ...classtvm_1_1tir_1_1StmtExprMutator-members.html |   23 +-
 .../classtvm_1_1tir_1_1StmtExprMutator.html        |    6 +
 ...stvm_1_1tir_1_1StmtExprMutator__coll__graph.svg |    2 +-
 ...m_1_1tir_1_1StmtExprMutator__inherit__graph.svg |    2 +-
 ...classtvm_1_1tir_1_1StmtExprVisitor-members.html |   23 +-
 .../classtvm_1_1tir_1_1StmtExprVisitor.html        |   10 +-
 ...stvm_1_1tir_1_1StmtExprVisitor__coll__graph.svg |   86 +-
 ...m_1_1tir_1_1StmtExprVisitor__inherit__graph.svg |   86 +-
 ...tmt_01_6n_00_01Args_8_8_8args_08_4-members.html |   27 +-
 ...onst_01Stmt_01_6n_00_01Args_8_8_8args_08_4.html |  114 +
 ...01_6n_00_01Args_8_8_8args_08_4__coll__graph.svg |    2 +-
 .../classtvm_1_1tir_1_1StmtMutator-members.html    |   23 +-
 .../doxygen/classtvm_1_1tir_1_1StmtMutator.html    |   78 +
 ...classtvm_1_1tir_1_1StmtMutator__coll__graph.svg |    2 +-
 ...sstvm_1_1tir_1_1StmtMutator__inherit__graph.svg |    2 +-
 docs/api/doxygen/classtvm_1_1tir_1_1StmtNode.html  |    2 +-
 ...classtvm_1_1tir_1_1StmtNode__inherit__graph.svg |  564 +-
 .../classtvm_1_1tir_1_1StmtVisitor-members.html    |   23 +-
 .../doxygen/classtvm_1_1tir_1_1StmtVisitor.html    |   82 +-
 ...classtvm_1_1tir_1_1StmtVisitor__coll__graph.svg |   44 +-
 ...sstvm_1_1tir_1_1StmtVisitor__inherit__graph.svg |   48 +-
 .../classtvm_1_1tir_1_1Stmt__inherit__graph.svg    |  323 +-
 .../doxygen/classtvm_1_1tir_1_1StringImmNode.html  |    2 +-
 ...asstvm_1_1tir_1_1StringImmNode__coll__graph.svg |  205 +-
 docs/api/doxygen/classtvm_1_1tir_1_1SubNode.html   |    2 +-
 .../classtvm_1_1tir_1_1SubNode__coll__graph.svg    |  151 +-
 docs/api/doxygen/classtvm_1_1tir_1_1VarNode.html   |    2 +-
 .../classtvm_1_1tir_1_1VarNode__coll__graph.svg    |  225 +-
 .../doxygen/classtvm_1_1tir_1_1While-members.html  |  108 +
 docs/api/doxygen/classtvm_1_1tir_1_1While.html     |  205 +
 .../classtvm_1_1tir_1_1WhileNode-members.html      |  119 +
 docs/api/doxygen/classtvm_1_1tir_1_1WhileNode.html |  333 +
 .../classtvm_1_1tir_1_1WhileNode__coll__graph.svg  |  167 +
 ...lasstvm_1_1tir_1_1WhileNode__inherit__graph.svg |   66 +
 .../classtvm_1_1tir_1_1While__coll__graph.svg      |   56 +
 .../classtvm_1_1tir_1_1While__inherit__graph.svg   |   56 +
 .../classtvm_1_1transform_1_1Pass-members.html     |    4 +-
 .../api/doxygen/classtvm_1_1transform_1_1Pass.html |   16 -
 .../classtvm_1_1transform_1_1PassContextNode.html  |    8 +-
 ..._1transform_1_1PassContextNode__coll__graph.svg |  135 +-
 ...lasstvm_1_1transform_1_1Sequential-members.html |    4 +-
 docs/api/doxygen/codegen_8h.html                   |    2 +-
 docs/api/doxygen/codegen_8h__incl.svg              | 1820 +++---
 docs/api/doxygen/codegen_8h_source.html            |    6 +-
 docs/api/doxygen/compute__dag_8h.html              |    2 +-
 docs/api/doxygen/compute__dag_8h__incl.svg         | 1793 +++---
 docs/api/doxygen/compute__dag_8h_source.html       |   10 +-
 docs/api/doxygen/constant__utils_8h.html           |    2 +-
 docs/api/doxygen/constant__utils_8h__incl.svg      | 1928 +++---
 docs/api/doxygen/constant__utils_8h_source.html    |    2 +-
 docs/api/doxygen/container_8h.html                 |  429 ++
 docs/api/doxygen/container_8h__dep__incl.svg       | 1111 ++++
 docs/api/doxygen/container_8h__incl.svg            |  259 +
 docs/api/doxygen/container_8h_source.html          |  359 ++
 docs/api/doxygen/cost__model_8h.html               |    2 +-
 docs/api/doxygen/cost__model_8h__incl.svg          | 1608 ++---
 docs/api/doxygen/cost__model_8h_source.html        |   10 +-
 docs/api/doxygen/crt_2packed__func_8h.html         |    2 +-
 .../doxygen/crt_2packed__func_8h__dep__incl.svg    |   22 +-
 docs/api/doxygen/crt_2packed__func_8h_source.html  |    8 +-
 docs/api/doxygen/crt_8h.html                       |    6 +-
 docs/api/doxygen/crt_8h_source.html                |    2 +-
 docs/api/doxygen/cublas_8h.html                    |    2 +-
 docs/api/doxygen/cublas_8h__incl.svg               | 1924 +++---
 docs/api/doxygen/cublas_8h_source.html             |    2 +-
 docs/api/doxygen/cuda_2dense_8h.html               |    2 +-
 docs/api/doxygen/cuda_2dense_8h__incl.svg          | 1773 +++---
 docs/api/doxygen/cuda_2dense_8h_source.html        |    6 +-
 docs/api/doxygen/cuda_2injective_8h.html           |    2 +-
 docs/api/doxygen/cuda_2injective_8h__incl.svg      | 2139 +++----
 docs/api/doxygen/cuda_2injective_8h_source.html    |    6 +-
 docs/api/doxygen/cuda_2normalization_8h.html       |    2 +-
 docs/api/doxygen/cuda_2normalization_8h__incl.svg  | 2131 +++----
 .../api/doxygen/cuda_2normalization_8h_source.html |    4 +-
 docs/api/doxygen/cuda_2pooling_8h.html             |    2 +-
 docs/api/doxygen/cuda_2pooling_8h__incl.svg        | 2127 +++----
 docs/api/doxygen/cuda_2pooling_8h_source.html      |    6 +-
 docs/api/doxygen/cuda_2reduction_8h.html           |    2 +-
 docs/api/doxygen/cuda_2reduction_8h__incl.svg      | 2139 +++----
 docs/api/doxygen/cuda_2reduction_8h_source.html    |    8 +-
 docs/api/doxygen/cuda_2softmax_8h.html             |    2 +-
 docs/api/doxygen/cuda_2softmax_8h__incl.svg        | 2139 +++----
 docs/api/doxygen/cuda_2softmax_8h_source.html      |    6 +-
 docs/api/doxygen/data__layout_8h.html              |    2 +-
 docs/api/doxygen/data__layout_8h__incl.svg         | 1963 +++---
 docs/api/doxygen/data__layout_8h_source.html       |    8 +-
 docs/api/doxygen/data__type_8h.html                |    4 +-
 docs/api/doxygen/data__type_8h__dep__incl.svg      | 1189 ++--
 docs/api/doxygen/data__type_8h__incl.svg           |    2 +-
 docs/api/doxygen/data__type_8h_source.html         |   27 +-
 docs/api/doxygen/dataflow__matcher_8h.html         |    2 +-
 docs/api/doxygen/dataflow__matcher_8h__incl.svg    | 1891 +++---
 docs/api/doxygen/dataflow__matcher_8h_source.html  |    5 +-
 docs/api/doxygen/dataflow__pattern_8h.html         |    2 +-
 docs/api/doxygen/dataflow__pattern_8h__incl.svg    | 2208 ++++---
 docs/api/doxygen/dataflow__pattern_8h_source.html  |   13 +-
 .../api/doxygen/dataflow__pattern__functor_8h.html |    2 +-
 .../dataflow__pattern__functor_8h__incl.svg        | 1889 +++---
 docs/api/doxygen/debug_8h.html                     |    2 +-
 docs/api/doxygen/debug_8h__incl.svg                | 1531 +++--
 docs/api/doxygen/detail_2broadcast_8h.html         |    2 +-
 docs/api/doxygen/detail_2broadcast_8h__incl.svg    | 2040 +++----
 docs/api/doxygen/detail_2broadcast_8h_source.html  |    2 +-
 docs/api/doxygen/detail_2extern_8h.html            |    2 +-
 docs/api/doxygen/detail_2extern_8h__incl.svg       | 1981 +++---
 docs/api/doxygen/detail_2extern_8h_source.html     |    9 +-
 docs/api/doxygen/device__api_8h.html               |   37 +-
 docs/api/doxygen/device__api_8h__dep__incl.svg     |   33 +
 docs/api/doxygen/device__api_8h__incl.svg          |  786 +--
 docs/api/doxygen/device__api_8h_source.html        |   64 +-
 docs/api/doxygen/device__copy_8h.html              |    2 +-
 docs/api/doxygen/device__copy_8h__incl.svg         | 1493 +++--
 docs/api/doxygen/diagnostic_8h.html                |   17 +-
 docs/api/doxygen/diagnostic_8h__incl.svg           | 1693 +++---
 docs/api/doxygen/diagnostic_8h_source.html         |   74 +-
 docs/api/doxygen/dilate_8h.html                    |    2 +-
 docs/api/doxygen/dilate_8h__incl.svg               | 1922 +++---
 docs/api/doxygen/dilate_8h_source.html             |    6 +-
 docs/api/doxygen/dir_000002_000009.html            |   90 +
 docs/api/doxygen/dir_000002_000013.html            |   90 -
 docs/api/doxygen/dir_000003_000009.html            |    2 +-
 docs/api/doxygen/dir_000004_000009.html            |    2 +-
 docs/api/doxygen/dir_000004_000013.html            |    2 +-
 docs/api/doxygen/dir_000006_000009.html            |    2 +-
 docs/api/doxygen/dir_000006_000013.html            |    2 +-
 docs/api/doxygen/dir_000008_000009.html            |    2 +-
 docs/api/doxygen/dir_000008_000013.html            |    2 +-
 docs/api/doxygen/dir_000011_000009.html            |    2 +-
 docs/api/doxygen/dir_000013_000009.html            |    2 +-
 docs/api/doxygen/dir_000020_000006.html            |    2 +-
 docs/api/doxygen/dir_000021_000009.html            |    2 +-
 docs/api/doxygen/dir_000021_000013.html            |   90 -
 .../dir_02be2c9d68e402f80df60bd528724ee5_dep.svg   |   12 +-
 .../dir_194ecda214f05a38134392ac6a69b970.html      |    2 +-
 .../dir_194ecda214f05a38134392ac6a69b970_dep.svg   |  113 +-
 .../dir_404558507ed35459f0d7a6d81d8c508d.html      |    9 +-
 .../dir_519be2d4a83a987dbf989f1de527b870_dep.svg   |    4 +-
 .../dir_5da96592f3a7c442b838b075c58254c2_dep.svg   |   24 +-
 .../dir_63946bee875c6d52bce55e72a67a86ad.html      |    2 +-
 .../dir_63946bee875c6d52bce55e72a67a86ad_dep.svg   |  227 +-
 .../dir_6cd4295f6ad5aa17e5b568d0e5b190e5.html      |    3 -
 .../dir_6cd4295f6ad5aa17e5b568d0e5b190e5_dep.svg   |    4 +-
 .../dir_72c2f11201cd7636dc7624de0754daa5_dep.svg   |   20 +-
 .../dir_8e4e25e66b8623d88c5b5dd2040bca97_dep.svg   |  493 +-
 .../dir_a2900df4deca8dd2bcded616f0fe650a.html      |    6 +-
 .../dir_a98464176f1216e334ac3bbacd433085_dep.svg   |   47 +-
 .../dir_b4c7d8e826c599ba55146c099a14beb5_dep.svg   |  503 +-
 .../dir_d523279167051dc3aad9a40981221f4d_dep.svg   |    4 +-
 .../dir_dc867ff9a37cad1764f1670dc7eba6c1_dep.svg   |   20 +-
 .../dir_f97d855a3173728370e632aa77170e34.html      |    2 +-
 .../dir_f97d855a3173728370e632aa77170e34_dep.svg   |  159 +-
 .../dir_fafc18f54a755f417c55c769623cbfef_dep.svg   |    4 +-
 docs/api/doxygen/driver__api_8h.html               |    2 +-
 docs/api/doxygen/driver__api_8h__incl.svg          | 1857 +++---
 docs/api/doxygen/einsum_8h.html                    |    2 +-
 docs/api/doxygen/einsum_8h__incl.svg               | 1745 +++---
 docs/api/doxygen/einsum_8h_source.html             |   14 +-
 docs/api/doxygen/elemwise_8h.html                  |    2 +-
 docs/api/doxygen/elemwise_8h__incl.svg             | 1667 +++--
 docs/api/doxygen/elemwise_8h_source.html           |    8 +-
 docs/api/doxygen/env__func_8h.html                 |    2 +-
 docs/api/doxygen/env__func_8h__incl.svg            | 1160 ++--
 docs/api/doxygen/env__func_8h_source.html          |    6 +-
 docs/api/doxygen/error_8h.html                     |    6 +-
 docs/api/doxygen/error_8h__incl.svg                | 1669 +++--
 docs/api/doxygen/error_8h_source.html              |   19 +-
 docs/api/doxygen/error__codes_8h.html              |   12 +-
 docs/api/doxygen/error__codes_8h__dep__incl.svg    |  143 +-
 docs/api/doxygen/error__codes_8h_source.html       |    4 +-
 docs/api/doxygen/executable_8h.html                |    2 +-
 docs/api/doxygen/executable_8h__incl.svg           |  627 +-
 docs/api/doxygen/executable_8h_source.html         |   30 +-
 docs/api/doxygen/files.html                        |   28 +-
 docs/api/doxygen/flatten_8h.html                   |    2 +-
 docs/api/doxygen/flatten_8h__incl.svg              | 2048 +++----
 docs/api/doxygen/flatten_8h_source.html            |    2 +-
 docs/api/doxygen/func__registry_8h.html            |    2 +-
 docs/api/doxygen/func__registry_8h__dep__incl.svg  |   42 +-
 docs/api/doxygen/functions.html                    |   19 +-
 docs/api/doxygen/functions_0x7e.html               |    7 +-
 docs/api/doxygen/functions_a.html                  |   29 +-
 docs/api/doxygen/functions_b.html                  |   31 +-
 docs/api/doxygen/functions_c.html                  |   45 +-
 docs/api/doxygen/functions_d.html                  |   35 +-
 docs/api/doxygen/functions_e.html                  |   41 +-
 docs/api/doxygen/functions_f.html                  |   25 +-
 docs/api/doxygen/functions_func_0x7e.html          |    7 +-
 docs/api/doxygen/functions_func_a.html             |   14 +-
 docs/api/doxygen/functions_func_b.html             |   24 +-
 docs/api/doxygen/functions_func_c.html             |   33 +-
 docs/api/doxygen/functions_func_d.html             |    5 +-
 docs/api/doxygen/functions_func_e.html             |   33 +-
 docs/api/doxygen/functions_func_f.html             |   21 +-
 docs/api/doxygen/functions_func_g.html             |   17 +-
 docs/api/doxygen/functions_func_i.html             |   17 +-
 docs/api/doxygen/functions_func_m.html             |    7 +-
 docs/api/doxygen/functions_func_n.html             |    4 +-
 docs/api/doxygen/functions_func_o.html             |   64 +-
 docs/api/doxygen/functions_func_p.html             |    5 +-
 docs/api/doxygen/functions_func_r.html             |   13 +-
 docs/api/doxygen/functions_func_s.html             |   65 +-
 docs/api/doxygen/functions_func_t.html             |   37 +-
 docs/api/doxygen/functions_func_u.html             |    5 +-
 docs/api/doxygen/functions_func_v.html             |   29 +-
 docs/api/doxygen/functions_func_w.html             |   10 +-
 docs/api/doxygen/functions_g.html                  |   17 +-
 docs/api/doxygen/functions_i.html                  |   40 +-
 docs/api/doxygen/functions_k.html                  |    8 +-
 docs/api/doxygen/functions_l.html                  |    7 +-
 docs/api/doxygen/functions_m.html                  |   23 +-
 docs/api/doxygen/functions_n.html                  |    8 +-
 docs/api/doxygen/functions_o.html                  |   54 +-
 docs/api/doxygen/functions_p.html                  |   17 +-
 docs/api/doxygen/functions_r.html                  |   27 +-
 docs/api/doxygen/functions_rela.html               |   29 +-
 docs/api/doxygen/functions_s.html                  |   90 +-
 docs/api/doxygen/functions_t.html                  |   41 +-
 docs/api/doxygen/functions_type.html               |   42 +-
 docs/api/doxygen/functions_u.html                  |    5 +-
 docs/api/doxygen/functions_v.html                  |   44 +-
 docs/api/doxygen/functions_vars.html               |   19 +-
 docs/api/doxygen/functions_vars_a.html             |   11 +-
 docs/api/doxygen/functions_vars_b.html             |    7 +
 docs/api/doxygen/functions_vars_c.html             |    7 +-
 docs/api/doxygen/functions_vars_d.html             |   22 +-
 docs/api/doxygen/functions_vars_e.html             |    5 +-
 docs/api/doxygen/functions_vars_f.html             |    4 +-
 docs/api/doxygen/functions_vars_i.html             |   11 +-
 docs/api/doxygen/functions_vars_k.html             |    2 +-
 docs/api/doxygen/functions_vars_l.html             |    3 -
 docs/api/doxygen/functions_vars_m.html             |    6 +
 docs/api/doxygen/functions_vars_n.html             |    4 +-
 docs/api/doxygen/functions_vars_p.html             |    6 +-
 docs/api/doxygen/functions_vars_r.html             |    6 +
 docs/api/doxygen/functions_vars_s.html             |   17 +-
 docs/api/doxygen/functions_vars_t.html             |    6 +
 docs/api/doxygen/functions_vars_v.html             |    5 +-
 docs/api/doxygen/functions_vars_w.html             |    3 +
 docs/api/doxygen/functions_w.html                  |   13 +-
 docs/api/doxygen/functor_8h.html                   |    2 +-
 docs/api/doxygen/functor_8h__dep__incl.svg         | 1119 ++--
 docs/api/doxygen/functor_8h__incl.svg              |    2 +-
 docs/api/doxygen/fuse_8h.html                      |    2 +-
 docs/api/doxygen/fuse_8h__incl.svg                 | 1955 +++---
 docs/api/doxygen/generic_2default_8h.html          |    2 +-
 docs/api/doxygen/generic_2default_8h__incl.svg     | 2139 +++----
 docs/api/doxygen/generic_2default_8h_source.html   |    6 +-
 docs/api/doxygen/generic_2extern_8h.html           |    2 +-
 docs/api/doxygen/generic_2extern_8h__incl.svg      | 2149 +++----
 docs/api/doxygen/generic_2extern_8h_source.html    |    6 +-
 docs/api/doxygen/generic_2injective_8h.html        |    2 +-
 docs/api/doxygen/generic_2injective_8h__incl.svg   | 2139 +++----
 docs/api/doxygen/generic_2injective_8h_source.html |    6 +-
 docs/api/doxygen/generic__func_8h.html             |    2 +-
 docs/api/doxygen/generic__func_8h__incl.svg        | 1629 ++---
 docs/api/doxygen/generic__func_8h_source.html      |   12 +-
 docs/api/doxygen/globals_defs.html                 |   13 +-
 docs/api/doxygen/globals_eval.html                 |   10 +-
 docs/api/doxygen/globals_func.html                 |   50 +-
 docs/api/doxygen/globals_k.html                    |   10 +-
 docs/api/doxygen/globals_t.html                    |   74 +-
 docs/api/doxygen/globals_type.html                 |   13 +-
 docs/api/doxygen/globals_u.html                    |    2 +-
 docs/api/doxygen/graph__executor_8h.html           |  497 ++
 docs/api/doxygen/graph__executor_8h__incl.svg      |  230 +
 docs/api/doxygen/graph__executor_8h_source.html    |  130 +
 docs/api/doxygen/graph__executor__module_8h.html   |  136 +
 .../doxygen/graph__executor__module_8h__incl.svg   |   34 +
 .../doxygen/graph__executor__module_8h_source.html |  103 +
 docs/api/doxygen/graph__runtime_8h.html            |  497 --
 docs/api/doxygen/graph__runtime_8h__incl.svg       |  230 -
 docs/api/doxygen/graph__runtime_8h_source.html     |  131 -
 docs/api/doxygen/graph__runtime__module_8h.html    |  136 -
 .../doxygen/graph__runtime__module_8h__incl.svg    |   33 -
 .../doxygen/graph__runtime__module_8h_source.html  |  103 -
 docs/api/doxygen/hierarchy.html                    | 1837 +++---
 docs/api/doxygen/image_8h.html                     |    2 +-
 docs/api/doxygen/image_8h__incl.svg                | 1755 +++---
 docs/api/doxygen/image_8h_source.html              |    2 +-
 docs/api/doxygen/inherit_graph_0.svg               |    4 +-
 docs/api/doxygen/inherit_graph_1.svg               |    4 +-
 docs/api/doxygen/inherit_graph_10.svg              |   37 +-
 docs/api/doxygen/inherit_graph_106.svg             |  131 +-
 docs/api/doxygen/inherit_graph_11.svg              |   15 +-
 docs/api/doxygen/inherit_graph_117.svg             |   15 +-
 docs/api/doxygen/inherit_graph_118.svg             |   14 +-
 docs/api/doxygen/inherit_graph_119.svg             |   16 +-
 docs/api/doxygen/inherit_graph_12.svg              |   39 +-
 docs/api/doxygen/inherit_graph_120.svg             |   14 +-
 docs/api/doxygen/inherit_graph_121.svg             |   17 +-
 docs/api/doxygen/inherit_graph_122.svg             |   15 +-
 docs/api/doxygen/inherit_graph_123.svg             |   15 +-
 docs/api/doxygen/inherit_graph_124.svg             |   15 +-
 docs/api/doxygen/inherit_graph_125.svg             |   54 +-
 docs/api/doxygen/inherit_graph_126.svg             |   19 +-
 docs/api/doxygen/inherit_graph_127.svg             |   55 +-
 docs/api/doxygen/inherit_graph_128.svg             |   19 +-
 docs/api/doxygen/inherit_graph_129.svg             |   19 +-
 docs/api/doxygen/inherit_graph_13.svg              |   26 +-
 docs/api/doxygen/inherit_graph_130.svg             |   18 +-
 docs/api/doxygen/inherit_graph_131.svg             |   19 +-
 docs/api/doxygen/inherit_graph_132.svg             |   17 +-
 docs/api/doxygen/inherit_graph_133.svg             |   15 +-
 docs/api/doxygen/inherit_graph_134.svg             |   12 +-
 docs/api/doxygen/inherit_graph_135.svg             |   12 +-
 docs/api/doxygen/inherit_graph_136.svg             |   15 +-
 docs/api/doxygen/inherit_graph_137.svg             |   12 +-
 docs/api/doxygen/inherit_graph_138.svg             |   15 +-
 docs/api/doxygen/inherit_graph_139.svg             |   15 +-
 docs/api/doxygen/inherit_graph_14.svg              |   27 +-
 docs/api/doxygen/inherit_graph_140.svg             |   12 +-
 docs/api/doxygen/inherit_graph_141.svg             |   14 +-
 docs/api/doxygen/inherit_graph_142.svg             |   12 +-
 docs/api/doxygen/inherit_graph_143.svg             |   15 +-
 docs/api/doxygen/inherit_graph_144.svg             |   12 +-
 docs/api/doxygen/inherit_graph_145.svg             |   12 +-
 docs/api/doxygen/inherit_graph_146.svg             |   12 +-
 docs/api/doxygen/inherit_graph_147.svg             |   15 +-
 docs/api/doxygen/inherit_graph_148.svg             |   16 +-
 docs/api/doxygen/inherit_graph_149.svg             |   15 +-
 docs/api/doxygen/inherit_graph_15.svg              |   28 +-
 docs/api/doxygen/inherit_graph_150.svg             |   17 +-
 docs/api/doxygen/inherit_graph_151.svg             |   15 +-
 docs/api/doxygen/inherit_graph_152.svg             |   14 +-
 docs/api/doxygen/inherit_graph_153.svg             |   69 +-
 docs/api/doxygen/inherit_graph_154.svg             |   69 +-
 docs/api/doxygen/inherit_graph_155.svg             |   72 +-
 docs/api/doxygen/inherit_graph_156.svg             |   69 +-
 docs/api/doxygen/inherit_graph_157.svg             |   18 +-
 docs/api/doxygen/inherit_graph_158.svg             |   14 +-
 docs/api/doxygen/inherit_graph_159.svg             |   28 +-
 docs/api/doxygen/inherit_graph_16.svg              |    4 +-
 docs/api/doxygen/inherit_graph_160.svg             |   27 +-
 docs/api/doxygen/inherit_graph_161.svg             |   28 +-
 docs/api/doxygen/inherit_graph_162.svg             |   28 +-
 docs/api/doxygen/inherit_graph_163.svg             |   12 +-
 docs/api/doxygen/inherit_graph_164.svg             |   12 +-
 docs/api/doxygen/inherit_graph_165.svg             |   12 +-
 docs/api/doxygen/inherit_graph_166.svg             |   12 +-
 docs/api/doxygen/inherit_graph_167.svg             |   12 +-
 docs/api/doxygen/inherit_graph_168.svg             |   12 +-
 docs/api/doxygen/inherit_graph_169.svg             |   12 +-
 docs/api/doxygen/inherit_graph_17.svg              |   15 +-
 docs/api/doxygen/inherit_graph_170.svg             |   12 +-
 docs/api/doxygen/inherit_graph_171.svg             |   12 +-
 ...inherit_graph_170.svg => inherit_graph_172.svg} |    0
 ...inherit_graph_171.svg => inherit_graph_173.svg} |    0
 docs/api/doxygen/inherit_graph_18.svg              |   14 +-
 docs/api/doxygen/inherit_graph_19.svg              |   15 +-
 docs/api/doxygen/inherit_graph_20.svg              |   12 +-
 docs/api/doxygen/inherit_graph_21.svg              |   41 +-
 docs/api/doxygen/inherit_graph_22.svg              |   28 +-
 docs/api/doxygen/inherit_graph_23.svg              |   40 +-
 docs/api/doxygen/inherit_graph_24.svg              |   25 +-
 docs/api/doxygen/inherit_graph_25.svg              |   12 +-
 docs/api/doxygen/inherit_graph_26.svg              |   12 +-
 docs/api/doxygen/inherit_graph_27.svg              |   15 +-
 docs/api/doxygen/inherit_graph_28.svg              |   15 +-
 docs/api/doxygen/inherit_graph_29.svg              |   15 +-
 docs/api/doxygen/inherit_graph_30.svg              |   15 +-
 docs/api/doxygen/inherit_graph_31.svg              |   15 +-
 docs/api/doxygen/inherit_graph_32.svg              |   12 +-
 docs/api/doxygen/inherit_graph_33.svg              |   14 +-
 docs/api/doxygen/inherit_graph_34.svg              |   15 +-
 docs/api/doxygen/inherit_graph_35.svg              |   14 +-
 docs/api/doxygen/inherit_graph_36.svg              |   14 +-
 docs/api/doxygen/inherit_graph_37.svg              |   14 +-
 docs/api/doxygen/inherit_graph_38.svg              |   54 +-
 docs/api/doxygen/inherit_graph_39.svg              |   14 +-
 docs/api/doxygen/inherit_graph_40.svg              |   54 +-
 docs/api/doxygen/inherit_graph_41.svg              |   27 +-
 docs/api/doxygen/inherit_graph_42.svg              |   24 +-
 docs/api/doxygen/inherit_graph_43.svg              |   27 +-
 docs/api/doxygen/inherit_graph_44.svg              |   24 +-
 docs/api/doxygen/inherit_graph_45.svg              |   17 +-
 docs/api/doxygen/inherit_graph_46.svg              |   17 +-
 docs/api/doxygen/inherit_graph_47.svg              |   17 +-
 docs/api/doxygen/inherit_graph_48.svg              |   17 +-
 docs/api/doxygen/inherit_graph_49.svg              |   14 +-
 docs/api/doxygen/inherit_graph_5.svg               | 3009 ++++++++-
 docs/api/doxygen/inherit_graph_50.svg              |   14 +-
 docs/api/doxygen/inherit_graph_51.svg              |    4 +-
 docs/api/doxygen/inherit_graph_52.svg              |    4 +-
 docs/api/doxygen/inherit_graph_53.svg              |    4 +-
 docs/api/doxygen/inherit_graph_54.svg              |   15 +-
 docs/api/doxygen/inherit_graph_55.svg              |    4 +-
 docs/api/doxygen/inherit_graph_56.svg              |   15 +-
 docs/api/doxygen/inherit_graph_57.svg              |   17 +-
 docs/api/doxygen/inherit_graph_58.svg              |   15 +-
 docs/api/doxygen/inherit_graph_59.svg              |   16 +-
 docs/api/doxygen/inherit_graph_6.svg               |   15 +-
 docs/api/doxygen/inherit_graph_60.svg              |   12 +-
 docs/api/doxygen/inherit_graph_61.svg              |   15 +-
 docs/api/doxygen/inherit_graph_62.svg              | 3128 +---------
 docs/api/doxygen/inherit_graph_63.svg              |   12 +-
 docs/api/doxygen/inherit_graph_64.svg              |   16 +-
 docs/api/doxygen/inherit_graph_65.svg              |   16 +-
 docs/api/doxygen/inherit_graph_66.svg              |   12 +-
 docs/api/doxygen/inherit_graph_67.svg              |   15 +-
 docs/api/doxygen/inherit_graph_68.svg              |   14 +-
 docs/api/doxygen/inherit_graph_69.svg              |   12 +-
 docs/api/doxygen/inherit_graph_7.svg               | 6360 +++++++++++++++++++-
 docs/api/doxygen/inherit_graph_70.svg              |   25 +-
 docs/api/doxygen/inherit_graph_71.svg              |   37 +-
 docs/api/doxygen/inherit_graph_72.svg              |   36 +-
 docs/api/doxygen/inherit_graph_73.svg              |   12 +-
 docs/api/doxygen/inherit_graph_74.svg              |   38 +-
 docs/api/doxygen/inherit_graph_75.svg              |   41 +-
 docs/api/doxygen/inherit_graph_76.svg              |   12 +-
 docs/api/doxygen/inherit_graph_77.svg              |   15 +-
 docs/api/doxygen/inherit_graph_78.svg              |   25 +-
 docs/api/doxygen/inherit_graph_79.svg              |   25 +-
 docs/api/doxygen/inherit_graph_8.svg               |   12 +-
 docs/api/doxygen/inherit_graph_80.svg              |   25 +-
 docs/api/doxygen/inherit_graph_81.svg              |   28 +-
 docs/api/doxygen/inherit_graph_82.svg              |   15 +-
 docs/api/doxygen/inherit_graph_83.svg              |   15 +-
 docs/api/doxygen/inherit_graph_84.svg              |   12 +-
 docs/api/doxygen/inherit_graph_85.svg              |   16 +-
 docs/api/doxygen/inherit_graph_86.svg              |   14 +-
 docs/api/doxygen/inherit_graph_87.svg              | 6323 +------------------
 docs/api/doxygen/inherit_graph_88.svg              |   14 +-
 docs/api/doxygen/inherit_graph_9.svg               |   12 +-
 docs/api/doxygen/inherit_graph_99.svg              |  360 +-
 docs/api/doxygen/inherits.html                     |  276 +-
 docs/api/doxygen/int__set_8h.html                  |    4 +-
 docs/api/doxygen/int__set_8h__incl.svg             | 1605 +++--
 docs/api/doxygen/int__set_8h_source.html           |    5 +-
 docs/api/doxygen/int__solver_8h.html               |    2 +-
 docs/api/doxygen/int__solver_8h__incl.svg          | 1929 +++---
 docs/api/doxygen/int__solver_8h_source.html        |    5 +-
 docs/api/doxygen/interpreter_8h.html               |   10 +-
 docs/api/doxygen/interpreter_8h__incl.svg          | 1748 +++---
 docs/api/doxygen/interpreter_8h_source.html        |   16 +-
 docs/api/doxygen/ir_2adt_8h.html                   |    6 +-
 docs/api/doxygen/ir_2adt_8h__dep__incl.svg         | 1345 ++---
 docs/api/doxygen/ir_2adt_8h__incl.svg              | 1419 +++--
 docs/api/doxygen/ir_2adt_8h_source.html            |    8 +-
 docs/api/doxygen/ir_2attrs_8h.html                 |    8 +-
 docs/api/doxygen/ir_2attrs_8h__dep__incl.svg       |  829 +--
 docs/api/doxygen/ir_2attrs_8h__incl.svg            | 1477 +++--
 docs/api/doxygen/ir_2attrs_8h_source.html          |   27 +-
 docs/api/doxygen/ir_2expr_8h.html                  |    6 +-
 docs/api/doxygen/ir_2expr_8h__dep__incl.svg        |  903 +--
 docs/api/doxygen/ir_2expr_8h__incl.svg             | 1371 ++---
 docs/api/doxygen/ir_2expr_8h_source.html           |   24 +-
 docs/api/doxygen/ir_2function_8h.html              |    6 +-
 docs/api/doxygen/ir_2function_8h__dep__incl.svg    | 1269 ++--
 docs/api/doxygen/ir_2function_8h__incl.svg         | 1519 +++--
 docs/api/doxygen/ir_2function_8h_source.html       |    9 +-
 docs/api/doxygen/ir_2module_8h.html                |    6 +-
 docs/api/doxygen/ir_2module_8h__dep__incl.svg      | 1366 ++---
 docs/api/doxygen/ir_2module_8h__incl.svg           | 1799 +++---
 docs/api/doxygen/ir_2module_8h_source.html         |   11 +-
 docs/api/doxygen/ir_2op_8h.html                    |    4 +-
 docs/api/doxygen/ir_2op_8h__incl.svg               | 1745 +++---
 docs/api/doxygen/ir_2op_8h_source.html             |    6 +-
 docs/api/doxygen/ir_2transform_8h.html             |    9 +-
 docs/api/doxygen/ir_2transform_8h__incl.svg        | 1774 +++---
 docs/api/doxygen/ir_2transform_8h_source.html      |   76 +-
 docs/api/doxygen/ir_2type_8h.html                  |    6 +-
 docs/api/doxygen/ir_2type_8h__dep__incl.svg        | 1230 ++--
 docs/api/doxygen/ir_2type_8h__incl.svg             | 1287 ++--
 docs/api/doxygen/ir_2type_8h_source.html           |   10 +-
 docs/api/doxygen/iter__affine__map_8h.html         |   11 +-
 docs/api/doxygen/iter__affine__map_8h__incl.svg    | 1716 +++---
 docs/api/doxygen/iter__affine__map_8h_source.html  |   44 +-
 docs/api/doxygen/local__response__norm_8h.html     |    2 +-
 .../api/doxygen/local__response__norm_8h__incl.svg | 1935 +++---
 .../doxygen/local__response__norm_8h_source.html   |    4 +-
 docs/api/doxygen/loop__state_8h.html               |    4 +-
 docs/api/doxygen/loop__state_8h__incl.svg          | 1779 +++---
 docs/api/doxygen/loop__state_8h_source.html        |   14 +-
 docs/api/doxygen/mapping_8h.html                   |    2 +-
 docs/api/doxygen/mapping_8h__incl.svg              | 1935 +++---
 docs/api/doxygen/measure_8h.html                   |    2 +-
 docs/api/doxygen/measure_8h__incl.svg              | 1763 +++---
 docs/api/doxygen/measure_8h_source.html            |   12 +-
 docs/api/doxygen/measure__record_8h.html           |    4 +-
 docs/api/doxygen/measure__record_8h__incl.svg      | 1695 +++---
 docs/api/doxygen/measure__record_8h_source.html    |    6 +-
 docs/api/doxygen/memory__manager_8h.html           |    2 +-
 docs/api/doxygen/memory__manager_8h__incl.svg      |  471 +-
 docs/api/doxygen/memory__manager_8h_source.html    |   45 +-
 docs/api/doxygen/namespacedmlc_1_1serializer.html  |    4 +-
 docs/api/doxygen/namespacemembers.html             |    5 +-
 docs/api/doxygen/namespacemembers_c.html           |   12 +-
 docs/api/doxygen/namespacemembers_d.html           |   21 +-
 docs/api/doxygen/namespacemembers_enum.html        |    3 +
 docs/api/doxygen/namespacemembers_func.html        |    5 +-
 docs/api/doxygen/namespacemembers_func_c.html      |   12 +-
 docs/api/doxygen/namespacemembers_func_d.html      |    9 +-
 docs/api/doxygen/namespacemembers_func_g.html      |   13 +-
 docs/api/doxygen/namespacemembers_func_i.html      |    4 +-
 docs/api/doxygen/namespacemembers_func_m.html      |    4 +-
 docs/api/doxygen/namespacemembers_func_p.html      |    5 +-
 docs/api/doxygen/namespacemembers_func_r.html      |    2 +-
 docs/api/doxygen/namespacemembers_func_s.html      |   10 +-
 docs/api/doxygen/namespacemembers_func_v.html      |    3 +
 docs/api/doxygen/namespacemembers_g.html           |   13 +-
 docs/api/doxygen/namespacemembers_h.html           |    3 +
 docs/api/doxygen/namespacemembers_i.html           |    4 +-
 docs/api/doxygen/namespacemembers_m.html           |    4 +-
 docs/api/doxygen/namespacemembers_p.html           |    5 +-
 docs/api/doxygen/namespacemembers_r.html           |    2 +-
 docs/api/doxygen/namespacemembers_s.html           |   18 +-
 docs/api/doxygen/namespacemembers_type.html        |    3 +
 docs/api/doxygen/namespacemembers_v.html           |    3 +
 docs/api/doxygen/namespacemembers_vars.html        |   18 +-
 docs/api/doxygen/namespaces.html                   |   37 +-
 docs/api/doxygen/namespacetvm.html                 |  597 +-
 docs/api/doxygen/namespacetvm_1_1arith.html        |  128 +-
 .../doxygen/namespacetvm_1_1auto__scheduler.html   |    8 +-
 docs/api/doxygen/namespacetvm_1_1relay.html        |   58 +-
 docs/api/doxygen/namespacetvm_1_1relay_1_1qnn.html |    2 +
 .../namespacetvm_1_1relay_1_1transform.html        |   10 +-
 docs/api/doxygen/namespacetvm_1_1runtime.html      |  207 +-
 .../namespacetvm_1_1runtime_1_1profiling.html      |  149 +
 docs/api/doxygen/namespacetvm_1_1te.html           |  157 +-
 docs/api/doxygen/namespacetvm_1_1tir.html          |  210 +-
 docs/api/doxygen/namespacetvm_1_1tir_1_1attr.html  |   47 +-
 .../doxygen/namespacetvm_1_1tir_1_1transform.html  |   10 +-
 docs/api/doxygen/ndarray_8h.html                   |   11 +-
 docs/api/doxygen/ndarray_8h__dep__incl.svg         | 1405 ++---
 docs/api/doxygen/ndarray_8h__incl.svg              |  416 +-
 docs/api/doxygen/ndarray_8h_source.html            |   90 +-
 docs/api/doxygen/nn_2bnn_8h.html                   |    2 +-
 docs/api/doxygen/nn_2bnn_8h__incl.svg              | 1923 +++---
 docs/api/doxygen/nn_2bnn_8h_source.html            |    6 +-
 docs/api/doxygen/nn_2dense_8h.html                 |    2 +-
 docs/api/doxygen/nn_2dense_8h__incl.svg            | 1935 +++---
 docs/api/doxygen/nn_2dense_8h_source.html          |    2 +-
 docs/api/doxygen/nn_2pooling_8h.html               |    2 +-
 docs/api/doxygen/nn_2pooling_8h__incl.svg          | 1953 +++---
 docs/api/doxygen/nn_2pooling_8h_source.html        |   12 +-
 docs/api/doxygen/nn_2softmax_8h.html               |    2 +-
 docs/api/doxygen/nn_2softmax_8h__incl.svg          | 1884 +++---
 docs/api/doxygen/nn_2softmax_8h_source.html        |    8 +-
 docs/api/doxygen/node_2container_8h.html           |  259 -
 docs/api/doxygen/node_2container_8h__dep__incl.svg | 1123 ----
 docs/api/doxygen/node_2container_8h__incl.svg      |  557 --
 docs/api/doxygen/node_2container_8h_source.html    |  222 -
 docs/api/doxygen/node_8h.html                      |    9 +-
 docs/api/doxygen/node_8h__dep__incl.svg            | 1344 +++--
 docs/api/doxygen/node_8h__incl.svg                 | 1207 ++--
 docs/api/doxygen/node_8h_source.html               |    9 +-
 docs/api/doxygen/object_8h.html                    |   28 +-
 docs/api/doxygen/object_8h__dep__incl.svg          | 1621 +++--
 docs/api/doxygen/object_8h__incl.svg               |    2 +-
 docs/api/doxygen/object_8h_source.html             |  123 +-
 docs/api/doxygen/op__strategy_8h.html              |    2 +-
 docs/api/doxygen/op__strategy_8h__incl.svg         | 1850 +++---
 docs/api/doxygen/op__strategy_8h_source.html       |    8 +-
 docs/api/doxygen/operation_8h.html                 |    2 +-
 docs/api/doxygen/operation_8h__incl.svg            | 1845 +++---
 docs/api/doxygen/operation_8h_source.html          |   13 +-
 docs/api/doxygen/packed__func_8h.html              |   40 +-
 docs/api/doxygen/packed__func_8h__dep__incl.svg    | 1100 ++--
 docs/api/doxygen/packed__func_8h__incl.svg         |  657 +-
 docs/api/doxygen/packed__func_8h_source.html       |  286 +-
 docs/api/doxygen/pad__utils_8h.html                |    2 +-
 docs/api/doxygen/pad__utils_8h__incl.svg           | 1806 +++---
 docs/api/doxygen/parser_8h.html                    |    2 +-
 docs/api/doxygen/parser_8h__incl.svg               |  717 +--
 docs/api/doxygen/pattern_8h.html                   |    4 +-
 docs/api/doxygen/pattern_8h__incl.svg              | 1599 +++--
 docs/api/doxygen/pattern_8h_source.html            |    4 +-
 docs/api/doxygen/pattern__functor_8h.html          |    2 +-
 docs/api/doxygen/pattern__functor_8h__incl.svg     | 2052 +++----
 docs/api/doxygen/platform_8h.html                  |   30 +-
 docs/api/doxygen/platform_8h__dep__incl.svg        |   32 +-
 docs/api/doxygen/platform_8h_source.html           |    6 +-
 docs/api/doxygen/profiling_8h.html                 |  165 +
 docs/api/doxygen/profiling_8h__incl.svg            |  650 ++
 docs/api/doxygen/profiling_8h_source.html          |  138 +
 docs/api/doxygen/random_8h.html                    |    2 +-
 docs/api/doxygen/random_8h__incl.svg               | 1487 +++--
 docs/api/doxygen/ravel__unravel_8h.html            |    2 +-
 docs/api/doxygen/ravel__unravel_8h__incl.svg       | 1963 +++---
 docs/api/doxygen/ravel__unravel_8h_source.html     |    2 +-
 docs/api/doxygen/reduce_8h.html                    |    2 +-
 docs/api/doxygen/reduce_8h__incl.svg               | 1493 +++--
 docs/api/doxygen/reduction_8h.html                 |    2 +-
 docs/api/doxygen/reduction_8h__incl.svg            | 1752 +++---
 docs/api/doxygen/reduction_8h_source.html          |   10 +-
 docs/api/doxygen/reflection_8h.html                |    8 +-
 docs/api/doxygen/reflection_8h__dep__incl.svg      | 1300 ++--
 docs/api/doxygen/reflection_8h__incl.svg           | 1140 ++--
 docs/api/doxygen/reflection_8h_source.html         |   11 +-
 docs/api/doxygen/registry_8h.html                  |    4 +-
 docs/api/doxygen/registry_8h__dep__incl.svg        |  965 +--
 docs/api/doxygen/registry_8h__incl.svg             |  685 +--
 docs/api/doxygen/registry_8h_source.html           |    6 +-
 docs/api/doxygen/relay_2adt_8h.html                |    2 +-
 docs/api/doxygen/relay_2adt_8h__incl.svg           | 2254 ++++---
 docs/api/doxygen/relay_2adt_8h_source.html         |    4 +-
 docs/api/doxygen/relay_2analysis_8h.html           |   10 +-
 docs/api/doxygen/relay_2analysis_8h__incl.svg      | 2193 ++++---
 docs/api/doxygen/relay_2analysis_8h_source.html    |    6 +-
 docs/api/doxygen/relay_2attrs_2memory_8h.html      |    2 +-
 docs/api/doxygen/relay_2attrs_2memory_8h__incl.svg | 1938 +++---
 .../doxygen/relay_2attrs_2memory_8h_source.html    |    2 +-
 docs/api/doxygen/relay_2attrs_2nn_8h.html          |    4 +-
 docs/api/doxygen/relay_2attrs_2nn_8h__incl.svg     | 1769 +++---
 docs/api/doxygen/relay_2attrs_2nn_8h_source.html   |    6 +-
 docs/api/doxygen/relay_2attrs_2transform_8h.html   |    6 +-
 .../doxygen/relay_2attrs_2transform_8h__incl.svg   | 2331 ++++---
 .../doxygen/relay_2attrs_2transform_8h_source.html |   20 +-
 docs/api/doxygen/relay_2attrs_2vm_8h.html          |    2 +-
 docs/api/doxygen/relay_2attrs_2vm_8h__incl.svg     | 1485 +++--
 docs/api/doxygen/relay_2expr_8h.html               |    2 +-
 docs/api/doxygen/relay_2expr_8h__incl.svg          | 2081 ++++---
 docs/api/doxygen/relay_2expr_8h_source.html        |    6 +-
 docs/api/doxygen/relay_2expr__functor_8h.html      |    2 +-
 docs/api/doxygen/relay_2expr__functor_8h__incl.svg | 2014 +++----
 .../doxygen/relay_2expr__functor_8h_source.html    |   78 +-
 docs/api/doxygen/relay_2feature_8h.html            |    4 +-
 docs/api/doxygen/relay_2feature_8h__incl.svg       | 2091 ++++---
 docs/api/doxygen/relay_2feature_8h_source.html     |    4 +-
 docs/api/doxygen/relay_2function_8h.html           |    2 +-
 docs/api/doxygen/relay_2function_8h__incl.svg      | 2017 +++----
 docs/api/doxygen/relay_2function_8h_source.html    |    4 +-
 docs/api/doxygen/relay_2op_8h.html                 |    2 +-
 docs/api/doxygen/relay_2op_8h__incl.svg            | 2192 ++++---
 docs/api/doxygen/relay_2op__attr__types_8h.html    |    2 +-
 .../doxygen/relay_2op__attr__types_8h__incl.svg    | 1908 +++---
 .../doxygen/relay_2op__attr__types_8h_source.html  |    2 +-
 docs/api/doxygen/relay_2qnn_2attrs_8h.html         |    4 +-
 docs/api/doxygen/relay_2qnn_2attrs_8h__incl.svg    | 1493 +++--
 docs/api/doxygen/relay_2qnn_2attrs_8h_source.html  |   11 +-
 docs/api/doxygen/relay_2qnn_2transform_8h.html     |    2 +-
 .../api/doxygen/relay_2qnn_2transform_8h__incl.svg | 1475 +++--
 docs/api/doxygen/relay_2transform_8h.html          |    6 +-
 docs/api/doxygen/relay_2transform_8h__incl.svg     | 1895 +++---
 docs/api/doxygen/relay_2transform_8h_source.html   |   27 +-
 docs/api/doxygen/relay_2type_8h.html               |    2 +-
 docs/api/doxygen/relay_2type_8h__incl.svg          | 2115 ++++---
 docs/api/doxygen/reorg_8h.html                     |    2 +-
 docs/api/doxygen/reorg_8h__incl.svg                | 1810 +++---
 docs/api/doxygen/reorg_8h_source.html              |    2 +-
 docs/api/doxygen/repr__printer_8h.html             |    2 +-
 docs/api/doxygen/repr__printer_8h__dep__incl.svg   | 1324 ++--
 docs/api/doxygen/repr__printer_8h__incl.svg        |    2 +-
 docs/api/doxygen/repr__printer_8h_source.html      |    2 +-
 docs/api/doxygen/rocblas_8h.html                   |    2 +-
 docs/api/doxygen/rocblas_8h__incl.svg              | 1924 +++---
 docs/api/doxygen/rocblas_8h_source.html            |    2 +-
 docs/api/doxygen/rocm_2dense_8h.html               |    2 +-
 docs/api/doxygen/rocm_2dense_8h__incl.svg          | 1680 +++---
 docs/api/doxygen/rocm_2dense_8h_source.html        |    4 +-
 docs/api/doxygen/rocm_2injective_8h.html           |    2 +-
 docs/api/doxygen/rocm_2injective_8h__incl.svg      | 1978 +++---
 docs/api/doxygen/rocm_2injective_8h_source.html    |    4 +-
 docs/api/doxygen/rocm_2normalization_8h.html       |    2 +-
 docs/api/doxygen/rocm_2normalization_8h__incl.svg  | 1956 +++---
 .../api/doxygen/rocm_2normalization_8h_source.html |    2 +-
 docs/api/doxygen/rocm_2pooling_8h.html             |    2 +-
 docs/api/doxygen/rocm_2pooling_8h__incl.svg        | 1968 +++---
 docs/api/doxygen/rocm_2pooling_8h_source.html      |    4 +-
 docs/api/doxygen/rocm_2reduction_8h.html           |    2 +-
 docs/api/doxygen/rocm_2reduction_8h__incl.svg      | 1978 +++---
 docs/api/doxygen/rocm_2reduction_8h_source.html    |    4 +-
 docs/api/doxygen/rocm_2softmax_8h.html             |    2 +-
 docs/api/doxygen/rocm_2softmax_8h__incl.svg        | 1978 +++---
 docs/api/doxygen/rocm_2softmax_8h_source.html      |    4 +-
 docs/api/doxygen/runtime_2container_8h.html        |  319 -
 .../doxygen/runtime_2container_8h__dep__incl.svg   | 1064 ----
 docs/api/doxygen/runtime_2container_8h__incl.svg   |  249 -
 docs/api/doxygen/runtime_2container_8h_source.html |  271 -
 .../doxygen/runtime_2crt_2memory_8h_source.html    |    6 +-
 docs/api/doxygen/runtime_2crt_2module_8h.html      |    2 +-
 .../doxygen/runtime_2crt_2module_8h__dep__incl.svg |   32 +-
 .../doxygen/runtime_2crt_2module_8h_source.html    |    2 +-
 docs/api/doxygen/runtime_2memory_8h.html           |    2 +-
 docs/api/doxygen/runtime_2memory_8h__dep__incl.svg | 1652 +++--
 docs/api/doxygen/runtime_2memory_8h__incl.svg      |    2 +-
 docs/api/doxygen/runtime_2memory_8h_source.html    |    6 +-
 docs/api/doxygen/runtime_2module_8h.html           |    4 +-
 docs/api/doxygen/runtime_2module_8h__dep__incl.svg | 1086 ++--
 docs/api/doxygen/runtime_2module_8h__incl.svg      |  551 +-
 docs/api/doxygen/runtime_2module_8h_source.html    |   12 +-
 docs/api/doxygen/runtime_2vm_2vm_8h.html           |    4 +-
 docs/api/doxygen/runtime_2vm_2vm_8h__incl.svg      |  869 +--
 docs/api/doxygen/runtime_2vm_2vm_8h_source.html    |   21 +-
 docs/api/doxygen/schedule_8h.html                  |    2 +-
 docs/api/doxygen/schedule_8h__incl.svg             | 1978 +++---
 docs/api/doxygen/schedule_8h_source.html           |    9 +-
 docs/api/doxygen/schedule__pass_8h.html            |    5 +-
 docs/api/doxygen/schedule__pass_8h__incl.svg       | 1946 +++---
 docs/api/doxygen/schedule__pass_8h_source.html     |    3 +-
 docs/api/doxygen/search/all_0.js                   |    8 +-
 docs/api/doxygen/search/all_1.js                   |   31 +-
 docs/api/doxygen/search/all_10.js                  |   24 +-
 docs/api/doxygen/search/all_12.js                  |   19 +-
 docs/api/doxygen/search/all_13.js                  |   86 +-
 docs/api/doxygen/search/all_14.js                  |   91 +-
 docs/api/doxygen/search/all_15.js                  |    6 +-
 docs/api/doxygen/search/all_16.js                  |   19 +-
 docs/api/doxygen/search/all_17.js                  |    9 +-
 docs/api/doxygen/search/all_19.js                  |    5 +-
 docs/api/doxygen/search/all_2.js                   |   30 +-
 docs/api/doxygen/search/all_3.js                   |   50 +-
 docs/api/doxygen/search/all_4.js                   |   40 +-
 docs/api/doxygen/search/all_5.js                   |   30 +-
 docs/api/doxygen/search/all_6.js                   |   27 +-
 docs/api/doxygen/search/all_7.js                   |   22 +-
 docs/api/doxygen/search/all_8.js                   |    5 +-
 docs/api/doxygen/search/all_9.js                   |   33 +-
 docs/api/doxygen/search/all_b.js                   |   15 +-
 docs/api/doxygen/search/all_c.js                   |    3 +-
 docs/api/doxygen/search/all_d.js                   |   47 +-
 docs/api/doxygen/search/all_e.js                   |    8 +-
 docs/api/doxygen/search/all_f.js                   |   22 +-
 docs/api/doxygen/search/classes_0.js               |    3 +-
 docs/api/doxygen/search/classes_1.js               |    6 +
 docs/api/doxygen/search/classes_10.js              |    6 +-
 docs/api/doxygen/search/classes_11.js              |    4 +-
 docs/api/doxygen/search/classes_13.js              |    2 +-
 docs/api/doxygen/search/classes_14.js              |    2 +
 docs/api/doxygen/search/classes_2.js               |    8 +-
 docs/api/doxygen/search/classes_3.js               |    5 +-
 docs/api/doxygen/search/classes_4.js               |    3 +-
 docs/api/doxygen/search/classes_7.js               |    2 +-
 docs/api/doxygen/search/classes_8.js               |    4 +-
 docs/api/doxygen/search/classes_a.js               |   18 +-
 docs/api/doxygen/search/classes_d.js               |    2 +
 docs/api/doxygen/search/classes_f.js               |    2 +-
 docs/api/doxygen/search/defines_8.js               |    9 +-
 docs/api/doxygen/search/defines_9.js               |    2 +-
 docs/api/doxygen/search/enums_3.js                 |    1 +
 docs/api/doxygen/search/enumvalues_5.js            |    9 +-
 docs/api/doxygen/search/files_2.js                 |    3 +-
 docs/api/doxygen/search/files_6.js                 |    4 +-
 docs/api/doxygen/search/files_c.js                 |    3 +-
 docs/api/doxygen/search/functions_1.js             |    9 +-
 docs/api/doxygen/search/functions_10.js            |    2 +
 docs/api/doxygen/search/functions_12.js            |    8 +-
 docs/api/doxygen/search/functions_13.js            |   32 +-
 docs/api/doxygen/search/functions_14.js            |   44 +-
 docs/api/doxygen/search/functions_15.js            |    2 +-
 docs/api/doxygen/search/functions_16.js            |    5 +-
 docs/api/doxygen/search/functions_17.js            |    4 +-
 docs/api/doxygen/search/functions_19.js            |    5 +-
 docs/api/doxygen/search/functions_2.js             |    6 +-
 docs/api/doxygen/search/functions_3.js             |   29 +-
 docs/api/doxygen/search/functions_4.js             |    4 +-
 docs/api/doxygen/search/functions_5.js             |    8 +-
 docs/api/doxygen/search/functions_6.js             |    9 +-
 docs/api/doxygen/search/functions_7.js             |   14 +-
 docs/api/doxygen/search/functions_9.js             |    9 +-
 docs/api/doxygen/search/functions_d.js             |    7 +-
 docs/api/doxygen/search/functions_e.js             |    2 +-
 docs/api/doxygen/search/functions_f.js             |   22 +-
 docs/api/doxygen/search/namespaces_2.js            |    3 +-
 docs/api/doxygen/search/related_1.js               |    1 +
 docs/api/doxygen/search/related_2.js               |    2 +-
 docs/api/doxygen/search/related_3.js               |    3 +-
 docs/api/doxygen/search/related_4.js               |    5 +-
 docs/api/doxygen/search/related_5.js               |   10 +-
 docs/api/doxygen/search/related_6.js               |    8 +-
 docs/api/doxygen/search/related_7.js               |   11 +-
 docs/api/doxygen/search/related_8.js               |   13 +-
 docs/api/doxygen/search/related_9.js               |    5 +-
 docs/api/doxygen/search/related_a.js               |   13 +-
 docs/api/doxygen/search/related_b.js               |   15 +-
 docs/api/doxygen/search/related_c.html             |   26 -
 docs/api/doxygen/search/related_c.js               |    7 -
 docs/api/doxygen/search/searchdata.js              |    2 +-
 docs/api/doxygen/search/typedefs_10.js             |    2 +-
 docs/api/doxygen/search/typedefs_2.js              |    2 +-
 docs/api/doxygen/search/typedefs_3.js              |    3 +-
 docs/api/doxygen/search/typedefs_7.js              |    2 +-
 docs/api/doxygen/search/typedefs_8.js              |    4 +-
 docs/api/doxygen/search/typedefs_9.js              |    2 +-
 docs/api/doxygen/search/typedefs_b.js              |    2 +-
 docs/api/doxygen/search/typedefs_c.js              |    2 +-
 docs/api/doxygen/search/typedefs_e.js              |    7 +-
 docs/api/doxygen/search/variables_0.js             |    8 +-
 docs/api/doxygen/search/variables_1.js             |    7 +-
 docs/api/doxygen/search/variables_10.js            |    2 +
 docs/api/doxygen/search/variables_11.js            |   18 +-
 docs/api/doxygen/search/variables_12.js            |    2 +
 docs/api/doxygen/search/variables_14.js            |    4 +-
 docs/api/doxygen/search/variables_15.js            |    3 +-
 docs/api/doxygen/search/variables_2.js             |    5 +-
 docs/api/doxygen/search/variables_3.js             |    4 +-
 docs/api/doxygen/search/variables_4.js             |   20 +-
 docs/api/doxygen/search/variables_5.js             |    3 +-
 docs/api/doxygen/search/variables_6.js             |    4 +-
 docs/api/doxygen/search/variables_8.js             |    1 +
 docs/api/doxygen/search/variables_9.js             |    6 +-
 docs/api/doxygen/search/variables_a.js             |    2 +-
 docs/api/doxygen/search/variables_b.js             |    1 -
 docs/api/doxygen/search/variables_c.js             |    2 +
 docs/api/doxygen/search/variables_d.js             |    6 +-
 docs/api/doxygen/search/variables_f.js             |    3 +-
 docs/api/doxygen/search__policy_8h.html            |    2 +-
 docs/api/doxygen/search__policy_8h__incl.svg       | 1784 +++---
 docs/api/doxygen/search__policy_8h_source.html     |    8 +-
 docs/api/doxygen/search__task_8h.html              |    3 +-
 docs/api/doxygen/search__task_8h__incl.svg         | 1929 +++---
 docs/api/doxygen/search__task_8h_source.html       |   55 +-
 docs/api/doxygen/serialization_8h__incl.svg        |    2 +-
 docs/api/doxygen/serializer_8h.html                |   10 +-
 docs/api/doxygen/serializer_8h__dep__incl.svg      | 1403 ++---
 docs/api/doxygen/serializer_8h__incl.svg           |  410 +-
 docs/api/doxygen/serializer_8h_source.html         |    5 +-
 docs/api/doxygen/source__map_8h.html               |    4 +-
 docs/api/doxygen/source__map_8h__dep__incl.svg     | 1351 ++---
 docs/api/doxygen/source__map_8h__incl.svg          | 1337 ++--
 docs/api/doxygen/source__map_8h_source.html        |    7 +-
 docs/api/doxygen/span_8h.html                      |    4 +-
 docs/api/doxygen/span_8h__dep__incl.svg            |  996 +--
 docs/api/doxygen/span_8h__incl.svg                 | 1221 ++--
 docs/api/doxygen/span_8h_source.html               |    8 +-
 docs/api/doxygen/stmt_8h.html                      |   51 +-
 docs/api/doxygen/stmt_8h__incl.svg                 | 1595 +++--
 docs/api/doxygen/stmt_8h_source.html               |  146 +-
 docs/api/doxygen/stmt__functor_8h.html             |    9 +-
 docs/api/doxygen/stmt__functor_8h__incl.svg        | 1883 +++---
 docs/api/doxygen/stmt__functor_8h_source.html      |   39 +-
 .../structMemoryManagerInterface-members.html      |    4 +-
 docs/api/doxygen/structMemoryManagerInterface.html |   24 +-
 docs/api/doxygen/structTVMArgs__coll__graph.svg    |    2 +-
 .../structTVMGraphExecutorGraphAttr-members.html   |  109 +
 .../doxygen/structTVMGraphExecutorGraphAttr.html   |  229 +
 ...tructTVMGraphExecutorGraphAttr__coll__graph.svg |   29 +
 .../structTVMGraphRuntimeGraphAttr-members.html    |  109 -
 .../doxygen/structTVMGraphRuntimeGraphAttr.html    |  229 -
 ...structTVMGraphRuntimeGraphAttr__coll__graph.svg |   29 -
 docs/api/doxygen/structTVMOpParam.html             |    4 +-
 .../doxygen/structTVMPackedFunc__coll__graph.svg   |    2 +-
 ...izer_1_1Handler_3_01DLContext_01_4-members.html |  107 -
 ..._1serializer_1_1Handler_3_01DLContext_01_4.html |  193 -
 ..._1_1Handler_3_01DLContext_01_4__coll__graph.svg |   24 -
 ...lizer_1_1Handler_3_01DLDevice_01_4-members.html |  107 +
 ...1_1serializer_1_1Handler_3_01DLDevice_01_4.html |  193 +
 ...r_1_1Handler_3_01DLDevice_01_4__coll__graph.svg |   24 +
 docs/api/doxygen/structtvm_1_1AttrError.html       |    4 +-
 .../structtvm_1_1AttrError__coll__graph.svg        |    8 +-
 .../structtvm_1_1AttrError__inherit__graph.svg     |    8 +-
 .../doxygen/structtvm_1_1ErrorBuilder-members.html |    2 +-
 docs/api/doxygen/structtvm_1_1ErrorBuilder.html    |   12 +-
 .../structtvm_1_1relay_1_1AllocStorageAttrs.html   |    2 +-
 ..._1_1relay_1_1AllocStorageAttrs__coll__graph.svg |  131 +-
 .../structtvm_1_1relay_1_1AllocTensorAttrs.html    |    2 +-
 ...m_1_1relay_1_1AllocTensorAttrs__coll__graph.svg |  219 +-
 .../doxygen/structtvm_1_1relay_1_1ArangeAttrs.html |    2 +-
 ...ucttvm_1_1relay_1_1ArangeAttrs__coll__graph.svg |  179 +-
 .../structtvm_1_1relay_1_1ArgsortAttrs.html        |    2 +-
 ...cttvm_1_1relay_1_1ArgsortAttrs__coll__graph.svg |  127 +-
 .../structtvm_1_1relay_1_1BinaryConv2DAttrs.html   |    2 +-
 ..._1_1relay_1_1BinaryConv2DAttrs__coll__graph.svg |  205 +-
 .../structtvm_1_1relay_1_1BinaryDenseAttrs.html    |    2 +-
 ...m_1_1relay_1_1BinaryDenseAttrs__coll__graph.svg |  187 +-
 .../structtvm_1_1relay_1_1BitPackAttrs.html        |    2 +-
 ...cttvm_1_1relay_1_1BitPackAttrs__coll__graph.svg |  127 +-
 .../doxygen/structtvm_1_1relay_1_1CastAttrs.html   |    2 +-
 ...tructtvm_1_1relay_1_1CastAttrs__coll__graph.svg |  127 +-
 .../structtvm_1_1relay_1_1CastHintAttrs.html       |    2 +-
 ...ttvm_1_1relay_1_1CastHintAttrs__coll__graph.svg |  129 +-
 .../doxygen/structtvm_1_1relay_1_1Conv1DAttrs.html |    2 +-
 ...ucttvm_1_1relay_1_1Conv1DAttrs__coll__graph.svg |  203 +-
 ...structtvm_1_1relay_1_1Conv1DTransposeAttrs.html |    2 +-
 ...1relay_1_1Conv1DTransposeAttrs__coll__graph.svg |  201 +-
 .../doxygen/structtvm_1_1relay_1_1Conv2DAttrs.html |    2 +-
 ...ucttvm_1_1relay_1_1Conv2DAttrs__coll__graph.svg |  327 +-
 ...structtvm_1_1relay_1_1Conv2DTransposeAttrs.html |    2 +-
 ...1relay_1_1Conv2DTransposeAttrs__coll__graph.svg |  201 +-
 .../structtvm_1_1relay_1_1Conv2DWinogradAttrs.html |    2 +-
 ..._1relay_1_1Conv2DWinogradAttrs__coll__graph.svg |  329 +-
 ..._1Conv2DWinogradNNPACKWeightTransformAttrs.html |    2 +-
 ...gradNNPACKWeightTransformAttrs__coll__graph.svg |  129 +-
 .../doxygen/structtvm_1_1relay_1_1Conv3DAttrs.html |    2 +-
 ...ucttvm_1_1relay_1_1Conv3DAttrs__coll__graph.svg |  327 +-
 ...structtvm_1_1relay_1_1Conv3DTransposeAttrs.html |    2 +-
 ...1relay_1_1Conv3DTransposeAttrs__coll__graph.svg |  201 +-
 .../structtvm_1_1relay_1_1Conv3DWinogradAttrs.html |    2 +-
 ..._1relay_1_1Conv3DWinogradAttrs__coll__graph.svg |  203 +-
 .../structtvm_1_1relay_1_1CropAndResizeAttrs.html  |    2 +-
 ...1_1relay_1_1CropAndResizeAttrs__coll__graph.svg |  145 +-
 .../structtvm_1_1relay_1_1CumsumAttrs-members.html |  124 -
 .../doxygen/structtvm_1_1relay_1_1CumsumAttrs.html |  259 -
 ...ucttvm_1_1relay_1_1CumsumAttrs__coll__graph.svg |  222 -
 ...tvm_1_1relay_1_1CumsumAttrs__inherit__graph.svg |   88 -
 ...tructtvm_1_1relay_1_1DeformableConv2DAttrs.html |    2 +-
 ...relay_1_1DeformableConv2DAttrs__coll__graph.svg |  203 +-
 .../doxygen/structtvm_1_1relay_1_1DenseAttrs.html  |    2 +-
 ...ructtvm_1_1relay_1_1DenseAttrs__coll__graph.svg |  301 +-
 .../structtvm_1_1relay_1_1Dilation2DAttrs.html     |    2 +-
 ...vm_1_1relay_1_1Dilation2DAttrs__coll__graph.svg |  145 +-
 .../doxygen/structtvm_1_1relay_1_1InitOpAttrs.html |    2 +-
 ...ucttvm_1_1relay_1_1InitOpAttrs__coll__graph.svg |  145 +-
 .../structtvm_1_1relay_1_1NdarraySizeAttrs.html    |    2 +-
 ...m_1_1relay_1_1NdarraySizeAttrs__coll__graph.svg |  129 +-
 .../doxygen/structtvm_1_1relay_1_1OneHotAttrs.html |    2 +-
 ...ucttvm_1_1relay_1_1OneHotAttrs__coll__graph.svg |  127 +-
 .../structtvm_1_1relay_1_1Resize3dAttrs.html       |    2 +-
 ...ttvm_1_1relay_1_1Resize3dAttrs__coll__graph.svg |  261 +-
 .../doxygen/structtvm_1_1relay_1_1ResizeAttrs.html |    2 +-
 ...ucttvm_1_1relay_1_1ResizeAttrs__coll__graph.svg |  143 +-
 .../structtvm_1_1relay_1_1ScanopAttrs-members.html |  124 +
 .../doxygen/structtvm_1_1relay_1_1ScanopAttrs.html |  259 +
 ...ucttvm_1_1relay_1_1ScanopAttrs__coll__graph.svg |  249 +
 ...tvm_1_1relay_1_1ScanopAttrs__inherit__graph.svg |   88 +
 .../structtvm_1_1relay_1_1ShapeOfAttrs.html        |    2 +-
 ...cttvm_1_1relay_1_1ShapeOfAttrs__coll__graph.svg |  127 +-
 .../doxygen/structtvm_1_1relay_1_1TopKAttrs.html   |    2 +-
 ...tructtvm_1_1relay_1_1TopKAttrs__coll__graph.svg |  143 +-
 ...structtvm_1_1relay_1_1qnn_1_1QuantizeAttrs.html |    2 +-
 ...1relay_1_1qnn_1_1QuantizeAttrs__coll__graph.svg |  129 +-
 ...ructtvm_1_1relay_1_1qnn_1_1RequantizeAttrs.html |    2 +-
 ...elay_1_1qnn_1_1RequantizeAttrs__coll__graph.svg |  131 +-
 ...y_1_1qnn_1_1SimulatedQuantizeAttrs-members.html |  122 +
 ..._1_1relay_1_1qnn_1_1SimulatedQuantizeAttrs.html |  226 +
 ...1qnn_1_1SimulatedQuantizeAttrs__coll__graph.svg |   88 +
 ...n_1_1SimulatedQuantizeAttrs__inherit__graph.svg |   88 +
 ...ttvm_1_1runtime_1_1Array_1_1ValueConverter.html |    4 +-
 .../structtvm_1_1runtime_1_1NullOptType.html       |    4 +-
 .../structtvm_1_1runtime_1_1ObjectEqual.html       |    4 +-
 .../structtvm_1_1runtime_1_1ObjectHash.html        |    4 +-
 ...TypeChecker_3_01Map_3_01K_00_01V_01_4_01_4.html |   10 +-
 .../doxygen/structtvm_1_1runtime_1_1TypeIndex.html |    2 +-
 ...1runtime_1_1profiling_1_1CallFrame-members.html |  109 +
 ...cttvm_1_1runtime_1_1profiling_1_1CallFrame.html |  183 +
 ...time_1_1profiling_1_1CallFrame__coll__graph.svg |  140 +
 ...ructtvm_1_1runtime_1_1vm_1_1Buffer-members.html |    4 +-
 .../structtvm_1_1runtime_1_1vm_1_1Buffer.html      |   18 +-
 ...tvm_1_1runtime_1_1vm_1_1Buffer__coll__graph.svg |    2 +-
 docs/api/doxygen/structtvm_1_1tir_1_1LENode.html   |    2 +-
 .../structtvm_1_1tir_1_1LENode__coll__graph.svg    |  151 +-
 docs/api/doxygen/structural__equal_8h.html         |    8 +-
 .../doxygen/structural__equal_8h__dep__incl.svg    | 1186 ++--
 docs/api/doxygen/structural__equal_8h__incl.svg    |  793 +--
 docs/api/doxygen/structural__equal_8h_source.html  |    4 +-
 docs/api/doxygen/structural__hash_8h.html          |    8 +-
 .../api/doxygen/structural__hash_8h__dep__incl.svg | 1186 ++--
 docs/api/doxygen/structural__hash_8h__incl.svg     |  806 +--
 docs/api/doxygen/structural__hash_8h_source.html   |    4 +-
 docs/api/doxygen/tag_8h.html                       |    4 +-
 docs/api/doxygen/tag_8h__incl.svg                  | 1788 +++---
 docs/api/doxygen/tag_8h_source.html                |    9 +-
 docs/api/doxygen/target_8h.html                    |   21 +-
 docs/api/doxygen/target_8h__incl.svg               | 1808 +++---
 docs/api/doxygen/target_8h_source.html             |   37 +-
 docs/api/doxygen/target__info_8h.html              |    2 +-
 docs/api/doxygen/target__info_8h__incl.svg         | 1387 +++--
 docs/api/doxygen/target__info_8h_source.html       |    2 +-
 docs/api/doxygen/target__kind_8h.html              |    4 +-
 docs/api/doxygen/target__kind_8h__incl.svg         | 1301 ++--
 docs/api/doxygen/target__kind_8h_source.html       |   13 +-
 docs/api/doxygen/tensor_8h.html                    |    4 +-
 docs/api/doxygen/tensor_8h__incl.svg               | 2032 +++----
 docs/api/doxygen/tensor_8h_source.html             |   12 +-
 docs/api/doxygen/tensor__intrin_8h.html            |    2 +-
 docs/api/doxygen/tensor__intrin_8h__incl.svg       | 2022 ++++---
 docs/api/doxygen/tensor__intrin_8h_source.html     |    2 +-
 docs/api/doxygen/tensor__type_8h.html              |    2 +-
 docs/api/doxygen/tensor__type_8h__incl.svg         | 1387 +++--
 docs/api/doxygen/tensor__type_8h_source.html       |    6 +-
 docs/api/doxygen/tensor__utils_8h.html             |    2 +-
 docs/api/doxygen/tensor__utils_8h__incl.svg        | 1963 +++---
 docs/api/doxygen/tir_2analysis_8h.html             |   12 +-
 docs/api/doxygen/tir_2analysis_8h__incl.svg        | 1902 +++---
 docs/api/doxygen/tir_2analysis_8h_source.html      |   14 +-
 docs/api/doxygen/tir_2expr_8h.html                 |    4 +-
 docs/api/doxygen/tir_2expr_8h__incl.svg            | 1561 +++--
 docs/api/doxygen/tir_2expr_8h_source.html          |   13 +-
 docs/api/doxygen/tir_2expr__functor_8h.html        |    2 +-
 docs/api/doxygen/tir_2expr__functor_8h__incl.svg   | 1461 +++--
 docs/api/doxygen/tir_2function_8h.html             |    2 +-
 docs/api/doxygen/tir_2function_8h__incl.svg        | 1831 +++---
 docs/api/doxygen/tir_2function_8h_source.html      |    9 +-
 docs/api/doxygen/tir_2op_8h.html                   |    2 +-
 docs/api/doxygen/tir_2op_8h__incl.svg              | 1726 +++---
 docs/api/doxygen/tir_2op_8h_source.html            |    2 +-
 docs/api/doxygen/tir_2op__attr__types_8h.html      |    4 +-
 docs/api/doxygen/tir_2op__attr__types_8h__incl.svg |  314 +-
 .../doxygen/tir_2op__attr__types_8h_source.html    |    6 +-
 docs/api/doxygen/tir_2transform_8h.html            |    2 +-
 docs/api/doxygen/tir_2transform_8h__incl.svg       | 1794 +++---
 docs/api/doxygen/tir_2transform_8h_source.html     |    2 +-
 docs/api/doxygen/topi_2nn_8h.html                  |    2 +-
 docs/api/doxygen/topi_2nn_8h__incl.svg             | 1879 +++---
 docs/api/doxygen/topi_2nn_8h_source.html           |    8 +-
 docs/api/doxygen/topi_2transform_8h.html           |    2 +-
 docs/api/doxygen/topi_2transform_8h__incl.svg      | 1691 +++---
 docs/api/doxygen/topi_2transform_8h_source.html    |   48 +-
 docs/api/doxygen/transform__step_8h.html           |    2 +-
 docs/api/doxygen/transform__step_8h__incl.svg      | 1850 +++---
 docs/api/doxygen/transform__step_8h_source.html    |   11 +-
 docs/api/doxygen/type__functor_8h.html             |    2 +-
 docs/api/doxygen/type__functor_8h__incl.svg        | 1704 +++---
 docs/api/doxygen/type__functor_8h_source.html      |    3 +-
 docs/api/doxygen/type__relation_8h.html            |    4 +-
 docs/api/doxygen/type__relation_8h__incl.svg       | 1695 +++---
 docs/api/doxygen/type__relation_8h_source.html     |    6 +-
 docs/api/doxygen/unionTVMValue-members.html        |    2 +-
 docs/api/doxygen/unionTVMValue.html                |    8 +-
 docs/api/doxygen/unionTVMValue__coll__graph.svg    |    2 +-
 docs/api/doxygen/utils_8h.html                     |    2 +-
 docs/api/doxygen/utils_8h__incl.svg                | 1385 +++--
 docs/api/doxygen/utils_8h_source.html              |    8 +-
 docs/api/doxygen/var_8h.html                       |    2 +-
 docs/api/doxygen/var_8h__incl.svg                  | 1403 +++--
 docs/api/doxygen/var_8h_source.html                |    6 +-
 docs/api/doxygen/vision_8h.html                    |    2 +-
 docs/api/doxygen/vision_8h__incl.svg               | 1755 +++---
 docs/api/doxygen/with_8h.html                      |    4 +-
 docs/api/doxygen/with_8h__incl.svg                 |   28 +-
 docs/api/doxygen/with_8h_source.html               |    2 +-
 docs/api/doxygen/x86_2bnn_8h.html                  |    2 +-
 docs/api/doxygen/x86_2bnn_8h__incl.svg             | 1822 +++---
 docs/api/doxygen/x86_2bnn_8h_source.html           |    6 +-
 docs/api/doxygen/x86_2default_8h.html              |    2 +-
 docs/api/doxygen/x86_2default_8h__incl.svg         | 2139 +++----
 docs/api/doxygen/x86_2default_8h_source.html       |    6 +-
 docs/api/doxygen/x86_2injective_8h.html            |    2 +-
 docs/api/doxygen/x86_2injective_8h__incl.svg       | 1822 +++---
 docs/api/doxygen/x86_2injective_8h_source.html     |    6 +-
 docs/api/javadoc/allclasses-frame.html             |    6 +-
 docs/api/javadoc/allclasses-noframe.html           |    6 +-
 docs/api/javadoc/index-all.html                    |  148 +-
 docs/api/javadoc/org/apache/tvm/ArgTypeCode.html   |   30 +-
 docs/api/javadoc/org/apache/tvm/Device.html        |  700 +++
 docs/api/javadoc/org/apache/tvm/Function.html      |    4 +-
 docs/api/javadoc/org/apache/tvm/NDArray.html       |   36 +-
 docs/api/javadoc/org/apache/tvm/NDArrayBase.html   |    4 +-
 docs/api/javadoc/org/apache/tvm/TVMContext.html    |  700 ---
 docs/api/javadoc/org/apache/tvm/TVMType.html       |    4 +-
 .../javadoc/org/apache/tvm/class-use/Device.html   |  404 ++
 .../javadoc/org/apache/tvm/class-use/Function.html |   12 +-
 .../javadoc/org/apache/tvm/class-use/Module.html   |    4 +-
 .../javadoc/org/apache/tvm/class-use/NDArray.html  |    8 +-
 .../org/apache/tvm/class-use/NDArrayBase.html      |    4 +-
 .../org/apache/tvm/class-use/TVMContext.html       |  404 --
 .../javadoc/org/apache/tvm/class-use/TVMType.html  |    4 +-
 .../org/apache/tvm/contrib/GraphExecutor.html      |  287 +
 .../org/apache/tvm/contrib/GraphModule.html        |    8 +-
 .../org/apache/tvm/contrib/GraphRuntime.html       |  287 -
 .../tvm/contrib/class-use/GraphExecutor.html       |  125 +
 .../apache/tvm/contrib/class-use/GraphModule.html  |    4 +-
 .../apache/tvm/contrib/class-use/GraphRuntime.html |  125 -
 .../org/apache/tvm/contrib/package-frame.html      |    2 +-
 .../org/apache/tvm/contrib/package-summary.html    |    8 +-
 .../org/apache/tvm/contrib/package-tree.html       |    2 +-
 docs/api/javadoc/org/apache/tvm/package-frame.html |    2 +-
 .../javadoc/org/apache/tvm/package-summary.html    |   14 +-
 docs/api/javadoc/org/apache/tvm/package-tree.html  |    2 +-
 docs/api/javadoc/org/apache/tvm/package-use.html   |   32 +-
 .../api/javadoc/org/apache/tvm/rpc/RPCSession.html |  140 +-
 .../apache/tvm/rpc/StandaloneServerProcessor.html  |    4 +-
 .../org/apache/tvm/rpc/TVMRemoteContext.html       |  259 -
 .../org/apache/tvm/rpc/TVMRemoteDevice.html        |  259 +
 .../org/apache/tvm/rpc/class-use/RPCSession.html   |    2 +-
 .../apache/tvm/rpc/class-use/TVMRemoteContext.html |  125 -
 .../apache/tvm/rpc/class-use/TVMRemoteDevice.html  |  125 +
 .../javadoc/org/apache/tvm/rpc/package-frame.html  |    2 +-
 .../org/apache/tvm/rpc/package-summary.html        |    2 +-
 .../javadoc/org/apache/tvm/rpc/package-tree.html   |   10 +-
 docs/api/javadoc/overview-tree.html                |   12 +-
 docs/api/links.html                                |    2 +-
 docs/api/python/auto_scheduler.html                |   69 +-
 docs/api/python/autotvm.html                       |    4 +-
 docs/api/python/contrib.html                       |   48 +-
 docs/api/python/driver.html                        |    4 +-
 docs/api/python/error.html                         |    4 +-
 docs/api/python/graph_executor.html                |  637 ++
 docs/api/python/graph_runtime.html                 |  637 --
 docs/api/python/index.html                         |    6 +-
 docs/api/python/ir.html                            |   44 +-
 docs/api/python/micro.html                         |   69 +-
 docs/api/python/ndarray.html                       |   24 +-
 docs/api/python/relay/analysis.html                |   16 +-
 docs/api/python/relay/backend.html                 |   40 +-
 docs/api/python/relay/dataflow_pattern.html        |    4 +-
 docs/api/python/relay/frontend.html                |    4 +-
 docs/api/python/relay/image.html                   |    4 +-
 docs/api/python/relay/index.html                   |  253 +-
 docs/api/python/relay/nn.html                      |   42 +-
 docs/api/python/relay/testing.html                 |    4 +-
 docs/api/python/relay/transform.html               |   46 +-
 docs/api/python/relay/vision.html                  |    4 +-
 docs/api/python/rpc.html                           |   22 +-
 docs/api/python/runtime.html                       |  253 +-
 docs/api/python/target.html                        |   32 +-
 docs/api/python/te.html                            |    4 +-
 docs/api/python/tir.html                           |  179 +-
 docs/api/python/topi.html                          |  379 +-
 docs/api/python/vta/index.html                     |    4 +-
 docs/api/rust/.lock                                |    0
 docs/api/rust/COPYRIGHT.txt                        |   45 -
 docs/api/rust/FiraSans-LICENSE.txt                 |   94 -
 docs/api/rust/FiraSans-Medium.woff                 |  Bin 186824 -> 0 bytes
 docs/api/rust/FiraSans-Regular.woff                |  Bin 183268 -> 0 bytes
 docs/api/rust/LICENSE-APACHE.txt                   |  201 -
 docs/api/rust/LICENSE-MIT.txt                      |   23 -
 docs/api/rust/SourceCodePro-LICENSE.txt            |   93 -
 docs/api/rust/SourceCodePro-Regular.woff           |  Bin 55472 -> 0 bytes
 docs/api/rust/SourceCodePro-Semibold.woff          |  Bin 55360 -> 0 bytes
 docs/api/rust/SourceSerifPro-Bold.ttf.woff         |  Bin 93248 -> 0 bytes
 docs/api/rust/SourceSerifPro-It.ttf.woff           |  Bin 36200 -> 0 bytes
 docs/api/rust/SourceSerifPro-LICENSE.md            |   93 -
 docs/api/rust/SourceSerifPro-Regular.ttf.woff      |  Bin 88596 -> 0 bytes
 docs/api/rust/array/all.html                       |    4 -
 docs/api/rust/array/fn.main.html                   |    2 -
 docs/api/rust/array/index.html                     |    3 -
 docs/api/rust/array/sidebar-items.js               |    1 -
 docs/api/rust/ayu.css                              |    1 -
 docs/api/rust/basics/all.html                      |    4 -
 docs/api/rust/basics/fn.main.html                  |    2 -
 docs/api/rust/basics/index.html                    |    3 -
 docs/api/rust/basics/sidebar-items.js              |    1 -
 docs/api/rust/brush.svg                            |    1 -
 docs/api/rust/compiler_ext/all.html                |    4 -
 docs/api/rust/compiler_ext/fn.tvm_export.html      |    2 -
 docs/api/rust/compiler_ext/index.html              |    3 -
 docs/api/rust/compiler_ext/sidebar-items.js        |    1 -
 docs/api/rust/dark.css                             |    1 -
 docs/api/rust/down-arrow.svg                       |    1 -
 docs/api/rust/error/all.html                       |    4 -
 docs/api/rust/error/fn.main.html                   |    2 -
 docs/api/rust/error/index.html                     |    3 -
 docs/api/rust/error/sidebar-items.js               |    1 -
 docs/api/rust/favicon.ico                          |  Bin 23229 -> 0 bytes
 docs/api/rust/float/all.html                       |    4 -
 docs/api/rust/float/fn.main.html                   |    2 -
 docs/api/rust/float/index.html                     |    3 -
 docs/api/rust/float/sidebar-items.js               |    1 -
 .../rust/implementors/core/clone/trait.Clone.js    |    5 -
 docs/api/rust/implementors/core/cmp/trait.Eq.js    |    5 -
 docs/api/rust/implementors/core/cmp/trait.Ord.js   |    4 -
 .../rust/implementors/core/cmp/trait.PartialEq.js  |    6 -
 .../rust/implementors/core/cmp/trait.PartialOrd.js |    4 -
 .../rust/implementors/core/convert/trait.AsRef.js  |    4 -
 .../rust/implementors/core/convert/trait.From.js   |    6 -
 .../implementors/core/convert/trait.TryFrom.js     |    6 -
 .../implementors/core/default/trait.Default.js     |    4 -
 docs/api/rust/implementors/core/fmt/trait.Debug.js |    7 -
 .../rust/implementors/core/fmt/trait.Display.js    |    6 -
 docs/api/rust/implementors/core/hash/trait.Hash.js |    5 -
 .../core/iter/traits/collect/trait.FromIterator.js |    3 -
 .../core/iter/traits/collect/trait.IntoIterator.js |    3 -
 .../core/iter/traits/iterator/trait.Iterator.js    |    3 -
 .../rust/implementors/core/marker/trait.Copy.js    |    3 -
 .../rust/implementors/core/marker/trait.Freeze.js  |    7 -
 .../rust/implementors/core/marker/trait.Send.js    |    7 -
 .../implementors/core/marker/trait.StructuralEq.js |    4 -
 .../core/marker/trait.StructuralPartialEq.js       |    5 -
 .../rust/implementors/core/marker/trait.Sync.js    |    7 -
 .../rust/implementors/core/marker/trait.Unpin.js   |    7 -
 .../implementors/core/ops/deref/trait.Deref.js     |    4 -
 .../rust/implementors/core/ops/drop/trait.Drop.js  |    4 -
 .../rust/implementors/core/str/trait.FromStr.js    |    3 -
 .../implementors/serde/de/trait.Deserialize.js     |    3 -
 .../rust/implementors/serde/ser/trait.Serialize.js |    3 -
 .../api/rust/implementors/std/error/trait.Error.js |    6 -
 .../implementors/std/panic/trait.RefUnwindSafe.js  |    7 -
 .../implementors/std/panic/trait.UnwindSafe.js     |    7 -
 .../rust/implementors/structopt/trait.StructOpt.js |    3 -
 .../structopt/trait.StructOptInternal.js           |    3 -
 .../tvm/runtime/function/trait.ToFunction.js       |    3 -
 .../tvm/runtime/function/trait.Typed.js            |    3 -
 .../tvm/runtime/ndarray/trait.Num32.js             |    3 -
 .../implementors/tvm/runtime/trait.IsObject.js     |    3 -
 .../implementors/tvm/runtime/trait.IsObjectRef.js  |    3 -
 .../rust/implementors/tvm_graph_rt/trait.Module.js |    3 -
 .../implementors/tvm_graph_rt/trait.PackedFunc.js  |    3 -
 .../tvm_rt/function/trait.ToFunction.js            |    3 -
 .../implementors/tvm_rt/function/trait.Typed.js    |    3 -
 .../implementors/tvm_rt/ndarray/trait.Num32.js     |    3 -
 .../implementors/tvm_rt/object/trait.IsObject.js   |    3 -
 .../tvm_rt/object/trait.IsObjectRef.js             |    3 -
 .../tvm_sys/packed_func/trait.PackedFunc.js        |    3 -
 docs/api/rust/int/all.html                         |    4 -
 docs/api/rust/int/fn.main.html                     |    2 -
 docs/api/rust/int/index.html                       |    3 -
 docs/api/rust/int/sidebar-items.js                 |    1 -
 docs/api/rust/light.css                            |    1 -
 docs/api/rust/main.js                              |    7 -
 docs/api/rust/normalize.css                        |    2 -
 docs/api/rust/noscript.css                         |    1 -
 docs/api/rust/resnet/all.html                      |    4 -
 docs/api/rust/resnet/fn.main.html                  |    2 -
 docs/api/rust/resnet/index.html                    |    3 -
 docs/api/rust/resnet/sidebar-items.js              |    1 -
 docs/api/rust/rust-logo.png                        |  Bin 5758 -> 0 bytes
 docs/api/rust/rustdoc.css                          |    1 -
 docs/api/rust/search-index.js                      |   20 -
 docs/api/rust/settings.css                         |    1 -
 docs/api/rust/settings.html                        |    3 -
 docs/api/rust/settings.js                          |    1 -
 docs/api/rust/source-files.js                      |   19 -
 docs/api/rust/source-script.js                     |    1 -
 docs/api/rust/src/array/array.rs.html              |  130 -
 docs/api/rust/src/basics/main.rs.html              |  106 -
 docs/api/rust/src/compiler_ext/lib.rs.html         |   74 -
 docs/api/rust/src/error/error.rs.html              |   94 -
 docs/api/rust/src/float/float.rs.html              |  104 -
 docs/api/rust/src/int/int.rs.html                  |   94 -
 docs/api/rust/src/resnet/main.rs.html              |  250 -
 docs/api/rust/src/string/string.rs.html            |  122 -
 docs/api/rust/src/test_rt_nn/main.rs.html          |  202 -
 docs/api/rust/src/test_rt_tvm_basic/main.rs.html   |   98 -
 docs/api/rust/src/test_rt_tvm_dso/main.rs.html     |   82 -
 docs/api/rust/src/tvm/ir/arith.rs.html             |   96 -
 docs/api/rust/src/tvm/ir/attrs.rs.html             |   62 -
 .../rust/src/tvm/ir/diagnostics/codespan.rs.html   |  436 --
 docs/api/rust/src/tvm/ir/diagnostics/mod.rs.html   |  496 --
 docs/api/rust/src/tvm/ir/expr.rs.html              |  208 -
 docs/api/rust/src/tvm/ir/function.rs.html          |   96 -
 docs/api/rust/src/tvm/ir/mod.rs.html               |   72 -
 docs/api/rust/src/tvm/ir/module.rs.html            |  774 ---
 docs/api/rust/src/tvm/ir/op.rs.html                |   90 -
 docs/api/rust/src/tvm/ir/relay/attrs/mod.rs.html   |   46 -
 docs/api/rust/src/tvm/ir/relay/attrs/nn.rs.html    |  292 -
 .../rust/src/tvm/ir/relay/attrs/transform.rs.html  |  170 -
 docs/api/rust/src/tvm/ir/relay/mod.rs.html         | 1180 ----
 docs/api/rust/src/tvm/ir/source_map.rs.html        |  118 -
 docs/api/rust/src/tvm/ir/span.rs.html              |  146 -
 docs/api/rust/src/tvm/ir/tir.rs.html               |  202 -
 docs/api/rust/src/tvm/ir/ty.rs.html                |  598 --
 docs/api/rust/src/tvm/lib.rs.html                  |  152 -
 docs/api/rust/src/tvm/python.rs.html               |  124 -
 docs/api/rust/src/tvm/runtime/graph_rt.rs.html     |  198 -
 docs/api/rust/src/tvm/runtime/mod.rs.html          |   48 -
 docs/api/rust/src/tvm/transform.rs.html            |  230 -
 docs/api/rust/src/tvm_graph_rt/allocator.rs.html   |  150 -
 docs/api/rust/src/tvm_graph_rt/array.rs.html       |  806 ---
 docs/api/rust/src/tvm_graph_rt/errors.rs.html      |  114 -
 docs/api/rust/src/tvm_graph_rt/graph.rs.html       | 1050 ----
 docs/api/rust/src/tvm_graph_rt/lib.rs.html         |  156 -
 docs/api/rust/src/tvm_graph_rt/module/dso.rs.html  |  300 -
 docs/api/rust/src/tvm_graph_rt/module/mod.rs.html  |  132 -
 .../rust/src/tvm_graph_rt/module/syslib.rs.html    |  150 -
 docs/api/rust/src/tvm_graph_rt/threading.rs.html   |  530 --
 docs/api/rust/src/tvm_graph_rt/workspace.rs.html   |  282 -
 docs/api/rust/src/tvm_macros/external.rs.html      |  400 --
 docs/api/rust/src/tvm_macros/import_module.rs.html |  270 -
 docs/api/rust/src/tvm_macros/lib.rs.html           |   92 -
 docs/api/rust/src/tvm_macros/object.rs.html        |  442 --
 docs/api/rust/src/tvm_macros/util.rs.html          |  100 -
 docs/api/rust/src/tvm_rt/array.rs.html             |  436 --
 docs/api/rust/src/tvm_rt/context.rs.html           |  198 -
 docs/api/rust/src/tvm_rt/errors.rs.html            |  198 -
 docs/api/rust/src/tvm_rt/function.rs.html          |  732 ---
 docs/api/rust/src/tvm_rt/lib.rs.html               |  264 -
 docs/api/rust/src/tvm_rt/map.rs.html               |  530 --
 docs/api/rust/src/tvm_rt/module.rs.html            |  264 -
 docs/api/rust/src/tvm_rt/ndarray.rs.html           | 1012 ----
 docs/api/rust/src/tvm_rt/object/mod.rs.html        |  198 -
 docs/api/rust/src/tvm_rt/object/object_ptr.rs.html |  980 ---
 docs/api/rust/src/tvm_rt/string.rs.html            |  288 -
 docs/api/rust/src/tvm_rt/to_function.rs.html       |  586 --
 docs/api/rust/src/tvm_rt/value.rs.html             |  216 -
 docs/api/rust/src/tvm_sys/array.rs.html            |  130 -
 docs/api/rust/src/tvm_sys/byte_array.rs.html       |  262 -
 docs/api/rust/src/tvm_sys/context.rs.html          |  596 --
 docs/api/rust/src/tvm_sys/datatype.rs.html         |  432 --
 docs/api/rust/src/tvm_sys/errors.rs.html           |   96 -
 docs/api/rust/src/tvm_sys/lib.rs.html              |  146 -
 docs/api/rust/src/tvm_sys/packed_func.rs.html      |  858 ---
 docs/api/rust/src/tvm_sys/value.rs.html            |  194 -
 .../rust/tvm-sys/src/c_runtime_api.rs.html         |    9 -
 docs/api/rust/src/tyck/tyck.rs.html                |  102 -
 docs/api/rust/storage.js                           |    1 -
 docs/api/rust/string/all.html                      |    4 -
 docs/api/rust/string/fn.main.html                  |    2 -
 docs/api/rust/string/index.html                    |    3 -
 docs/api/rust/string/sidebar-items.js              |    1 -
 docs/api/rust/test_rt_nn/all.html                  |    4 -
 docs/api/rust/test_rt_nn/constant.BATCH_SIZE.html  |    2 -
 docs/api/rust/test_rt_nn/constant.IN_DIM.html      |    2 -
 docs/api/rust/test_rt_nn/fn.main.html              |    2 -
 docs/api/rust/test_rt_nn/index.html                |    4 -
 docs/api/rust/test_rt_nn/sidebar-items.js          |    1 -
 docs/api/rust/test_rt_tvm_basic/all.html           |    4 -
 docs/api/rust/test_rt_tvm_basic/fn.main.html       |    2 -
 docs/api/rust/test_rt_tvm_basic/index.html         |    4 -
 docs/api/rust/test_rt_tvm_basic/sidebar-items.js   |    1 -
 .../tvm_mod/ext/fn.__tvm_main__.html               |    2 -
 .../tvm_mod/ext/fn.__tvm_module_ctx.html           |    2 -
 .../tvm_mod/ext/fn.__tvm_module_startup.html       |    2 -
 .../tvm_mod/ext/fn.default_function.html           |    2 -
 .../rust/test_rt_tvm_basic/tvm_mod/ext/index.html  |    3 -
 .../test_rt_tvm_basic/tvm_mod/ext/sidebar-items.js |    1 -
 .../test_rt_tvm_basic/tvm_mod/fn.__tvm_main__.html |    2 -
 .../tvm_mod/fn.__tvm_module_ctx.html               |    2 -
 .../tvm_mod/fn.__tvm_module_startup.html           |    2 -
 .../tvm_mod/fn.default_function.html               |    2 -
 docs/api/rust/test_rt_tvm_basic/tvm_mod/index.html |    4 -
 .../test_rt_tvm_basic/tvm_mod/sidebar-items.js     |    1 -
 docs/api/rust/test_rt_tvm_dso/all.html             |    4 -
 docs/api/rust/test_rt_tvm_dso/fn.main.html         |    2 -
 docs/api/rust/test_rt_tvm_dso/index.html           |    3 -
 docs/api/rust/test_rt_tvm_dso/sidebar-items.js     |    1 -
 docs/api/rust/theme.js                             |    1 -
 docs/api/rust/tvm/all.html                         |    4 -
 docs/api/rust/tvm/context/enum.DeviceType.html     |   49 -
 docs/api/rust/tvm/context/index.html               |    6 -
 docs/api/rust/tvm/context/sidebar-items.js         |    1 -
 docs/api/rust/tvm/context/struct.Context.html      |   43 -
 .../tvm/context/struct.UnsupportedDeviceError.html |   21 -
 docs/api/rust/tvm/enum.DeviceType.html             |   49 -
 docs/api/rust/tvm/enum.Error.html                  |   42 -
 docs/api/rust/tvm/enum.NDArrayError.html           |   34 -
 docs/api/rust/tvm/errors/enum.Error.html           |   42 -
 docs/api/rust/tvm/errors/enum.NDArrayError.html    |   34 -
 docs/api/rust/tvm/errors/index.html                |    4 -
 docs/api/rust/tvm/errors/sidebar-items.js          |    1 -
 .../tvm/errors/struct.FunctionNotFoundError.html   |   21 -
 .../rust/tvm/errors/struct.TypeMismatchError.html  |   25 -
 docs/api/rust/tvm/fn.version.html                  |    3 -
 docs/api/rust/tvm/function/enum.ArgValue.html      |  486 --
 docs/api/rust/tvm/function/enum.RetValue.html      |  383 --
 .../ffi/constant.DLDataTypeCode_kDLBfloat.html     |    2 -
 .../ffi/constant.DLDataTypeCode_kDLFloat.html      |    2 -
 .../ffi/constant.DLDataTypeCode_kDLInt.html        |    2 -
 .../ffi/constant.DLDataTypeCode_kDLUInt.html       |    2 -
 .../function/ffi/constant.DLDeviceType_kDLCPU.html |    3 -
 .../ffi/constant.DLDeviceType_kDLCPUPinned.html    |    4 -
 .../ffi/constant.DLDeviceType_kDLExtDev.html       |    5 -
 .../function/ffi/constant.DLDeviceType_kDLGPU.html |    3 -
 .../ffi/constant.DLDeviceType_kDLMetal.html        |    3 -
 .../ffi/constant.DLDeviceType_kDLOpenCL.html       |    3 -
 .../ffi/constant.DLDeviceType_kDLROCM.html         |    3 -
 .../function/ffi/constant.DLDeviceType_kDLVPI.html |    3 -
 .../ffi/constant.DLDeviceType_kDLVulkan.html       |    3 -
 .../tvm/function/ffi/constant.DLPACK_VERSION.html  |    2 -
 .../rust/tvm/function/ffi/constant.INT16_MAX.html  |    2 -
 .../rust/tvm/function/ffi/constant.INT16_MIN.html  |    2 -
 .../rust/tvm/function/ffi/constant.INT32_MAX.html  |    2 -
 .../rust/tvm/function/ffi/constant.INT32_MIN.html  |    2 -
 .../rust/tvm/function/ffi/constant.INT8_MAX.html   |    2 -
 .../rust/tvm/function/ffi/constant.INT8_MIN.html   |    2 -
 .../rust/tvm/function/ffi/constant.INTPTR_MAX.html |    2 -
 .../rust/tvm/function/ffi/constant.INTPTR_MIN.html |    2 -
 .../tvm/function/ffi/constant.INT_FAST16_MAX.html  |    2 -
 .../tvm/function/ffi/constant.INT_FAST16_MIN.html  |    2 -
 .../tvm/function/ffi/constant.INT_FAST32_MAX.html  |    2 -
 .../tvm/function/ffi/constant.INT_FAST32_MIN.html  |    2 -
 .../tvm/function/ffi/constant.INT_FAST8_MAX.html   |    2 -
 .../tvm/function/ffi/constant.INT_FAST8_MIN.html   |    2 -
 .../tvm/function/ffi/constant.INT_LEAST16_MAX.html |    2 -
 .../tvm/function/ffi/constant.INT_LEAST16_MIN.html |    2 -
 .../tvm/function/ffi/constant.INT_LEAST32_MAX.html |    2 -
 .../tvm/function/ffi/constant.INT_LEAST32_MIN.html |    2 -
 .../tvm/function/ffi/constant.INT_LEAST8_MAX.html  |    2 -
 .../tvm/function/ffi/constant.INT_LEAST8_MIN.html  |    2 -
 .../tvm/function/ffi/constant.PTRDIFF_MAX.html     |    2 -
 .../tvm/function/ffi/constant.PTRDIFF_MIN.html     |    2 -
 .../tvm/function/ffi/constant.SIG_ATOMIC_MAX.html  |    2 -
 .../tvm/function/ffi/constant.SIG_ATOMIC_MIN.html  |    2 -
 .../rust/tvm/function/ffi/constant.SIZE_MAX.html   |    2 -
 .../ffi/constant.TVMArgTypeCode_kTVMArgFloat.html  |    2 -
 .../ffi/constant.TVMArgTypeCode_kTVMArgInt.html    |    2 -
 .../ffi/constant.TVMArgTypeCode_kTVMBytes.html     |    2 -
 .../ffi/constant.TVMArgTypeCode_kTVMContext.html   |    2 -
 ...constant.TVMArgTypeCode_kTVMDLTensorHandle.html |    2 -
 .../ffi/constant.TVMArgTypeCode_kTVMDataType.html  |    2 -
 .../ffi/constant.TVMArgTypeCode_kTVMExtBegin.html  |    2 -
 .../ffi/constant.TVMArgTypeCode_kTVMExtEnd.html    |    2 -
 .../constant.TVMArgTypeCode_kTVMExtReserveEnd.html |    2 -
 .../constant.TVMArgTypeCode_kTVMModuleHandle.html  |    2 -
 .../constant.TVMArgTypeCode_kTVMNDArrayHandle.html |    2 -
 .../ffi/constant.TVMArgTypeCode_kTVMNNVMFirst.html |    2 -
 .../ffi/constant.TVMArgTypeCode_kTVMNNVMLast.html  |    2 -
 .../ffi/constant.TVMArgTypeCode_kTVMNullptr.html   |    2 -
 .../constant.TVMArgTypeCode_kTVMObjectHandle.html  |    2 -
 ...tant.TVMArgTypeCode_kTVMObjectRValueRefArg.html |    2 -
 .../constant.TVMArgTypeCode_kTVMOpaqueHandle.html  |    2 -
 ...nstant.TVMArgTypeCode_kTVMPackedFuncHandle.html |    2 -
 .../ffi/constant.TVMArgTypeCode_kTVMStr.html       |    2 -
 .../ffi/constant.TVMDeviceExtType_kDLAOCL.html     |    2 -
 .../ffi/constant.TVMDeviceExtType_kDLHexagon.html  |    2 -
 .../ffi/constant.TVMDeviceExtType_kDLMicroDev.html |    2 -
 .../ffi/constant.TVMDeviceExtType_kDLSDAccel.html  |    2 -
 .../ffi/constant.TVMDeviceExtType_kDLWebGPU.html   |    2 -
 .../ffi/constant.TVMDeviceExtType_kOpenGL.html     |    2 -
 .../tvm/function/ffi/constant.TVM_VERSION.html     |    2 -
 .../rust/tvm/function/ffi/constant.UINT16_MAX.html |    2 -
 .../rust/tvm/function/ffi/constant.UINT32_MAX.html |    2 -
 .../rust/tvm/function/ffi/constant.UINT8_MAX.html  |    2 -
 .../tvm/function/ffi/constant.UINTPTR_MAX.html     |    2 -
 .../tvm/function/ffi/constant.UINT_FAST16_MAX.html |    2 -
 .../tvm/function/ffi/constant.UINT_FAST32_MAX.html |    2 -
 .../tvm/function/ffi/constant.UINT_FAST8_MAX.html  |    2 -
 .../function/ffi/constant.UINT_LEAST16_MAX.html    |    2 -
 .../function/ffi/constant.UINT_LEAST32_MAX.html    |    2 -
 .../tvm/function/ffi/constant.UINT_LEAST8_MAX.html |    2 -
 .../rust/tvm/function/ffi/constant.WINT_MAX.html   |    2 -
 .../rust/tvm/function/ffi/constant.WINT_MIN.html   |    2 -
 .../tvm/function/ffi/constant._ATFILE_SOURCE.html  |    2 -
 .../tvm/function/ffi/constant._BITS_WCHAR_H.html   |    2 -
 .../tvm/function/ffi/constant._DEFAULT_SOURCE.html |    2 -
 .../tvm/function/ffi/constant._FEATURES_H.html     |    2 -
 .../tvm/function/ffi/constant._POSIX_C_SOURCE.html |    2 -
 .../tvm/function/ffi/constant._POSIX_SOURCE.html   |    2 -
 .../tvm/function/ffi/constant._STDC_PREDEF_H.html  |    2 -
 .../rust/tvm/function/ffi/constant._STDINT_H.html  |    2 -
 .../tvm/function/ffi/constant._SYS_CDEFS_H.html    |    2 -
 .../tvm/function/ffi/constant.__GLIBC_MINOR__.html |    2 -
 .../rust/tvm/function/ffi/constant.__GLIBC__.html  |    2 -
 .../tvm/function/ffi/constant.__GNU_LIBRARY__.html |    2 -
 .../ffi/constant.__STDC_IEC_559_COMPLEX__.html     |    2 -
 .../function/ffi/constant.__STDC_IEC_559__.html    |    2 -
 .../function/ffi/constant.__STDC_ISO_10646__.html  |    2 -
 .../function/ffi/constant.__STDC_NO_THREADS__.html |    2 -
 .../function/ffi/constant.__SYSCALL_WORDSIZE.html  |    2 -
 .../tvm/function/ffi/constant.__USE_ATFILE.html    |    2 -
 .../function/ffi/constant.__USE_FORTIFY_LEVEL.html |    2 -
 .../tvm/function/ffi/constant.__USE_ISOC11.html    |    2 -
 .../tvm/function/ffi/constant.__USE_ISOC95.html    |    2 -
 .../tvm/function/ffi/constant.__USE_ISOC99.html    |    2 -
 .../rust/tvm/function/ffi/constant.__USE_MISC.html |    2 -
 .../tvm/function/ffi/constant.__USE_POSIX.html     |    2 -
 .../function/ffi/constant.__USE_POSIX199309.html   |    2 -
 .../function/ffi/constant.__USE_POSIX199506.html   |    2 -
 .../tvm/function/ffi/constant.__USE_POSIX2.html    |    2 -
 .../ffi/constant.__USE_POSIX_IMPLICITLY.html       |    2 -
 .../tvm/function/ffi/constant.__USE_XOPEN2K.html   |    2 -
 .../tvm/function/ffi/constant.__USE_XOPEN2K8.html  |    2 -
 .../rust/tvm/function/ffi/constant.__WORDSIZE.html |    2 -
 .../ffi/constant.__WORDSIZE_TIME64_COMPAT32.html   |    2 -
 .../tvm/function/ffi/fn.TVMAPISetLastError.html    |    5 -
 .../rust/tvm/function/ffi/fn.TVMArrayAlloc.html    |   13 -
 .../tvm/function/ffi/fn.TVMArrayCopyFromBytes.html |    7 -
 .../tvm/function/ffi/fn.TVMArrayCopyFromTo.html    |    7 -
 .../tvm/function/ffi/fn.TVMArrayCopyToBytes.html   |    7 -
 .../api/rust/tvm/function/ffi/fn.TVMArrayFree.html |    5 -
 .../tvm/function/ffi/fn.TVMArrayFromDLPack.html    |    7 -
 .../rust/tvm/function/ffi/fn.TVMArrayToDLPack.html |    7 -
 .../function/ffi/fn.TVMBackendAllocWorkspace.html  |   12 -
 .../function/ffi/fn.TVMBackendFreeWorkspace.html   |    8 -
 .../function/ffi/fn.TVMBackendGetFuncFromEnv.html  |    9 -
 .../function/ffi/fn.TVMBackendParallelBarrier.html |    6 -
 .../function/ffi/fn.TVMBackendParallelLaunch.html  |    8 -
 .../ffi/fn.TVMBackendRegisterSystemLibSymbol.html  |    6 -
 .../tvm/function/ffi/fn.TVMBackendRunOnce.html     |   10 -
 .../tvm/function/ffi/fn.TVMCFuncSetReturn.html     |    9 -
 .../rust/tvm/function/ffi/fn.TVMCbArgToReturn.html |    8 -
 .../ffi/fn.TVMDLManagedTensorCallDeleter.html      |    4 -
 .../function/ffi/fn.TVMDeviceAllocDataSpace.html   |   10 -
 .../function/ffi/fn.TVMDeviceCopyDataFromTo.html   |   14 -
 .../function/ffi/fn.TVMDeviceFreeDataSpace.html    |    6 -
 docs/api/rust/tvm/function/ffi/fn.TVMFuncCall.html |   16 -
 .../function/ffi/fn.TVMFuncCreateFromCFunc.html    |    9 -
 docs/api/rust/tvm/function/ffi/fn.TVMFuncFree.html |    5 -
 .../rust/tvm/function/ffi/fn.TVMFuncGetGlobal.html |    7 -
 .../function/ffi/fn.TVMFuncListGlobalNames.html    |    6 -
 .../tvm/function/ffi/fn.TVMFuncRegisterGlobal.html |    7 -
 .../tvm/function/ffi/fn.TVMFuncRemoveGlobal.html   |    4 -
 .../rust/tvm/function/ffi/fn.TVMGetLastError.html  |    8 -
 docs/api/rust/tvm/function/ffi/fn.TVMModFree.html  |    9 -
 .../tvm/function/ffi/fn.TVMModGetFunction.html     |    8 -
 .../api/rust/tvm/function/ffi/fn.TVMModImport.html |    7 -
 .../tvm/function/ffi/fn.TVMModLoadFromFile.html    |    9 -
 .../tvm/function/ffi/fn.TVMObjectDerivedFrom.html  |    7 -
 .../rust/tvm/function/ffi/fn.TVMObjectFree.html    |    7 -
 .../tvm/function/ffi/fn.TVMObjectGetTypeIndex.html |    6 -
 .../rust/tvm/function/ffi/fn.TVMObjectRetain.html  |    6 -
 .../function/ffi/fn.TVMObjectTypeKey2Index.html    |    6 -
 .../api/rust/tvm/function/ffi/fn.TVMSetStream.html |   10 -
 .../rust/tvm/function/ffi/fn.TVMStreamCreate.html  |    7 -
 .../rust/tvm/function/ffi/fn.TVMStreamFree.html    |    7 -
 .../ffi/fn.TVMStreamStreamSynchronize.html         |    8 -
 .../rust/tvm/function/ffi/fn.TVMSynchronize.html   |    7 -
 docs/api/rust/tvm/function/ffi/index.html          |  182 -
 docs/api/rust/tvm/function/ffi/sidebar-items.js    |    1 -
 .../rust/tvm/function/ffi/struct.DLContext.html    |   47 -
 .../rust/tvm/function/ffi/struct.DLDataType.html   |   54 -
 .../tvm/function/ffi/struct.DLManagedTensor.html   |   40 -
 .../api/rust/tvm/function/ffi/struct.DLTensor.html |   63 -
 .../rust/tvm/function/ffi/struct.TVMByteArray.html |   33 -
 .../function/ffi/struct.TVMParallelGroupEnv.html   |   30 -
 .../tvm/function/ffi/type.BackendPackedCFunc.html  |    2 -
 .../rust/tvm/function/ffi/type.DLDataTypeCode.html |    3 -
 .../rust/tvm/function/ffi/type.DLDeviceType.html   |    3 -
 .../tvm/function/ffi/type.FTVMParallelLambda.html  |    6 -
 .../rust/tvm/function/ffi/type.TVMArgTypeCode.html |   14 -
 .../rust/tvm/function/ffi/type.TVMArrayHandle.html |    3 -
 .../function/ffi/type.TVMBackendPackedCFunc.html   |   10 -
 .../api/rust/tvm/function/ffi/type.TVMContext.html |    3 -
 .../tvm/function/ffi/type.TVMDeviceExtType.html    |    3 -
 .../ffi/type.TVMExtensionFuncDeclarer.html         |    7 -
 .../tvm/function/ffi/type.TVMFunctionHandle.html   |    3 -
 .../tvm/function/ffi/type.TVMModuleHandle.html     |    3 -
 .../tvm/function/ffi/type.TVMObjectHandle.html     |    3 -
 .../rust/tvm/function/ffi/type.TVMPackedCFunc.html |   10 -
 .../function/ffi/type.TVMPackedCFuncFinalizer.html |    4 -
 .../tvm/function/ffi/type.TVMRetValueHandle.html   |    3 -
 .../tvm/function/ffi/type.TVMStreamHandle.html     |    4 -
 .../rust/tvm/function/ffi/type.int_fast16_t.html   |    2 -
 .../rust/tvm/function/ffi/type.int_fast32_t.html   |    2 -
 .../rust/tvm/function/ffi/type.int_fast64_t.html   |    2 -
 .../rust/tvm/function/ffi/type.int_fast8_t.html    |    2 -
 .../rust/tvm/function/ffi/type.int_least16_t.html  |    2 -
 .../rust/tvm/function/ffi/type.int_least32_t.html  |    2 -
 .../rust/tvm/function/ffi/type.int_least64_t.html  |    2 -
 .../rust/tvm/function/ffi/type.int_least8_t.html   |    2 -
 docs/api/rust/tvm/function/ffi/type.intmax_t.html  |    2 -
 .../rust/tvm/function/ffi/type.tvm_index_t.html    |    3 -
 .../rust/tvm/function/ffi/type.uint_fast16_t.html  |    2 -
 .../rust/tvm/function/ffi/type.uint_fast32_t.html  |    2 -
 .../rust/tvm/function/ffi/type.uint_fast64_t.html  |    2 -
 .../rust/tvm/function/ffi/type.uint_fast8_t.html   |    2 -
 .../rust/tvm/function/ffi/type.uint_least16_t.html |    2 -
 .../rust/tvm/function/ffi/type.uint_least32_t.html |    2 -
 .../rust/tvm/function/ffi/type.uint_least64_t.html |    2 -
 .../rust/tvm/function/ffi/type.uint_least8_t.html  |    2 -
 docs/api/rust/tvm/function/ffi/type.uintmax_t.html |    2 -
 docs/api/rust/tvm/function/ffi/type.wchar_t.html   |    2 -
 docs/api/rust/tvm/function/ffi/union.TVMValue.html |   47 -
 docs/api/rust/tvm/function/fn.register.html        |   20 -
 .../rust/tvm/function/fn.register_override.html    |    4 -
 .../api/rust/tvm/function/fn.register_untyped.html |    2 -
 docs/api/rust/tvm/function/index.html              |   27 -
 docs/api/rust/tvm/function/sidebar-items.js        |    1 -
 docs/api/rust/tvm/function/struct.Function.html    |   38 -
 docs/api/rust/tvm/function/trait.ToFunction.html   |   22 -
 docs/api/rust/tvm/function/trait.Typed.html        |   14 -
 docs/api/rust/tvm/function/type.Result.html        |    2 -
 docs/api/rust/tvm/index.html                       |   33 -
 docs/api/rust/tvm/ir/arith/index.html              |    3 -
 docs/api/rust/tvm/ir/arith/sidebar-items.js        |    1 -
 .../rust/tvm/ir/arith/struct.ConstIntBound.html    |   35 -
 .../tvm/ir/arith/struct.ConstIntBoundNode.html     |   22 -
 docs/api/rust/tvm/ir/attrs/index.html              |    3 -
 docs/api/rust/tvm/ir/attrs/sidebar-items.js        |    1 -
 docs/api/rust/tvm/ir/attrs/struct.Attrs.html       |   35 -
 .../rust/tvm/ir/attrs/struct.BaseAttrsNode.html    |   20 -
 .../rust/tvm/ir/diagnostics/codespan/fn.init.html  |    4 -
 .../rust/tvm/ir/diagnostics/codespan/index.html    |    8 -
 .../tvm/ir/diagnostics/codespan/sidebar-items.js   |    1 -
 .../tvm/ir/diagnostics/enum.DiagnosticLevel.html   |   25 -
 docs/api/rust/tvm/ir/diagnostics/index.html        |   12 -
 docs/api/rust/tvm/ir/diagnostics/sidebar-items.js  |    1 -
 .../rust/tvm/ir/diagnostics/struct.Diagnostic.html |   35 -
 .../ir/diagnostics/struct.DiagnosticBuilder.html   |   22 -
 .../ir/diagnostics/struct.DiagnosticContext.html   |   40 -
 .../diagnostics/struct.DiagnosticContextNode.html  |   27 -
 .../tvm/ir/diagnostics/struct.DiagnosticNode.html  |   27 -
 .../ir/diagnostics/struct.DiagnosticRenderer.html  |   36 -
 .../diagnostics/struct.DiagnosticRendererNode.html |   29 -
 docs/api/rust/tvm/ir/expr/fn.as_text.html          |    2 -
 docs/api/rust/tvm/ir/expr/index.html               |    4 -
 docs/api/rust/tvm/ir/expr/sidebar-items.js         |    1 -
 docs/api/rust/tvm/ir/expr/struct.BaseExpr.html     |   35 -
 docs/api/rust/tvm/ir/expr/struct.BaseExprNode.html |   20 -
 docs/api/rust/tvm/ir/expr/struct.GlobalVar.html    |   35 -
 .../api/rust/tvm/ir/expr/struct.GlobalVarNode.html |   21 -
 docs/api/rust/tvm/ir/expr/struct.PrimExpr.html     |   36 -
 docs/api/rust/tvm/ir/expr/struct.PrimExprNode.html |   21 -
 docs/api/rust/tvm/ir/function/index.html           |    4 -
 docs/api/rust/tvm/ir/function/sidebar-items.js     |    1 -
 docs/api/rust/tvm/ir/function/struct.BaseFunc.html |   35 -
 .../rust/tvm/ir/function/struct.BaseFuncNode.html  |   21 -
 docs/api/rust/tvm/ir/function/type.DictAttrs.html  |    2 -
 docs/api/rust/tvm/ir/index.html                    |    4 -
 docs/api/rust/tvm/ir/module/enum.Error.html        |   28 -
 docs/api/rust/tvm/ir/module/index.html             |    4 -
 docs/api/rust/tvm/ir/module/sidebar-items.js       |    1 -
 docs/api/rust/tvm/ir/module/struct.IRModule.html   |   35 -
 .../rust/tvm/ir/module/struct.IRModuleNode.html    |   23 -
 docs/api/rust/tvm/ir/op/index.html                 |    3 -
 docs/api/rust/tvm/ir/op/sidebar-items.js           |    1 -
 docs/api/rust/tvm/ir/op/struct.Op.html             |   35 -
 docs/api/rust/tvm/ir/op/struct.OpNode.html         |   28 -
 docs/api/rust/tvm/ir/relay/attrs/index.html        |    3 -
 docs/api/rust/tvm/ir/relay/attrs/nn/index.html     |    3 -
 .../rust/tvm/ir/relay/attrs/nn/sidebar-items.js    |    1 -
 .../ir/relay/attrs/nn/struct.AvgPool2DAttrs.html   |   35 -
 .../relay/attrs/nn/struct.AvgPool2DAttrsNode.html  |   26 -
 .../ir/relay/attrs/nn/struct.BatchNormAttrs.html   |   35 -
 .../relay/attrs/nn/struct.BatchNormAttrsNode.html  |   24 -
 .../tvm/ir/relay/attrs/nn/struct.BiasAddAttrs.html |   35 -
 .../ir/relay/attrs/nn/struct.BiasAddAttrsNode.html |   21 -
 .../tvm/ir/relay/attrs/nn/struct.Conv2DAttrs.html  |   35 -
 .../ir/relay/attrs/nn/struct.Conv2DAttrsNode.html  |   30 -
 .../tvm/ir/relay/attrs/nn/struct.DenseAttrs.html   |   35 -
 .../ir/relay/attrs/nn/struct.DenseAttrsNode.html   |   22 -
 .../relay/attrs/nn/struct.GlobalPool2DAttrs.html   |   35 -
 .../attrs/nn/struct.GlobalPool2DAttrsNode.html     |   21 -
 .../ir/relay/attrs/nn/struct.LeakyReluAttrs.html   |   35 -
 .../relay/attrs/nn/struct.LeakyReluAttrsNode.html  |   21 -
 .../ir/relay/attrs/nn/struct.MaxPool2DAttrs.html   |   35 -
 .../relay/attrs/nn/struct.MaxPool2DAttrsNode.html  |   25 -
 .../tvm/ir/relay/attrs/nn/struct.SoftmaxAttrs.html |   35 -
 .../ir/relay/attrs/nn/struct.SoftmaxAttrsNode.html |   21 -
 .../ir/relay/attrs/nn/struct.UpSamplingAttrs.html  |   35 -
 .../relay/attrs/nn/struct.UpSamplingAttrsNode.html |   25 -
 docs/api/rust/tvm/ir/relay/attrs/sidebar-items.js  |    1 -
 .../rust/tvm/ir/relay/attrs/transform/index.html   |    3 -
 .../tvm/ir/relay/attrs/transform/sidebar-items.js  |    1 -
 .../attrs/transform/struct.ConcatenateAttrs.html   |   35 -
 .../transform/struct.ConcatenateAttrsNode.html     |   21 -
 .../attrs/transform/struct.ExpandDimsAttrs.html    |   35 -
 .../transform/struct.ExpandDimsAttrsNode.html      |   22 -
 .../relay/attrs/transform/struct.ReshapeAttrs.html |   35 -
 .../attrs/transform/struct.ReshapeAttrsNode.html   |   22 -
 .../relay/attrs/transform/struct.SplitAttrs.html   |   35 -
 .../attrs/transform/struct.SplitAttrsNode.html     |   22 -
 .../relay/attrs/transform/struct.SqueezeAttrs.html |   35 -
 .../attrs/transform/struct.SqueezeAttrsNode.html   |   21 -
 .../attrs/transform/struct.TransposeAttrs.html     |   35 -
 .../attrs/transform/struct.TransposeAttrsNode.html |   21 -
 docs/api/rust/tvm/ir/relay/index.html              |    5 -
 docs/api/rust/tvm/ir/relay/sidebar-items.js        |    1 -
 docs/api/rust/tvm/ir/relay/struct.Call.html        |   35 -
 docs/api/rust/tvm/ir/relay/struct.CallNode.html    |   24 -
 docs/api/rust/tvm/ir/relay/struct.Clause.html      |   35 -
 docs/api/rust/tvm/ir/relay/struct.ClauseNode.html  |   22 -
 docs/api/rust/tvm/ir/relay/struct.Constant.html    |   35 -
 .../api/rust/tvm/ir/relay/struct.ConstantNode.html |   21 -
 docs/api/rust/tvm/ir/relay/struct.Constructor.html |   35 -
 .../rust/tvm/ir/relay/struct.ConstructorNode.html  |   23 -
 docs/api/rust/tvm/ir/relay/struct.DataType.html    |   47 -
 docs/api/rust/tvm/ir/relay/struct.Expr.html        |   35 -
 docs/api/rust/tvm/ir/relay/struct.ExprNode.html    |   22 -
 docs/api/rust/tvm/ir/relay/struct.Function.html    |   35 -
 .../api/rust/tvm/ir/relay/struct.FunctionNode.html |   24 -
 docs/api/rust/tvm/ir/relay/struct.Id.html          |   35 -
 docs/api/rust/tvm/ir/relay/struct.IdNode.html      |   21 -
 docs/api/rust/tvm/ir/relay/struct.If.html          |   35 -
 docs/api/rust/tvm/ir/relay/struct.IfNode.html      |   23 -
 docs/api/rust/tvm/ir/relay/struct.Let.html         |   35 -
 docs/api/rust/tvm/ir/relay/struct.LetNode.html     |   23 -
 docs/api/rust/tvm/ir/relay/struct.Match.html       |   35 -
 docs/api/rust/tvm/ir/relay/struct.MatchNode.html   |   23 -
 docs/api/rust/tvm/ir/relay/struct.Pattern.html     |   35 -
 .../tvm/ir/relay/struct.PatternConstructor.html    |   35 -
 .../ir/relay/struct.PatternConstructorNode.html    |   22 -
 docs/api/rust/tvm/ir/relay/struct.PatternNode.html |   21 -
 .../api/rust/tvm/ir/relay/struct.PatternTuple.html |   35 -
 .../rust/tvm/ir/relay/struct.PatternTupleNode.html |   21 -
 docs/api/rust/tvm/ir/relay/struct.PatternVar.html  |   35 -
 .../rust/tvm/ir/relay/struct.PatternVarNode.html   |   21 -
 .../rust/tvm/ir/relay/struct.PatternWildcard.html  |   35 -
 .../tvm/ir/relay/struct.PatternWildcardNode.html   |   20 -
 docs/api/rust/tvm/ir/relay/struct.RefCreate.html   |   35 -
 .../rust/tvm/ir/relay/struct.RefCreateNode.html    |   21 -
 docs/api/rust/tvm/ir/relay/struct.RefRead.html     |   35 -
 docs/api/rust/tvm/ir/relay/struct.RefReadNode.html |   21 -
 docs/api/rust/tvm/ir/relay/struct.RefWrite.html    |   35 -
 .../api/rust/tvm/ir/relay/struct.RefWriteNode.html |   22 -
 docs/api/rust/tvm/ir/relay/struct.Tuple.html       |   35 -
 .../api/rust/tvm/ir/relay/struct.TupleGetItem.html |   35 -
 .../rust/tvm/ir/relay/struct.TupleGetItemNode.html |   22 -
 docs/api/rust/tvm/ir/relay/struct.TupleNode.html   |   21 -
 docs/api/rust/tvm/ir/relay/struct.Var.html         |   35 -
 docs/api/rust/tvm/ir/relay/struct.VarNode.html     |   22 -
 docs/api/rust/tvm/ir/sidebar-items.js              |    1 -
 docs/api/rust/tvm/ir/source_map/index.html         |    5 -
 docs/api/rust/tvm/ir/source_map/sidebar-items.js   |    1 -
 docs/api/rust/tvm/ir/source_map/struct.Source.html |   35 -
 .../rust/tvm/ir/source_map/struct.SourceMap.html   |   35 -
 .../tvm/ir/source_map/struct.SourceMapNode.html    |   24 -
 .../rust/tvm/ir/source_map/struct.SourceNode.html  |   26 -
 docs/api/rust/tvm/ir/span/index.html               |    5 -
 docs/api/rust/tvm/ir/span/sidebar-items.js         |    1 -
 docs/api/rust/tvm/ir/span/struct.SourceName.html   |   35 -
 .../rust/tvm/ir/span/struct.SourceNameNode.html    |   22 -
 docs/api/rust/tvm/ir/span/struct.Span.html         |   35 -
 docs/api/rust/tvm/ir/span/struct.SpanNode.html     |   31 -
 docs/api/rust/tvm/ir/tir/index.html                |    3 -
 docs/api/rust/tvm/ir/tir/sidebar-items.js          |    1 -
 docs/api/rust/tvm/ir/tir/struct.Add.html           |   35 -
 docs/api/rust/tvm/ir/tir/struct.AddNode.html       |   22 -
 docs/api/rust/tvm/ir/tir/struct.And.html           |   35 -
 docs/api/rust/tvm/ir/tir/struct.AndNode.html       |   22 -
 docs/api/rust/tvm/ir/tir/struct.Cast.html          |   35 -
 docs/api/rust/tvm/ir/tir/struct.CastNode.html      |   21 -
 docs/api/rust/tvm/ir/tir/struct.Div.html           |   35 -
 docs/api/rust/tvm/ir/tir/struct.DivNode.html       |   22 -
 docs/api/rust/tvm/ir/tir/struct.Eq.html            |   35 -
 docs/api/rust/tvm/ir/tir/struct.EqNode.html        |   22 -
 docs/api/rust/tvm/ir/tir/struct.FloorDiv.html      |   35 -
 docs/api/rust/tvm/ir/tir/struct.FloorDivNode.html  |   22 -
 docs/api/rust/tvm/ir/tir/struct.FloorMod.html      |   35 -
 docs/api/rust/tvm/ir/tir/struct.FloorModNode.html  |   22 -
 docs/api/rust/tvm/ir/tir/struct.Ge.html            |   35 -
 docs/api/rust/tvm/ir/tir/struct.GeNode.html        |   22 -
 docs/api/rust/tvm/ir/tir/struct.Gt.html            |   35 -
 docs/api/rust/tvm/ir/tir/struct.GtNode.html        |   22 -
 docs/api/rust/tvm/ir/tir/struct.IntImm.html        |   36 -
 docs/api/rust/tvm/ir/tir/struct.IntImmNode.html    |   21 -
 docs/api/rust/tvm/ir/tir/struct.Le.html            |   35 -
 docs/api/rust/tvm/ir/tir/struct.LeNode.html        |   22 -
 docs/api/rust/tvm/ir/tir/struct.Let.html           |   35 -
 docs/api/rust/tvm/ir/tir/struct.LetNode.html       |   23 -
 docs/api/rust/tvm/ir/tir/struct.Lt.html            |   35 -
 docs/api/rust/tvm/ir/tir/struct.LtNode.html        |   22 -
 docs/api/rust/tvm/ir/tir/struct.Max.html           |   35 -
 docs/api/rust/tvm/ir/tir/struct.MaxNode.html       |   22 -
 docs/api/rust/tvm/ir/tir/struct.Min.html           |   35 -
 docs/api/rust/tvm/ir/tir/struct.MinNode.html       |   22 -
 docs/api/rust/tvm/ir/tir/struct.Mod.html           |   35 -
 docs/api/rust/tvm/ir/tir/struct.ModNode.html       |   22 -
 docs/api/rust/tvm/ir/tir/struct.Mul.html           |   35 -
 docs/api/rust/tvm/ir/tir/struct.MulNode.html       |   22 -
 docs/api/rust/tvm/ir/tir/struct.Ne.html            |   35 -
 docs/api/rust/tvm/ir/tir/struct.NeNode.html        |   22 -
 docs/api/rust/tvm/ir/tir/struct.Not.html           |   35 -
 docs/api/rust/tvm/ir/tir/struct.NotNode.html       |   21 -
 docs/api/rust/tvm/ir/tir/struct.Or.html            |   35 -
 docs/api/rust/tvm/ir/tir/struct.OrNode.html        |   22 -
 docs/api/rust/tvm/ir/tir/struct.Ramp.html          |   35 -
 docs/api/rust/tvm/ir/tir/struct.RampNode.html      |   23 -
 docs/api/rust/tvm/ir/tir/struct.Select.html        |   35 -
 docs/api/rust/tvm/ir/tir/struct.SelectNode.html    |   23 -
 docs/api/rust/tvm/ir/tir/struct.Sub.html           |   35 -
 docs/api/rust/tvm/ir/tir/struct.SubNode.html       |   22 -
 docs/api/rust/tvm/ir/tir/struct.Var.html           |   35 -
 docs/api/rust/tvm/ir/tir/struct.VarNode.html       |   21 -
 docs/api/rust/tvm/ir/ty/enum.TypeKind.html         |   26 -
 docs/api/rust/tvm/ir/ty/index.html                 |    8 -
 docs/api/rust/tvm/ir/ty/sidebar-items.js           |    1 -
 docs/api/rust/tvm/ir/ty/struct.BaseTensorType.html |   35 -
 .../rust/tvm/ir/ty/struct.BaseTensorTypeNode.html  |   20 -
 docs/api/rust/tvm/ir/ty/struct.FuncType.html       |   35 -
 docs/api/rust/tvm/ir/ty/struct.FuncTypeNode.html   |   30 -
 docs/api/rust/tvm/ir/ty/struct.GlobalTypeVar.html  |   35 -
 .../rust/tvm/ir/ty/struct.GlobalTypeVarNode.html   |   23 -
 docs/api/rust/tvm/ir/ty/struct.IncompleteType.html |   35 -
 .../rust/tvm/ir/ty/struct.IncompleteTypeNode.html  |   21 -
 docs/api/rust/tvm/ir/ty/struct.PointerType.html    |   35 -
 .../api/rust/tvm/ir/ty/struct.PointerTypeNode.html |   22 -
 docs/api/rust/tvm/ir/ty/struct.PrimType.html       |   35 -
 docs/api/rust/tvm/ir/ty/struct.PrimTypeNode.html   |   22 -
 docs/api/rust/tvm/ir/ty/struct.RefType.html        |   35 -
 .../rust/tvm/ir/ty/struct.RelayRefTypeNode.html    |   21 -
 docs/api/rust/tvm/ir/ty/struct.TensorType.html     |   35 -
 docs/api/rust/tvm/ir/ty/struct.TensorTypeNode.html |   22 -
 docs/api/rust/tvm/ir/ty/struct.TupleType.html      |   35 -
 docs/api/rust/tvm/ir/ty/struct.TupleTypeNode.html  |   21 -
 docs/api/rust/tvm/ir/ty/struct.Type.html           |   35 -
 docs/api/rust/tvm/ir/ty/struct.TypeConstraint.html |   35 -
 .../rust/tvm/ir/ty/struct.TypeConstraintNode.html  |   20 -
 docs/api/rust/tvm/ir/ty/struct.TypeData.html       |   35 -
 docs/api/rust/tvm/ir/ty/struct.TypeDataNode.html   |   29 -
 docs/api/rust/tvm/ir/ty/struct.TypeNode.html       |   21 -
 docs/api/rust/tvm/ir/ty/struct.TypeVar.html        |   35 -
 docs/api/rust/tvm/ir/ty/struct.TypeVarNode.html    |   28 -
 docs/api/rust/tvm/macro.export!.html               |   10 -
 docs/api/rust/tvm/macro.export.html                |    6 -
 docs/api/rust/tvm/macro.export_mod!.html           |   10 -
 docs/api/rust/tvm/macro.export_mod.html            |    6 -
 docs/api/rust/tvm/macro.export_pass!.html          |   10 -
 docs/api/rust/tvm/macro.export_pass.html           |    6 -
 docs/api/rust/tvm/macro.initialize!.html           |   10 -
 docs/api/rust/tvm/macro.initialize.html            |   11 -
 docs/api/rust/tvm/module/index.html                |    6 -
 docs/api/rust/tvm/module/sidebar-items.js          |    1 -
 docs/api/rust/tvm/module/struct.Module.html        |   37 -
 docs/api/rust/tvm/ndarray/index.html               |   23 -
 docs/api/rust/tvm/ndarray/sidebar-items.js         |    1 -
 docs/api/rust/tvm/ndarray/struct.NDArray.html      |   76 -
 .../rust/tvm/ndarray/struct.NDArrayContainer.html  |   18 -
 docs/api/rust/tvm/ndarray/trait.Num32.html         |    8 -
 docs/api/rust/tvm/python/fn.load.html              |    7 -
 docs/api/rust/tvm/python/index.html                |    4 -
 docs/api/rust/tvm/python/sidebar-items.js          |    1 -
 docs/api/rust/tvm/runtime/array/index.html         |    3 -
 docs/api/rust/tvm/runtime/array/sidebar-items.js   |    1 -
 docs/api/rust/tvm/runtime/array/struct.Array.html  |   34 -
 .../rust/tvm/runtime/array/struct.IntoIter.html    |  138 -
 .../rust/tvm/runtime/context/enum.DeviceType.html  |   49 -
 docs/api/rust/tvm/runtime/context/index.html       |    6 -
 docs/api/rust/tvm/runtime/context/sidebar-items.js |    1 -
 .../rust/tvm/runtime/context/struct.Context.html   |   43 -
 .../context/struct.UnsupportedDeviceError.html     |   21 -
 docs/api/rust/tvm/runtime/enum.ArgValue.html       |  486 --
 docs/api/rust/tvm/runtime/enum.DeviceType.html     |   49 -
 docs/api/rust/tvm/runtime/enum.Error.html          |   42 -
 docs/api/rust/tvm/runtime/enum.NDArrayError.html   |   34 -
 docs/api/rust/tvm/runtime/enum.RetValue.html       |  383 --
 docs/api/rust/tvm/runtime/errors/enum.Error.html   |   42 -
 .../rust/tvm/runtime/errors/enum.NDArrayError.html |   34 -
 docs/api/rust/tvm/runtime/errors/index.html        |    4 -
 docs/api/rust/tvm/runtime/errors/sidebar-items.js  |    1 -
 .../errors/struct.FunctionNotFoundError.html       |   21 -
 .../runtime/errors/struct.TypeMismatchError.html   |   25 -
 docs/api/rust/tvm/runtime/fn.debug_print.html      |    2 -
 docs/api/rust/tvm/runtime/fn.get_last_error.html   |    3 -
 docs/api/rust/tvm/runtime/fn.version.html          |    3 -
 .../rust/tvm/runtime/function/enum.ArgValue.html   |  486 --
 .../rust/tvm/runtime/function/enum.RetValue.html   |  383 --
 .../ffi/constant.DLDataTypeCode_kDLBfloat.html     |    2 -
 .../ffi/constant.DLDataTypeCode_kDLFloat.html      |    2 -
 .../ffi/constant.DLDataTypeCode_kDLInt.html        |    2 -
 .../ffi/constant.DLDataTypeCode_kDLUInt.html       |    2 -
 .../function/ffi/constant.DLDeviceType_kDLCPU.html |    3 -
 .../ffi/constant.DLDeviceType_kDLCPUPinned.html    |    4 -
 .../ffi/constant.DLDeviceType_kDLExtDev.html       |    5 -
 .../function/ffi/constant.DLDeviceType_kDLGPU.html |    3 -
 .../ffi/constant.DLDeviceType_kDLMetal.html        |    3 -
 .../ffi/constant.DLDeviceType_kDLOpenCL.html       |    3 -
 .../ffi/constant.DLDeviceType_kDLROCM.html         |    3 -
 .../function/ffi/constant.DLDeviceType_kDLVPI.html |    3 -
 .../ffi/constant.DLDeviceType_kDLVulkan.html       |    3 -
 .../function/ffi/constant.DLPACK_VERSION.html      |    2 -
 .../runtime/function/ffi/constant.INT16_MAX.html   |    2 -
 .../runtime/function/ffi/constant.INT16_MIN.html   |    2 -
 .../runtime/function/ffi/constant.INT32_MAX.html   |    2 -
 .../runtime/function/ffi/constant.INT32_MIN.html   |    2 -
 .../runtime/function/ffi/constant.INT8_MAX.html    |    2 -
 .../runtime/function/ffi/constant.INT8_MIN.html    |    2 -
 .../runtime/function/ffi/constant.INTPTR_MAX.html  |    2 -
 .../runtime/function/ffi/constant.INTPTR_MIN.html  |    2 -
 .../function/ffi/constant.INT_FAST16_MAX.html      |    2 -
 .../function/ffi/constant.INT_FAST16_MIN.html      |    2 -
 .../function/ffi/constant.INT_FAST32_MAX.html      |    2 -
 .../function/ffi/constant.INT_FAST32_MIN.html      |    2 -
 .../function/ffi/constant.INT_FAST8_MAX.html       |    2 -
 .../function/ffi/constant.INT_FAST8_MIN.html       |    2 -
 .../function/ffi/constant.INT_LEAST16_MAX.html     |    2 -
 .../function/ffi/constant.INT_LEAST16_MIN.html     |    2 -
 .../function/ffi/constant.INT_LEAST32_MAX.html     |    2 -
 .../function/ffi/constant.INT_LEAST32_MIN.html     |    2 -
 .../function/ffi/constant.INT_LEAST8_MAX.html      |    2 -
 .../function/ffi/constant.INT_LEAST8_MIN.html      |    2 -
 .../runtime/function/ffi/constant.PTRDIFF_MAX.html |    2 -
 .../runtime/function/ffi/constant.PTRDIFF_MIN.html |    2 -
 .../function/ffi/constant.SIG_ATOMIC_MAX.html      |    2 -
 .../function/ffi/constant.SIG_ATOMIC_MIN.html      |    2 -
 .../runtime/function/ffi/constant.SIZE_MAX.html    |    2 -
 .../ffi/constant.TVMArgTypeCode_kTVMArgFloat.html  |    2 -
 .../ffi/constant.TVMArgTypeCode_kTVMArgInt.html    |    2 -
 .../ffi/constant.TVMArgTypeCode_kTVMBytes.html     |    2 -
 .../ffi/constant.TVMArgTypeCode_kTVMContext.html   |    2 -
 ...constant.TVMArgTypeCode_kTVMDLTensorHandle.html |    2 -
 .../ffi/constant.TVMArgTypeCode_kTVMDataType.html  |    2 -
 .../ffi/constant.TVMArgTypeCode_kTVMExtBegin.html  |    2 -
 .../ffi/constant.TVMArgTypeCode_kTVMExtEnd.html    |    2 -
 .../constant.TVMArgTypeCode_kTVMExtReserveEnd.html |    2 -
 .../constant.TVMArgTypeCode_kTVMModuleHandle.html  |    2 -
 .../constant.TVMArgTypeCode_kTVMNDArrayHandle.html |    2 -
 .../ffi/constant.TVMArgTypeCode_kTVMNNVMFirst.html |    2 -
 .../ffi/constant.TVMArgTypeCode_kTVMNNVMLast.html  |    2 -
 .../ffi/constant.TVMArgTypeCode_kTVMNullptr.html   |    2 -
 .../constant.TVMArgTypeCode_kTVMObjectHandle.html  |    2 -
 ...tant.TVMArgTypeCode_kTVMObjectRValueRefArg.html |    2 -
 .../constant.TVMArgTypeCode_kTVMOpaqueHandle.html  |    2 -
 ...nstant.TVMArgTypeCode_kTVMPackedFuncHandle.html |    2 -
 .../ffi/constant.TVMArgTypeCode_kTVMStr.html       |    2 -
 .../ffi/constant.TVMDeviceExtType_kDLAOCL.html     |    2 -
 .../ffi/constant.TVMDeviceExtType_kDLHexagon.html  |    2 -
 .../ffi/constant.TVMDeviceExtType_kDLMicroDev.html |    2 -
 .../ffi/constant.TVMDeviceExtType_kDLSDAccel.html  |    2 -
 .../ffi/constant.TVMDeviceExtType_kDLWebGPU.html   |    2 -
 .../ffi/constant.TVMDeviceExtType_kOpenGL.html     |    2 -
 .../runtime/function/ffi/constant.TVM_VERSION.html |    2 -
 .../runtime/function/ffi/constant.UINT16_MAX.html  |    2 -
 .../runtime/function/ffi/constant.UINT32_MAX.html  |    2 -
 .../runtime/function/ffi/constant.UINT8_MAX.html   |    2 -
 .../runtime/function/ffi/constant.UINTPTR_MAX.html |    2 -
 .../function/ffi/constant.UINT_FAST16_MAX.html     |    2 -
 .../function/ffi/constant.UINT_FAST32_MAX.html     |    2 -
 .../function/ffi/constant.UINT_FAST8_MAX.html      |    2 -
 .../function/ffi/constant.UINT_LEAST16_MAX.html    |    2 -
 .../function/ffi/constant.UINT_LEAST32_MAX.html    |    2 -
 .../function/ffi/constant.UINT_LEAST8_MAX.html     |    2 -
 .../runtime/function/ffi/constant.WINT_MAX.html    |    2 -
 .../runtime/function/ffi/constant.WINT_MIN.html    |    2 -
 .../function/ffi/constant._ATFILE_SOURCE.html      |    2 -
 .../function/ffi/constant._BITS_WCHAR_H.html       |    2 -
 .../function/ffi/constant._DEFAULT_SOURCE.html     |    2 -
 .../runtime/function/ffi/constant._FEATURES_H.html |    2 -
 .../function/ffi/constant._POSIX_C_SOURCE.html     |    2 -
 .../function/ffi/constant._POSIX_SOURCE.html       |    2 -
 .../function/ffi/constant._STDC_PREDEF_H.html      |    2 -
 .../runtime/function/ffi/constant._STDINT_H.html   |    2 -
 .../function/ffi/constant._SYS_CDEFS_H.html        |    2 -
 .../function/ffi/constant.__GLIBC_MINOR__.html     |    2 -
 .../runtime/function/ffi/constant.__GLIBC__.html   |    2 -
 .../function/ffi/constant.__GNU_LIBRARY__.html     |    2 -
 .../ffi/constant.__STDC_IEC_559_COMPLEX__.html     |    2 -
 .../function/ffi/constant.__STDC_IEC_559__.html    |    2 -
 .../function/ffi/constant.__STDC_ISO_10646__.html  |    2 -
 .../function/ffi/constant.__STDC_NO_THREADS__.html |    2 -
 .../function/ffi/constant.__SYSCALL_WORDSIZE.html  |    2 -
 .../function/ffi/constant.__USE_ATFILE.html        |    2 -
 .../function/ffi/constant.__USE_FORTIFY_LEVEL.html |    2 -
 .../function/ffi/constant.__USE_ISOC11.html        |    2 -
 .../function/ffi/constant.__USE_ISOC95.html        |    2 -
 .../function/ffi/constant.__USE_ISOC99.html        |    2 -
 .../runtime/function/ffi/constant.__USE_MISC.html  |    2 -
 .../runtime/function/ffi/constant.__USE_POSIX.html |    2 -
 .../function/ffi/constant.__USE_POSIX199309.html   |    2 -
 .../function/ffi/constant.__USE_POSIX199506.html   |    2 -
 .../function/ffi/constant.__USE_POSIX2.html        |    2 -
 .../ffi/constant.__USE_POSIX_IMPLICITLY.html       |    2 -
 .../function/ffi/constant.__USE_XOPEN2K.html       |    2 -
 .../function/ffi/constant.__USE_XOPEN2K8.html      |    2 -
 .../runtime/function/ffi/constant.__WORDSIZE.html  |    2 -
 .../ffi/constant.__WORDSIZE_TIME64_COMPAT32.html   |    2 -
 .../function/ffi/fn.TVMAPISetLastError.html        |    5 -
 .../tvm/runtime/function/ffi/fn.TVMArrayAlloc.html |   13 -
 .../function/ffi/fn.TVMArrayCopyFromBytes.html     |    7 -
 .../function/ffi/fn.TVMArrayCopyFromTo.html        |    7 -
 .../function/ffi/fn.TVMArrayCopyToBytes.html       |    7 -
 .../tvm/runtime/function/ffi/fn.TVMArrayFree.html  |    5 -
 .../function/ffi/fn.TVMArrayFromDLPack.html        |    7 -
 .../runtime/function/ffi/fn.TVMArrayToDLPack.html  |    7 -
 .../function/ffi/fn.TVMBackendAllocWorkspace.html  |   12 -
 .../function/ffi/fn.TVMBackendFreeWorkspace.html   |    8 -
 .../function/ffi/fn.TVMBackendGetFuncFromEnv.html  |    9 -
 .../function/ffi/fn.TVMBackendParallelBarrier.html |    6 -
 .../function/ffi/fn.TVMBackendParallelLaunch.html  |    8 -
 .../ffi/fn.TVMBackendRegisterSystemLibSymbol.html  |    6 -
 .../runtime/function/ffi/fn.TVMBackendRunOnce.html |   10 -
 .../runtime/function/ffi/fn.TVMCFuncSetReturn.html |    9 -
 .../runtime/function/ffi/fn.TVMCbArgToReturn.html  |    8 -
 .../ffi/fn.TVMDLManagedTensorCallDeleter.html      |    4 -
 .../function/ffi/fn.TVMDeviceAllocDataSpace.html   |   10 -
 .../function/ffi/fn.TVMDeviceCopyDataFromTo.html   |   14 -
 .../function/ffi/fn.TVMDeviceFreeDataSpace.html    |    6 -
 .../tvm/runtime/function/ffi/fn.TVMFuncCall.html   |   16 -
 .../function/ffi/fn.TVMFuncCreateFromCFunc.html    |    9 -
 .../tvm/runtime/function/ffi/fn.TVMFuncFree.html   |    5 -
 .../runtime/function/ffi/fn.TVMFuncGetGlobal.html  |    7 -
 .../function/ffi/fn.TVMFuncListGlobalNames.html    |    6 -
 .../function/ffi/fn.TVMFuncRegisterGlobal.html     |    7 -
 .../function/ffi/fn.TVMFuncRemoveGlobal.html       |    4 -
 .../runtime/function/ffi/fn.TVMGetLastError.html   |    8 -
 .../tvm/runtime/function/ffi/fn.TVMModFree.html    |    9 -
 .../runtime/function/ffi/fn.TVMModGetFunction.html |    8 -
 .../tvm/runtime/function/ffi/fn.TVMModImport.html  |    7 -
 .../function/ffi/fn.TVMModLoadFromFile.html        |    9 -
 .../function/ffi/fn.TVMObjectDerivedFrom.html      |    7 -
 .../tvm/runtime/function/ffi/fn.TVMObjectFree.html |    7 -
 .../function/ffi/fn.TVMObjectGetTypeIndex.html     |    6 -
 .../runtime/function/ffi/fn.TVMObjectRetain.html   |    6 -
 .../function/ffi/fn.TVMObjectTypeKey2Index.html    |    6 -
 .../tvm/runtime/function/ffi/fn.TVMSetStream.html  |   10 -
 .../runtime/function/ffi/fn.TVMStreamCreate.html   |    7 -
 .../tvm/runtime/function/ffi/fn.TVMStreamFree.html |    7 -
 .../ffi/fn.TVMStreamStreamSynchronize.html         |    8 -
 .../runtime/function/ffi/fn.TVMSynchronize.html    |    7 -
 docs/api/rust/tvm/runtime/function/ffi/index.html  |  182 -
 .../rust/tvm/runtime/function/ffi/sidebar-items.js |    1 -
 .../tvm/runtime/function/ffi/struct.DLContext.html |   47 -
 .../runtime/function/ffi/struct.DLDataType.html    |   54 -
 .../function/ffi/struct.DLManagedTensor.html       |   40 -
 .../tvm/runtime/function/ffi/struct.DLTensor.html  |   63 -
 .../runtime/function/ffi/struct.TVMByteArray.html  |   33 -
 .../function/ffi/struct.TVMParallelGroupEnv.html   |   30 -
 .../function/ffi/type.BackendPackedCFunc.html      |    2 -
 .../runtime/function/ffi/type.DLDataTypeCode.html  |    3 -
 .../runtime/function/ffi/type.DLDeviceType.html    |    3 -
 .../function/ffi/type.FTVMParallelLambda.html      |    6 -
 .../runtime/function/ffi/type.TVMArgTypeCode.html  |   14 -
 .../runtime/function/ffi/type.TVMArrayHandle.html  |    3 -
 .../function/ffi/type.TVMBackendPackedCFunc.html   |   10 -
 .../tvm/runtime/function/ffi/type.TVMContext.html  |    3 -
 .../function/ffi/type.TVMDeviceExtType.html        |    3 -
 .../ffi/type.TVMExtensionFuncDeclarer.html         |    7 -
 .../function/ffi/type.TVMFunctionHandle.html       |    3 -
 .../runtime/function/ffi/type.TVMModuleHandle.html |    3 -
 .../runtime/function/ffi/type.TVMObjectHandle.html |    3 -
 .../runtime/function/ffi/type.TVMPackedCFunc.html  |   10 -
 .../function/ffi/type.TVMPackedCFuncFinalizer.html |    4 -
 .../function/ffi/type.TVMRetValueHandle.html       |    3 -
 .../runtime/function/ffi/type.TVMStreamHandle.html |    4 -
 .../runtime/function/ffi/type.int_fast16_t.html    |    2 -
 .../runtime/function/ffi/type.int_fast32_t.html    |    2 -
 .../runtime/function/ffi/type.int_fast64_t.html    |    2 -
 .../tvm/runtime/function/ffi/type.int_fast8_t.html |    2 -
 .../runtime/function/ffi/type.int_least16_t.html   |    2 -
 .../runtime/function/ffi/type.int_least32_t.html   |    2 -
 .../runtime/function/ffi/type.int_least64_t.html   |    2 -
 .../runtime/function/ffi/type.int_least8_t.html    |    2 -
 .../tvm/runtime/function/ffi/type.intmax_t.html    |    2 -
 .../tvm/runtime/function/ffi/type.tvm_index_t.html |    3 -
 .../runtime/function/ffi/type.uint_fast16_t.html   |    2 -
 .../runtime/function/ffi/type.uint_fast32_t.html   |    2 -
 .../runtime/function/ffi/type.uint_fast64_t.html   |    2 -
 .../runtime/function/ffi/type.uint_fast8_t.html    |    2 -
 .../runtime/function/ffi/type.uint_least16_t.html  |    2 -
 .../runtime/function/ffi/type.uint_least32_t.html  |    2 -
 .../runtime/function/ffi/type.uint_least64_t.html  |    2 -
 .../runtime/function/ffi/type.uint_least8_t.html   |    2 -
 .../tvm/runtime/function/ffi/type.uintmax_t.html   |    2 -
 .../tvm/runtime/function/ffi/type.wchar_t.html     |    2 -
 .../tvm/runtime/function/ffi/union.TVMValue.html   |   47 -
 .../api/rust/tvm/runtime/function/fn.register.html |   20 -
 .../tvm/runtime/function/fn.register_override.html |    4 -
 .../tvm/runtime/function/fn.register_untyped.html  |    2 -
 docs/api/rust/tvm/runtime/function/index.html      |   27 -
 .../api/rust/tvm/runtime/function/sidebar-items.js |    1 -
 .../rust/tvm/runtime/function/struct.Function.html |   38 -
 .../tvm/runtime/function/trait.ToFunction.html     |   22 -
 .../api/rust/tvm/runtime/function/trait.Typed.html |   14 -
 .../api/rust/tvm/runtime/function/type.Result.html |    2 -
 docs/api/rust/tvm/runtime/graph_rt/index.html      |    4 -
 .../api/rust/tvm/runtime/graph_rt/sidebar-items.js |    1 -
 .../rust/tvm/runtime/graph_rt/struct.GraphRt.html  |   21 -
 docs/api/rust/tvm/runtime/index.html               |   39 -
 docs/api/rust/tvm/runtime/macro.check_call!.html   |   10 -
 docs/api/rust/tvm/runtime/macro.check_call.html    |    6 -
 docs/api/rust/tvm/runtime/macro.external!.html     |   10 -
 docs/api/rust/tvm/runtime/macro.external.html      |    2 -
 docs/api/rust/tvm/runtime/macro.tvm_call!.html     |   10 -
 docs/api/rust/tvm/runtime/macro.tvm_call.html      |    6 -
 docs/api/rust/tvm/runtime/map/index.html           |    3 -
 docs/api/rust/tvm/runtime/map/sidebar-items.js     |    1 -
 docs/api/rust/tvm/runtime/map/struct.IntoIter.html |  138 -
 docs/api/rust/tvm/runtime/map/struct.Map.html      |   34 -
 docs/api/rust/tvm/runtime/module/index.html        |    6 -
 docs/api/rust/tvm/runtime/module/sidebar-items.js  |    1 -
 .../api/rust/tvm/runtime/module/struct.Module.html |   37 -
 docs/api/rust/tvm/runtime/ndarray/index.html       |   23 -
 docs/api/rust/tvm/runtime/ndarray/sidebar-items.js |    1 -
 .../rust/tvm/runtime/ndarray/struct.NDArray.html   |   76 -
 .../runtime/ndarray/struct.NDArrayContainer.html   |   18 -
 docs/api/rust/tvm/runtime/ndarray/trait.Num32.html |    8 -
 .../rust/tvm/runtime/object/fn.debug_print.html    |    2 -
 docs/api/rust/tvm/runtime/object/index.html        |   14 -
 docs/api/rust/tvm/runtime/object/sidebar-items.js  |    1 -
 .../api/rust/tvm/runtime/object/struct.Object.html |   25 -
 .../rust/tvm/runtime/object/struct.ObjectPtr.html  |  132 -
 .../rust/tvm/runtime/object/struct.ObjectRef.html  |   35 -
 .../rust/tvm/runtime/object/trait.IsObject.html    |   14 -
 .../rust/tvm/runtime/object/trait.IsObjectRef.html |   21 -
 docs/api/rust/tvm/runtime/sidebar-items.js         |    1 -
 docs/api/rust/tvm/runtime/string/index.html        |    3 -
 docs/api/rust/tvm/runtime/string/sidebar-items.js  |    1 -
 .../api/rust/tvm/runtime/string/struct.String.html |   49 -
 .../rust/tvm/runtime/string/struct.StringObj.html  |   17 -
 docs/api/rust/tvm/runtime/struct.ByteArray.html    |   30 -
 docs/api/rust/tvm/runtime/struct.Context.html      |   43 -
 docs/api/rust/tvm/runtime/struct.DataType.html     |   47 -
 docs/api/rust/tvm/runtime/struct.Function.html     |   38 -
 .../tvm/runtime/struct.FunctionNotFoundError.html  |   21 -
 docs/api/rust/tvm/runtime/struct.Module.html       |   37 -
 docs/api/rust/tvm/runtime/struct.NDArray.html      |   76 -
 docs/api/rust/tvm/runtime/struct.Object.html       |   25 -
 docs/api/rust/tvm/runtime/struct.ObjectPtr.html    |  132 -
 docs/api/rust/tvm/runtime/struct.ObjectRef.html    |   35 -
 docs/api/rust/tvm/runtime/struct.String.html       |   49 -
 docs/api/rust/tvm/runtime/struct.StringObj.html    |   17 -
 .../rust/tvm/runtime/struct.TypeMismatchError.html |   25 -
 docs/api/rust/tvm/runtime/trait.IsObject.html      |   14 -
 docs/api/rust/tvm/runtime/trait.IsObjectRef.html   |   21 -
 docs/api/rust/tvm/runtime/value/index.html         |    5 -
 docs/api/rust/tvm/runtime/value/sidebar-items.js   |    1 -
 docs/api/rust/tvm/sidebar-items.js                 |    1 -
 docs/api/rust/tvm/struct.Context.html              |   43 -
 docs/api/rust/tvm/struct.DataType.html             |   47 -
 docs/api/rust/tvm/struct.Function.html             |   38 -
 .../api/rust/tvm/struct.FunctionNotFoundError.html |   21 -
 docs/api/rust/tvm/struct.Module.html               |   37 -
 docs/api/rust/tvm/struct.NDArray.html              |   76 -
 docs/api/rust/tvm/struct.TypeMismatchError.html    |   25 -
 docs/api/rust/tvm/transform/fn.function_pass.html  |    2 -
 docs/api/rust/tvm/transform/index.html             |    5 -
 docs/api/rust/tvm/transform/sidebar-items.js       |    1 -
 docs/api/rust/tvm/transform/struct.PassInfo.html   |   35 -
 .../rust/tvm/transform/struct.PassInfoNode.html    |   23 -
 docs/api/rust/tvm/transform/type.IRModule.html     |    2 -
 docs/api/rust/tvm/transform/type.Pass.html         |    2 -
 docs/api/rust/tvm/transform/type.PassContext.html  |    2 -
 docs/api/rust/tvm/value/index.html                 |    5 -
 docs/api/rust/tvm/value/sidebar-items.js           |    1 -
 docs/api/rust/tvm_graph_rt/all.html                |    4 -
 .../tvm_graph_rt/array/constant.DTYPE_FLOAT32.html |   10 -
 .../tvm_graph_rt/array/constant.DTYPE_FLOAT64.html |   10 -
 .../tvm_graph_rt/array/constant.DTYPE_INT32.html   |   10 -
 .../tvm_graph_rt/array/constant.DTYPE_UINT32.html  |   10 -
 docs/api/rust/tvm_graph_rt/array/enum.Storage.html |   10 -
 .../api/rust/tvm_graph_rt/array/struct.Tensor.html |   10 -
 .../rust/tvm_graph_rt/constant.DTYPE_FLOAT32.html  |    2 -
 .../rust/tvm_graph_rt/constant.DTYPE_FLOAT64.html  |    2 -
 .../rust/tvm_graph_rt/constant.DTYPE_INT32.html    |    2 -
 .../rust/tvm_graph_rt/constant.DTYPE_UINT32.html   |    2 -
 docs/api/rust/tvm_graph_rt/enum.ArgValue.html      |   90 -
 docs/api/rust/tvm_graph_rt/enum.RetValue.html      |   79 -
 docs/api/rust/tvm_graph_rt/enum.Storage.html       |   26 -
 .../rust/tvm_graph_rt/errors/enum.ArrayError.html  |   24 -
 .../tvm_graph_rt/errors/enum.GraphFormatError.html |   29 -
 docs/api/rust/tvm_graph_rt/errors/index.html       |    4 -
 docs/api/rust/tvm_graph_rt/errors/sidebar-items.js |    1 -
 .../errors/struct.FunctionNotFound.html            |   19 -
 .../tvm_graph_rt/errors/struct.InvalidPointer.html |   19 -
 .../ffi/constant.DLDataTypeCode_kDLBfloat.html     |    2 -
 .../ffi/constant.DLDataTypeCode_kDLFloat.html      |    2 -
 .../ffi/constant.DLDataTypeCode_kDLInt.html        |    2 -
 .../ffi/constant.DLDataTypeCode_kDLUInt.html       |    2 -
 .../ffi/constant.DLDeviceType_kDLCPU.html          |    3 -
 .../ffi/constant.DLDeviceType_kDLCPUPinned.html    |    4 -
 .../ffi/constant.DLDeviceType_kDLExtDev.html       |    5 -
 .../ffi/constant.DLDeviceType_kDLGPU.html          |    3 -
 .../ffi/constant.DLDeviceType_kDLMetal.html        |    3 -
 .../ffi/constant.DLDeviceType_kDLOpenCL.html       |    3 -
 .../ffi/constant.DLDeviceType_kDLROCM.html         |    3 -
 .../ffi/constant.DLDeviceType_kDLVPI.html          |    3 -
 .../ffi/constant.DLDeviceType_kDLVulkan.html       |    3 -
 .../tvm_graph_rt/ffi/constant.DLPACK_VERSION.html  |    2 -
 .../rust/tvm_graph_rt/ffi/constant.INT16_MAX.html  |    2 -
 .../rust/tvm_graph_rt/ffi/constant.INT16_MIN.html  |    2 -
 .../rust/tvm_graph_rt/ffi/constant.INT32_MAX.html  |    2 -
 .../rust/tvm_graph_rt/ffi/constant.INT32_MIN.html  |    2 -
 .../rust/tvm_graph_rt/ffi/constant.INT8_MAX.html   |    2 -
 .../rust/tvm_graph_rt/ffi/constant.INT8_MIN.html   |    2 -
 .../rust/tvm_graph_rt/ffi/constant.INTPTR_MAX.html |    2 -
 .../rust/tvm_graph_rt/ffi/constant.INTPTR_MIN.html |    2 -
 .../tvm_graph_rt/ffi/constant.INT_FAST16_MAX.html  |    2 -
 .../tvm_graph_rt/ffi/constant.INT_FAST16_MIN.html  |    2 -
 .../tvm_graph_rt/ffi/constant.INT_FAST32_MAX.html  |    2 -
 .../tvm_graph_rt/ffi/constant.INT_FAST32_MIN.html  |    2 -
 .../tvm_graph_rt/ffi/constant.INT_FAST8_MAX.html   |    2 -
 .../tvm_graph_rt/ffi/constant.INT_FAST8_MIN.html   |    2 -
 .../tvm_graph_rt/ffi/constant.INT_LEAST16_MAX.html |    2 -
 .../tvm_graph_rt/ffi/constant.INT_LEAST16_MIN.html |    2 -
 .../tvm_graph_rt/ffi/constant.INT_LEAST32_MAX.html |    2 -
 .../tvm_graph_rt/ffi/constant.INT_LEAST32_MIN.html |    2 -
 .../tvm_graph_rt/ffi/constant.INT_LEAST8_MAX.html  |    2 -
 .../tvm_graph_rt/ffi/constant.INT_LEAST8_MIN.html  |    2 -
 .../tvm_graph_rt/ffi/constant.PTRDIFF_MAX.html     |    2 -
 .../tvm_graph_rt/ffi/constant.PTRDIFF_MIN.html     |    2 -
 .../tvm_graph_rt/ffi/constant.SIG_ATOMIC_MAX.html  |    2 -
 .../tvm_graph_rt/ffi/constant.SIG_ATOMIC_MIN.html  |    2 -
 .../rust/tvm_graph_rt/ffi/constant.SIZE_MAX.html   |    2 -
 .../ffi/constant.TVMArgTypeCode_kTVMArgFloat.html  |    2 -
 .../ffi/constant.TVMArgTypeCode_kTVMArgInt.html    |    2 -
 .../ffi/constant.TVMArgTypeCode_kTVMBytes.html     |    2 -
 .../ffi/constant.TVMArgTypeCode_kTVMContext.html   |    2 -
 ...constant.TVMArgTypeCode_kTVMDLTensorHandle.html |    2 -
 .../ffi/constant.TVMArgTypeCode_kTVMDataType.html  |    2 -
 .../ffi/constant.TVMArgTypeCode_kTVMExtBegin.html  |    2 -
 .../ffi/constant.TVMArgTypeCode_kTVMExtEnd.html    |    2 -
 .../constant.TVMArgTypeCode_kTVMExtReserveEnd.html |    2 -
 .../constant.TVMArgTypeCode_kTVMModuleHandle.html  |    2 -
 .../constant.TVMArgTypeCode_kTVMNDArrayHandle.html |    2 -
 .../ffi/constant.TVMArgTypeCode_kTVMNNVMFirst.html |    2 -
 .../ffi/constant.TVMArgTypeCode_kTVMNNVMLast.html  |    2 -
 .../ffi/constant.TVMArgTypeCode_kTVMNullptr.html   |    2 -
 .../constant.TVMArgTypeCode_kTVMObjectHandle.html  |    2 -
 ...tant.TVMArgTypeCode_kTVMObjectRValueRefArg.html |    2 -
 .../constant.TVMArgTypeCode_kTVMOpaqueHandle.html  |    2 -
 ...nstant.TVMArgTypeCode_kTVMPackedFuncHandle.html |    2 -
 .../ffi/constant.TVMArgTypeCode_kTVMStr.html       |    2 -
 .../ffi/constant.TVMDeviceExtType_kDLAOCL.html     |    2 -
 .../ffi/constant.TVMDeviceExtType_kDLHexagon.html  |    2 -
 .../ffi/constant.TVMDeviceExtType_kDLMicroDev.html |    2 -
 .../ffi/constant.TVMDeviceExtType_kDLSDAccel.html  |    2 -
 .../ffi/constant.TVMDeviceExtType_kDLWebGPU.html   |    2 -
 .../ffi/constant.TVMDeviceExtType_kOpenGL.html     |    2 -
 .../tvm_graph_rt/ffi/constant.TVM_VERSION.html     |    2 -
 .../rust/tvm_graph_rt/ffi/constant.UINT16_MAX.html |    2 -
 .../rust/tvm_graph_rt/ffi/constant.UINT32_MAX.html |    2 -
 .../rust/tvm_graph_rt/ffi/constant.UINT8_MAX.html  |    2 -
 .../tvm_graph_rt/ffi/constant.UINTPTR_MAX.html     |    2 -
 .../tvm_graph_rt/ffi/constant.UINT_FAST16_MAX.html |    2 -
 .../tvm_graph_rt/ffi/constant.UINT_FAST32_MAX.html |    2 -
 .../tvm_graph_rt/ffi/constant.UINT_FAST8_MAX.html  |    2 -
 .../ffi/constant.UINT_LEAST16_MAX.html             |    2 -
 .../ffi/constant.UINT_LEAST32_MAX.html             |    2 -
 .../tvm_graph_rt/ffi/constant.UINT_LEAST8_MAX.html |    2 -
 .../rust/tvm_graph_rt/ffi/constant.WINT_MAX.html   |    2 -
 .../rust/tvm_graph_rt/ffi/constant.WINT_MIN.html   |    2 -
 .../tvm_graph_rt/ffi/constant._ATFILE_SOURCE.html  |    2 -
 .../tvm_graph_rt/ffi/constant._BITS_WCHAR_H.html   |    2 -
 .../tvm_graph_rt/ffi/constant._DEFAULT_SOURCE.html |    2 -
 .../tvm_graph_rt/ffi/constant._FEATURES_H.html     |    2 -
 .../tvm_graph_rt/ffi/constant._POSIX_C_SOURCE.html |    2 -
 .../tvm_graph_rt/ffi/constant._POSIX_SOURCE.html   |    2 -
 .../tvm_graph_rt/ffi/constant._STDC_PREDEF_H.html  |    2 -
 .../rust/tvm_graph_rt/ffi/constant._STDINT_H.html  |    2 -
 .../tvm_graph_rt/ffi/constant._SYS_CDEFS_H.html    |    2 -
 .../tvm_graph_rt/ffi/constant.__GLIBC_MINOR__.html |    2 -
 .../rust/tvm_graph_rt/ffi/constant.__GLIBC__.html  |    2 -
 .../tvm_graph_rt/ffi/constant.__GNU_LIBRARY__.html |    2 -
 .../ffi/constant.__STDC_IEC_559_COMPLEX__.html     |    2 -
 .../ffi/constant.__STDC_IEC_559__.html             |    2 -
 .../ffi/constant.__STDC_ISO_10646__.html           |    2 -
 .../ffi/constant.__STDC_NO_THREADS__.html          |    2 -
 .../ffi/constant.__SYSCALL_WORDSIZE.html           |    2 -
 .../tvm_graph_rt/ffi/constant.__USE_ATFILE.html    |    2 -
 .../ffi/constant.__USE_FORTIFY_LEVEL.html          |    2 -
 .../tvm_graph_rt/ffi/constant.__USE_ISOC11.html    |    2 -
 .../tvm_graph_rt/ffi/constant.__USE_ISOC95.html    |    2 -
 .../tvm_graph_rt/ffi/constant.__USE_ISOC99.html    |    2 -
 .../rust/tvm_graph_rt/ffi/constant.__USE_MISC.html |    2 -
 .../tvm_graph_rt/ffi/constant.__USE_POSIX.html     |    2 -
 .../ffi/constant.__USE_POSIX199309.html            |    2 -
 .../ffi/constant.__USE_POSIX199506.html            |    2 -
 .../tvm_graph_rt/ffi/constant.__USE_POSIX2.html    |    2 -
 .../ffi/constant.__USE_POSIX_IMPLICITLY.html       |    2 -
 .../tvm_graph_rt/ffi/constant.__USE_XOPEN2K.html   |    2 -
 .../tvm_graph_rt/ffi/constant.__USE_XOPEN2K8.html  |    2 -
 .../rust/tvm_graph_rt/ffi/constant.__WORDSIZE.html |    2 -
 .../ffi/constant.__WORDSIZE_TIME64_COMPAT32.html   |    2 -
 .../tvm_graph_rt/ffi/fn.TVMAPISetLastError.html    |    5 -
 .../rust/tvm_graph_rt/ffi/fn.TVMArrayAlloc.html    |   13 -
 .../tvm_graph_rt/ffi/fn.TVMArrayCopyFromBytes.html |    7 -
 .../tvm_graph_rt/ffi/fn.TVMArrayCopyFromTo.html    |    7 -
 .../tvm_graph_rt/ffi/fn.TVMArrayCopyToBytes.html   |    7 -
 .../api/rust/tvm_graph_rt/ffi/fn.TVMArrayFree.html |    5 -
 .../tvm_graph_rt/ffi/fn.TVMArrayFromDLPack.html    |    7 -
 .../rust/tvm_graph_rt/ffi/fn.TVMArrayToDLPack.html |    7 -
 .../ffi/fn.TVMBackendAllocWorkspace.html           |   12 -
 .../ffi/fn.TVMBackendFreeWorkspace.html            |    8 -
 .../ffi/fn.TVMBackendGetFuncFromEnv.html           |    9 -
 .../ffi/fn.TVMBackendParallelBarrier.html          |    6 -
 .../ffi/fn.TVMBackendParallelLaunch.html           |    8 -
 .../ffi/fn.TVMBackendRegisterSystemLibSymbol.html  |    6 -
 .../tvm_graph_rt/ffi/fn.TVMBackendRunOnce.html     |   10 -
 .../tvm_graph_rt/ffi/fn.TVMCFuncSetReturn.html     |    9 -
 .../rust/tvm_graph_rt/ffi/fn.TVMCbArgToReturn.html |    8 -
 .../ffi/fn.TVMDLManagedTensorCallDeleter.html      |    4 -
 .../ffi/fn.TVMDeviceAllocDataSpace.html            |   10 -
 .../ffi/fn.TVMDeviceCopyDataFromTo.html            |   14 -
 .../ffi/fn.TVMDeviceFreeDataSpace.html             |    6 -
 docs/api/rust/tvm_graph_rt/ffi/fn.TVMFuncCall.html |   16 -
 .../ffi/fn.TVMFuncCreateFromCFunc.html             |    9 -
 docs/api/rust/tvm_graph_rt/ffi/fn.TVMFuncFree.html |    5 -
 .../rust/tvm_graph_rt/ffi/fn.TVMFuncGetGlobal.html |    7 -
 .../ffi/fn.TVMFuncListGlobalNames.html             |    6 -
 .../tvm_graph_rt/ffi/fn.TVMFuncRegisterGlobal.html |    7 -
 .../tvm_graph_rt/ffi/fn.TVMFuncRemoveGlobal.html   |    4 -
 .../rust/tvm_graph_rt/ffi/fn.TVMGetLastError.html  |    8 -
 docs/api/rust/tvm_graph_rt/ffi/fn.TVMModFree.html  |    9 -
 .../tvm_graph_rt/ffi/fn.TVMModGetFunction.html     |    8 -
 .../api/rust/tvm_graph_rt/ffi/fn.TVMModImport.html |    7 -
 .../tvm_graph_rt/ffi/fn.TVMModLoadFromFile.html    |    9 -
 .../tvm_graph_rt/ffi/fn.TVMObjectDerivedFrom.html  |    7 -
 .../rust/tvm_graph_rt/ffi/fn.TVMObjectFree.html    |    7 -
 .../tvm_graph_rt/ffi/fn.TVMObjectGetTypeIndex.html |    6 -
 .../rust/tvm_graph_rt/ffi/fn.TVMObjectRetain.html  |    6 -
 .../ffi/fn.TVMObjectTypeKey2Index.html             |    6 -
 .../api/rust/tvm_graph_rt/ffi/fn.TVMSetStream.html |   10 -
 .../rust/tvm_graph_rt/ffi/fn.TVMStreamCreate.html  |    7 -
 .../rust/tvm_graph_rt/ffi/fn.TVMStreamFree.html    |    7 -
 .../ffi/fn.TVMStreamStreamSynchronize.html         |    8 -
 .../rust/tvm_graph_rt/ffi/fn.TVMSynchronize.html   |    7 -
 docs/api/rust/tvm_graph_rt/ffi/index.html          |  182 -
 docs/api/rust/tvm_graph_rt/ffi/sidebar-items.js    |    1 -
 .../rust/tvm_graph_rt/ffi/struct.DLContext.html    |   44 -
 .../rust/tvm_graph_rt/ffi/struct.DLDataType.html   |   51 -
 .../tvm_graph_rt/ffi/struct.DLManagedTensor.html   |   38 -
 .../api/rust/tvm_graph_rt/ffi/struct.DLTensor.html |   64 -
 .../rust/tvm_graph_rt/ffi/struct.TVMByteArray.html |   31 -
 .../ffi/struct.TVMParallelGroupEnv.html            |   28 -
 .../tvm_graph_rt/ffi/type.BackendPackedCFunc.html  |    2 -
 .../rust/tvm_graph_rt/ffi/type.DLDataTypeCode.html |    3 -
 .../rust/tvm_graph_rt/ffi/type.DLDeviceType.html   |    3 -
 .../tvm_graph_rt/ffi/type.FTVMParallelLambda.html  |    6 -
 .../rust/tvm_graph_rt/ffi/type.TVMArgTypeCode.html |   14 -
 .../rust/tvm_graph_rt/ffi/type.TVMArrayHandle.html |    3 -
 .../ffi/type.TVMBackendPackedCFunc.html            |   10 -
 .../api/rust/tvm_graph_rt/ffi/type.TVMContext.html |    3 -
 .../tvm_graph_rt/ffi/type.TVMDeviceExtType.html    |    3 -
 .../ffi/type.TVMExtensionFuncDeclarer.html         |    7 -
 .../tvm_graph_rt/ffi/type.TVMFunctionHandle.html   |    3 -
 .../tvm_graph_rt/ffi/type.TVMModuleHandle.html     |    3 -
 .../tvm_graph_rt/ffi/type.TVMObjectHandle.html     |    3 -
 .../rust/tvm_graph_rt/ffi/type.TVMPackedCFunc.html |   10 -
 .../ffi/type.TVMPackedCFuncFinalizer.html          |    4 -
 .../tvm_graph_rt/ffi/type.TVMRetValueHandle.html   |    3 -
 .../tvm_graph_rt/ffi/type.TVMStreamHandle.html     |    4 -
 .../rust/tvm_graph_rt/ffi/type.int_fast16_t.html   |    2 -
 .../rust/tvm_graph_rt/ffi/type.int_fast32_t.html   |    2 -
 .../rust/tvm_graph_rt/ffi/type.int_fast64_t.html   |    2 -
 .../rust/tvm_graph_rt/ffi/type.int_fast8_t.html    |    2 -
 .../rust/tvm_graph_rt/ffi/type.int_least16_t.html  |    2 -
 .../rust/tvm_graph_rt/ffi/type.int_least32_t.html  |    2 -
 .../rust/tvm_graph_rt/ffi/type.int_least64_t.html  |    2 -
 .../rust/tvm_graph_rt/ffi/type.int_least8_t.html   |    2 -
 docs/api/rust/tvm_graph_rt/ffi/type.intmax_t.html  |    2 -
 .../rust/tvm_graph_rt/ffi/type.tvm_index_t.html    |    3 -
 .../rust/tvm_graph_rt/ffi/type.uint_fast16_t.html  |    2 -
 .../rust/tvm_graph_rt/ffi/type.uint_fast32_t.html  |    2 -
 .../rust/tvm_graph_rt/ffi/type.uint_fast64_t.html  |    2 -
 .../rust/tvm_graph_rt/ffi/type.uint_fast8_t.html   |    2 -
 .../rust/tvm_graph_rt/ffi/type.uint_least16_t.html |    2 -
 .../rust/tvm_graph_rt/ffi/type.uint_least32_t.html |    2 -
 .../rust/tvm_graph_rt/ffi/type.uint_least64_t.html |    2 -
 .../rust/tvm_graph_rt/ffi/type.uint_least8_t.html  |    2 -
 docs/api/rust/tvm_graph_rt/ffi/type.uintmax_t.html |    2 -
 docs/api/rust/tvm_graph_rt/ffi/type.wchar_t.html   |    2 -
 docs/api/rust/tvm_graph_rt/ffi/union.TVMValue.html |   45 -
 .../rust/tvm_graph_rt/fn.TVMAPISetLastError.html   |    2 -
 .../tvm_graph_rt/fn.TVMBackendAllocWorkspace.html  |    2 -
 .../tvm_graph_rt/fn.TVMBackendFreeWorkspace.html   |    2 -
 .../tvm_graph_rt/fn.TVMBackendParallelBarrier.html |    2 -
 .../tvm_graph_rt/fn.TVMBackendParallelLaunch.html  |    2 -
 docs/api/rust/tvm_graph_rt/fn.TVMGetLastError.html |    2 -
 docs/api/rust/tvm_graph_rt/fn.load_param_dict.html |    3 -
 docs/api/rust/tvm_graph_rt/fn.remove_item.html     |    2 -
 .../tvm_graph_rt/graph/fn.load_param_dict.html     |   10 -
 docs/api/rust/tvm_graph_rt/graph/struct.Entry.html |   10 -
 docs/api/rust/tvm_graph_rt/graph/struct.Graph.html |   10 -
 .../tvm_graph_rt/graph/struct.GraphExecutor.html   |   10 -
 docs/api/rust/tvm_graph_rt/graph/struct.Node.html  |   10 -
 docs/api/rust/tvm_graph_rt/index.html              |   36 -
 docs/api/rust/tvm_graph_rt/macro.call_packed!.html |   10 -
 docs/api/rust/tvm_graph_rt/macro.call_packed.html  |   10 -
 .../rust/tvm_graph_rt/macro.import_module!.html    |   10 -
 .../api/rust/tvm_graph_rt/macro.import_module.html |    2 -
 .../tvm_graph_rt/module/dso/struct.DsoModule.html  |   10 -
 .../module/syslib/struct.SystemLibModule.html      |   10 -
 .../api/rust/tvm_graph_rt/module/trait.Module.html |   10 -
 .../tvm_graph_rt/packed_func/enum.ArgValue.html    |   90 -
 .../tvm_graph_rt/packed_func/enum.RetValue.html    |   79 -
 docs/api/rust/tvm_graph_rt/packed_func/index.html  |   11 -
 .../rust/tvm_graph_rt/packed_func/sidebar-items.js |    1 -
 .../tvm_graph_rt/packed_func/trait.PackedFunc.html |    3 -
 .../tvm_graph_rt/packed_func/union.TVMValue.html   |   45 -
 docs/api/rust/tvm_graph_rt/sidebar-items.js        |    1 -
 docs/api/rust/tvm_graph_rt/struct.DLTensor.html    |   64 -
 docs/api/rust/tvm_graph_rt/struct.DsoModule.html   |   13 -
 docs/api/rust/tvm_graph_rt/struct.Entry.html       |   19 -
 .../rust/tvm_graph_rt/struct.FuncCallError.html    |   19 -
 docs/api/rust/tvm_graph_rt/struct.Graph.html       |   32 -
 .../rust/tvm_graph_rt/struct.GraphExecutor.html    |   37 -
 docs/api/rust/tvm_graph_rt/struct.Node.html        |   21 -
 .../rust/tvm_graph_rt/struct.SystemLibModule.html  |   12 -
 docs/api/rust/tvm_graph_rt/struct.Tensor.html      |   62 -
 .../tvm_graph_rt/struct.ValueDowncastError.html    |   23 -
 .../threading/fn.TVMBackendParallelBarrier.html    |   10 -
 .../threading/fn.TVMBackendParallelLaunch.html     |   10 -
 docs/api/rust/tvm_graph_rt/trait.Module.html       |    6 -
 docs/api/rust/tvm_graph_rt/trait.PackedFunc.html   |    3 -
 docs/api/rust/tvm_graph_rt/union.TVMValue.html     |   45 -
 .../workspace/fn.TVMBackendAllocWorkspace.html     |   10 -
 .../workspace/fn.TVMBackendFreeWorkspace.html      |   10 -
 .../tvm_graph_rt/workspace/fn.remove_item.html     |   10 -
 docs/api/rust/tvm_macros/all.html                  |    4 -
 docs/api/rust/tvm_macros/derive.Object.html        |    9 -
 docs/api/rust/tvm_macros/index.html                |    4 -
 docs/api/rust/tvm_macros/macro.external!.html      |   10 -
 docs/api/rust/tvm_macros/macro.external.html       |    2 -
 docs/api/rust/tvm_macros/macro.import_module!.html |   10 -
 docs/api/rust/tvm_macros/macro.import_module.html  |    2 -
 docs/api/rust/tvm_macros/sidebar-items.js          |    1 -
 docs/api/rust/tvm_rt/all.html                      |    4 -
 docs/api/rust/tvm_rt/array/index.html              |    3 -
 docs/api/rust/tvm_rt/array/sidebar-items.js        |    1 -
 docs/api/rust/tvm_rt/array/struct.Array.html       |   31 -
 docs/api/rust/tvm_rt/array/struct.IntoIter.html    |  135 -
 docs/api/rust/tvm_rt/context/enum.DeviceType.html  |   47 -
 docs/api/rust/tvm_rt/context/index.html            |    6 -
 docs/api/rust/tvm_rt/context/sidebar-items.js      |    1 -
 docs/api/rust/tvm_rt/context/struct.Context.html   |   41 -
 .../context/struct.UnsupportedDeviceError.html     |   19 -
 docs/api/rust/tvm_rt/enum.ArgValue.html            |  124 -
 docs/api/rust/tvm_rt/enum.DeviceType.html          |   47 -
 docs/api/rust/tvm_rt/enum.RetValue.html            |  111 -
 docs/api/rust/tvm_rt/errors/enum.Error.html        |   39 -
 docs/api/rust/tvm_rt/errors/enum.NDArrayError.html |   32 -
 docs/api/rust/tvm_rt/errors/index.html             |    4 -
 docs/api/rust/tvm_rt/errors/sidebar-items.js       |    1 -
 .../errors/struct.FunctionNotFoundError.html       |   19 -
 .../tvm_rt/errors/struct.TypeMismatchError.html    |   23 -
 docs/api/rust/tvm_rt/fn.get_last_error.html        |    3 -
 docs/api/rust/tvm_rt/fn.version.html               |    3 -
 docs/api/rust/tvm_rt/function/enum.ArgValue.html   |  124 -
 docs/api/rust/tvm_rt/function/enum.RetValue.html   |  111 -
 .../ffi/constant.DLDataTypeCode_kDLBfloat.html     |    2 -
 .../ffi/constant.DLDataTypeCode_kDLFloat.html      |    2 -
 .../ffi/constant.DLDataTypeCode_kDLInt.html        |    2 -
 .../ffi/constant.DLDataTypeCode_kDLUInt.html       |    2 -
 .../function/ffi/constant.DLDeviceType_kDLCPU.html |    3 -
 .../ffi/constant.DLDeviceType_kDLCPUPinned.html    |    4 -
 .../ffi/constant.DLDeviceType_kDLExtDev.html       |    5 -
 .../function/ffi/constant.DLDeviceType_kDLGPU.html |    3 -
 .../ffi/constant.DLDeviceType_kDLMetal.html        |    3 -
 .../ffi/constant.DLDeviceType_kDLOpenCL.html       |    3 -
 .../ffi/constant.DLDeviceType_kDLROCM.html         |    3 -
 .../function/ffi/constant.DLDeviceType_kDLVPI.html |    3 -
 .../ffi/constant.DLDeviceType_kDLVulkan.html       |    3 -
 .../function/ffi/constant.DLPACK_VERSION.html      |    2 -
 .../tvm_rt/function/ffi/constant.INT16_MAX.html    |    2 -
 .../tvm_rt/function/ffi/constant.INT16_MIN.html    |    2 -
 .../tvm_rt/function/ffi/constant.INT32_MAX.html    |    2 -
 .../tvm_rt/function/ffi/constant.INT32_MIN.html    |    2 -
 .../tvm_rt/function/ffi/constant.INT8_MAX.html     |    2 -
 .../tvm_rt/function/ffi/constant.INT8_MIN.html     |    2 -
 .../tvm_rt/function/ffi/constant.INTPTR_MAX.html   |    2 -
 .../tvm_rt/function/ffi/constant.INTPTR_MIN.html   |    2 -
 .../function/ffi/constant.INT_FAST16_MAX.html      |    2 -
 .../function/ffi/constant.INT_FAST16_MIN.html      |    2 -
 .../function/ffi/constant.INT_FAST32_MAX.html      |    2 -
 .../function/ffi/constant.INT_FAST32_MIN.html      |    2 -
 .../function/ffi/constant.INT_FAST8_MAX.html       |    2 -
 .../function/ffi/constant.INT_FAST8_MIN.html       |    2 -
 .../function/ffi/constant.INT_LEAST16_MAX.html     |    2 -
 .../function/ffi/constant.INT_LEAST16_MIN.html     |    2 -
 .../function/ffi/constant.INT_LEAST32_MAX.html     |    2 -
 .../function/ffi/constant.INT_LEAST32_MIN.html     |    2 -
 .../function/ffi/constant.INT_LEAST8_MAX.html      |    2 -
 .../function/ffi/constant.INT_LEAST8_MIN.html      |    2 -
 .../tvm_rt/function/ffi/constant.PTRDIFF_MAX.html  |    2 -
 .../tvm_rt/function/ffi/constant.PTRDIFF_MIN.html  |    2 -
 .../function/ffi/constant.SIG_ATOMIC_MAX.html      |    2 -
 .../function/ffi/constant.SIG_ATOMIC_MIN.html      |    2 -
 .../tvm_rt/function/ffi/constant.SIZE_MAX.html     |    2 -
 .../ffi/constant.TVMArgTypeCode_kTVMArgFloat.html  |    2 -
 .../ffi/constant.TVMArgTypeCode_kTVMArgInt.html    |    2 -
 .../ffi/constant.TVMArgTypeCode_kTVMBytes.html     |    2 -
 .../ffi/constant.TVMArgTypeCode_kTVMContext.html   |    2 -
 ...constant.TVMArgTypeCode_kTVMDLTensorHandle.html |    2 -
 .../ffi/constant.TVMArgTypeCode_kTVMDataType.html  |    2 -
 .../ffi/constant.TVMArgTypeCode_kTVMExtBegin.html  |    2 -
 .../ffi/constant.TVMArgTypeCode_kTVMExtEnd.html    |    2 -
 .../constant.TVMArgTypeCode_kTVMExtReserveEnd.html |    2 -
 .../constant.TVMArgTypeCode_kTVMModuleHandle.html  |    2 -
 .../constant.TVMArgTypeCode_kTVMNDArrayHandle.html |    2 -
 .../ffi/constant.TVMArgTypeCode_kTVMNNVMFirst.html |    2 -
 .../ffi/constant.TVMArgTypeCode_kTVMNNVMLast.html  |    2 -
 .../ffi/constant.TVMArgTypeCode_kTVMNullptr.html   |    2 -
 .../constant.TVMArgTypeCode_kTVMObjectHandle.html  |    2 -
 ...tant.TVMArgTypeCode_kTVMObjectRValueRefArg.html |    2 -
 .../constant.TVMArgTypeCode_kTVMOpaqueHandle.html  |    2 -
 ...nstant.TVMArgTypeCode_kTVMPackedFuncHandle.html |    2 -
 .../ffi/constant.TVMArgTypeCode_kTVMStr.html       |    2 -
 .../ffi/constant.TVMDeviceExtType_kDLAOCL.html     |    2 -
 .../ffi/constant.TVMDeviceExtType_kDLHexagon.html  |    2 -
 .../ffi/constant.TVMDeviceExtType_kDLMicroDev.html |    2 -
 .../ffi/constant.TVMDeviceExtType_kDLSDAccel.html  |    2 -
 .../ffi/constant.TVMDeviceExtType_kDLWebGPU.html   |    2 -
 .../ffi/constant.TVMDeviceExtType_kOpenGL.html     |    2 -
 .../tvm_rt/function/ffi/constant.TVM_VERSION.html  |    2 -
 .../tvm_rt/function/ffi/constant.UINT16_MAX.html   |    2 -
 .../tvm_rt/function/ffi/constant.UINT32_MAX.html   |    2 -
 .../tvm_rt/function/ffi/constant.UINT8_MAX.html    |    2 -
 .../tvm_rt/function/ffi/constant.UINTPTR_MAX.html  |    2 -
 .../function/ffi/constant.UINT_FAST16_MAX.html     |    2 -
 .../function/ffi/constant.UINT_FAST32_MAX.html     |    2 -
 .../function/ffi/constant.UINT_FAST8_MAX.html      |    2 -
 .../function/ffi/constant.UINT_LEAST16_MAX.html    |    2 -
 .../function/ffi/constant.UINT_LEAST32_MAX.html    |    2 -
 .../function/ffi/constant.UINT_LEAST8_MAX.html     |    2 -
 .../tvm_rt/function/ffi/constant.WINT_MAX.html     |    2 -
 .../tvm_rt/function/ffi/constant.WINT_MIN.html     |    2 -
 .../function/ffi/constant._ATFILE_SOURCE.html      |    2 -
 .../function/ffi/constant._BITS_WCHAR_H.html       |    2 -
 .../function/ffi/constant._DEFAULT_SOURCE.html     |    2 -
 .../tvm_rt/function/ffi/constant._FEATURES_H.html  |    2 -
 .../function/ffi/constant._POSIX_C_SOURCE.html     |    2 -
 .../function/ffi/constant._POSIX_SOURCE.html       |    2 -
 .../function/ffi/constant._STDC_PREDEF_H.html      |    2 -
 .../tvm_rt/function/ffi/constant._STDINT_H.html    |    2 -
 .../tvm_rt/function/ffi/constant._SYS_CDEFS_H.html |    2 -
 .../function/ffi/constant.__GLIBC_MINOR__.html     |    2 -
 .../tvm_rt/function/ffi/constant.__GLIBC__.html    |    2 -
 .../function/ffi/constant.__GNU_LIBRARY__.html     |    2 -
 .../ffi/constant.__STDC_IEC_559_COMPLEX__.html     |    2 -
 .../function/ffi/constant.__STDC_IEC_559__.html    |    2 -
 .../function/ffi/constant.__STDC_ISO_10646__.html  |    2 -
 .../function/ffi/constant.__STDC_NO_THREADS__.html |    2 -
 .../function/ffi/constant.__SYSCALL_WORDSIZE.html  |    2 -
 .../tvm_rt/function/ffi/constant.__USE_ATFILE.html |    2 -
 .../function/ffi/constant.__USE_FORTIFY_LEVEL.html |    2 -
 .../tvm_rt/function/ffi/constant.__USE_ISOC11.html |    2 -
 .../tvm_rt/function/ffi/constant.__USE_ISOC95.html |    2 -
 .../tvm_rt/function/ffi/constant.__USE_ISOC99.html |    2 -
 .../tvm_rt/function/ffi/constant.__USE_MISC.html   |    2 -
 .../tvm_rt/function/ffi/constant.__USE_POSIX.html  |    2 -
 .../function/ffi/constant.__USE_POSIX199309.html   |    2 -
 .../function/ffi/constant.__USE_POSIX199506.html   |    2 -
 .../tvm_rt/function/ffi/constant.__USE_POSIX2.html |    2 -
 .../ffi/constant.__USE_POSIX_IMPLICITLY.html       |    2 -
 .../function/ffi/constant.__USE_XOPEN2K.html       |    2 -
 .../function/ffi/constant.__USE_XOPEN2K8.html      |    2 -
 .../tvm_rt/function/ffi/constant.__WORDSIZE.html   |    2 -
 .../ffi/constant.__WORDSIZE_TIME64_COMPAT32.html   |    2 -
 .../tvm_rt/function/ffi/fn.TVMAPISetLastError.html |    5 -
 .../rust/tvm_rt/function/ffi/fn.TVMArrayAlloc.html |   13 -
 .../function/ffi/fn.TVMArrayCopyFromBytes.html     |    7 -
 .../tvm_rt/function/ffi/fn.TVMArrayCopyFromTo.html |    7 -
 .../function/ffi/fn.TVMArrayCopyToBytes.html       |    7 -
 .../rust/tvm_rt/function/ffi/fn.TVMArrayFree.html  |    5 -
 .../tvm_rt/function/ffi/fn.TVMArrayFromDLPack.html |    7 -
 .../tvm_rt/function/ffi/fn.TVMArrayToDLPack.html   |    7 -
 .../function/ffi/fn.TVMBackendAllocWorkspace.html  |   12 -
 .../function/ffi/fn.TVMBackendFreeWorkspace.html   |    8 -
 .../function/ffi/fn.TVMBackendGetFuncFromEnv.html  |    9 -
 .../function/ffi/fn.TVMBackendParallelBarrier.html |    6 -
 .../function/ffi/fn.TVMBackendParallelLaunch.html  |    8 -
 .../ffi/fn.TVMBackendRegisterSystemLibSymbol.html  |    6 -
 .../tvm_rt/function/ffi/fn.TVMBackendRunOnce.html  |   10 -
 .../tvm_rt/function/ffi/fn.TVMCFuncSetReturn.html  |    9 -
 .../tvm_rt/function/ffi/fn.TVMCbArgToReturn.html   |    8 -
 .../ffi/fn.TVMDLManagedTensorCallDeleter.html      |    4 -
 .../function/ffi/fn.TVMDeviceAllocDataSpace.html   |   10 -
 .../function/ffi/fn.TVMDeviceCopyDataFromTo.html   |   14 -
 .../function/ffi/fn.TVMDeviceFreeDataSpace.html    |    6 -
 .../rust/tvm_rt/function/ffi/fn.TVMFuncCall.html   |   16 -
 .../function/ffi/fn.TVMFuncCreateFromCFunc.html    |    9 -
 .../rust/tvm_rt/function/ffi/fn.TVMFuncFree.html   |    5 -
 .../tvm_rt/function/ffi/fn.TVMFuncGetGlobal.html   |    7 -
 .../function/ffi/fn.TVMFuncListGlobalNames.html    |    6 -
 .../function/ffi/fn.TVMFuncRegisterGlobal.html     |    7 -
 .../function/ffi/fn.TVMFuncRemoveGlobal.html       |    4 -
 .../tvm_rt/function/ffi/fn.TVMGetLastError.html    |    8 -
 .../rust/tvm_rt/function/ffi/fn.TVMModFree.html    |    9 -
 .../tvm_rt/function/ffi/fn.TVMModGetFunction.html  |    8 -
 .../rust/tvm_rt/function/ffi/fn.TVMModImport.html  |    7 -
 .../tvm_rt/function/ffi/fn.TVMModLoadFromFile.html |    9 -
 .../function/ffi/fn.TVMObjectDerivedFrom.html      |    7 -
 .../rust/tvm_rt/function/ffi/fn.TVMObjectFree.html |    7 -
 .../function/ffi/fn.TVMObjectGetTypeIndex.html     |    6 -
 .../tvm_rt/function/ffi/fn.TVMObjectRetain.html    |    6 -
 .../function/ffi/fn.TVMObjectTypeKey2Index.html    |    6 -
 .../rust/tvm_rt/function/ffi/fn.TVMSetStream.html  |   10 -
 .../tvm_rt/function/ffi/fn.TVMStreamCreate.html    |    7 -
 .../rust/tvm_rt/function/ffi/fn.TVMStreamFree.html |    7 -
 .../ffi/fn.TVMStreamStreamSynchronize.html         |    8 -
 .../tvm_rt/function/ffi/fn.TVMSynchronize.html     |    7 -
 docs/api/rust/tvm_rt/function/ffi/index.html       |  182 -
 docs/api/rust/tvm_rt/function/ffi/sidebar-items.js |    1 -
 .../rust/tvm_rt/function/ffi/struct.DLContext.html |   45 -
 .../tvm_rt/function/ffi/struct.DLDataType.html     |   52 -
 .../function/ffi/struct.DLManagedTensor.html       |   38 -
 .../rust/tvm_rt/function/ffi/struct.DLTensor.html  |   61 -
 .../tvm_rt/function/ffi/struct.TVMByteArray.html   |   31 -
 .../function/ffi/struct.TVMParallelGroupEnv.html   |   28 -
 .../function/ffi/type.BackendPackedCFunc.html      |    2 -
 .../tvm_rt/function/ffi/type.DLDataTypeCode.html   |    3 -
 .../tvm_rt/function/ffi/type.DLDeviceType.html     |    3 -
 .../function/ffi/type.FTVMParallelLambda.html      |    6 -
 .../tvm_rt/function/ffi/type.TVMArgTypeCode.html   |   14 -
 .../tvm_rt/function/ffi/type.TVMArrayHandle.html   |    3 -
 .../function/ffi/type.TVMBackendPackedCFunc.html   |   10 -
 .../rust/tvm_rt/function/ffi/type.TVMContext.html  |    3 -
 .../tvm_rt/function/ffi/type.TVMDeviceExtType.html |    3 -
 .../ffi/type.TVMExtensionFuncDeclarer.html         |    7 -
 .../function/ffi/type.TVMFunctionHandle.html       |    3 -
 .../tvm_rt/function/ffi/type.TVMModuleHandle.html  |    3 -
 .../tvm_rt/function/ffi/type.TVMObjectHandle.html  |    3 -
 .../tvm_rt/function/ffi/type.TVMPackedCFunc.html   |   10 -
 .../function/ffi/type.TVMPackedCFuncFinalizer.html |    4 -
 .../function/ffi/type.TVMRetValueHandle.html       |    3 -
 .../tvm_rt/function/ffi/type.TVMStreamHandle.html  |    4 -
 .../tvm_rt/function/ffi/type.int_fast16_t.html     |    2 -
 .../tvm_rt/function/ffi/type.int_fast32_t.html     |    2 -
 .../tvm_rt/function/ffi/type.int_fast64_t.html     |    2 -
 .../rust/tvm_rt/function/ffi/type.int_fast8_t.html |    2 -
 .../tvm_rt/function/ffi/type.int_least16_t.html    |    2 -
 .../tvm_rt/function/ffi/type.int_least32_t.html    |    2 -
 .../tvm_rt/function/ffi/type.int_least64_t.html    |    2 -
 .../tvm_rt/function/ffi/type.int_least8_t.html     |    2 -
 .../rust/tvm_rt/function/ffi/type.intmax_t.html    |    2 -
 .../rust/tvm_rt/function/ffi/type.tvm_index_t.html |    3 -
 .../tvm_rt/function/ffi/type.uint_fast16_t.html    |    2 -
 .../tvm_rt/function/ffi/type.uint_fast32_t.html    |    2 -
 .../tvm_rt/function/ffi/type.uint_fast64_t.html    |    2 -
 .../tvm_rt/function/ffi/type.uint_fast8_t.html     |    2 -
 .../tvm_rt/function/ffi/type.uint_least16_t.html   |    2 -
 .../tvm_rt/function/ffi/type.uint_least32_t.html   |    2 -
 .../tvm_rt/function/ffi/type.uint_least64_t.html   |    2 -
 .../tvm_rt/function/ffi/type.uint_least8_t.html    |    2 -
 .../rust/tvm_rt/function/ffi/type.uintmax_t.html   |    2 -
 .../api/rust/tvm_rt/function/ffi/type.wchar_t.html |    2 -
 .../rust/tvm_rt/function/ffi/union.TVMValue.html   |   45 -
 docs/api/rust/tvm_rt/function/fn.register.html     |   20 -
 .../rust/tvm_rt/function/fn.register_override.html |    4 -
 .../rust/tvm_rt/function/fn.register_untyped.html  |    2 -
 docs/api/rust/tvm_rt/function/index.html           |   27 -
 docs/api/rust/tvm_rt/function/sidebar-items.js     |    1 -
 docs/api/rust/tvm_rt/function/struct.Function.html |   43 -
 .../api/rust/tvm_rt/function/trait.ToFunction.html |   22 -
 docs/api/rust/tvm_rt/function/trait.Typed.html     |   14 -
 docs/api/rust/tvm_rt/function/type.Result.html     |    2 -
 docs/api/rust/tvm_rt/index.html                    |   30 -
 docs/api/rust/tvm_rt/macro.check_call!.html        |   10 -
 docs/api/rust/tvm_rt/macro.check_call.html         |    6 -
 docs/api/rust/tvm_rt/macro.external!.html          |   10 -
 docs/api/rust/tvm_rt/macro.external.html           |    2 -
 docs/api/rust/tvm_rt/macro.tvm_call!.html          |   10 -
 docs/api/rust/tvm_rt/macro.tvm_call.html           |    6 -
 docs/api/rust/tvm_rt/map/index.html                |    3 -
 docs/api/rust/tvm_rt/map/sidebar-items.js          |    1 -
 docs/api/rust/tvm_rt/map/struct.IntoIter.html      |  135 -
 docs/api/rust/tvm_rt/map/struct.Map.html           |   32 -
 docs/api/rust/tvm_rt/module/index.html             |    6 -
 docs/api/rust/tvm_rt/module/sidebar-items.js       |    1 -
 docs/api/rust/tvm_rt/module/struct.Module.html     |   35 -
 docs/api/rust/tvm_rt/ndarray/index.html            |   23 -
 docs/api/rust/tvm_rt/ndarray/sidebar-items.js      |    1 -
 docs/api/rust/tvm_rt/ndarray/struct.NDArray.html   |   86 -
 .../tvm_rt/ndarray/struct.NDArrayContainer.html    |   16 -
 docs/api/rust/tvm_rt/ndarray/trait.Num32.html      |    8 -
 docs/api/rust/tvm_rt/object/fn.debug_print.html    |    2 -
 docs/api/rust/tvm_rt/object/index.html             |   14 -
 .../tvm_rt/object/object_ptr/struct.Object.html    |   10 -
 .../tvm_rt/object/object_ptr/struct.ObjectPtr.html |   10 -
 .../tvm_rt/object/object_ptr/struct.ObjectRef.html |   10 -
 .../tvm_rt/object/object_ptr/trait.IsObject.html   |   10 -
 docs/api/rust/tvm_rt/object/sidebar-items.js       |    1 -
 docs/api/rust/tvm_rt/object/struct.Object.html     |   23 -
 docs/api/rust/tvm_rt/object/struct.ObjectPtr.html  |   40 -
 docs/api/rust/tvm_rt/object/struct.ObjectRef.html  |   33 -
 docs/api/rust/tvm_rt/object/trait.IsObject.html    |   14 -
 docs/api/rust/tvm_rt/object/trait.IsObjectRef.html |   21 -
 docs/api/rust/tvm_rt/sidebar-items.js              |    1 -
 docs/api/rust/tvm_rt/string/index.html             |    3 -
 docs/api/rust/tvm_rt/string/sidebar-items.js       |    1 -
 docs/api/rust/tvm_rt/string/struct.String.html     |   47 -
 docs/api/rust/tvm_rt/string/struct.StringObj.html  |   15 -
 docs/api/rust/tvm_rt/struct.ByteArray.html         |   28 -
 docs/api/rust/tvm_rt/struct.Context.html           |   41 -
 docs/api/rust/tvm_rt/struct.DataType.html          |   45 -
 .../api/rust/tvm_rt/to_function/enum.ArgValue.html |   10 -
 .../api/rust/tvm_rt/to_function/enum.RetValue.html |   10 -
 .../ffi/constant.DLDataTypeCode_kDLBfloat.html     |   10 -
 .../ffi/constant.DLDataTypeCode_kDLFloat.html      |   10 -
 .../ffi/constant.DLDataTypeCode_kDLInt.html        |   10 -
 .../ffi/constant.DLDataTypeCode_kDLUInt.html       |   10 -
 .../ffi/constant.DLDeviceType_kDLCPU.html          |   10 -
 .../ffi/constant.DLDeviceType_kDLCPUPinned.html    |   10 -
 .../ffi/constant.DLDeviceType_kDLExtDev.html       |   10 -
 .../ffi/constant.DLDeviceType_kDLGPU.html          |   10 -
 .../ffi/constant.DLDeviceType_kDLMetal.html        |   10 -
 .../ffi/constant.DLDeviceType_kDLOpenCL.html       |   10 -
 .../ffi/constant.DLDeviceType_kDLROCM.html         |   10 -
 .../ffi/constant.DLDeviceType_kDLVPI.html          |   10 -
 .../ffi/constant.DLDeviceType_kDLVulkan.html       |   10 -
 .../to_function/ffi/constant.DLPACK_VERSION.html   |   10 -
 .../tvm_rt/to_function/ffi/constant.INT16_MAX.html |   10 -
 .../tvm_rt/to_function/ffi/constant.INT16_MIN.html |   10 -
 .../tvm_rt/to_function/ffi/constant.INT32_MAX.html |   10 -
 .../tvm_rt/to_function/ffi/constant.INT32_MIN.html |   10 -
 .../tvm_rt/to_function/ffi/constant.INT8_MAX.html  |   10 -
 .../tvm_rt/to_function/ffi/constant.INT8_MIN.html  |   10 -
 .../to_function/ffi/constant.INTPTR_MAX.html       |   10 -
 .../to_function/ffi/constant.INTPTR_MIN.html       |   10 -
 .../to_function/ffi/constant.INT_FAST16_MAX.html   |   10 -
 .../to_function/ffi/constant.INT_FAST16_MIN.html   |   10 -
 .../to_function/ffi/constant.INT_FAST32_MAX.html   |   10 -
 .../to_function/ffi/constant.INT_FAST32_MIN.html   |   10 -
 .../to_function/ffi/constant.INT_FAST8_MAX.html    |   10 -
 .../to_function/ffi/constant.INT_FAST8_MIN.html    |   10 -
 .../to_function/ffi/constant.INT_LEAST16_MAX.html  |   10 -
 .../to_function/ffi/constant.INT_LEAST16_MIN.html  |   10 -
 .../to_function/ffi/constant.INT_LEAST32_MAX.html  |   10 -
 .../to_function/ffi/constant.INT_LEAST32_MIN.html  |   10 -
 .../to_function/ffi/constant.INT_LEAST8_MAX.html   |   10 -
 .../to_function/ffi/constant.INT_LEAST8_MIN.html   |   10 -
 .../to_function/ffi/constant.PTRDIFF_MAX.html      |   10 -
 .../to_function/ffi/constant.PTRDIFF_MIN.html      |   10 -
 .../to_function/ffi/constant.SIG_ATOMIC_MAX.html   |   10 -
 .../to_function/ffi/constant.SIG_ATOMIC_MIN.html   |   10 -
 .../tvm_rt/to_function/ffi/constant.SIZE_MAX.html  |   10 -
 .../ffi/constant.TVMArgTypeCode_kTVMArgFloat.html  |   10 -
 .../ffi/constant.TVMArgTypeCode_kTVMArgInt.html    |   10 -
 .../ffi/constant.TVMArgTypeCode_kTVMBytes.html     |   10 -
 .../ffi/constant.TVMArgTypeCode_kTVMContext.html   |   10 -
 ...constant.TVMArgTypeCode_kTVMDLTensorHandle.html |   10 -
 .../ffi/constant.TVMArgTypeCode_kTVMDataType.html  |   10 -
 .../ffi/constant.TVMArgTypeCode_kTVMExtBegin.html  |   10 -
 .../ffi/constant.TVMArgTypeCode_kTVMExtEnd.html    |   10 -
 .../constant.TVMArgTypeCode_kTVMExtReserveEnd.html |   10 -
 .../constant.TVMArgTypeCode_kTVMModuleHandle.html  |   10 -
 .../constant.TVMArgTypeCode_kTVMNDArrayHandle.html |   10 -
 .../ffi/constant.TVMArgTypeCode_kTVMNNVMFirst.html |   10 -
 .../ffi/constant.TVMArgTypeCode_kTVMNNVMLast.html  |   10 -
 .../ffi/constant.TVMArgTypeCode_kTVMNullptr.html   |   10 -
 .../constant.TVMArgTypeCode_kTVMObjectHandle.html  |   10 -
 ...tant.TVMArgTypeCode_kTVMObjectRValueRefArg.html |   10 -
 .../constant.TVMArgTypeCode_kTVMOpaqueHandle.html  |   10 -
 ...nstant.TVMArgTypeCode_kTVMPackedFuncHandle.html |   10 -
 .../ffi/constant.TVMArgTypeCode_kTVMStr.html       |   10 -
 .../ffi/constant.TVMDeviceExtType_kDLAOCL.html     |   10 -
 .../ffi/constant.TVMDeviceExtType_kDLHexagon.html  |   10 -
 .../ffi/constant.TVMDeviceExtType_kDLMicroDev.html |   10 -
 .../ffi/constant.TVMDeviceExtType_kDLSDAccel.html  |   10 -
 .../ffi/constant.TVMDeviceExtType_kDLWebGPU.html   |   10 -
 .../ffi/constant.TVMDeviceExtType_kOpenGL.html     |   10 -
 .../to_function/ffi/constant.TVM_VERSION.html      |   10 -
 .../to_function/ffi/constant.UINT16_MAX.html       |   10 -
 .../to_function/ffi/constant.UINT32_MAX.html       |   10 -
 .../tvm_rt/to_function/ffi/constant.UINT8_MAX.html |   10 -
 .../to_function/ffi/constant.UINTPTR_MAX.html      |   10 -
 .../to_function/ffi/constant.UINT_FAST16_MAX.html  |   10 -
 .../to_function/ffi/constant.UINT_FAST32_MAX.html  |   10 -
 .../to_function/ffi/constant.UINT_FAST8_MAX.html   |   10 -
 .../to_function/ffi/constant.UINT_LEAST16_MAX.html |   10 -
 .../to_function/ffi/constant.UINT_LEAST32_MAX.html |   10 -
 .../to_function/ffi/constant.UINT_LEAST8_MAX.html  |   10 -
 .../tvm_rt/to_function/ffi/constant.WINT_MAX.html  |   10 -
 .../tvm_rt/to_function/ffi/constant.WINT_MIN.html  |   10 -
 .../to_function/ffi/constant._ATFILE_SOURCE.html   |   10 -
 .../to_function/ffi/constant._BITS_WCHAR_H.html    |   10 -
 .../to_function/ffi/constant._DEFAULT_SOURCE.html  |   10 -
 .../to_function/ffi/constant._FEATURES_H.html      |   10 -
 .../to_function/ffi/constant._POSIX_C_SOURCE.html  |   10 -
 .../to_function/ffi/constant._POSIX_SOURCE.html    |   10 -
 .../to_function/ffi/constant._STDC_PREDEF_H.html   |   10 -
 .../tvm_rt/to_function/ffi/constant._STDINT_H.html |   10 -
 .../to_function/ffi/constant._SYS_CDEFS_H.html     |   10 -
 .../to_function/ffi/constant.__GLIBC_MINOR__.html  |   10 -
 .../tvm_rt/to_function/ffi/constant.__GLIBC__.html |   10 -
 .../to_function/ffi/constant.__GNU_LIBRARY__.html  |   10 -
 .../ffi/constant.__STDC_IEC_559_COMPLEX__.html     |   10 -
 .../to_function/ffi/constant.__STDC_IEC_559__.html |   10 -
 .../ffi/constant.__STDC_ISO_10646__.html           |   10 -
 .../ffi/constant.__STDC_NO_THREADS__.html          |   10 -
 .../ffi/constant.__SYSCALL_WORDSIZE.html           |   10 -
 .../to_function/ffi/constant.__USE_ATFILE.html     |   10 -
 .../ffi/constant.__USE_FORTIFY_LEVEL.html          |   10 -
 .../to_function/ffi/constant.__USE_ISOC11.html     |   10 -
 .../to_function/ffi/constant.__USE_ISOC95.html     |   10 -
 .../to_function/ffi/constant.__USE_ISOC99.html     |   10 -
 .../to_function/ffi/constant.__USE_MISC.html       |   10 -
 .../to_function/ffi/constant.__USE_POSIX.html      |   10 -
 .../ffi/constant.__USE_POSIX199309.html            |   10 -
 .../ffi/constant.__USE_POSIX199506.html            |   10 -
 .../to_function/ffi/constant.__USE_POSIX2.html     |   10 -
 .../ffi/constant.__USE_POSIX_IMPLICITLY.html       |   10 -
 .../to_function/ffi/constant.__USE_XOPEN2K.html    |   10 -
 .../to_function/ffi/constant.__USE_XOPEN2K8.html   |   10 -
 .../to_function/ffi/constant.__WORDSIZE.html       |   10 -
 .../ffi/constant.__WORDSIZE_TIME64_COMPAT32.html   |   10 -
 .../to_function/ffi/fn.TVMAPISetLastError.html     |   10 -
 .../tvm_rt/to_function/ffi/fn.TVMArrayAlloc.html   |   10 -
 .../to_function/ffi/fn.TVMArrayCopyFromBytes.html  |   10 -
 .../to_function/ffi/fn.TVMArrayCopyFromTo.html     |   10 -
 .../to_function/ffi/fn.TVMArrayCopyToBytes.html    |   10 -
 .../tvm_rt/to_function/ffi/fn.TVMArrayFree.html    |   10 -
 .../to_function/ffi/fn.TVMArrayFromDLPack.html     |   10 -
 .../to_function/ffi/fn.TVMArrayToDLPack.html       |   10 -
 .../ffi/fn.TVMBackendAllocWorkspace.html           |   10 -
 .../ffi/fn.TVMBackendFreeWorkspace.html            |   10 -
 .../ffi/fn.TVMBackendGetFuncFromEnv.html           |   10 -
 .../ffi/fn.TVMBackendParallelBarrier.html          |   10 -
 .../ffi/fn.TVMBackendParallelLaunch.html           |   10 -
 .../ffi/fn.TVMBackendRegisterSystemLibSymbol.html  |   10 -
 .../to_function/ffi/fn.TVMBackendRunOnce.html      |   10 -
 .../to_function/ffi/fn.TVMCFuncSetReturn.html      |   10 -
 .../to_function/ffi/fn.TVMCbArgToReturn.html       |   10 -
 .../ffi/fn.TVMDLManagedTensorCallDeleter.html      |   10 -
 .../ffi/fn.TVMDeviceAllocDataSpace.html            |   10 -
 .../ffi/fn.TVMDeviceCopyDataFromTo.html            |   10 -
 .../to_function/ffi/fn.TVMDeviceFreeDataSpace.html |   10 -
 .../tvm_rt/to_function/ffi/fn.TVMFuncCall.html     |   10 -
 .../to_function/ffi/fn.TVMFuncCreateFromCFunc.html |   10 -
 .../tvm_rt/to_function/ffi/fn.TVMFuncFree.html     |   10 -
 .../to_function/ffi/fn.TVMFuncGetGlobal.html       |   10 -
 .../to_function/ffi/fn.TVMFuncListGlobalNames.html |   10 -
 .../to_function/ffi/fn.TVMFuncRegisterGlobal.html  |   10 -
 .../to_function/ffi/fn.TVMFuncRemoveGlobal.html    |   10 -
 .../tvm_rt/to_function/ffi/fn.TVMGetLastError.html |   10 -
 .../rust/tvm_rt/to_function/ffi/fn.TVMModFree.html |   10 -
 .../to_function/ffi/fn.TVMModGetFunction.html      |   10 -
 .../tvm_rt/to_function/ffi/fn.TVMModImport.html    |   10 -
 .../to_function/ffi/fn.TVMModLoadFromFile.html     |   10 -
 .../to_function/ffi/fn.TVMObjectDerivedFrom.html   |   10 -
 .../tvm_rt/to_function/ffi/fn.TVMObjectFree.html   |   10 -
 .../to_function/ffi/fn.TVMObjectGetTypeIndex.html  |   10 -
 .../tvm_rt/to_function/ffi/fn.TVMObjectRetain.html |   10 -
 .../to_function/ffi/fn.TVMObjectTypeKey2Index.html |   10 -
 .../tvm_rt/to_function/ffi/fn.TVMSetStream.html    |   10 -
 .../tvm_rt/to_function/ffi/fn.TVMStreamCreate.html |   10 -
 .../tvm_rt/to_function/ffi/fn.TVMStreamFree.html   |   10 -
 .../ffi/fn.TVMStreamStreamSynchronize.html         |   10 -
 .../tvm_rt/to_function/ffi/fn.TVMSynchronize.html  |   10 -
 docs/api/rust/tvm_rt/to_function/ffi/index.html    |   10 -
 .../tvm_rt/to_function/ffi/struct.DLContext.html   |   10 -
 .../tvm_rt/to_function/ffi/struct.DLDataType.html  |   10 -
 .../to_function/ffi/struct.DLManagedTensor.html    |   10 -
 .../tvm_rt/to_function/ffi/struct.DLTensor.html    |   10 -
 .../to_function/ffi/struct.TVMByteArray.html       |   10 -
 .../ffi/struct.TVMParallelGroupEnv.html            |   10 -
 .../to_function/ffi/type.BackendPackedCFunc.html   |   10 -
 .../to_function/ffi/type.DLDataTypeCode.html       |   10 -
 .../tvm_rt/to_function/ffi/type.DLDeviceType.html  |   10 -
 .../to_function/ffi/type.FTVMParallelLambda.html   |   10 -
 .../to_function/ffi/type.TVMArgTypeCode.html       |   10 -
 .../to_function/ffi/type.TVMArrayHandle.html       |   10 -
 .../ffi/type.TVMBackendPackedCFunc.html            |   10 -
 .../tvm_rt/to_function/ffi/type.TVMContext.html    |   10 -
 .../to_function/ffi/type.TVMDeviceExtType.html     |   10 -
 .../ffi/type.TVMExtensionFuncDeclarer.html         |   10 -
 .../to_function/ffi/type.TVMFunctionHandle.html    |   10 -
 .../to_function/ffi/type.TVMModuleHandle.html      |   10 -
 .../to_function/ffi/type.TVMObjectHandle.html      |   10 -
 .../to_function/ffi/type.TVMPackedCFunc.html       |   10 -
 .../ffi/type.TVMPackedCFuncFinalizer.html          |   10 -
 .../to_function/ffi/type.TVMRetValueHandle.html    |   10 -
 .../to_function/ffi/type.TVMStreamHandle.html      |   10 -
 .../tvm_rt/to_function/ffi/type.int_fast16_t.html  |   10 -
 .../tvm_rt/to_function/ffi/type.int_fast32_t.html  |   10 -
 .../tvm_rt/to_function/ffi/type.int_fast64_t.html  |   10 -
 .../tvm_rt/to_function/ffi/type.int_fast8_t.html   |   10 -
 .../tvm_rt/to_function/ffi/type.int_least16_t.html |   10 -
 .../tvm_rt/to_function/ffi/type.int_least32_t.html |   10 -
 .../tvm_rt/to_function/ffi/type.int_least64_t.html |   10 -
 .../tvm_rt/to_function/ffi/type.int_least8_t.html  |   10 -
 .../rust/tvm_rt/to_function/ffi/type.intmax_t.html |   10 -
 .../tvm_rt/to_function/ffi/type.tvm_index_t.html   |   10 -
 .../tvm_rt/to_function/ffi/type.uint_fast16_t.html |   10 -
 .../tvm_rt/to_function/ffi/type.uint_fast32_t.html |   10 -
 .../tvm_rt/to_function/ffi/type.uint_fast64_t.html |   10 -
 .../tvm_rt/to_function/ffi/type.uint_fast8_t.html  |   10 -
 .../to_function/ffi/type.uint_least16_t.html       |   10 -
 .../to_function/ffi/type.uint_least32_t.html       |   10 -
 .../to_function/ffi/type.uint_least64_t.html       |   10 -
 .../tvm_rt/to_function/ffi/type.uint_least8_t.html |   10 -
 .../tvm_rt/to_function/ffi/type.uintmax_t.html     |   10 -
 .../rust/tvm_rt/to_function/ffi/type.wchar_t.html  |   10 -
 .../tvm_rt/to_function/ffi/union.TVMValue.html     |   10 -
 .../rust/tvm_rt/to_function/trait.ToFunction.html  |   10 -
 docs/api/rust/tvm_rt/to_function/trait.Typed.html  |   10 -
 docs/api/rust/tvm_rt/value/index.html              |    5 -
 docs/api/rust/tvm_rt/value/sidebar-items.js        |    1 -
 docs/api/rust/tvm_sys/all.html                     |    4 -
 docs/api/rust/tvm_sys/array/index.html             |    2 -
 docs/api/rust/tvm_sys/array/sidebar-items.js       |    1 -
 docs/api/rust/tvm_sys/byte_array/index.html        |    4 -
 docs/api/rust/tvm_sys/byte_array/sidebar-items.js  |    1 -
 .../rust/tvm_sys/byte_array/struct.ByteArray.html  |   28 -
 docs/api/rust/tvm_sys/context/enum.DeviceType.html |   48 -
 docs/api/rust/tvm_sys/context/index.html           |   20 -
 docs/api/rust/tvm_sys/context/sidebar-items.js     |    1 -
 docs/api/rust/tvm_sys/context/struct.Context.html  |   41 -
 .../context/struct.UnsupportedDeviceError.html     |   19 -
 .../tvm_sys/datatype/enum.ParseDataTypeError.html  |   25 -
 docs/api/rust/tvm_sys/datatype/index.html          |    4 -
 docs/api/rust/tvm_sys/datatype/sidebar-items.js    |    1 -
 .../api/rust/tvm_sys/datatype/struct.DataType.html |   45 -
 docs/api/rust/tvm_sys/errors/index.html            |    3 -
 docs/api/rust/tvm_sys/errors/sidebar-items.js      |    1 -
 .../rust/tvm_sys/errors/struct.FuncCallError.html  |   19 -
 .../tvm_sys/errors/struct.ValueDowncastError.html  |   23 -
 .../ffi/constant.DLDataTypeCode_kDLBfloat.html     |    2 -
 .../ffi/constant.DLDataTypeCode_kDLFloat.html      |    2 -
 .../ffi/constant.DLDataTypeCode_kDLInt.html        |    2 -
 .../ffi/constant.DLDataTypeCode_kDLUInt.html       |    2 -
 .../tvm_sys/ffi/constant.DLDeviceType_kDLCPU.html  |    3 -
 .../ffi/constant.DLDeviceType_kDLCPUPinned.html    |    4 -
 .../ffi/constant.DLDeviceType_kDLExtDev.html       |    5 -
 .../tvm_sys/ffi/constant.DLDeviceType_kDLGPU.html  |    3 -
 .../ffi/constant.DLDeviceType_kDLMetal.html        |    3 -
 .../ffi/constant.DLDeviceType_kDLOpenCL.html       |    3 -
 .../tvm_sys/ffi/constant.DLDeviceType_kDLROCM.html |    3 -
 .../tvm_sys/ffi/constant.DLDeviceType_kDLVPI.html  |    3 -
 .../ffi/constant.DLDeviceType_kDLVulkan.html       |    3 -
 .../rust/tvm_sys/ffi/constant.DLPACK_VERSION.html  |    2 -
 docs/api/rust/tvm_sys/ffi/constant.INT16_MAX.html  |    2 -
 docs/api/rust/tvm_sys/ffi/constant.INT16_MIN.html  |    2 -
 docs/api/rust/tvm_sys/ffi/constant.INT32_MAX.html  |    2 -
 docs/api/rust/tvm_sys/ffi/constant.INT32_MIN.html  |    2 -
 docs/api/rust/tvm_sys/ffi/constant.INT8_MAX.html   |    2 -
 docs/api/rust/tvm_sys/ffi/constant.INT8_MIN.html   |    2 -
 docs/api/rust/tvm_sys/ffi/constant.INTPTR_MAX.html |    2 -
 docs/api/rust/tvm_sys/ffi/constant.INTPTR_MIN.html |    2 -
 .../rust/tvm_sys/ffi/constant.INT_FAST16_MAX.html  |    2 -
 .../rust/tvm_sys/ffi/constant.INT_FAST16_MIN.html  |    2 -
 .../rust/tvm_sys/ffi/constant.INT_FAST32_MAX.html  |    2 -
 .../rust/tvm_sys/ffi/constant.INT_FAST32_MIN.html  |    2 -
 .../rust/tvm_sys/ffi/constant.INT_FAST8_MAX.html   |    2 -
 .../rust/tvm_sys/ffi/constant.INT_FAST8_MIN.html   |    2 -
 .../rust/tvm_sys/ffi/constant.INT_LEAST16_MAX.html |    2 -
 .../rust/tvm_sys/ffi/constant.INT_LEAST16_MIN.html |    2 -
 .../rust/tvm_sys/ffi/constant.INT_LEAST32_MAX.html |    2 -
 .../rust/tvm_sys/ffi/constant.INT_LEAST32_MIN.html |    2 -
 .../rust/tvm_sys/ffi/constant.INT_LEAST8_MAX.html  |    2 -
 .../rust/tvm_sys/ffi/constant.INT_LEAST8_MIN.html  |    2 -
 .../api/rust/tvm_sys/ffi/constant.PTRDIFF_MAX.html |    2 -
 .../api/rust/tvm_sys/ffi/constant.PTRDIFF_MIN.html |    2 -
 .../rust/tvm_sys/ffi/constant.SIG_ATOMIC_MAX.html  |    2 -
 .../rust/tvm_sys/ffi/constant.SIG_ATOMIC_MIN.html  |    2 -
 docs/api/rust/tvm_sys/ffi/constant.SIZE_MAX.html   |    2 -
 .../ffi/constant.TVMArgTypeCode_kTVMArgFloat.html  |    2 -
 .../ffi/constant.TVMArgTypeCode_kTVMArgInt.html    |    2 -
 .../ffi/constant.TVMArgTypeCode_kTVMBytes.html     |    2 -
 .../ffi/constant.TVMArgTypeCode_kTVMContext.html   |    2 -
 ...constant.TVMArgTypeCode_kTVMDLTensorHandle.html |    2 -
 .../ffi/constant.TVMArgTypeCode_kTVMDataType.html  |    2 -
 .../ffi/constant.TVMArgTypeCode_kTVMExtBegin.html  |    2 -
 .../ffi/constant.TVMArgTypeCode_kTVMExtEnd.html    |    2 -
 .../constant.TVMArgTypeCode_kTVMExtReserveEnd.html |    2 -
 .../constant.TVMArgTypeCode_kTVMModuleHandle.html  |    2 -
 .../constant.TVMArgTypeCode_kTVMNDArrayHandle.html |    2 -
 .../ffi/constant.TVMArgTypeCode_kTVMNNVMFirst.html |    2 -
 .../ffi/constant.TVMArgTypeCode_kTVMNNVMLast.html  |    2 -
 .../ffi/constant.TVMArgTypeCode_kTVMNullptr.html   |    2 -
 .../constant.TVMArgTypeCode_kTVMObjectHandle.html  |    2 -
 ...tant.TVMArgTypeCode_kTVMObjectRValueRefArg.html |    2 -
 .../constant.TVMArgTypeCode_kTVMOpaqueHandle.html  |    2 -
 ...nstant.TVMArgTypeCode_kTVMPackedFuncHandle.html |    2 -
 .../ffi/constant.TVMArgTypeCode_kTVMStr.html       |    2 -
 .../ffi/constant.TVMDeviceExtType_kDLAOCL.html     |    2 -
 .../ffi/constant.TVMDeviceExtType_kDLHexagon.html  |    2 -
 .../ffi/constant.TVMDeviceExtType_kDLMicroDev.html |    2 -
 .../ffi/constant.TVMDeviceExtType_kDLSDAccel.html  |    2 -
 .../ffi/constant.TVMDeviceExtType_kDLWebGPU.html   |    2 -
 .../ffi/constant.TVMDeviceExtType_kOpenGL.html     |    2 -
 .../api/rust/tvm_sys/ffi/constant.TVM_VERSION.html |    2 -
 docs/api/rust/tvm_sys/ffi/constant.UINT16_MAX.html |    2 -
 docs/api/rust/tvm_sys/ffi/constant.UINT32_MAX.html |    2 -
 docs/api/rust/tvm_sys/ffi/constant.UINT8_MAX.html  |    2 -
 .../api/rust/tvm_sys/ffi/constant.UINTPTR_MAX.html |    2 -
 .../rust/tvm_sys/ffi/constant.UINT_FAST16_MAX.html |    2 -
 .../rust/tvm_sys/ffi/constant.UINT_FAST32_MAX.html |    2 -
 .../rust/tvm_sys/ffi/constant.UINT_FAST8_MAX.html  |    2 -
 .../tvm_sys/ffi/constant.UINT_LEAST16_MAX.html     |    2 -
 .../tvm_sys/ffi/constant.UINT_LEAST32_MAX.html     |    2 -
 .../rust/tvm_sys/ffi/constant.UINT_LEAST8_MAX.html |    2 -
 docs/api/rust/tvm_sys/ffi/constant.WINT_MAX.html   |    2 -
 docs/api/rust/tvm_sys/ffi/constant.WINT_MIN.html   |    2 -
 .../rust/tvm_sys/ffi/constant._ATFILE_SOURCE.html  |    2 -
 .../rust/tvm_sys/ffi/constant._BITS_WCHAR_H.html   |    2 -
 .../rust/tvm_sys/ffi/constant._DEFAULT_SOURCE.html |    2 -
 .../api/rust/tvm_sys/ffi/constant._FEATURES_H.html |    2 -
 .../rust/tvm_sys/ffi/constant._POSIX_C_SOURCE.html |    2 -
 .../rust/tvm_sys/ffi/constant._POSIX_SOURCE.html   |    2 -
 .../rust/tvm_sys/ffi/constant._STDC_PREDEF_H.html  |    2 -
 docs/api/rust/tvm_sys/ffi/constant._STDINT_H.html  |    2 -
 .../rust/tvm_sys/ffi/constant._SYS_CDEFS_H.html    |    2 -
 .../rust/tvm_sys/ffi/constant.__GLIBC_MINOR__.html |    2 -
 docs/api/rust/tvm_sys/ffi/constant.__GLIBC__.html  |    2 -
 .../rust/tvm_sys/ffi/constant.__GNU_LIBRARY__.html |    2 -
 .../ffi/constant.__STDC_IEC_559_COMPLEX__.html     |    2 -
 .../tvm_sys/ffi/constant.__STDC_IEC_559__.html     |    2 -
 .../tvm_sys/ffi/constant.__STDC_ISO_10646__.html   |    2 -
 .../tvm_sys/ffi/constant.__STDC_NO_THREADS__.html  |    2 -
 .../tvm_sys/ffi/constant.__SYSCALL_WORDSIZE.html   |    2 -
 .../rust/tvm_sys/ffi/constant.__USE_ATFILE.html    |    2 -
 .../tvm_sys/ffi/constant.__USE_FORTIFY_LEVEL.html  |    2 -
 .../rust/tvm_sys/ffi/constant.__USE_ISOC11.html    |    2 -
 .../rust/tvm_sys/ffi/constant.__USE_ISOC95.html    |    2 -
 .../rust/tvm_sys/ffi/constant.__USE_ISOC99.html    |    2 -
 docs/api/rust/tvm_sys/ffi/constant.__USE_MISC.html |    2 -
 .../api/rust/tvm_sys/ffi/constant.__USE_POSIX.html |    2 -
 .../tvm_sys/ffi/constant.__USE_POSIX199309.html    |    2 -
 .../tvm_sys/ffi/constant.__USE_POSIX199506.html    |    2 -
 .../rust/tvm_sys/ffi/constant.__USE_POSIX2.html    |    2 -
 .../ffi/constant.__USE_POSIX_IMPLICITLY.html       |    2 -
 .../rust/tvm_sys/ffi/constant.__USE_XOPEN2K.html   |    2 -
 .../rust/tvm_sys/ffi/constant.__USE_XOPEN2K8.html  |    2 -
 docs/api/rust/tvm_sys/ffi/constant.__WORDSIZE.html |    2 -
 .../ffi/constant.__WORDSIZE_TIME64_COMPAT32.html   |    2 -
 .../rust/tvm_sys/ffi/fn.TVMAPISetLastError.html    |    5 -
 docs/api/rust/tvm_sys/ffi/fn.TVMArrayAlloc.html    |   13 -
 .../rust/tvm_sys/ffi/fn.TVMArrayCopyFromBytes.html |    7 -
 .../rust/tvm_sys/ffi/fn.TVMArrayCopyFromTo.html    |    7 -
 .../rust/tvm_sys/ffi/fn.TVMArrayCopyToBytes.html   |    7 -
 docs/api/rust/tvm_sys/ffi/fn.TVMArrayFree.html     |    5 -
 .../rust/tvm_sys/ffi/fn.TVMArrayFromDLPack.html    |    7 -
 docs/api/rust/tvm_sys/ffi/fn.TVMArrayToDLPack.html |    7 -
 .../tvm_sys/ffi/fn.TVMBackendAllocWorkspace.html   |   12 -
 .../tvm_sys/ffi/fn.TVMBackendFreeWorkspace.html    |    8 -
 .../tvm_sys/ffi/fn.TVMBackendGetFuncFromEnv.html   |    9 -
 .../tvm_sys/ffi/fn.TVMBackendParallelBarrier.html  |    6 -
 .../tvm_sys/ffi/fn.TVMBackendParallelLaunch.html   |    8 -
 .../ffi/fn.TVMBackendRegisterSystemLibSymbol.html  |    6 -
 .../api/rust/tvm_sys/ffi/fn.TVMBackendRunOnce.html |   10 -
 .../api/rust/tvm_sys/ffi/fn.TVMCFuncSetReturn.html |    9 -
 docs/api/rust/tvm_sys/ffi/fn.TVMCbArgToReturn.html |    8 -
 .../ffi/fn.TVMDLManagedTensorCallDeleter.html      |    4 -
 .../tvm_sys/ffi/fn.TVMDeviceAllocDataSpace.html    |   10 -
 .../tvm_sys/ffi/fn.TVMDeviceCopyDataFromTo.html    |   14 -
 .../tvm_sys/ffi/fn.TVMDeviceFreeDataSpace.html     |    6 -
 docs/api/rust/tvm_sys/ffi/fn.TVMFuncCall.html      |   16 -
 .../tvm_sys/ffi/fn.TVMFuncCreateFromCFunc.html     |    9 -
 docs/api/rust/tvm_sys/ffi/fn.TVMFuncFree.html      |    5 -
 docs/api/rust/tvm_sys/ffi/fn.TVMFuncGetGlobal.html |    7 -
 .../tvm_sys/ffi/fn.TVMFuncListGlobalNames.html     |    6 -
 .../rust/tvm_sys/ffi/fn.TVMFuncRegisterGlobal.html |    7 -
 .../rust/tvm_sys/ffi/fn.TVMFuncRemoveGlobal.html   |    4 -
 docs/api/rust/tvm_sys/ffi/fn.TVMGetLastError.html  |    8 -
 docs/api/rust/tvm_sys/ffi/fn.TVMModFree.html       |    9 -
 .../api/rust/tvm_sys/ffi/fn.TVMModGetFunction.html |    8 -
 docs/api/rust/tvm_sys/ffi/fn.TVMModImport.html     |    7 -
 .../rust/tvm_sys/ffi/fn.TVMModLoadFromFile.html    |    9 -
 .../rust/tvm_sys/ffi/fn.TVMObjectDerivedFrom.html  |    7 -
 docs/api/rust/tvm_sys/ffi/fn.TVMObjectFree.html    |    7 -
 .../rust/tvm_sys/ffi/fn.TVMObjectGetTypeIndex.html |    6 -
 docs/api/rust/tvm_sys/ffi/fn.TVMObjectRetain.html  |    6 -
 .../tvm_sys/ffi/fn.TVMObjectTypeKey2Index.html     |    6 -
 docs/api/rust/tvm_sys/ffi/fn.TVMSetStream.html     |   10 -
 docs/api/rust/tvm_sys/ffi/fn.TVMStreamCreate.html  |    7 -
 docs/api/rust/tvm_sys/ffi/fn.TVMStreamFree.html    |    7 -
 .../tvm_sys/ffi/fn.TVMStreamStreamSynchronize.html |    8 -
 docs/api/rust/tvm_sys/ffi/fn.TVMSynchronize.html   |    7 -
 docs/api/rust/tvm_sys/ffi/index.html               |  182 -
 docs/api/rust/tvm_sys/ffi/sidebar-items.js         |    1 -
 docs/api/rust/tvm_sys/ffi/struct.DLContext.html    |   35 -
 docs/api/rust/tvm_sys/ffi/struct.DLDataType.html   |   52 -
 .../rust/tvm_sys/ffi/struct.DLManagedTensor.html   |   38 -
 docs/api/rust/tvm_sys/ffi/struct.DLTensor.html     |   61 -
 docs/api/rust/tvm_sys/ffi/struct.TVMByteArray.html |   31 -
 .../tvm_sys/ffi/struct.TVMParallelGroupEnv.html    |   28 -
 .../rust/tvm_sys/ffi/type.BackendPackedCFunc.html  |    2 -
 docs/api/rust/tvm_sys/ffi/type.DLDataTypeCode.html |    3 -
 docs/api/rust/tvm_sys/ffi/type.DLDeviceType.html   |    4 -
 .../rust/tvm_sys/ffi/type.FTVMParallelLambda.html  |    6 -
 docs/api/rust/tvm_sys/ffi/type.TVMArgTypeCode.html |   14 -
 docs/api/rust/tvm_sys/ffi/type.TVMArrayHandle.html |    3 -
 .../tvm_sys/ffi/type.TVMBackendPackedCFunc.html    |   10 -
 docs/api/rust/tvm_sys/ffi/type.TVMContext.html     |   13 -
 .../rust/tvm_sys/ffi/type.TVMDeviceExtType.html    |    3 -
 .../tvm_sys/ffi/type.TVMExtensionFuncDeclarer.html |    7 -
 .../rust/tvm_sys/ffi/type.TVMFunctionHandle.html   |    3 -
 .../api/rust/tvm_sys/ffi/type.TVMModuleHandle.html |    3 -
 .../api/rust/tvm_sys/ffi/type.TVMObjectHandle.html |    3 -
 docs/api/rust/tvm_sys/ffi/type.TVMPackedCFunc.html |   10 -
 .../tvm_sys/ffi/type.TVMPackedCFuncFinalizer.html  |    4 -
 .../rust/tvm_sys/ffi/type.TVMRetValueHandle.html   |    3 -
 .../api/rust/tvm_sys/ffi/type.TVMStreamHandle.html |    4 -
 docs/api/rust/tvm_sys/ffi/type.int_fast16_t.html   |    2 -
 docs/api/rust/tvm_sys/ffi/type.int_fast32_t.html   |    2 -
 docs/api/rust/tvm_sys/ffi/type.int_fast64_t.html   |    2 -
 docs/api/rust/tvm_sys/ffi/type.int_fast8_t.html    |    2 -
 docs/api/rust/tvm_sys/ffi/type.int_least16_t.html  |    2 -
 docs/api/rust/tvm_sys/ffi/type.int_least32_t.html  |    2 -
 docs/api/rust/tvm_sys/ffi/type.int_least64_t.html  |    2 -
 docs/api/rust/tvm_sys/ffi/type.int_least8_t.html   |    2 -
 docs/api/rust/tvm_sys/ffi/type.intmax_t.html       |    2 -
 docs/api/rust/tvm_sys/ffi/type.tvm_index_t.html    |    3 -
 docs/api/rust/tvm_sys/ffi/type.uint_fast16_t.html  |    2 -
 docs/api/rust/tvm_sys/ffi/type.uint_fast32_t.html  |    2 -
 docs/api/rust/tvm_sys/ffi/type.uint_fast64_t.html  |    2 -
 docs/api/rust/tvm_sys/ffi/type.uint_fast8_t.html   |    2 -
 docs/api/rust/tvm_sys/ffi/type.uint_least16_t.html |    2 -
 docs/api/rust/tvm_sys/ffi/type.uint_least32_t.html |    2 -
 docs/api/rust/tvm_sys/ffi/type.uint_least64_t.html |    2 -
 docs/api/rust/tvm_sys/ffi/type.uint_least8_t.html  |    2 -
 docs/api/rust/tvm_sys/ffi/type.uintmax_t.html      |    2 -
 docs/api/rust/tvm_sys/ffi/type.wchar_t.html        |    2 -
 docs/api/rust/tvm_sys/ffi/union.TVMValue.html      |   57 -
 docs/api/rust/tvm_sys/index.html                   |   14 -
 docs/api/rust/tvm_sys/macro.call_packed!.html      |   10 -
 docs/api/rust/tvm_sys/macro.call_packed.html       |   10 -
 docs/api/rust/tvm_sys/macro.try_downcast!.html     |   10 -
 docs/api/rust/tvm_sys/macro.try_downcast.html      |    6 -
 .../rust/tvm_sys/packed_func/enum.ArgValue.html    |  146 -
 .../rust/tvm_sys/packed_func/enum.RetValue.html    |  117 -
 docs/api/rust/tvm_sys/packed_func/index.html       |    9 -
 docs/api/rust/tvm_sys/packed_func/sidebar-items.js |    1 -
 .../rust/tvm_sys/packed_func/trait.PackedFunc.html |    3 -
 docs/api/rust/tvm_sys/sidebar-items.js             |    1 -
 docs/api/rust/tvm_sys/value/index.html             |    3 -
 docs/api/rust/tvm_sys/value/sidebar-items.js       |    1 -
 .../value/struct.UnsupportedDeviceError.html       |   19 -
 docs/api/rust/tyck/all.html                        |    4 -
 docs/api/rust/tyck/fn.main.html                    |    2 -
 docs/api/rust/tyck/index.html                      |    4 -
 docs/api/rust/tyck/sidebar-items.js                |    1 -
 docs/api/rust/tyck/struct.Opt.html                 |   24 -
 docs/api/rust/wheel.svg                            |    1 -
 docs/api/typedoc/assets/js/search.json             |    2 +-
 docs/api/typedoc/classes/bytestreamreader.html     |   12 +-
 docs/api/typedoc/classes/cachedcallstack.html      |   34 +-
 docs/api/typedoc/classes/dlcontext.html            |  277 -
 docs/api/typedoc/classes/dldatatype.html           |   12 +-
 docs/api/typedoc/classes/dldevice.html             |  277 +
 docs/api/typedoc/classes/environment.html          |   12 +-
 docs/api/typedoc/classes/ffilibrary.html           |   20 +-
 docs/api/typedoc/classes/graphexecutor.html        |  433 ++
 docs/api/typedoc/classes/graphruntime.html         |  433 --
 docs/api/typedoc/classes/instance.html             |  130 +-
 docs/api/typedoc/classes/memory.html               |   34 +-
 docs/api/typedoc/classes/module.html               |   10 +-
 docs/api/typedoc/classes/ndarray.html              |   34 +-
 docs/api/typedoc/classes/packedfunccell.html       |    6 +-
 docs/api/typedoc/classes/rpcserver.html            |   14 +-
 docs/api/typedoc/classes/scalar.html               |    6 +-
 docs/api/typedoc/classes/webgpucontext.html        |   12 +-
 docs/api/typedoc/enums/argtypecode.html            |   56 +-
 docs/api/typedoc/enums/aynccallbackcode.html       |    4 +-
 docs/api/typedoc/enums/dldatatypecode.html         |    8 +-
 docs/api/typedoc/enums/rpcserverstate.html         |   12 +-
 docs/api/typedoc/enums/sizeof.html                 |   36 +-
 docs/api/typedoc/index.html                        |  124 +-
 docs/api/typedoc/interfaces/disposable.html        |    4 +-
 docs/api/typedoc/interfaces/functioninfo.html      |    6 +-
 docs/api/typedoc/interfaces/libraryprovider.html   |    4 +-
 docs/contribute/code_guide.html                    |    4 +-
 docs/contribute/code_review.html                   |    2 +-
 docs/contribute/committer_guide.html               |    2 +-
 docs/contribute/community.html                     |    2 +-
 docs/contribute/document.html                      |    2 +-
 docs/contribute/error_handling.html                |    2 +-
 docs/contribute/git_howto.html                     |    2 +-
 docs/contribute/index.html                         |    2 +-
 docs/contribute/pull_request.html                  |    2 +-
 docs/contribute/release_process.html               |    2 +-
 docs/deploy/android.html                           |    5 +-
 docs/deploy/arm_compute_lib.html                   |   17 +-
 docs/deploy/bnns.html                              |  516 ++
 docs/deploy/cpp_deploy.html                        |    3 +-
 docs/deploy/hls.html                               |   11 +-
 docs/deploy/index.html                             |   12 +-
 docs/deploy/integrate.html                         |    3 +-
 docs/deploy/tensorrt.html                          |    7 +-
 docs/deploy/vitis_ai.html                          |   23 +-
 docs/dev/benchmark.html                            |    2 +-
 docs/dev/codebase_walkthrough.html                 |   12 +-
 docs/dev/convert_layout.html                       |    2 +-
 docs/dev/debugger.html                             |   16 +-
 docs/dev/frontend/tensorflow.html                  |    2 +-
 docs/dev/how_to.html                               |    6 +-
 docs/dev/hybrid_script.html                        |    2 +-
 docs/dev/index.html                                |    6 +-
 docs/dev/inferbound.html                           |    2 +-
 docs/dev/introduction_to_module_serialization.html |    2 +-
 docs/dev/microtvm_design.html                      |   24 +-
 docs/dev/pass_infra.html                           |    2 +-
 docs/dev/relay_add_op.html                         |    2 +-
 docs/dev/relay_add_pass.html                       |    2 +-
 docs/dev/relay_bring_your_own_codegen.html         |   10 +-
 docs/dev/relay_intro.html                          |    2 +-
 docs/dev/relay_op_strategy.html                    |    2 +-
 docs/dev/runtime.html                              |    2 +-
 docs/dev/security.html                             |    2 +-
 docs/dev/virtual_machine.html                      |   12 +-
 docs/faq.html                                      |    2 +-
 docs/genindex.html                                 |  186 +-
 docs/index.html                                    |    4 +-
 docs/install/docker.html                           |    2 +-
 docs/install/from_source.html                      |    4 +-
 docs/install/index.html                            |    2 +-
 docs/install/nnpack.html                           |    2 +-
 docs/langref/hybrid_script.html                    |    2 +-
 docs/langref/index.html                            |    3 +-
 docs/langref/relay_adt.html                        |    2 +-
 docs/langref/relay_expr.html                       |    2 +-
 docs/langref/relay_op.html                         |    2 +-
 docs/langref/relay_pattern.html                    |   15 +-
 docs/langref/relay_type.html                       |    2 +-
 docs/microtvm/index.html                           |    8 +-
 docs/objects.inv                                   |  Bin 18131 -> 18639 bytes
 docs/py-modindex.html                              |    6 +-
 docs/search.html                                   |    2 +-
 docs/searchindex.js                                |    2 +-
 .../auto_scheduler/sg_execution_times.html         |   16 +-
 .../auto_scheduler/tune_conv2d_layer_cuda.html     | 1346 +----
 docs/tutorials/auto_scheduler/tune_matmul_x86.html |  666 --
 .../tutorials/auto_scheduler/tune_network_arm.html |   53 +-
 .../auto_scheduler/tune_network_cuda.html          |   20 +-
 .../auto_scheduler/tune_network_mali.html          |  121 +-
 .../tutorials/auto_scheduler/tune_network_x86.html |   45 +-
 docs/tutorials/auto_scheduler/tune_sparse_x86.html |  788 +++
 docs/tutorials/autotvm/sg_execution_times.html     |   15 +-
 docs/tutorials/autotvm/tune_conv2d_cuda.html       |   65 +-
 docs/tutorials/autotvm/tune_relay_arm.html         |   13 +-
 docs/tutorials/autotvm/tune_relay_cuda.html        |   13 +-
 docs/tutorials/autotvm/tune_relay_mobile_gpu.html  |   27 +-
 docs/tutorials/autotvm/tune_relay_x86.html         |   13 +-
 docs/tutorials/autotvm/tune_simple_template.html   |  729 ---
 docs/tutorials/dev/bring_your_own_datatypes.html   |   10 +-
 docs/tutorials/dev/low_level_custom_pass.html      |    8 +-
 docs/tutorials/dev/sg_execution_times.html         |    8 +-
 docs/tutorials/dev/use_pass_infra.html             |    4 +-
 docs/tutorials/frontend/build_gcn.html             |   12 +-
 .../frontend/deploy_model_on_android.html          |   36 +-
 docs/tutorials/frontend/deploy_model_on_rasp.html  |   10 +-
 .../frontend/deploy_object_detection_pytorch.html  |   10 +-
 docs/tutorials/frontend/deploy_prequantized.html   |   12 +-
 .../frontend/deploy_prequantized_tflite.html       |   16 +-
 docs/tutorials/frontend/deploy_quantized.html      |    8 +-
 docs/tutorials/frontend/deploy_sparse.html         |   30 +-
 docs/tutorials/frontend/deploy_ssd_gluoncv.html    |   20 +-
 docs/tutorials/frontend/from_caffe2.html           |   10 +-
 docs/tutorials/frontend/from_coreml.html           |   10 +-
 docs/tutorials/frontend/from_darknet.html          |   21 +-
 docs/tutorials/frontend/from_keras.html            |    8 +-
 docs/tutorials/frontend/from_mxnet.html            |   14 +-
 docs/tutorials/frontend/from_onnx.html             |    6 +-
 docs/tutorials/frontend/from_pytorch.html          |   27 +-
 docs/tutorials/frontend/from_tensorflow.html       |   26 +-
 docs/tutorials/frontend/from_tflite.html           |    8 +-
 docs/tutorials/frontend/sg_execution_times.html    |   42 +-
 docs/tutorials/frontend/using_external_lib.html    |   14 +-
 .../get_started/auto_tuning_with_python.html       | 1031 ++++
 docs/tutorials/get_started/autotvm_matmul.html     |  760 +++
 .../get_started/cross_compilation_and_rpc.html     |   44 +-
 docs/tutorials/get_started/install.html            |  442 ++
 docs/tutorials/get_started/introduction.html       |  518 ++
 docs/tutorials/get_started/relay_quick_start.html  |  155 +-
 docs/tutorials/get_started/sg_execution_times.html |   17 +-
 .../get_started/tensor_expr_get_started.html       | 1263 +++-
 docs/tutorials/get_started/tune_matmul_x86.html    |  697 +++
 .../get_started/tvmc_command_line_driver.html      |  503 +-
 docs/tutorials/index.html                          |  285 +-
 docs/tutorials/language/extern_op.html             |   16 +-
 docs/tutorials/language/intrin_math.html           |   20 +-
 docs/tutorials/language/reduction.html             |   14 +-
 docs/tutorials/language/scan.html                  |   10 +-
 docs/tutorials/language/schedule_primitives.html   |   12 +-
 docs/tutorials/language/sg_execution_times.html    |   20 +-
 docs/tutorials/language/tedd.html                  |    4 +-
 docs/tutorials/language/tensorize.html             |   26 +-
 docs/tutorials/language/tuple_inputs.html          |   12 +-
 docs/tutorials/micro/micro_reference_vm.html       |    4 +-
 docs/tutorials/micro/micro_tflite.html             |   22 +-
 docs/tutorials/micro/sg_execution_times.html       |    8 +-
 docs/tutorials/optimize/opt_conv_cuda.html         |   16 +-
 docs/tutorials/optimize/opt_conv_tensorcore.html   |   18 +-
 docs/tutorials/optimize/opt_gemm.html              |   58 +-
 .../optimize/opt_matmul_auto_tensorcore.html       |   20 +-
 docs/tutorials/optimize/sg_execution_times.html    |   12 +-
 docs/tutorials/topi/intro_topi.html                |   14 +-
 docs/tutorials/topi/sg_execution_times.html        |    6 +-
 docs/vta/dev/config.html                           |    2 +-
 docs/vta/dev/hardware.html                         |    2 +-
 docs/vta/dev/index.html                            |    2 +-
 docs/vta/index.html                                |    2 +-
 docs/vta/install.html                              |    2 +-
 docs/vta/tutorials/autotvm/sg_execution_times.html |    6 +-
 docs/vta/tutorials/autotvm/tune_relay_vta.html     |  194 +-
 .../tutorials/frontend/deploy_classification.html  |   34 +-
 .../vta/tutorials/frontend/sg_execution_times.html |    6 +-
 docs/vta/tutorials/index.html                      |    2 +-
 docs/vta/tutorials/matrix_multiply.html            |    6 +-
 docs/vta/tutorials/optimize/convolution_opt.html   |    6 +-
 .../tutorials/optimize/matrix_multiply_opt.html    |    2 +-
 .../vta/tutorials/optimize/sg_execution_times.html |    8 +-
 docs/vta/tutorials/sg_execution_times.html         |    8 +-
 docs/vta/tutorials/vta_get_started.html            |   10 +-
 3864 files changed, 214253 insertions(+), 242627 deletions(-)

diff --git a/docs/_downloads/02fc8627299fa0b05eb017773b471bfa/from_tflite.py b/docs/_downloads/02fc8627299fa0b05eb017773b471bfa/from_tflite.py
index f7e8422..a85cfce 100644
--- a/docs/_downloads/02fc8627299fa0b05eb017773b471bfa/from_tflite.py
+++ b/docs/_downloads/02fc8627299fa0b05eb017773b471bfa/from_tflite.py
@@ -148,7 +148,7 @@ with transform.PassContext(opt_level=3):
 # --------------
 import tvm
 from tvm import te
-from tvm.contrib import graph_runtime as runtime
+from tvm.contrib import graph_executor as runtime
 
 # Create a runtime executor module
 module = runtime.GraphModule(lib["default"](tvm.cpu()))
diff --git a/docs/_downloads/0bb862dbb3a4c434477f93fe2c147fbb/tune_simple_template.py b/docs/_downloads/0bb862dbb3a4c434477f93fe2c147fbb/tune_simple_template.py
deleted file mode 100644
index bd2dcf3..0000000
--- a/docs/_downloads/0bb862dbb3a4c434477f93fe2c147fbb/tune_simple_template.py
+++ /dev/null
@@ -1,336 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""
-Writing Tunable Templates and Using the Auto-tuner
-==================================================
-**Author**: `Lianmin Zheng <https://github.com/merrymercy>`_
-
-This is an introduction tutorial to the auto-tuning module in TVM.
-
-There are two steps in auto-tuning.
-The first step is defining a search space.
-The second step is running a search algorithm to explore through this space.
-In this tutorial, you can learn how to perform these two steps in TVM.
-The whole workflow is illustrated by a matrix multiplication example.
-
-Note that this tutorial will not run on Windows or recent versions of macOS. To
-get it to run, you will need to wrap the body of this tutorial in a :code:`if
-__name__ == "__main__":` block.
-"""
-
-######################################################################
-# Install dependencies
-# --------------------
-# To use autotvm package in TVM, we need to install some extra dependencies.
-# This step (installing xgboost) can be skipped as it doesn't need XGBoost
-# (change "3" to "2" if you use python2):
-#
-# .. code-block:: bash
-#
-#   pip3 install --user psutil xgboost cloudpickle
-#
-# To make TVM run faster in tuning, it is recommended to use cython
-# as FFI of TVM. In the root directory of TVM, execute
-# (change "3" to "2" if you use python2):
-#
-# .. code-block:: bash
-#
-#   pip3 install --user cython
-#   sudo make cython3
-#
-# Now return to python code. Import packages.
-
-import logging
-import sys
-
-import numpy as np
-import tvm
-from tvm import te
-import tvm.testing
-
-# the module is called `autotvm`
-from tvm import autotvm
-
-######################################################################
-# Step 1:  Define the search space
-# --------------------------------
-# In this section, we will rewrite a deterministic TVM schedule code to a
-# tunable schedule template. You can regard the process of search space definition
-# as the parameterization of our existing schedule code.
-#
-# To begin with, here is how we implement a blocked matrix multiplication in TVM.
-
-# Matmul V0: Constant tiling factor
-def matmul_v0(N, L, M, dtype):
-    A = te.placeholder((N, L), name="A", dtype=dtype)
-    B = te.placeholder((L, M), name="B", dtype=dtype)
-
-    k = te.reduce_axis((0, L), name="k")
-    C = te.compute((N, M), lambda i, j: te.sum(A[i, k] * B[k, j], axis=k), name="C")
-    s = te.create_schedule(C.op)
-
-    # schedule
-    y, x = s[C].op.axis
-    k = s[C].op.reduce_axis[0]
-
-    yo, yi = s[C].split(y, 8)
-    xo, xi = s[C].split(x, 8)
-
-    s[C].reorder(yo, xo, k, yi, xi)
-
-    return s, [A, B, C]
-
-
-#####################################################################
-# Parametrize the schedule
-# ^^^^^^^^^^^^^^^^^^^^^^^^
-# In the previous schedule code, we use a constant "8" as tiling factor.
-# However, it might not be the best one because the best tiling factor depends
-# on real hardware environment and input shape.
-#
-# If you want the schedule code to be portable across a wider range of input shapes
-# and target hardware, it is better to define a set of candidate values and
-# pick the best one according to the measurement results on target hardware.
-#
-# In autotvm, we can define a tunable parameter, or a "knob" for such kind of value.
-
-# Matmul V1: List candidate values
-@autotvm.template("tutorial/matmul_v1")  # 1. use a decorator
-def matmul_v1(N, L, M, dtype):
-    A = te.placeholder((N, L), name="A", dtype=dtype)
-    B = te.placeholder((L, M), name="B", dtype=dtype)
-
-    k = te.reduce_axis((0, L), name="k")
-    C = te.compute((N, M), lambda i, j: te.sum(A[i, k] * B[k, j], axis=k), name="C")
-    s = te.create_schedule(C.op)
-
-    # schedule
-    y, x = s[C].op.axis
-    k = s[C].op.reduce_axis[0]
-
-    # 2. get the config object
-    cfg = autotvm.get_config()
-
-    # 3. define search space
-    cfg.define_knob("tile_y", [1, 2, 4, 8, 16])
-    cfg.define_knob("tile_x", [1, 2, 4, 8, 16])
-
-    # 4. schedule according to config
-    yo, yi = s[C].split(y, cfg["tile_y"].val)
-    xo, xi = s[C].split(x, cfg["tile_x"].val)
-
-    s[C].reorder(yo, xo, k, yi, xi)
-
-    return s, [A, B, C]
-
-
-###############################################################################
-# Here we make four modifications to the previous schedule code and get
-# a tunable "template". We can explain the modifications one by one.
-#
-# 1. Use a decorator to mark this function as a simple template.
-# 2. Get a config object:
-#    You can regard this :code:`cfg` as an argument of this function but
-#    we obtain it in a different way. With this argument, this function is no longer
-#    a deterministic schedule code. Instead, we can pass different configurations to
-#    this function and get different schedules, so this function is a "template".
-#
-#    To make the template function more compact, we do two things in a single function.
-#    (1) define a search space and (2) schedule according to an entity in this space.
-#    To achieve this, we make :code:`cfg` be either
-#    a :any:`ConfigSpace` or a :any:`ConfigEntity` object.
-#
-#    When it is a :any:`ConfigSpace`, it will collect all tunable knobs in this function and
-#    build the search space.
-#    When it is a :any:`ConfigEntity`, it will ignore all space definition API
-#    (namely, :code:`cfg.define_XXXXX(...)`).   Instead, it stores deterministic values for
-#    all tunable knobs, and we schedule according to these values.
-#
-#    During auto-tuning, we will first call this template with a :any:`ConfigSpace`
-#    object to build the search space. Then we call this template with different :any:`ConfigEntity`
-#    in the built space to get different schedules. Finally we will measure the code generated by
-#    different schedules and pick the best one.
-#
-# 3. Define two tunable knobs. The first one is :code:`tile_y` with
-#    5 possible values. The second one is :code:`tile_x` with a same
-#    list of possible values. These two knobs are independent, so they
-#    span a search space with size = 5x5 = 25
-# 4. Schedule according to the deterministic values in :code:`cfg`
-#
-
-#####################################################################
-# Use better space definition API
-# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-# In the previous template, we manually list all possible values for a knob.
-# This is the lowest level API to define the space.
-# However, we also provide another set of API to make the space definition
-# easier and smarter. It is recommended to use this set of high level API.
-#
-# In the following example, we use :any:`ConfigSpace.define_split` to define a split
-# knob. It will enumerate all the possible ways to split an axis and construct
-# the space.
-#
-# We also have :any:`ConfigSpace.define_reorder` for reorder knob and
-# :any:`ConfigSpace.define_annotate` for annotation like unroll, vectorization,
-# thread binding.
-# When the high level API cannot meet your requirement, you can always fall
-# back to use low level API.
-
-
-@autotvm.template("tutorial/matmul")
-def matmul(N, L, M, dtype):
-    A = te.placeholder((N, L), name="A", dtype=dtype)
-    B = te.placeholder((L, M), name="B", dtype=dtype)
-
-    k = te.reduce_axis((0, L), name="k")
-    C = te.compute((N, M), lambda i, j: te.sum(A[i, k] * B[k, j], axis=k), name="C")
-    s = te.create_schedule(C.op)
-
-    # schedule
-    y, x = s[C].op.axis
-    k = s[C].op.reduce_axis[0]
-
-    ##### define space begin #####
-    cfg = autotvm.get_config()
-    cfg.define_split("tile_y", y, num_outputs=2)
-    cfg.define_split("tile_x", x, num_outputs=2)
-    ##### define space end #####
-
-    # schedule according to config
-    yo, yi = cfg["tile_y"].apply(s, C, y)
-    xo, xi = cfg["tile_x"].apply(s, C, x)
-
-    s[C].reorder(yo, xo, k, yi, xi)
-
-    return s, [A, B, C]
-
-
-######################################################################
-# .. note:: More Explanation on :code:`cfg.defile_split`
-#
-#  In this template, :code:`cfg.define_split("tile_y", y, num_outputs=2)` will enumerate
-#  all possible combinations that can split axis y into two axes with factors of the length of y.
-#  For example, if the length of y is 32 and we want to split it into two axes
-#  using factors of 32, then there are 6 possible values for
-#  (length of outer axis, length of inner axis) pair, namely
-#  (32, 1), (16, 2), (8, 4), (4, 8), (2, 16) or (1, 32).
-#  They are just the 6 possible values of `tile_y`.
-#
-#  During schedule, :code:`cfg["tile_y"]` is a :code:`SplitEntity` object.
-#  We stores the lengths of outer axes and inner axes in :code:`cfg['tile_y'].size`
-#  (a tuple with two elements).
-#  In this template, we apply it by using :code:`yo, yi = cfg['tile_y'].apply(s, C, y)`.
-#  Actually, this is equivalent to
-#  :code:`yo, yi = s[C].split(y, cfg["tile_y"].size[1])`
-#  or  :code:`yo, yi = s[C].split(y, nparts=cfg['tile_y"].size[0])`
-#
-#  The advantage of using cfg.apply API is that it makes multi-level split
-#  (when num_outputs >= 3) easier.
-
-######################################################################
-# Step 2:  Search through the space
-# ---------------------------------
-# In step 1, we build the search space by extending our old schedule code
-# into a template. The next step is to pick a tuner and explore in this space.
-#
-# Auto-tuners in TVM
-# ^^^^^^^^^^^^^^^^^^
-# The job for a tuner can be described by following pseudo code
-#
-#   .. code-block:: c
-#
-#    ct = 0
-#    while ct < max_number_of_trials:
-#        propose a batch of configs
-#        measure this batch of configs on real hardware and get results
-#        ct += batch_size
-#
-# When proposing the next batch of configs, the tuner can take different strategies. We
-# provide four tuners with different strategies in autotvm.
-#
-# * :any:`RandomTuner`: Enumerate the space in a random order
-# * :any:`GridSearchTuner`: Enumerate the space in a grid search order
-# * :any:`GATuner`: Using genetic algorithm to search through the space
-# * :any:`XGBTuner`: Uses a model based method. Train a XGBoost model to predict the speed of lowered IR and pick the next batch according to the prediction.
-#
-# You can choose the tuner according to the size of your space, your time budget and other factors.
-# For example, if your space is very small (less than 1000), a gridsearch tuner or a
-# random tuner is good enough. If your space is at the level of 10^9 (this is the space
-# size of a conv2d operator on CUDA GPU), XGBoostTuner can explore more efficiently
-# and find better configs.
-
-################################################################
-# Begin tuning
-# ^^^^^^^^^^^^
-# Here we continue our matrix multiplication example.
-# First we should create a tuning task.
-# We can also inspect the initialized search space.
-# In this case, for a 512x512 square matrix multiplication, the space size
-# is 10x10=100
-N, L, M = 512, 512, 512
-task = autotvm.task.create("tutorial/matmul", args=(N, L, M, "float32"), target="llvm")
-print(task.config_space)
-
-################################################################
-# Then we need to define how to measure the generated code and pick a tuner.
-# Since our space is small, a random tuner is just okay.
-#
-# We only make 10 trials in this tutorial for demonstration. In practice,
-# you can do more trials according to your time budget.
-# We will log the tuning results into a log file. This file can be
-# used to get the best config later.
-
-# logging config (for printing tuning log to the screen)
-logging.getLogger("autotvm").setLevel(logging.DEBUG)
-logging.getLogger("autotvm").addHandler(logging.StreamHandler(sys.stdout))
-
-# There are two steps for measuring a config: build and run.
-# By default, we use all CPU cores to compile program. Then measure them sequentially.
-# We measure 5 times and take average to reduce variance.
-measure_option = autotvm.measure_option(builder="local", runner=autotvm.LocalRunner(number=5))
-
-# Begin tuning with RandomTuner, log records to file `matmul.log`
-# You can use alternatives like XGBTuner.
-tuner = autotvm.tuner.RandomTuner(task)
-tuner.tune(
-    n_trial=10,
-    measure_option=measure_option,
-    callbacks=[autotvm.callback.log_to_file("matmul.log")],
-)
-
-#########################################################################
-# Finally we apply history best from the cache file and check its correctness.
-# We can call the function :code:`matmul` directly under the
-# :any:`autotvm.apply_history_best` context. When we call this function,
-# it will query the dispatch context with its argument and get the best config
-# with the same argument.
-
-# apply history best from log file
-with autotvm.apply_history_best("matmul.log"):
-    with tvm.target.Target("llvm"):
-        s, arg_bufs = matmul(N, L, M, "float32")
-        func = tvm.build(s, arg_bufs)
-
-# check correctness
-a_np = np.random.uniform(size=(N, L)).astype(np.float32)
-b_np = np.random.uniform(size=(L, M)).astype(np.float32)
-c_np = a_np.dot(b_np)
-
-c_tvm = tvm.nd.empty(c_np.shape)
-func(tvm.nd.array(a_np), tvm.nd.array(b_np), c_tvm)
-
-tvm.testing.assert_allclose(c_np, c_tvm.asnumpy(), rtol=1e-2)
diff --git a/docs/_downloads/0c30ce88b67b0e8d46494348ab36c9fb/from_tflite.ipynb b/docs/_downloads/0c30ce88b67b0e8d46494348ab36c9fb/from_tflite.ipynb
index a5e08f1..da3bb1e 100644
--- a/docs/_downloads/0c30ce88b67b0e8d46494348ab36c9fb/from_tflite.ipynb
+++ b/docs/_downloads/0c30ce88b67b0e8d46494348ab36c9fb/from_tflite.ipynb
@@ -105,7 +105,7 @@
       },
       "outputs": [],
       "source": [
-        "import tvm\nfrom tvm import te\nfrom tvm.contrib import graph_runtime as runtime\n\n# Create a runtime executor module\nmodule = runtime.GraphModule(lib[\"default\"](tvm.cpu()))\n\n# Feed input data\nmodule.set_input(input_tensor, tvm.nd.array(image_data))\n\n# Run\nmodule.run()\n\n# Get output\ntvm_output = module.get_output(0).asnumpy()"
+        "import tvm\nfrom tvm import te\nfrom tvm.contrib import graph_executor as runtime\n\n# Create a runtime executor module\nmodule = runtime.GraphModule(lib[\"default\"](tvm.cpu()))\n\n# Feed input data\nmodule.set_input(input_tensor, tvm.nd.array(image_data))\n\n# Run\nmodule.run()\n\n# Get output\ntvm_output = module.get_output(0).asnumpy()"
       ]
     },
     {
diff --git a/docs/_downloads/0c8b1cb0bb1d1dff7899c341215a0f35/tune_network_mali.ipynb b/docs/_downloads/0c8b1cb0bb1d1dff7899c341215a0f35/tune_network_mali.ipynb
index b0a4054..ab3faab 100644
--- a/docs/_downloads/0c8b1cb0bb1d1dff7899c341215a0f35/tune_network_mali.ipynb
+++ b/docs/_downloads/0c8b1cb0bb1d1dff7899c341215a0f35/tune_network_mali.ipynb
@@ -26,7 +26,7 @@
       },
       "outputs": [],
       "source": [
-        "import numpy as np\n\nimport tvm\nfrom tvm import relay, auto_scheduler\nimport tvm.relay.testing\nfrom tvm.contrib import graph_runtime\nimport os"
+        "import numpy as np\n\nimport tvm\nfrom tvm import relay, auto_scheduler\nimport tvm.relay.testing\nfrom tvm.contrib import graph_executor\nimport os"
       ]
     },
     {
@@ -44,7 +44,7 @@
       },
       "outputs": [],
       "source": [
-        "def get_network(name, batch_size, layout=\"NHWC\", dtype=\"float32\"):\n    \"\"\"Get the symbol definition and random weight of a network\"\"\"\n\n    # auto-scheduler prefers NHWC layout\n    if layout == \"NHWC\":\n        image_shape = (224, 224, 3)\n    elif layout == \"NCHW\":\n        image_shape = (3, 224, 224)\n    else:\n        raise ValueError(\"Invalid layout: \" + layout)\n\n    input_shape = (batch_size,) + image_shape\n    output_shape = (batch_size, 1000)\n\n    [...]
+        "def get_network(name, batch_size, layout=\"NHWC\", dtype=\"float32\"):\n    \"\"\"Get the symbol definition and random weight of a network\"\"\"\n\n    # auto-scheduler prefers NHWC layout\n    if layout == \"NHWC\":\n        image_shape = (224, 224, 3)\n    elif layout == \"NCHW\":\n        image_shape = (3, 224, 224)\n    else:\n        raise ValueError(\"Invalid layout: \" + layout)\n\n    input_shape = (batch_size,) + image_shape\n    output_shape = (batch_size, 1000)\n\n    [...]
       ]
     },
     {
@@ -80,14 +80,14 @@
       },
       "outputs": [],
       "source": [
-        "# Extract tasks from the network\nprint(\"Extract tasks...\")\nmod, params, input_shape, output_shape = get_network(network, batch_size, layout, dtype=dtype)\ntasks, task_weights = auto_scheduler.extract_tasks(mod[\"main\"], params, target, target_host)\n\nfor idx, task in enumerate(tasks):\n    print(\"========== Task %d  (workload key: %s) ==========\" % (idx, task.workload_key))\n    print(task.compute_dag)"
+        "# Extract tasks from the network\nprint(\"Extract tasks...\")\nmod, params, input_shape, output_shape = get_network(network, batch_size, layout, dtype=dtype)\ntasks, task_weights = auto_scheduler.extract_tasks(mod[\"main\"], params, target)\n\nfor idx, task in enumerate(tasks):\n    print(\"========== Task %d  (workload key: %s) ==========\" % (idx, task.workload_key))\n    print(task.compute_dag)"
       ]
     },
     {
       "cell_type": "markdown",
       "metadata": {},
       "source": [
-        "<div class=\"alert alert-info\"><h4>Note</h4><p>How to get the hardware parameters from remote device\n\n  .. code-block:: python\n\n    from tvm.auto_scheduler.utils import request_remote\n    remote = request_remote(device_key, \"0.0.0.0\", 9190)\n    ctx = remote.cl()\n    max_shared_memory_per_block = ctx.max_shared_memory_per_block\n    # There is no explicit local memory limition\n    # so we can use INT32_MAX to disalbe the check on local_memory.\n    max_local_memory_per [...]
+        "<div class=\"alert alert-info\"><h4>Note</h4><p>How to get the hardware parameters from remote device\n\n  .. code-block:: python\n\n    from tvm.auto_scheduler.utils import request_remote\n    remote = request_remote(device_key, \"0.0.0.0\", 9190)\n    dev = remote.cl()\n    max_shared_memory_per_block = dev.max_shared_memory_per_block\n    # There is no explicit local memory limition\n    # so we can use INT32_MAX to disalbe the check on local_memory.\n    max_local_memory_per [...]
       ]
     },
     {
@@ -105,14 +105,14 @@
       },
       "outputs": [],
       "source": [
-        "def tune_and_evaluate():\n    print(\"Begin tuning...\")\n    tuner = auto_scheduler.TaskScheduler(tasks, task_weights)\n    tune_option = auto_scheduler.TuningOptions(\n        num_measure_trials=200,  # change this to 20000 to achieve the best performance\n        builder=auto_scheduler.LocalBuilder(build_func=\"ndk\" if use_ndk else \"default\"),\n        runner=auto_scheduler.RPCRunner(\n            device_key, host=\"0.0.0.0\", port=9190, repeat=3, timeout=50\n        ),\n  [...]
+        "def tune_and_evaluate():\n    print(\"Begin tuning...\")\n    tuner = auto_scheduler.TaskScheduler(tasks, task_weights)\n    tune_option = auto_scheduler.TuningOptions(\n        num_measure_trials=200,  # change this to 20000 to achieve the best performance\n        builder=auto_scheduler.LocalBuilder(build_func=\"ndk\" if use_ndk else \"default\"),\n        runner=auto_scheduler.RPCRunner(\n            device_key, host=\"0.0.0.0\", port=9190, repeat=3, timeout=50\n        ),\n  [...]
       ]
     },
     {
       "cell_type": "markdown",
       "metadata": {},
       "source": [
-        "<div class=\"alert alert-info\"><h4>Note</h4><p>Explain the printed information during tuning\n\n  During the tuning, a lot of information will be printed on the console.\n  They are used for debugging purposes. The most important info is the output\n  of the task scheduler. The following table is a sample output.\n\n  .. code-block:: c\n\n    ----------------------------------------------------------------------\n    ------------------------------  [ Task Scheduler ]\n    ----- [...]
+        "<div class=\"alert alert-info\"><h4>Note</h4><p>Explain the printed information during tuning\n\n  During the tuning, a lot of information will be printed on the console.\n  They are used for debugging purposes. The most important info is the output\n  of the task scheduler. The following table is a sample output.\n\n  .. code-block:: c\n\n    ----------------------------------------------------------------------\n    ------------------------------  [ Task Scheduler ]\n    ----- [...]
       ]
     },
     {
diff --git a/docs/_downloads/0d95a85fc279fdff660608ef305b9107/tune_simple_template.ipynb b/docs/_downloads/0d95a85fc279fdff660608ef305b9107/tune_simple_template.ipynb
deleted file mode 100644
index 1c76bd0..0000000
--- a/docs/_downloads/0d95a85fc279fdff660608ef305b9107/tune_simple_template.ipynb
+++ /dev/null
@@ -1,190 +0,0 @@
-{
-  "cells": [
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "collapsed": false
-      },
-      "outputs": [],
-      "source": [
-        "%matplotlib inline"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-        "\nWriting Tunable Templates and Using the Auto-tuner\n==================================================\n**Author**: `Lianmin Zheng <https://github.com/merrymercy>`_\n\nThis is an introduction tutorial to the auto-tuning module in TVM.\n\nThere are two steps in auto-tuning.\nThe first step is defining a search space.\nThe second step is running a search algorithm to explore through this space.\nIn this tutorial, you can learn how to perform these two steps in TVM.\nThe whole wo [...]
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-        "Install dependencies\n--------------------\nTo use autotvm package in TVM, we need to install some extra dependencies.\nThis step (installing xgboost) can be skipped as it doesn't need XGBoost\n(change \"3\" to \"2\" if you use python2):\n\n.. code-block:: bash\n\n  pip3 install --user psutil xgboost cloudpickle\n\nTo make TVM run faster in tuning, it is recommended to use cython\nas FFI of TVM. In the root directory of TVM, execute\n(change \"3\" to \"2\" if you use python2):\n [...]
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "collapsed": false
-      },
-      "outputs": [],
-      "source": [
-        "import logging\nimport sys\n\nimport numpy as np\nimport tvm\nfrom tvm import te\nimport tvm.testing\n\n# the module is called `autotvm`\nfrom tvm import autotvm"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-        "Step 1:  Define the search space\n--------------------------------\nIn this section, we will rewrite a deterministic TVM schedule code to a\ntunable schedule template. You can regard the process of search space definition\nas the parameterization of our existing schedule code.\n\nTo begin with, here is how we implement a blocked matrix multiplication in TVM.\n\n"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "collapsed": false
-      },
-      "outputs": [],
-      "source": [
-        "# Matmul V0: Constant tiling factor\ndef matmul_v0(N, L, M, dtype):\n    A = te.placeholder((N, L), name=\"A\", dtype=dtype)\n    B = te.placeholder((L, M), name=\"B\", dtype=dtype)\n\n    k = te.reduce_axis((0, L), name=\"k\")\n    C = te.compute((N, M), lambda i, j: te.sum(A[i, k] * B[k, j], axis=k), name=\"C\")\n    s = te.create_schedule(C.op)\n\n    # schedule\n    y, x = s[C].op.axis\n    k = s[C].op.reduce_axis[0]\n\n    yo, yi = s[C].split(y, 8)\n    xo, xi = s[C].split( [...]
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-        "Parametrize the schedule\n^^^^^^^^^^^^^^^^^^^^^^^^\nIn the previous schedule code, we use a constant \"8\" as tiling factor.\nHowever, it might not be the best one because the best tiling factor depends\non real hardware environment and input shape.\n\nIf you want the schedule code to be portable across a wider range of input shapes\nand target hardware, it is better to define a set of candidate values and\npick the best one according to the measurement results on target hardwar [...]
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "collapsed": false
-      },
-      "outputs": [],
-      "source": [
-        "# Matmul V1: List candidate values\n@autotvm.template(\"tutorial/matmul_v1\")  # 1. use a decorator\ndef matmul_v1(N, L, M, dtype):\n    A = te.placeholder((N, L), name=\"A\", dtype=dtype)\n    B = te.placeholder((L, M), name=\"B\", dtype=dtype)\n\n    k = te.reduce_axis((0, L), name=\"k\")\n    C = te.compute((N, M), lambda i, j: te.sum(A[i, k] * B[k, j], axis=k), name=\"C\")\n    s = te.create_schedule(C.op)\n\n    # schedule\n    y, x = s[C].op.axis\n    k = s[C].op.reduce_ax [...]
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-        "Here we make four modifications to the previous schedule code and get\na tunable \"template\". We can explain the modifications one by one.\n\n1. Use a decorator to mark this function as a simple template.\n2. Get a config object:\n   You can regard this :code:`cfg` as an argument of this function but\n   we obtain it in a different way. With this argument, this function is no longer\n   a deterministic schedule code. Instead, we can pass different configurations to\n   this fun [...]
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-        "Use better space definition API\n^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\nIn the previous template, we manually list all possible values for a knob.\nThis is the lowest level API to define the space.\nHowever, we also provide another set of API to make the space definition\neasier and smarter. It is recommended to use this set of high level API.\n\nIn the following example, we use :any:`ConfigSpace.define_split` to define a split\nknob. It will enumerate all the possible ways to split a [...]
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "collapsed": false
-      },
-      "outputs": [],
-      "source": [
-        "@autotvm.template(\"tutorial/matmul\")\ndef matmul(N, L, M, dtype):\n    A = te.placeholder((N, L), name=\"A\", dtype=dtype)\n    B = te.placeholder((L, M), name=\"B\", dtype=dtype)\n\n    k = te.reduce_axis((0, L), name=\"k\")\n    C = te.compute((N, M), lambda i, j: te.sum(A[i, k] * B[k, j], axis=k), name=\"C\")\n    s = te.create_schedule(C.op)\n\n    # schedule\n    y, x = s[C].op.axis\n    k = s[C].op.reduce_axis[0]\n\n    ##### define space begin #####\n    cfg = autotvm.g [...]
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-        "<div class=\"alert alert-info\"><h4>Note</h4><p>More Explanation on :code:`cfg.defile_split`</p></div>\n\n In this template, :code:`cfg.define_split(\"tile_y\", y, num_outputs=2)` will enumerate\n all possible combinations that can split axis y into two axes with factors of the length of y.\n For example, if the length of y is 32 and we want to split it into two axes\n using factors of 32, then there are 6 possible values for\n (length of outer axis, length of inner axis) pair,  [...]
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-        "Step 2:  Search through the space\n---------------------------------\nIn step 1, we build the search space by extending our old schedule code\ninto a template. The next step is to pick a tuner and explore in this space.\n\nAuto-tuners in TVM\n^^^^^^^^^^^^^^^^^^\nThe job for a tuner can be described by following pseudo code\n\n  .. code-block:: c\n\n   ct = 0\n   while ct < max_number_of_trials:\n       propose a batch of configs\n       measure this batch of configs on real hard [...]
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-        "Begin tuning\n^^^^^^^^^^^^\nHere we continue our matrix multiplication example.\nFirst we should create a tuning task.\nWe can also inspect the initialized search space.\nIn this case, for a 512x512 square matrix multiplication, the space size\nis 10x10=100\n\n"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "collapsed": false
-      },
-      "outputs": [],
-      "source": [
-        "N, L, M = 512, 512, 512\ntask = autotvm.task.create(\"tutorial/matmul\", args=(N, L, M, \"float32\"), target=\"llvm\")\nprint(task.config_space)"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-        "Then we need to define how to measure the generated code and pick a tuner.\nSince our space is small, a random tuner is just okay.\n\nWe only make 10 trials in this tutorial for demonstration. In practice,\nyou can do more trials according to your time budget.\nWe will log the tuning results into a log file. This file can be\nused to get the best config later.\n\n"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "collapsed": false
-      },
-      "outputs": [],
-      "source": [
-        "# logging config (for printing tuning log to the screen)\nlogging.getLogger(\"autotvm\").setLevel(logging.DEBUG)\nlogging.getLogger(\"autotvm\").addHandler(logging.StreamHandler(sys.stdout))\n\n# There are two steps for measuring a config: build and run.\n# By default, we use all CPU cores to compile program. Then measure them sequentially.\n# We measure 5 times and take average to reduce variance.\nmeasure_option = autotvm.measure_option(builder=\"local\", runner=autotvm.LocalR [...]
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-        "Finally we apply history best from the cache file and check its correctness.\nWe can call the function :code:`matmul` directly under the\n:any:`autotvm.apply_history_best` context. When we call this function,\nit will query the dispatch context with its argument and get the best config\nwith the same argument.\n\n"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "collapsed": false
-      },
-      "outputs": [],
-      "source": [
-        "# apply history best from log file\nwith autotvm.apply_history_best(\"matmul.log\"):\n    with tvm.target.Target(\"llvm\"):\n        s, arg_bufs = matmul(N, L, M, \"float32\")\n        func = tvm.build(s, arg_bufs)\n\n# check correctness\na_np = np.random.uniform(size=(N, L)).astype(np.float32)\nb_np = np.random.uniform(size=(L, M)).astype(np.float32)\nc_np = a_np.dot(b_np)\n\nc_tvm = tvm.nd.empty(c_np.shape)\nfunc(tvm.nd.array(a_np), tvm.nd.array(b_np), c_tvm)\n\ntvm.testing.as [...]
-      ]
-    }
-  ],
-  "metadata": {
-    "kernelspec": {
-      "display_name": "Python 3",
-      "language": "python",
-      "name": "python3"
-    },
-    "language_info": {
-      "codemirror_mode": {
-        "name": "ipython",
-        "version": 3
-      },
-      "file_extension": ".py",
-      "mimetype": "text/x-python",
-      "name": "python",
-      "nbconvert_exporter": "python",
-      "pygments_lexer": "ipython3",
-      "version": "3.6.12"
-    }
-  },
-  "nbformat": 4,
-  "nbformat_minor": 0
-}
\ No newline at end of file
diff --git a/docs/_downloads/1087d6f8ab0d37227d1a26042de7ac88/auto_tuning_with_python.py b/docs/_downloads/1087d6f8ab0d37227d1a26042de7ac88/auto_tuning_with_python.py
new file mode 100644
index 0000000..0256776
--- /dev/null
+++ b/docs/_downloads/1087d6f8ab0d37227d1a26042de7ac88/auto_tuning_with_python.py
@@ -0,0 +1,477 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""
+Compiling and Optimizing a Model with the Python AutoScheduler
+==============================================================
+**Author**:
+`Chris Hoge <https://github.com/hogepodge>`_
+
+In the `TVMC Tutorial <tvmc_command_line_driver>`_, we covered how to compile, run, and tune a
+pre-trained vision model, ResNet-50-v2 using the command line interface for
+TVM, TVMC. TVM is more that just a command-line tool though, it is an
+optimizing framework with APIs available for a number of different languages
+that gives you tremendous flexibility in working with machine learning models.
+
+In this tutorial we will cover the same ground we did with TVMC, but show how
+it is done with the Python API. Upon completion of this section, we will have
+used the Python API for TVM to accomplish the following tasks:
+
+* Compile a pre-trained ResNet 50 v2 model for the TVM runtime.
+* Run a real image through the compiled model, and interpret the output and model
+  performance.
+* Tune the model that model on a CPU using TVM.
+* Re-compile an optimized model using the tuning data collected by TVM.
+* Run the image through the optimized model, and compare the output and model
+  performance.
+
+The goal of this section is to give you an overview of TVM's capabilites and
+how to use them through the Python API.
+"""
+
+################################################################################
+# TVM is a deep learning compiler framework, with a number of different modules
+# available for working with deep learning models and operators. In this
+# tutorial we will work through how to load, compile, and optimize a model
+# using the Python API.
+#
+# We begin by importing a number of dependencies, including ``onnx`` for
+# loading and converting the model, helper utilities for downloading test data,
+# the Python Image Library for working with the image data, ``numpy`` for pre
+# and post-processing of the image data, the TVM Relay framework, and the TVM
+# Graph Executor.
+
+import onnx
+from tvm.contrib.download import download_testdata
+from PIL import Image
+import numpy as np
+import tvm.relay as relay
+import tvm
+from tvm.contrib import graph_executor
+
+################################################################################
+# Downloading and Loading the ONNX Model
+# --------------------------------------
+#
+# For this tutorial, we will be working with ResNet-50 v2. ResNet-50 is a
+# convolutional neural network that is 50-layers deep and designed to classify
+# images. The model we will be using has been pre-trained on more than a
+# million images with 1000 different classifications. The network has an input
+# image size of 224x224. If you are interested exploring more of how the
+# ResNet-50 model is structured, we recommend downloading
+# `Netron <https://netron.app>`_, a freely available ML model viewer.
+#
+# TVM provides a helper library to download pre-trained models. By providing a
+# model URL, file name, and model type through the module, TVM will download
+# the model and save it to disk. For the instance of an ONNX model, you can
+# then load it into memory using the ONNX runtime.
+#
+# .. note:: Working with Other Model Formats
+#
+#   TVM supports many popular model formats. A list can be found in the `Compile
+#   Deep Learning Models
+#   <https://tvm.apache.org/docs/tutorials/index.html#compile-deep-learning-models>`_
+#   section of the TVM Documentation.
+
+model_url = "".join(
+    [
+        "https://github.com/onnx/models/raw/",
+        "master/vision/classification/resnet/model/",
+        "resnet50-v2-7.onnx",
+    ]
+)
+
+model_path = download_testdata(model_url, "resnet50-v2-7.onnx", module="onnx")
+onnx_model = onnx.load(model_path)
+
+################################################################################
+# Downloading, Preprocessing, and Loading the Test Image
+# ------------------------------------------------------
+#
+# Each model is particular when it comes to expected tensor shapes, formats and
+# data types. For this reason, most models require some pre and
+# post-processing, to ensure the input is valid and to interpret the output.
+# TVMC has adopted NumPy's ``.npz`` format for both input and output data.
+#
+# As input for this tutorial, we will use the image of a cat, but you can feel
+# free to substitute image for any of your choosing.
+#
+# .. image:: https://s3.amazonaws.com/model-server/inputs/kitten.jpg
+#    :height: 224px
+#    :width: 224px
+#    :align: center
+#
+# Download the image data, then convert it to a numpy array to use as an input to the model.
+
+img_url = "https://s3.amazonaws.com/model-server/inputs/kitten.jpg"
+img_path = download_testdata(img_url, "imagenet_cat.png", module="data")
+
+# Resize it to 224x224
+resized_image = Image.open(img_path).resize((224, 224))
+img_data = np.asarray(resized_image).astype("float32")
+
+# Our input image is in HWC layout while ONNX expects CHW input, so convert the array
+img_data = np.transpose(img_data, (2, 0, 1))
+
+# Normalize according to the ImageNet input specification
+imagenet_mean = np.array([0.485, 0.456, 0.406])
+imagenet_stddev = np.array([0.229, 0.224, 0.225])
+norm_img_data = np.zeros(img_data.shape).astype("float32")
+for i in range(img_data.shape[0]):
+    norm_img_data[i, :, :] = (img_data[i, :, :] / 255 - imagenet_mean[i]) / imagenet_stddev[i]
+
+# Add the batch dimension, as we are expecting 4-dimensional input: NCHW.
+img_data = np.expand_dims(norm_img_data, axis=0)
+
+###############################################################################
+# Compile the Model With Relay
+# ----------------------------
+#
+# The next step is to compile the ResNet model. We begin by importing the model
+# to relay using the `from_onnx` importer. We then build the model, with
+# standard optimizations, into a TVM library.  Finally, we create a TVM graph
+# runtime module from the library.
+
+target = "llvm"
+
+######################################################################
+# .. note:: Defining the Correct Target
+#
+#   Specifying the correct target can have a huge impact on the performance of
+#   the compiled module, as it can take advantage of hardware features
+#   available on the target. For more information, please refer to `Auto-tuning
+#   a convolutional network for x86 CPU
+#   <https://tvm.apache.org/docs/tutorials/autotvm/tune_relay_x86.html#define-network>`_.
+#   We recommend identifying which CPU you are running, along with optional
+#   features, and set the target appropriately. For example, for some
+#   processors ``target = "llvm -mcpu=skylake"``, or ``target = "llvm
+#   -mcpu=skylake-avx512"`` for processors with the AVX-512 vector instruction
+#   set.
+#
+
+# The input name may vary across model types. You can use a tool
+# like netron to check input names
+input_name = "data"
+shape_dict = {input_name: img_data.shape}
+
+mod, params = relay.frontend.from_onnx(onnx_model, shape_dict)
+
+with tvm.transform.PassContext(opt_level=3):
+    lib = relay.build(mod, target=target, params=params)
+
+dev = tvm.device(str(target), 0)
+module = graph_executor.GraphModule(lib["default"](dev))
+
+######################################################################
+# Execute on the TVM Runtime
+# --------------------------
+# Now that we've compiled the model, we can use the TVM runtime to make
+# predictions with it. To use TVM to run the model and make predictions, we
+# need two things:
+#
+# - The compiled model, which we just produced.
+# - Valid input to the model to make predictions on.
+
+dtype = "float32"
+module.set_input(input_name, img_data)
+module.run()
+output_shape = (1, 1000)
+tvm_output = module.get_output(0, tvm.nd.empty(output_shape)).asnumpy()
+
+################################################################################
+# Collect Basic Performance Data
+# ------------------------------
+# We want to collect some basic performance data associated with this
+# unoptimized model and compare it to a tuned model later. To help account for
+# CPU noise, we run the computation in multiple batches in multiple
+# repetitions, then gather some basis statistics on the mean, median, and
+# standard deviation.
+import timeit
+
+timing_number = 10
+timing_repeat = 10
+unoptimized = (
+    np.array(timeit.Timer(lambda: module.run()).repeat(repeat=timing_repeat, number=timing_number))
+    * 1000
+    / timing_number
+)
+unoptimized = {
+    "mean": np.mean(unoptimized),
+    "median": np.median(unoptimized),
+    "std": np.std(unoptimized),
+}
+
+print(unoptimized)
+
+################################################################################
+# Postprocess the output
+# ----------------------
+#
+# As previously mentioned, each model will have its own particular way of
+# providing output tensors.
+#
+# In our case, we need to run some post-processing to render the outputs from
+# ResNet-50-V2 into a more human-readable form, using the lookup-table provided
+# for the model.
+
+from scipy.special import softmax
+
+# Download a list of labels
+labels_url = "https://s3.amazonaws.com/onnx-model-zoo/synset.txt"
+labels_path = download_testdata(labels_url, "synset.txt", module="data")
+
+with open(labels_path, "r") as f:
+    labels = [l.rstrip() for l in f]
+
+# Open the output and read the output tensor
+scores = softmax(tvm_output)
+scores = np.squeeze(scores)
+ranks = np.argsort(scores)[::-1]
+for rank in ranks[0:5]:
+    print("class='%s' with probability=%f" % (labels[rank], scores[rank]))
+
+################################################################################
+# This should produce the following output:
+#
+# .. code-block:: bash
+#
+#     # class='n02123045 tabby, tabby cat' with probability=0.610553
+#     # class='n02123159 tiger cat' with probability=0.367179
+#     # class='n02124075 Egyptian cat' with probability=0.019365
+#     # class='n02129604 tiger, Panthera tigris' with probability=0.001273
+#     # class='n04040759 radiator' with probability=0.000261
+
+################################################################################
+# Tune the model
+# --------------
+# The previous model was compiled to work on the TVM runtime, but did not
+# include any platform specific optimization. In this section, we will show you
+# how to build an optimized model using TVM to target your working platform.
+#
+# In some cases, we might not get the expected performance when running
+# inferences using our compiled module. In cases like this, we can make use of
+# the auto-tuner, to find a better configuration for our model and get a boost
+# in performance. Tuning in TVM refers to the process by which a model is
+# optimized to run faster on a given target. This differs from training or
+# fine-tuning in that it does not affect the accuracy of the model, but only
+# the runtime performance. As part of the tuning process, TVM will try running
+# many different operator implementation variants to see which perform best.
+# The results of these runs are stored in a tuning records file.
+#
+# In the simplest form, tuning requires you to provide three things:
+#
+# - the target specification of the device you intend to run this model on
+# - the path to an output file in which the tuning records will be stored
+# - a path to the model to be tuned.
+#
+
+import tvm.auto_scheduler as auto_scheduler
+from tvm.autotvm.tuner import XGBTuner
+from tvm import autotvm
+
+# Set up some basic parameters for the runner. The runner takes compiled code
+# that is generated with a specific set of parameters and measures the
+# performance of it. ``number`` specifies the number of different
+# configurations that we will test, while ``repeat`` specifies how many
+# measurements we will take of each configuration. ``min_repeat_ms`` is a value
+# that specifies how long need to run configuration test. If the number of
+# repeats falls under this time, it will be increased. This option is necessary
+# for accurate tuning on GPUs, and is not required for CPU tuning. Setting this
+# value to 0 disables it. The ``timeout`` places an upper limit on how long to
+# run training code for each tested configuration.
+
+number = 10
+repeat = 1
+min_repeat_ms = 0  # since we're tuning on a CPU, can be set to 0
+timeout = 10  # in seconds
+
+# create a TVM runner
+runner = autotvm.LocalRunner(
+    number=number,
+    repeat=repeat,
+    timeout=timeout,
+    min_repeat_ms=min_repeat_ms,
+)
+
+# Create a simple structure for holding tuning options. We use an XGBoost
+# algorithim for guiding the search. For a production job, you will want to set
+# the number of trials to be larger than the value of 10 used here. For CPU we
+# recommend 1500, for GPU 3000-4000. The number of trials required can depend
+# on the particular model and processor, so it's worth spending some time
+# evaluating performance across a range of values to find the best balance
+# between tuning time and model optimization. Because running tuning is time
+# intensive we set number of trials to 10, but do not recommend a value this
+# small. The ``early_stopping`` parameter is the minimum number of trails to
+# run before a condition that stops the search early can be applied. The
+# measure option indicates where trial code will be built, and where it will be
+# run. In this case, we're using the ``LocalRunner`` we just created and a
+# ``LocalBuilder``. The ``tuning_records`` option specifies a file to write
+# the tuning data to.
+
+tuning_option = {
+    "tuner": "xgb",
+    "trials": 10,
+    "early_stopping": 100,
+    "measure_option": autotvm.measure_option(
+        builder=autotvm.LocalBuilder(build_func="default"), runner=runner
+    ),
+    "tuning_records": "resnet-50-v2-autotuning.json",
+}
+
+################################################################################
+# .. note:: Defining the Tuning Search Algorithm
+#
+#   By default this search is guided using an `XGBoost Grid` algorithm.
+#   Depending on your model complexity and amount of time available, you might
+#   want to choose a different algorithm.
+
+
+################################################################################
+# .. note:: Setting Tuning Parameters
+#
+#   In this example, in the interest of time, we set the number of trials and
+#   early stopping to 10. You will likely see more performance improvements if
+#   you set these values to be higher but this comes at the expense of time
+#   spent tuning. The number of trials required for convergence will vary
+#   depending on the specifics of the model and the target platform.
+
+# begin by extracting the taks from the onnx model
+tasks = autotvm.task.extract_from_program(mod["main"], target=target, params=params)
+
+# Tune the extracted tasks sequentially.
+for i, task in enumerate(tasks):
+    prefix = "[Task %2d/%2d] " % (i + 1, len(tasks))
+    tuner_obj = XGBTuner(task, loss_type="rank")
+    tuner_obj.tune(
+        n_trial=min(tuning_option["trials"], len(task.config_space)),
+        early_stopping=tuning_option["early_stopping"],
+        measure_option=tuning_option["measure_option"],
+        callbacks=[
+            autotvm.callback.progress_bar(tuning_option["trials"], prefix=prefix),
+            autotvm.callback.log_to_file(tuning_option["tuning_records"]),
+        ],
+    )
+
+################################################################################
+# The output from this tuning process will look something like this:
+#
+# .. code-block:: bash
+#
+#   # [Task  1/24]  Current/Best:   10.71/  21.08 GFLOPS | Progress: (60/1000) | 111.77 s Done.
+#   # [Task  1/24]  Current/Best:    9.32/  24.18 GFLOPS | Progress: (192/1000) | 365.02 s Done.
+#   # [Task  2/24]  Current/Best:   22.39/ 177.59 GFLOPS | Progress: (960/1000) | 976.17 s Done.
+#   # [Task  3/24]  Current/Best:   32.03/ 153.34 GFLOPS | Progress: (800/1000) | 776.84 s Done.
+#   # [Task  4/24]  Current/Best:   11.96/ 156.49 GFLOPS | Progress: (960/1000) | 632.26 s Done.
+#   # [Task  5/24]  Current/Best:   23.75/ 130.78 GFLOPS | Progress: (800/1000) | 739.29 s Done.
+#   # [Task  6/24]  Current/Best:   38.29/ 198.31 GFLOPS | Progress: (1000/1000) | 624.51 s Done.
+#   # [Task  7/24]  Current/Best:    4.31/ 210.78 GFLOPS | Progress: (1000/1000) | 701.03 s Done.
+#   # [Task  8/24]  Current/Best:   50.25/ 185.35 GFLOPS | Progress: (972/1000) | 538.55 s Done.
+#   # [Task  9/24]  Current/Best:   50.19/ 194.42 GFLOPS | Progress: (1000/1000) | 487.30 s Done.
+#   # [Task 10/24]  Current/Best:   12.90/ 172.60 GFLOPS | Progress: (972/1000) | 607.32 s Done.
+#   # [Task 11/24]  Current/Best:   62.71/ 203.46 GFLOPS | Progress: (1000/1000) | 581.92 s Done.
+#   # [Task 12/24]  Current/Best:   36.79/ 224.71 GFLOPS | Progress: (1000/1000) | 675.13 s Done.
+#   # [Task 13/24]  Current/Best:    7.76/ 219.72 GFLOPS | Progress: (1000/1000) | 519.06 s Done.
+#   # [Task 14/24]  Current/Best:   12.26/ 202.42 GFLOPS | Progress: (1000/1000) | 514.30 s Done.
+#   # [Task 15/24]  Current/Best:   31.59/ 197.61 GFLOPS | Progress: (1000/1000) | 558.54 s Done.
+#   # [Task 16/24]  Current/Best:   31.63/ 206.08 GFLOPS | Progress: (1000/1000) | 708.36 s Done.
+#   # [Task 17/24]  Current/Best:   41.18/ 204.45 GFLOPS | Progress: (1000/1000) | 736.08 s Done.
+#   # [Task 18/24]  Current/Best:   15.85/ 222.38 GFLOPS | Progress: (980/1000) | 516.73 s Done.
+#   # [Task 19/24]  Current/Best:   15.78/ 203.41 GFLOPS | Progress: (1000/1000) | 587.13 s Done.
+#   # [Task 20/24]  Current/Best:   30.47/ 205.92 GFLOPS | Progress: (980/1000) | 471.00 s Done.
+#   # [Task 21/24]  Current/Best:   46.91/ 227.99 GFLOPS | Progress: (308/1000) | 219.18 s Done.
+#   # [Task 22/24]  Current/Best:   13.33/ 207.66 GFLOPS | Progress: (1000/1000) | 761.74 s Done.
+#   # [Task 23/24]  Current/Best:   53.29/ 192.98 GFLOPS | Progress: (1000/1000) | 799.90 s Done.
+#   # [Task 24/24]  Current/Best:   25.03/ 146.14 GFLOPS | Progress: (1000/1000) | 1112.55 s Done.
+
+################################################################################
+# Compiling an Optimized Model with Tuning Data
+# ----------------------------------------------
+#
+# As an output of the tuning process above, we obtained the tuning records
+# stored in ``resnet-50-v2-autotuning.json``. The compiler will use the results to
+# generate high performance code for the model on your specified target.
+#
+# Now that tuning data for the model has been collected, we can re-compile the
+# model using optimized operators to speed up our computations.
+
+with autotvm.apply_history_best(tuning_option["tuning_records"]):
+    with tvm.transform.PassContext(opt_level=3, config={}):
+        lib = relay.build(mod, target=target, params=params)
+
+dev = tvm.device(str(target), 0)
+module = graph_executor.GraphModule(lib["default"](dev))
+
+################################################################################
+# Verify that the optimized model runs and produces the same results:
+
+dtype = "float32"
+module.set_input(input_name, img_data)
+module.run()
+output_shape = (1, 1000)
+tvm_output = module.get_output(0, tvm.nd.empty(output_shape)).asnumpy()
+
+scores = softmax(tvm_output)
+scores = np.squeeze(scores)
+ranks = np.argsort(scores)[::-1]
+for rank in ranks[0:5]:
+    print("class='%s' with probability=%f" % (labels[rank], scores[rank]))
+
+# Verifying that the predictions are the same:
+#
+# .. code-block:: bash
+#
+#   # class='n02123045 tabby, tabby cat' with probability=0.610550
+#   # class='n02123159 tiger cat' with probability=0.367181
+#   # class='n02124075 Egyptian cat' with probability=0.019365
+#   # class='n02129604 tiger, Panthera tigris' with probability=0.001273
+#   # class='n04040759 radiator' with probability=0.000261
+
+################################################################################
+# Comparing the Tuned and Untuned Models
+# --------------------------------------
+# We want to collect some basic performance data associated with this optimized
+# model to compare it to the unoptimized model. Depending on your underlying
+# hardware, number of iterations, and other factors, you should see a performance
+# improvement in comparing the optimized model to the unoptimized model.
+
+import timeit
+
+timing_number = 10
+timing_repeat = 10
+optimized = (
+    np.array(timeit.Timer(lambda: module.run()).repeat(repeat=timing_repeat, number=timing_number))
+    * 1000
+    / timing_number
+)
+optimized = {"mean": np.mean(optimized), "median": np.median(optimized), "std": np.std(optimized)}
+
+
+print("optimized: %s" % (optimized))
+print("unoptimized: %s" % (unoptimized))
+
+################################################################################
+# Final Remarks
+# -------------
+#
+# In this tutorial, we we gave a short example of how to use the TVM Python API
+# to compile, run, and tune a model. We also discussed the need for pre and
+# post-processing of inputs and outputs. After the tuning process, we
+# demonstrated how to compare the performance of the unoptimized and optimize
+# models.
+#
+# Here we presented a simple example using ResNet 50 V2 locally. However, TVM
+# supports many more features including cross-compilation, remote execution and
+# profiling/benchmarking.
diff --git a/docs/_downloads/10e16681be542cc483fa89e9b4678a27/opt_matmul_auto_tensorcore.py b/docs/_downloads/10e16681be542cc483fa89e9b4678a27/opt_matmul_auto_tensorcore.py
index f5450b9..03682a0 100644
--- a/docs/_downloads/10e16681be542cc483fa89e9b4678a27/opt_matmul_auto_tensorcore.py
+++ b/docs/_downloads/10e16681be542cc483fa89e9b4678a27/opt_matmul_auto_tensorcore.py
@@ -252,8 +252,8 @@ def test_gemm(N, L, M, dtype, layout):
 if not tvm.gpu(0).exist or not tvm.runtime.enabled("cuda"):
     raise Exception("skip building this tutorial because cuda is not enabled..")
 
-ctx = tvm.gpu()
-if not nvcc.have_tensorcore(ctx.compute_version):
+dev = tvm.gpu()
+if not nvcc.have_tensorcore(dev.compute_version):
     raise Exception("the gpu has no tensorcore, skipping...")
 
 M, N, L = 512, 32, 512
@@ -385,14 +385,14 @@ def tune_and_evaluate(M, N, L, dtype, layout):
                 for k in range(32):
                     b_np[i, j] = b_np[i, j] | ((b_np_int[i, j * 32 + k] & 0xF) << (31 - k))
 
-    c_tvm = tvm.nd.array(np.zeros(c_np.shape, dtype=c_np_type), ctx=ctx)
-    a_tvm = tvm.nd.array(a_np, ctx=ctx)
-    b_tvm = tvm.nd.array(b_np, ctx=ctx)
+    c_tvm = tvm.nd.array(np.zeros(c_np.shape, dtype=c_np_type), device=dev)
+    a_tvm = tvm.nd.array(a_np, device=dev)
+    b_tvm = tvm.nd.array(b_np, device=dev)
     func(a_tvm, b_tvm, c_tvm)
 
     tvm.testing.assert_allclose(c_np, c_tvm.asnumpy(), rtol=1e-3)
 
-    evaluator = func.time_evaluator(func.entry_name, ctx, number=100)
+    evaluator = func.time_evaluator(func.entry_name, dev, number=100)
     print("Time cost of this operator: %f" % evaluator(a_tvm, b_tvm, c_tvm).mean)
 
 
diff --git a/docs/_downloads/143c743c62f58570eabd77fd3395ca8c/scan.py b/docs/_downloads/143c743c62f58570eabd77fd3395ca8c/scan.py
index 5f51320..8124b56 100644
--- a/docs/_downloads/143c743c62f58570eabd77fd3395ca8c/scan.py
+++ b/docs/_downloads/143c743c62f58570eabd77fd3395ca8c/scan.py
@@ -83,12 +83,12 @@ print(tvm.lower(s, [X, s_scan], simple_mode=True))
 # numpy to verify the correctness of the result.
 #
 fscan = tvm.build(s, [X, s_scan], "cuda", name="myscan")
-ctx = tvm.gpu(0)
+dev = tvm.gpu(0)
 n = 1024
 m = 10
 a_np = np.random.uniform(size=(m, n)).astype(s_scan.dtype)
-a = tvm.nd.array(a_np, ctx)
-b = tvm.nd.array(np.zeros((m, n), dtype=s_scan.dtype), ctx)
+a = tvm.nd.array(a_np, dev)
+b = tvm.nd.array(np.zeros((m, n), dtype=s_scan.dtype), dev)
 fscan(a, b)
 tvm.testing.assert_allclose(b.asnumpy(), np.cumsum(a_np, axis=0))
 
diff --git a/docs/_downloads/15c94550bd86853c42df8e5d139d700a/tune_sparse_x86.py b/docs/_downloads/15c94550bd86853c42df8e5d139d700a/tune_sparse_x86.py
new file mode 100644
index 0000000..a635a74
--- /dev/null
+++ b/docs/_downloads/15c94550bd86853c42df8e5d139d700a/tune_sparse_x86.py
@@ -0,0 +1,316 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""
+Auto-scheduling Sparse Matrix Multiplication on CPU with Custom Sketch Rule
+===========================================================================
+**Author**: `Chengfan Jia <https://github.com/jcf94/>`_
+
+This is a tutorial on how to use the auto-scheduler to tune a sparse matrix multiplication for
+CPUs.
+
+Auto-scheduler is designed to explore the schedule with best performance for a given computation
+declaration automatically. While sometimes, we may have a demand to try some special ops which may
+not been well-supported by auto-scheduler's default sketch rules and result in poor performance.
+Fortunately, auto-scheduler currently allows user to provide a CustomSketch to cover these cases.
+
+We use sparse matrix multiplication as an example in this tutorial to demonstrate how to implement
+and plug a custom sketch rule to the auto-scheduler's search policy.
+
+Note that this tutorial will not run on Windows or recent versions of macOS. To
+get it to run, you will need to wrap the body of this tutorial in a :code:`if
+__name__ == "__main__":` block.
+"""
+
+import os
+
+import numpy as np
+import tvm
+from tvm import te, auto_scheduler, runtime, topi
+from tvm.auto_scheduler import _ffi_api
+from tvm.topi.utils import get_const_tuple
+from tvm.topi.sparse.utils import random_bsr_matrix
+
+######################################################################
+# Define the computation
+# ^^^^^^^^^^^^^^^^^^^^^^
+# To begin with, let us define the computation of a sparse matmul with several relu and bias add.
+# The function should return the list of input/output tensors.
+# From these tensors, the auto-scheduler can get the whole computational graph.
+
+
+@auto_scheduler.register_workload
+def sparse_dense(M, N, K, w_data_shape, w_indices_shape, w_indptr_shape, dtype):
+    X = te.placeholder(shape=(M, K), dtype=dtype)
+    W_data = te.placeholder(shape=w_data_shape, dtype=dtype)
+    W_indices = te.placeholder(shape=w_indices_shape, dtype="int32")
+    W_indptr = te.placeholder(shape=w_indptr_shape, dtype="int32")
+    B = te.placeholder(shape=(M, N), dtype=dtype)
+
+    out = topi.nn.sparse_dense(topi.nn.relu(X), W_data, W_indices, W_indptr)
+    out = te.compute((M, N), lambda i, j: out[i, j] + B[i, j], name="BiasAdd")
+    out = topi.nn.relu(out)
+
+    return [X, W_data, W_indices, W_indptr, B, out]
+
+
+######################################################################
+# Special step for sparse workload
+# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+# During schedule tuning, auto-scheduler will use random inputs to measure the performance of a
+# generated schedule. While we cannot directly use a random array as the input of a sparse op, for
+# the "indices" and "indptr" array are meaningful for the computation.
+#
+# To solve this problem, we register these as special buffers, and load them when process program
+# measuring.
+# See the `tvm.auto_scheduler.measure.py` for more details.
+
+# Define the basic shapes of this sparse computation
+M = 128
+K = 256
+N = 512
+BS_R = 16
+BS_C = 1
+density = 0.6
+
+# Generate the test data with numpy
+X_np = np.random.randn(M, K).astype("float32")
+X_np = np.maximum(np.zeros((M, K), dtype="float32"), X_np)  # Relu
+W_sp_np = random_bsr_matrix(N, K, BS_R, BS_C, density=density, dtype="float32")
+W_np = W_sp_np.todense()
+Y_np = X_np @ W_np.T  # Process the matrix multiplication
+B_np = np.random.randn(M, N).astype("float32")
+Y_np = Y_np + B_np  # Bias add
+Y_np = np.maximum(np.zeros((M, N), dtype="float32"), Y_np)  # Relu
+
+######################################################################
+# Create the search task
+# ^^^^^^^^^^^^^^^^^^^^^^
+# We then create a search task with M=N=K=512 and dtype="float32"
+# If your machine supports avx instructions, you can
+#
+#   - replace "llvm" below with "llvm -mcpu=core-avx2" to enable AVX2
+#   - replace "llvm" below with "llvm -mcpu=skylake-avx512" to enable AVX-512
+
+target = tvm.target.Target("llvm")
+
+# Register the sparse data to task inputs
+prefix = "sparse_dense_bsr_%d_%d_%d_%d_%.2f_" % (N, K, BS_R, BS_C, density)
+task = tvm.auto_scheduler.SearchTask(
+    func=sparse_dense,
+    args=(M, N, K, W_sp_np.data.shape, W_sp_np.indices.shape, W_sp_np.indptr.shape, "float32"),
+    target=target,
+    task_inputs={
+        prefix + "W_data": runtime.ndarray.array(W_sp_np.data),
+        prefix + "W_indices": runtime.ndarray.array(W_sp_np.indices),
+        prefix + "W_indptr": runtime.ndarray.array(W_sp_np.indptr),
+    },
+    task_inputs_save_to_file=True,
+)
+
+# Inspect the computational graph
+print("Computational DAG:")
+print(task.compute_dag)
+
+######################################################################
+# Write the custom sketch for sparse dense op
+# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+# Before tuning, we will need to define the CustomSketchRule for the sparse dense op.
+#
+# CustomSketchRule consists of two parts: the condition function and the apply function.
+#
+#   - condition function: describe when to apply this sketch rule. For example, we can only apply
+#     the rule to the sparse ops by matching their name and tag.
+#   - apply function: describe how to generate the initial sketch. You can implement it using
+#     auto-scheduler provided loop state APIs.
+
+
+def meet_condition_func(search_policy, state, stage_id):
+    state = auto_scheduler.loop_state.State(state, search_policy.search_task.compute_dag)
+    if state.stages[stage_id].op.tag in [
+        "sparse_dense_sp_rhs_bsrmm",
+        "sparse_dense_sp_rhs_bsrmm_block",
+    ]:
+        return auto_scheduler.PreloadCustomSketchRule.APPLY_AND_SKIP_REST
+    else:
+        return auto_scheduler.PreloadCustomSketchRule.PASS
+
+
+def apply_func(search_policy, state, stage_id):
+    ret = []
+    s0 = auto_scheduler.loop_state.State(state, search_policy.search_task.compute_dag)
+    if s0.stages[stage_id].op.tag == "sparse_dense_sp_rhs_bsrmm_block":
+        return [s0.state_object, stage_id - 1]
+
+    sparse_dense = s0.stages[stage_id].op
+    sparse_dense_block = s0.stages[stage_id - 1].op
+    assert sparse_dense.tag == "sparse_dense_sp_rhs_bsrmm"
+    assert sparse_dense_block.tag == "sparse_dense_sp_rhs_bsrmm_block"
+
+    # Set the default consumer of compute block
+    consumer = sparse_dense
+
+    # If sparse dense has a single elementwise consumer
+    # We can compute inline the sparse_dense output stage
+    consumers = _ffi_api.SearchPolicyUtilsGetConsumers(
+        search_policy.search_task, s0.state_object, stage_id
+    )
+    if len(consumers) == 1:
+        consumer_id = int(consumers.items()[0][0])
+        if _ffi_api.SearchPolicyUtilsIsElementwiseMatch(
+            search_policy.search_task, s0.state_object, stage_id, consumer_id
+        ):
+            consumer = s0.stages[consumer_id].op
+            s0.compute_inline(sparse_dense)
+
+    i, nb_j, j, row_offset, c = s0[sparse_dense_block].iters
+    m, n = s0[consumer].iters
+    i0, i1, i2 = s0.split(sparse_dense_block, i, [None, None])
+    m0, m1 = s0.follow_split(consumer, m, len(s0.transform_steps) - 1, 1)
+    j0, j1 = s0.split(sparse_dense_block, nb_j, [None])
+    n0, n1 = s0.follow_split(consumer, n, len(s0.transform_steps) - 1, 1)
+    s0.reorder(sparse_dense_block, [i0, j0, i1, j1, row_offset, i2, j, c])
+    s0.reorder(consumer, [m0, n0, m1, n1])
+    s0.compute_at(sparse_dense_block, consumer, n0)
+
+    ret.append([s0.state_object, stage_id - 2])
+
+    return ret
+
+
+######################################################################
+# Next, we set parameters for the auto-scheduler with the custom sketch plugged in.
+#
+# * :code:`num_measure_trials` is the number of measurement trials we can use during the search.
+#   We only make 10 trials in this tutorial for a fast demonstration. In practice, 1000 is a
+#   good value for the search to converge. You can do more trials according to your time budget.
+# * In addition, we use :code:`RecordToFile` to dump measurement records into a file
+#   `sparse_dense.json`.
+#   The measurement records can be used to query the history best, resume the search,
+#   and do more analyses later.
+# * see :any:`auto_scheduler.TuningOptions` for more parameters
+# * Here, we need to create a :code:`auto_scheduler.SketchPolicy` object, and add the custom sketch
+#   rule as a `init_search_callbacks`.
+
+log_file = "sparse_dense.json"
+tune_option = auto_scheduler.TuningOptions(
+    num_measure_trials=10,
+    measure_callbacks=[auto_scheduler.RecordToFile(log_file)],
+    verbose=2,
+)
+
+search_policy = auto_scheduler.SketchPolicy(
+    task,
+    program_cost_model=auto_scheduler.XGBModel(),
+    init_search_callbacks=[
+        auto_scheduler.PreloadCustomSketchRule(meet_condition_func, apply_func, "SparseDense")
+    ],
+)
+
+######################################################################
+# Run the search
+# ^^^^^^^^^^^^^^
+# Now we get all inputs ready.
+# We can kick off the search and let the auto-scheduler do its magic.
+# After some measurement trials, we can load the best schedule from the log
+# file and apply it.
+
+# Run auto-tuning (search)
+# Notice: We do not run the tuning in our webpage server since it takes too long.
+# Uncomment the following line to run it by yourself.
+task.tune(tune_option, search_policy)
+
+# Apply the best schedule
+sch, args = task.apply_best(log_file)
+
+######################################################################
+# We can lower the schedule to see the IR after auto-scheduling.
+# The auto-scheduler correctly performs optimizations including multi-level tiling,
+# layout transformation, parallelization, vectorization, unrolling, and operator fusion.
+
+print("Lowered TIR:")
+print(tvm.lower(sch, args, simple_mode=True))
+
+######################################################################
+# Check correctness and evaluate performance
+# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+# We build the binary and check its correctness and performance.
+
+func = tvm.build(sch, args, target)
+
+dev = tvm.cpu()
+
+X_tvm = tvm.nd.array(X_np, device=dev)
+W_data_tvm = tvm.nd.array(W_sp_np.data, device=dev)
+W_indices_tvm = tvm.nd.array(W_sp_np.indices, device=dev)
+W_indptr_tvm = tvm.nd.array(W_sp_np.indptr, device=dev)
+B_tvm = tvm.nd.array(B_np, device=dev)
+Y_tvm = tvm.nd.empty(Y_np.shape, device=dev)
+
+func(X_tvm, W_data_tvm, W_indices_tvm, W_indptr_tvm, B_tvm, Y_tvm)
+
+# Check results
+tvm.testing.assert_allclose(Y_np, Y_tvm.asnumpy(), atol=1e-4, rtol=1e-4)
+
+# Evaluate execution time.
+evaluator = func.time_evaluator(func.entry_name, dev, min_repeat_ms=500)
+print(
+    "Execution time of this operator: %.3f ms"
+    % (
+        np.median(evaluator(X_tvm, W_data_tvm, W_indices_tvm, W_indptr_tvm, B_tvm, Y_tvm).results)
+        * 1000
+    )
+)
+
+######################################################################
+# .. note:: Tuning result example
+#
+#   .. code-block:: c
+#
+#    ----------------------------------------------------------------------
+#    Lowered TIR:
+#    primfn(placeholder_5: handle, placeholder_6: handle, placeholder_7: handle, placeholder_8: handle, placeholder_9: handle, compute_1: handle) -> ()
+#      attr = {"global_symbol": "main", "tir.noalias": True}
+#      buffers = {placeholder_2: Buffer(placeholder_10: Pointer(float32), float32, [9831, 16, 1], []),
+#                 placeholder_4: Buffer(placeholder_11: Pointer(int32), int32, [33], []),
+#                 placeholder_3: Buffer(placeholder_12: Pointer(float32), float32, [512, 512], []),
+#                 compute: Buffer(compute_2: Pointer(float32), float32, [512, 512], []),
+#                 placeholder_1: Buffer(placeholder_13: Pointer(float32), float32, [512, 512], []),
+#                 placeholder: Buffer(placeholder_14: Pointer(int32), int32, [9831], [])}
+#      buffer_map = {placeholder_7: placeholder, placeholder_9: placeholder_1, placeholder_6: placeholder_2, compute_1: compute, placeholder_5: placeholder_3, placeholder_8: placeholder_4} {
+#      for (i0.outer.i1.outer.fused: int32, 0, 1024) "parallel" {
+#        attr [compute_3: Pointer(float32)] "storage_scope" = "global";
+#        allocate(compute_3, float32, [256]) {
+#          for (nb_j.inner: int32, 0, 2) {
+#            for (i.inner.init: int32, 0, 8) {
+#              for (j.init: int32, 0, 16) {
+#                compute_3[(((i.inner.init*32) + (nb_j.inner*16)) + j.init)] = 0f32
+#              }
+#            }
+#            for (elem_idx: int32, 0, ((int32*)placeholder_11[(((floormod(i0.outer.i1.outer.fused, 16)*2) + nb_j.inner) + 1)] - (int32*)placeholder_11[((floormod(i0.outer.i1.outer.fused, 16)*2) + nb_j.inner)])) {
+#              for (i.inner: int32, 0, 8) {
+#                for (j: int32, 0, 16) {
+#                  compute_3[(((i.inner*32) + (nb_j.inner*16)) + j)] = ((float32*)compute_3[(((i.inner*32) + (nb_j.inner*16)) + j)] + ((float32*)placeholder_10[((((int32*)placeholder_11[((floormod(i0.outer.i1.outer.fused, 16)*2) + nb_j.inner)]*16) + (elem_idx*16)) + j)]*max((float32*)placeholder_12[(((floordiv(i0.outer.i1.outer.fused, 16)*4096) + (i.inner*512)) + (int32*)placeholder_14[((int32*)placeholder_11[((floormod(i0.outer.i1.outer.fused, 16)*2) + nb_j.inner)] + elem_idx)])], 0f32)))
+#                }
+#              }
+#            }
+#          }
+#          for (i0.inner: int32, 0, 8) {
+#            compute_2[ramp((((floordiv(i0.outer.i1.outer.fused, 16)*4096) + (i0.inner*512)) + (floormod(i0.outer.i1.outer.fused, 16)*32)), 1, 32)] = max(((float32x32*)compute_3[ramp((i0.inner*32), 1, 32)] + (float32x32*)placeholder_13[ramp((((floordiv(i0.outer.i1.outer.fused, 16)*4096) + (i0.inner*512)) + (floormod(i0.outer.i1.outer.fused, 16)*32)), 1, 32)]), broadcast(0f32, 32))
+#          }
+#        }
+#      }
+#    }
diff --git a/docs/_downloads/1604460dde2b82fb9db809bb388890f8/deploy_prequantized_tflite.ipynb b/docs/_downloads/1604460dde2b82fb9db809bb388890f8/deploy_prequantized_tflite.ipynb
index 7017b5d..c4c942e 100644
--- a/docs/_downloads/1604460dde2b82fb9db809bb388890f8/deploy_prequantized_tflite.ipynb
+++ b/docs/_downloads/1604460dde2b82fb9db809bb388890f8/deploy_prequantized_tflite.ipynb
@@ -155,7 +155,7 @@
       },
       "outputs": [],
       "source": [
-        "def run_tvm(lib):\n    from tvm.contrib import graph_runtime\n\n    rt_mod = graph_runtime.GraphModule(lib[\"default\"](tvm.cpu(0)))\n    rt_mod.set_input(\"input\", data)\n    rt_mod.run()\n    tvm_res = rt_mod.get_output(0).asnumpy()\n    tvm_pred = np.squeeze(tvm_res).argsort()[-5:][::-1]\n    return tvm_pred, rt_mod"
+        "def run_tvm(lib):\n    from tvm.contrib import graph_executor\n\n    rt_mod = graph_executor.GraphModule(lib[\"default\"](tvm.cpu(0)))\n    rt_mod.set_input(\"input\", data)\n    rt_mod.run()\n    tvm_res = rt_mod.get_output(0).asnumpy()\n    tvm_pred = np.squeeze(tvm_res).argsort()[-5:][::-1]\n    return tvm_pred, rt_mod"
       ]
     },
     {
@@ -284,7 +284,7 @@
       },
       "outputs": [],
       "source": [
-        "n_repeat = 100  # should be bigger to make the measurement more accurate\nctx = tvm.cpu(0)\nftimer = rt_mod.module.time_evaluator(\"run\", ctx, number=1, repeat=n_repeat)\nprof_res = np.array(ftimer().results) * 1e3\nprint(\"Elapsed average ms:\", np.mean(prof_res))"
+        "n_repeat = 100  # should be bigger to make the measurement more accurate\ndev = tvm.cpu(0)\nftimer = rt_mod.module.time_evaluator(\"run\", dev, number=1, repeat=n_repeat)\nprof_res = np.array(ftimer().results) * 1e3\nprint(\"Elapsed average ms:\", np.mean(prof_res))"
       ]
     },
     {
diff --git a/docs/_downloads/18fb1ab3ed0a0c9f304520f2beaf4fd6/tvmc_command_line_driver.py b/docs/_downloads/18fb1ab3ed0a0c9f304520f2beaf4fd6/tvmc_command_line_driver.py
index bcdf03e..fffbfbf 100644
--- a/docs/_downloads/18fb1ab3ed0a0c9f304520f2beaf4fd6/tvmc_command_line_driver.py
+++ b/docs/_downloads/18fb1ab3ed0a0c9f304520f2beaf4fd6/tvmc_command_line_driver.py
@@ -15,31 +15,33 @@
 # specific language governing permissions and limitations
 # under the License.
 """
-Getting Started with TVM command line driver - TVMC
-===================================================
+Compiling and Optimizing a Model with TVMC
+==========================================
 **Authors**:
 `Leandro Nunes <https://github.com/leandron>`_,
-`Matthew Barrett <https://github.com/mbaret>`_
-
-This tutorial is an introduction to working with TVMC, the TVM command
-line driver. TVMC is a tool that exposes TVM features such as
-auto-tuning, compiling, profiling and execution of models, via a
-command line interface.
-
-In this tutorial we are going to use TVMC to compile, run and tune a
-ResNet-50 on a x86 CPU.
-
-We are going to start by downloading ResNet 50 V2. Then, we are going
-to use TVMC to compile this model into a TVM module, and use the
-compiled module to generate predictions. Finally, we are going to experiment
-with the auto-tuning options, that can be used to help the compiler to
-improve network performance.
-
-The final goal is to give an overview of TVMC's capabilities and also
-some guidance on where to look for more information.
+`Matthew Barrett <https://github.com/mbaret>`_,
+`Chris Hoge <https://github.com/hogepodge>`_
+
+In this section, we will work with TVMC, the TVM command line driver. TVMC is a
+tool that exposes TVM features such as auto-tuning, compiling, profiling and
+execution of models through a command line interface.
+
+Upon completion of this section, we will have used TVMC to accomplish the
+following tasks:
+
+* Compile a pre-trained ResNet 50 v2 model for the TVM runtime.
+* Run a real image through the compiled model, and interpret the output and
+  model performance.
+* Tune the model on a CPU using TVM.
+* Re-compile an optimized model using the tuning data collected by TVM.
+* Run the image through the optimized model, and compare the output and model
+  performance.
+
+The goal of this section is to give you an overview of TVM and TVMC's
+capabilities, and set the stage for understanding how TVM works.
 """
 
-######################################################################
+################################################################################
 # Using TVMC
 # ----------
 #
@@ -61,32 +63,35 @@ some guidance on where to look for more information.
 #
 #   tvmc --help
 #
-#
-# As you can see in the help page, the main features are
-# accessible via the subcommands ``tune``, ``compile`` and ``run``.
-# To read about specific options under a given subcommand, use
-# ``tvmc <subcommand> --help``.
-#
-# In the following sections we will use TVMC to tune, compile and
-# run a model. But first, we need a model.
+# The main features of TVM available to ``tvmc`` are from subcommands
+# ``compile``, and ``run``, and ``tune``.  To read about specific options under
+# a given subcommand, use ``tvmc <subcommand> --help``. We will cover each of
+# these commands in this tutorial, but first we need to download a pre-trained
+# model to work with.
 #
 
 
-######################################################################
-# Obtaining the model
+################################################################################
+# Obtaining the Model
 # -------------------
 #
-# We are going to use ResNet-50 V2 as an example to experiment with TVMC.
-# The version below is in ONNX format. To download the file, you can use
-# the command below:
+# For this tutorial, we will be working with ResNet-50 v2. ResNet-50 is a
+# convolutional neural network that is 50-layers deep and designed to classify
+# images. The model we will be using has been pre-trained on more than a
+# million images with 1000 different classifications. The network has an input
+# image size of 224x224. If you are interested exploring more of how the
+# ResNet-50 model is structured, we recommend downloading `Netron
+# <https://netron.app>`, a freely available ML model viewer.
+#
+# For this tutorial we will be using the model in ONNX format.
 #
 # .. code-block:: bash
 #
 #   wget https://github.com/onnx/models/raw/master/vision/classification/resnet/model/resnet50-v2-7.onnx
 #
-#
 
-######################################################################
+
+################################################################################
 # .. note:: Supported model formats
 #
 #   TVMC supports models created with Keras, ONNX, TensorFlow, TFLite
@@ -96,241 +101,398 @@ some guidance on where to look for more information.
 #
 
 
-######################################################################
-# Compiling the model
-# -------------------
+################################################################################
+# Compiling an ONNX Model to the TVM Runtime
+# ------------------------------------------
 #
-# The next step once we've downloaded ResNet-50, is to compile it,
-# To accomplish that, we are going to use ``tvmc compile``. The
-# output we get from the compilation process is a TAR package,
-# that can be used to run our model on the target device.
+# Once we've downloaded the ResNet-50 model, the next step is to compile it. To
+# accomplish that, we are going to use ``tvmc compile``. The output we get from
+# the compilation process is a TAR package of the model compiled to a dynamic
+# library for our target platform. We can run that model on our target device
+# using the TVM runtime.
 #
 # .. code-block:: bash
 #
 #   tvmc compile \
-#     --target "llvm" \
-#     --output compiled_module.tar \
-#     resnet50-v2-7.onnx
+#   --target "llvm" \
+#   --output resnet50-v2-7-tvm.tar \
+#   resnet50-v2-7.onnx
 #
-# Once compilation finishes, the output ``compiled_module.tar`` will be created. This
-# can be directly loaded by your application and run via the TVM runtime APIs.
+# Let's take a look at the files that ``tvmc compile`` creates in the module:
 #
+# .. code-block:: bash
+#
+# 	mkdir model
+# 	tar -xvf resnet50-v2-7-tvm.tar -C model
+# 	ls model
+#
+# You will see three files listed.
+#
+# * ``mod.so`` is the model, represented as a C++ library, that can be loaded
+#   by the TVM runtime.
+# * ``mod.json`` is a text representation of the TVM Relay computation graph.
+# * ``mod.params`` is a file containing the parameters for the pre-trained
+#   model.
+#
+# This module can be directly loaded by your application, and the model can be
+# run via the TVM runtime APIs.
 
 
-######################################################################
-# .. note:: Defining the correct target
+################################################################################
+# .. note:: Defining the Correct Target
 #
 #   Specifying the correct target (option ``--target``) can have a huge
 #   impact on the performance of the compiled module, as it can take
 #   advantage of hardware features available on the target. For more
 #   information, please refer to `Auto-tuning a convolutional network
 #   for x86 CPU <https://tvm.apache.org/docs/tutorials/autotvm/tune_relay_x86.html#define-network>`_.
+#   We recommend identifying which CPU you are running, along with optional features,
+#   and set the target appropriately.
 #
 
-
-######################################################################
-#
-# In the next step, we are going to use the compiled module, providing it
-# with some inputs, to generate some predictions.
-#
-
-
-######################################################################
-# Input pre-processing
-# --------------------
+################################################################################
+# Running the Model from The Compiled Module with TVMC
+# ----------------------------------------------------
 #
-# In order to generate predictions, we will need two things:
+# Now that we've compiled the model to this module, we can use the TVM runtime
+# to make predictions with it. TVMC has the TVM runtime built in to it,
+# allowing you to run compiled TVM models. To use TVMC to run the model and
+# make predictions, we need two things:
 #
-# - the compiled module, which we just produced;
-# - a valid input to the model
+# - The compiled module, which we just produced.
+# - Valid input to the model to make predictions on.
 #
-# Each model is particular when it comes to expected tensor shapes, formats and data
-# types. For this reason, most models require some pre and
-# post processing, to ensure the input(s) is valid and to interpret the output(s).
+# Each model is particular when it comes to expected tensor shapes, formats and
+# data types. For this reason, most models require some pre and
+# post-processing, to ensure the input is valid and to interpret the output.
+# TVMC has adopted NumPy's ``.npz`` format for both input and output data. This
+# is a well-supported NumPy format to serialize multiple arrays into a file
 #
-# In TVMC, we adopted NumPy's ``.npz`` format for both input and output data.
-# This is a well-supported NumPy format to serialize multiple arrays into a file.
-#
-# We will use the usual cat image, similar to other TVM tutorials:
+# As input for this tutorial, we will use the image of a cat, but you can feel
+# free to substitute image for any of your choosing.
 #
 # .. image:: https://s3.amazonaws.com/model-server/inputs/kitten.jpg
 #    :height: 224px
 #    :width: 224px
 #    :align: center
+
+
+################################################################################
+# Input pre-processing
+# ~~~~~~~~~~~~~~~~~~~~
 #
 # For our ResNet 50 V2 model, the input is expected to be in ImageNet format.
 # Here is an example of a script to pre-process an image for ResNet 50 V2.
 #
-from tvm.contrib.download import download_testdata
-from PIL import Image
-import numpy as np
-
-img_url = "https://s3.amazonaws.com/model-server/inputs/kitten.jpg"
-img_path = download_testdata(img_url, "imagenet_cat.png", module="data")
-
-# Resize it to 224x224
-resized_image = Image.open(img_path).resize((224, 224))
-img_data = np.asarray(resized_image).astype("float32")
-
-# ONNX expects NCHW input, so convert the array
-img_data = np.transpose(img_data, (2, 0, 1))
-
-# Normalize according to ImageNet
-imagenet_mean = np.array([0.485, 0.456, 0.406])
-imagenet_stddev = np.array([0.229, 0.224, 0.225])
-norm_img_data = np.zeros(img_data.shape).astype("float32")
-for i in range(img_data.shape[0]):
-    norm_img_data[i, :, :] = (img_data[i, :, :] / 255 - imagenet_mean[i]) / imagenet_stddev[i]
-
-# Add batch dimension
-img_data = np.expand_dims(norm_img_data, axis=0)
-
-# Save to .npz (outputs imagenet_cat.npz)
-np.savez("imagenet_cat", data=img_data)
-
+# .. code-block:: python
+#    :caption: preprocess.py
+#    :name: preprocess.py
+#
+#     #!python ./preprocess.py
+#     from tvm.contrib.download import download_testdata
+#     from PIL import Image
+#     import numpy as np
+#
+#     img_url = "https://s3.amazonaws.com/model-server/inputs/kitten.jpg"
+#     img_path = download_testdata(img_url, "imagenet_cat.png", module="data")
+#
+#     # Resize it to 224x224
+#     resized_image = Image.open(img_path).resize((224, 224))
+#     img_data = np.asarray(resized_image).astype("float32")
+#
+#     # ONNX expects NCHW input, so convert the array
+#     img_data = np.transpose(img_data, (2, 0, 1))
+#
+#     # Normalize according to ImageNet
+#     imagenet_mean = np.array([0.485, 0.456, 0.406])
+#     imagenet_stddev = np.array([0.229, 0.224, 0.225])
+#     norm_img_data = np.zeros(img_data.shape).astype("float32")
+#     for i in range(img_data.shape[0]):
+#    	    norm_img_data[i, :, :] = (img_data[i, :, :] / 255 - imagenet_mean[i]) / imagenet_stddev[i]
+#
+#     # Add batch dimension
+#     img_data = np.expand_dims(norm_img_data, axis=0)
+#
+#     # Save to .npz (outputs imagenet_cat.npz)
+#     np.savez("imagenet_cat", data=img_data)
+#
 
-######################################################################
-# Running the compiled module
-# ---------------------------
+################################################################################
+# Running the Compiled Module
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~
 #
-# With both the compiled module and input file in hand, we can run it by
-# invoking ``tvmc run``.
+# With both the model and input data in hand, we can now run TVMC to make a
+# prediction:
 #
 # .. code-block:: bash
 #
-#    tvmc run \
-#      --inputs imagenet_cat.npz \
-#      --output predictions.npz \
-#      compiled_module.tar
+#     tvmc run \
+#     --inputs imagenet_cat.npz \
+#     --output predictions.npz \
+#     resnet50-v2-7-tvm.tar
 #
-# When running the above command, a new file ``predictions.npz`` should
-# be produced. It contains the output tensors.
+# Recall that the `.tar` model file includes a C++ library, a description of
+# the Relay model, and the parameters for the model. TVMC includes the TVM
+# runtime, which can load the model and make predictions against input. When
+# running the above command, TVMC outputs a new file, ``predictions.npz``, that
+# contains the model output tensors in NumPy format.
 #
 # In this example, we are running the model on the same machine that we used
-# for compilation. In some cases we might want to run it remotely via
-# an RPC Tracker. To read more about these options please check ``tvmc
-# run --help``.
-#
+# for compilation. In some cases we might want to run it remotely via an RPC
+# Tracker. To read more about these options please check ``tvmc run --help``.
 
-######################################################################
-# Output post-processing
-# ----------------------
+################################################################################
+# Output Post-Processing
+# ~~~~~~~~~~~~~~~~~~~~~~
 #
-# As previously mentioned, each model will have its own particular way
-# of providing output tensors.
+# As previously mentioned, each model will have its own particular way of
+# providing output tensors.
 #
-# In our case, we need to run some post-processing to render the
-# outputs from ResNet 50 V2 into a more human-readable form.
+# In our case, we need to run some post-processing to render the outputs from
+# ResNet 50 V2 into a more human-readable form, using the lookup-table provided
+# for the model.
 #
-# The script below shows an example of the post-processing to extract
-# labels from the output of our compiled module.
+# The script below shows an example of the post-processing to extract labels
+# from the output of our compiled module.
 #
-import os.path
-import numpy as np
-
-from scipy.special import softmax
-
-from tvm.contrib.download import download_testdata
-
-# Download a list of labels
-labels_url = "https://s3.amazonaws.com/onnx-model-zoo/synset.txt"
-labels_path = download_testdata(labels_url, "synset.txt", module="data")
-
-with open(labels_path, "r") as f:
-    labels = [l.rstrip() for l in f]
-
-output_file = "predictions.npz"
-
-# Open the output and read the output tensor
-if os.path.exists(output_file):
-    with np.load(output_file) as data:
-        scores = softmax(data["output_0"])
-        scores = np.squeeze(scores)
-        ranks = np.argsort(scores)[::-1]
-
-        for rank in ranks[0:5]:
-            print("class='%s' with probability=%f" % (labels[rank], scores[rank]))
-
-
-########################################################################
-# When running the script, a list of predictions should be printed similar
-# the the example below.
+# .. code-block:: python
+#     :caption: postprocess.py
+#     :name: postprocess.py
+#
+#     #!python ./postprocess.py
+#     import os.path
+#     import numpy as np
+#
+#     from scipy.special import softmax
+#
+#     from tvm.contrib.download import download_testdata
+#
+#     # Download a list of labels
+#     labels_url = "https://s3.amazonaws.com/onnx-model-zoo/synset.txt"
+#     labels_path = download_testdata(labels_url, "synset.txt", module="data")
+#
+#     with open(labels_path, "r") as f:
+#         labels = [l.rstrip() for l in f]
+#
+#     output_file = "predictions.npz"
+#
+#     # Open the output and read the output tensor
+#     if os.path.exists(output_file):
+#         with np.load(output_file) as data:
+#             scores = softmax(data["output_0"])
+#             scores = np.squeeze(scores)
+#             ranks = np.argsort(scores)[::-1]
+#
+#             for rank in ranks[0:5]:
+#                 print("class='%s' with probability=%f" % (labels[rank], scores[rank]))
+#
+# Running this script should produce the following output:
 #
 # .. code-block:: bash
 #
-#   $ python post_processing.py
-#   class=n02123045 tabby, tabby cat ; probability=446.000000
-#   class=n02123159 tiger cat ; probability=675.000000
-#   class=n02124075 Egyptian cat ; probability=836.000000
-#   class=n02129604 tiger, Panthera tigris ; probability=917.000000
-#   class=n04040759 radiator ; probability=213.000000
+#     python postprocess.py
 #
+#     # class='n02123045 tabby, tabby cat' with probability=0.610553
+#     # class='n02123159 tiger cat' with probability=0.367179
+#     # class='n02124075 Egyptian cat' with probability=0.019365
+#     # class='n02129604 tiger, Panthera tigris' with probability=0.001273
+#     # class='n04040759 radiator' with probability=0.000261
+#
+# Try replacing the cat image with other images, and see what sort of
+# predictions the ResNet model makes.
 
-
-######################################################################
-# Tuning the model
-# ----------------
+################################################################################
+# Automatically Tuning the ResNet Model
+# -------------------------------------
+#
+# The previous model was compiled to work on the TVM runtime, but did not
+# include any platform specific optimization. In this section, we will show you
+# how to build an optimized model using TVMC to target your working platform.
 #
 # In some cases, we might not get the expected performance when running
-# inferences using our compiled module. In cases like this, we can make use
-# of the auto-tuner, to find a better configuration for our model and
-# get a boost in performance.
-#
-# Tuning in TVM refers to the process by which a model is optimized
-# to run faster on a given target. This differs from training or
-# fine-tuning in that it does not affect the accuracy of the model,
-# but only the runtime performance.
-#
-# As part of the tuning process, TVM will try running many different
-# operator implementation variants to see which perform best. The
-# results of these runs are stored in a tuning records file, which is
+# inferences using our compiled module.  In cases like this, we can make use of
+# the auto-tuner, to find a better configuration for our model and get a boost
+# in performance. Tuning in TVM refers to the process by which a model is
+# optimized to run faster on a given target. This differs from training or
+# fine-tuning in that it does not affect the accuracy of the model, but only
+# the runtime performance. As part of the tuning process, TVM will try running
+# many different operator implementation variants to see which perform best.
+# The results of these runs are stored in a tuning records file, which is
 # ultimately the output of the ``tune`` subcommand.
 #
 # In the simplest form, tuning requires you to provide three things:
 #
-# - the target specification of the device you intend to run this model on;
-# - the path to an output file in which the tuning records will be stored, and finally,
+# - the target specification of the device you intend to run this model on
+# - the path to an output file in which the tuning records will be stored, and
+#   finally
 # - a path to the model to be tuned.
 #
-#
 # The example below demonstrates how that works in practice:
 #
 # .. code-block:: bash
 #
-#   tvmc tune \
+#     tvmc tune \
 #     --target "llvm" \
-#     --output autotuner_records.json \
+#     --output resnet50-v2-7-autotuner_records.json \
 #     resnet50-v2-7.onnx
 #
+# In this example, you will see better results if you indicate a more specific
+# target for the `--target` flag.  For example, on an Intel i7 processor you
+# could use `--target llvm -mcpu=skylake`. For this tuning example, we are
+# tuning locally on the CPU using LLVM as the compiler for the specified
+# achitecture.
+#
+# TVMC will perform a search against the parameter space for the model, trying
+# out different configurations for operators and choosing the one that runs
+# fastest on your platform. Although this is a guided search based on the CPU
+# and model operations, it can still take several hours to complete the search.
+# The output of this search will be saved to the
+# `resnet50-v2-7-autotuner_records.json` file, which will later be used to
+# compile an optimized model.
+#
+# .. note:: Defining the Tuning Search Algorithm
+#
+#   By default this search is guided using an `XGBoost Grid` algorithm.
+#   Depending on your model complexity and amount of time avilable, you might
+#   want to choose a different algorithm. A full list is available by
+#   consulting ``tvmc tune --help``.
+#
+# The output will look something like this for a consumer-level Skylake CPU:
+#
+# .. code-block:: bash
+#
+#   tvmc tune   --target "llvm -mcpu=broadwell"   --output resnet50-v2-7-autotuner_records.json   resnet50-v2-7.onnx
+#   # [Task  1/24]  Current/Best:    9.65/  23.16 GFLOPS | Progress: (60/1000) | 130.74 s Done.
+#   # [Task  1/24]  Current/Best:    3.56/  23.16 GFLOPS | Progress: (192/1000) | 381.32 s Done.
+#   # [Task  2/24]  Current/Best:   13.13/  58.61 GFLOPS | Progress: (960/1000) | 1190.59 s Done.
+#   # [Task  3/24]  Current/Best:   31.93/  59.52 GFLOPS | Progress: (800/1000) | 727.85 s Done.
+#   # [Task  4/24]  Current/Best:   16.42/  57.80 GFLOPS | Progress: (960/1000) | 559.74 s Done.
+#   # [Task  5/24]  Current/Best:   12.42/  57.92 GFLOPS | Progress: (800/1000) | 766.63 s Done.
+#   # [Task  6/24]  Current/Best:   20.66/  59.25 GFLOPS | Progress: (1000/1000) | 673.61 s Done.
+#   # [Task  7/24]  Current/Best:   15.48/  59.60 GFLOPS | Progress: (1000/1000) | 953.04 s Done.
+#   # [Task  8/24]  Current/Best:   31.97/  59.33 GFLOPS | Progress: (972/1000) | 559.57 s Done.
+#   # [Task  9/24]  Current/Best:   34.14/  60.09 GFLOPS | Progress: (1000/1000) | 479.32 s Done.
+#   # [Task 10/24]  Current/Best:   12.53/  58.97 GFLOPS | Progress: (972/1000) | 642.34 s Done.
+#   # [Task 11/24]  Current/Best:   30.94/  58.47 GFLOPS | Progress: (1000/1000) | 648.26 s Done.
+#   # [Task 12/24]  Current/Best:   23.66/  58.63 GFLOPS | Progress: (1000/1000) | 851.59 s Done.
+#   # [Task 13/24]  Current/Best:   25.44/  59.76 GFLOPS | Progress: (1000/1000) | 534.58 s Done.
+#   # [Task 14/24]  Current/Best:   26.83/  58.51 GFLOPS | Progress: (1000/1000) | 491.67 s Done.
+#   # [Task 15/24]  Current/Best:   33.64/  58.55 GFLOPS | Progress: (1000/1000) | 529.85 s Done.
+#   # [Task 16/24]  Current/Best:   14.93/  57.94 GFLOPS | Progress: (1000/1000) | 645.55 s Done.
+#   # [Task 17/24]  Current/Best:   28.70/  58.19 GFLOPS | Progress: (1000/1000) | 756.88 s Done.
+#   # [Task 18/24]  Current/Best:   19.01/  60.43 GFLOPS | Progress: (980/1000) | 514.69 s Done.
+#   # [Task 19/24]  Current/Best:   14.61/  57.30 GFLOPS | Progress: (1000/1000) | 614.44 s Done.
+#   # [Task 20/24]  Current/Best:   10.47/  57.68 GFLOPS | Progress: (980/1000) | 479.80 s Done.
+#   # [Task 21/24]  Current/Best:   34.37/  58.28 GFLOPS | Progress: (308/1000) | 225.37 s Done.
+#   # [Task 22/24]  Current/Best:   15.75/  57.71 GFLOPS | Progress: (1000/1000) | 1024.05 s Done.
+#   # [Task 23/24]  Current/Best:   23.23/  58.92 GFLOPS | Progress: (1000/1000) | 999.34 s Done.
+#   # [Task 24/24]  Current/Best:   17.27/  55.25 GFLOPS | Progress: (1000/1000) | 1428.74 s Done.
+#
+# Tuning sessions can take a long time, so ``tvmc tune`` offers many options to customize your tuning
+# process, in terms of number of repetitions (``--repeat`` and ``--number``, for example), the tuning
+# algorithm to be used, and so on. Check ``tvmc tune --help`` for more information.
+#
+
+################################################################################
+# Compiling an Optimized Model with Tuning Data
+# ----------------------------------------------
+#
+# As an output of the tuning process above, we obtained the tuning records
+# stored in ``resnet50-v2-7-autotuner_records.json``. This file can be used in
+# two ways:
+#
+# - As input to further tuning (via ``tvmc tune --tuning-records``).
+# - As input to the compiler
+#
+# The compiler will use the results to generate high performance code for the
+# model on your specified target. To do that we can use ``tvmc compile
+# --tuning-records``. Check ``tvmc compile --help`` for more information.
+#
+# Now that tuning data for the model has been collected, we can re-compile the
+# model using optimized operators to speed up our computations.
+#
+# .. code-block:: bash
+#
+#   tvmc compile \
+#   --target "llvm" \
+#   --tuning-records resnet50-v2-7-autotuner_records.json  \
+#   --output resnet50-v2-7-tvm_autotuned.tar \
+#   resnet50-v2-7.onnx
+#
+# Verify that the optimized model runs and produces the same results:
+#
+# .. code-block:: bash
+#
+#   tvmc run \
+#   --inputs imagenet_cat.npz \
+#   --output predictions.npz \
+#   resnet50-v2-7-tvm_autotuned.tar
+#
+#   python postproccess.py
+#
+# Verifying that the predictions are the same:
+#
+# .. code-block:: bash
+#
+#   # class='n02123045 tabby, tabby cat' with probability=0.610550
+#   # class='n02123159 tiger cat' with probability=0.367181
+#   # class='n02124075 Egyptian cat' with probability=0.019365
+#   # class='n02129604 tiger, Panthera tigris' with probability=0.001273
+#   # class='n04040759 radiator' with probability=0.000261
+
+################################################################################
+# Comparing the Tuned and Untuned Models
+# --------------------------------------
+#
+# TVMC gives you tools for basic performance benchmarking between the models.
+# You can specify a number of repetitions and that TVMC report on the model run
+# time (independent of runtime startup). We can get a rough idea of how much
+# tuning has improved the model performance. For example, on a test Intel i7
+# system, we see that the tuned model runs 47% faster than the untuned model:
+#
+# .. code-block:: bash
 #
-# Tuning sessions can take a long time, so ``tvmc tune`` offers many options to
-# customize your tuning process, in terms of number of repetitions (``--repeat`` and
-# ``--number``, for example), the tuning algorithm to be use, and so on.
-# Check ``tvmc tune --help`` for more information.
+#   tvmc run \
+#   --inputs imagenet_cat.npz \
+#   --output predictions.npz  \
+#   --print-time \
+#   --repeat 100 \
+#   resnet50-v2-7-tvm_autotuned.tar
 #
-# As an output of the tuning process above, we obtained the tuning records stored
-# in ``autotuner_records.json``. This file can be used in two ways:
+#   # Execution time summary:
+#   # mean (s)   max (s)    min (s)    std (s)
+#   # 0.09219    0.11573    0.08985    0.00315
 #
-# - as an input to further tuning (via ``tvmc tune --tuning-records``), or
-# - as an input to the compiler
+#   tvmc run \
+#   --inputs imagenet_cat.npz \
+#   --output predictions.npz  \
+#   --print-time \
+#   --repeat 100 \
+#   resnet50-v2-7-tvm.tar
 #
-# The compiler will use the results to generate high performance code for the model
-# on your specified target. To do that we can use ``tvmc compile --tuning-records``.
-# Check ``tvmc compile --help`` for more information.
+#   # Execution time summary:
+#   # mean (s)   max (s)    min (s)    std (s)
+#   # 0.19332    0.21997    0.18504    0.00711
 #
 
 
-######################################################################
+################################################################################
 # Final Remarks
 # -------------
 #
-# In this tutorial, we presented TVMC, a command line driver for TVM.
-# We demonstrated how to compile, run and tune a model, as well
-# as discussed the need for pre and post processing of inputs and outputs.
+# In this tutorial, we presented TVMC, a command line driver for TVM. We
+# demonstrated how to compile, run, and tune a model. We also discussed the
+# need for pre and post-processing of inputs and outputs. After the tuning
+# process, we demonstrated how to compare the performance of the unoptimized
+# and optimize models.
 #
 # Here we presented a simple example using ResNet 50 V2 locally. However, TVMC
 # supports many more features including cross-compilation, remote execution and
 # profiling/benchmarking.
 #
-# To see what other options are available, please have a look at ``tvmc --help``.
+# To see what other options are available, please have a look at ``tvmc
+# --help``.
 #
+# In the next tutorial, `Compiling and Optimizing a Model with the Python
+# AutoScheduler <auto_tuning_with_pyton>`_, we will cover the same compilation
+# and optimization steps using the Python interface.
diff --git a/docs/_downloads/2354a24ad8bc07194943c49f2fb48874/tune_conv2d_cuda.ipynb b/docs/_downloads/2354a24ad8bc07194943c49f2fb48874/tune_conv2d_cuda.ipynb
index 9daf994..3e2489f 100644
--- a/docs/_downloads/2354a24ad8bc07194943c49f2fb48874/tune_conv2d_cuda.ipynb
+++ b/docs/_downloads/2354a24ad8bc07194943c49f2fb48874/tune_conv2d_cuda.ipynb
@@ -87,7 +87,7 @@
       },
       "outputs": [],
       "source": [
-        "# inspect the best config\ndispatch_context = autotvm.apply_history_best(\"conv2d.log\")\nbest_config = dispatch_context.query(task.target, task.workload)\nprint(\"\\nBest config:\")\nprint(best_config)\n\n# apply history best from log file\nwith autotvm.apply_history_best(\"conv2d.log\"):\n    with tvm.target.Target(\"cuda\"):\n        s, arg_bufs = conv2d_no_batching(N, H, W, CO, CI, KH, KW, strides, padding)\n        func = tvm.build(s, arg_bufs)\n\n# check correctness\na_np  [...]
+        "# inspect the best config\ndispatch_context = autotvm.apply_history_best(\"conv2d.log\")\nbest_config = dispatch_context.query(task.target, task.workload)\nprint(\"\\nBest config:\")\nprint(best_config)\n\n# apply history best from log file\nwith autotvm.apply_history_best(\"conv2d.log\"):\n    with tvm.target.Target(\"cuda\"):\n        s, arg_bufs = conv2d_no_batching(N, H, W, CO, CI, KH, KW, strides, padding)\n        func = tvm.build(s, arg_bufs)\n\n# check correctness\na_np  [...]
       ]
     }
   ],
diff --git a/docs/_downloads/24a7471da81b18c4ba77d215289aed2f/relay_quick_start.ipynb b/docs/_downloads/24a7471da81b18c4ba77d215289aed2f/relay_quick_start.ipynb
index 3ae8ec5..f6be35e 100644
--- a/docs/_downloads/24a7471da81b18c4ba77d215289aed2f/relay_quick_start.ipynb
+++ b/docs/_downloads/24a7471da81b18c4ba77d215289aed2f/relay_quick_start.ipynb
@@ -33,7 +33,7 @@
       },
       "outputs": [],
       "source": [
-        "import numpy as np\n\nfrom tvm import relay\nfrom tvm.relay import testing\nimport tvm\nfrom tvm import te\nfrom tvm.contrib import graph_runtime\nimport tvm.testing"
+        "import numpy as np\n\nfrom tvm import relay\nfrom tvm.relay import testing\nimport tvm\nfrom tvm import te\nfrom tvm.contrib import graph_executor\nimport tvm.testing"
       ]
     },
     {
@@ -76,7 +76,7 @@
       "cell_type": "markdown",
       "metadata": {},
       "source": [
-        "Run the generate library\n------------------------\nNow we can create graph runtime and run the module on Nvidia GPU.\n\n"
+        "Run the generate library\n------------------------\nNow we can create graph executor and run the module on Nvidia GPU.\n\n"
       ]
     },
     {
@@ -87,7 +87,7 @@
       },
       "outputs": [],
       "source": [
-        "# create random input\nctx = tvm.gpu()\ndata = np.random.uniform(-1, 1, size=data_shape).astype(\"float32\")\n# create module\nmodule = graph_runtime.GraphModule(lib[\"default\"](ctx))\n# set input and parameters\nmodule.set_input(\"data\", data)\n# run\nmodule.run()\n# get output\nout = module.get_output(0, tvm.nd.empty(out_shape)).asnumpy()\n\n# Print first 10 elements of output\nprint(out.flatten()[0:10])"
+        "# create random input\ndev = tvm.gpu()\ndata = np.random.uniform(-1, 1, size=data_shape).astype(\"float32\")\n# create module\nmodule = graph_executor.GraphModule(lib[\"default\"](dev))\n# set input and parameters\nmodule.set_input(\"data\", data)\n# run\nmodule.run()\n# get output\nout = module.get_output(0, tvm.nd.empty(out_shape)).asnumpy()\n\n# Print first 10 elements of output\nprint(out.flatten()[0:10])"
       ]
     },
     {
@@ -116,7 +116,7 @@
       },
       "outputs": [],
       "source": [
-        "# load the module back.\nloaded_lib = tvm.runtime.load_module(path_lib)\ninput_data = tvm.nd.array(np.random.uniform(size=data_shape).astype(\"float32\"))\n\nmodule = graph_runtime.GraphModule(loaded_lib[\"default\"](ctx))\nmodule.run(data=input_data)\nout_deploy = module.get_output(0).asnumpy()\n\n# Print first 10 elements of output\nprint(out_deploy.flatten()[0:10])\n\n# check whether the output from deployed module is consistent with original one\ntvm.testing.assert_allclose( [...]
+        "# load the module back.\nloaded_lib = tvm.runtime.load_module(path_lib)\ninput_data = tvm.nd.array(np.random.uniform(size=data_shape).astype(\"float32\"))\n\nmodule = graph_executor.GraphModule(loaded_lib[\"default\"](dev))\nmodule.run(data=input_data)\nout_deploy = module.get_output(0).asnumpy()\n\n# Print first 10 elements of output\nprint(out_deploy.flatten()[0:10])\n\n# check whether the output from deployed module is consistent with original one\ntvm.testing.assert_allclose [...]
       ]
     }
   ],
diff --git a/docs/_downloads/272a5a893d007658546dc0eaf0a7aeed/tune_relay_cuda.py b/docs/_downloads/272a5a893d007658546dc0eaf0a7aeed/tune_relay_cuda.py
index 148ebbf..50485c4 100644
--- a/docs/_downloads/272a5a893d007658546dc0eaf0a7aeed/tune_relay_cuda.py
+++ b/docs/_downloads/272a5a893d007658546dc0eaf0a7aeed/tune_relay_cuda.py
@@ -67,7 +67,7 @@ import tvm
 from tvm import relay, autotvm
 import tvm.relay.testing
 from tvm.autotvm.tuner import XGBTuner, GATuner, RandomTuner, GridSearchTuner
-import tvm.contrib.graph_runtime as runtime
+import tvm.contrib.graph_executor as runtime
 
 #################################################################
 # Define Network
@@ -237,14 +237,14 @@ def tune_and_evaluate(tuning_opt):
             lib = relay.build_module.build(mod, target=target, params=params)
 
         # load parameters
-        ctx = tvm.context(str(target), 0)
-        module = runtime.GraphModule(lib["default"](ctx))
+        dev = tvm.device(str(target), 0)
+        module = runtime.GraphModule(lib["default"](dev))
         data_tvm = tvm.nd.array((np.random.uniform(size=input_shape)).astype(dtype))
         module.set_input("data", data_tvm)
 
         # evaluate
         print("Evaluate inference time cost...")
-        ftimer = module.module.time_evaluator("run", ctx, number=1, repeat=600)
+        ftimer = module.module.time_evaluator("run", dev, number=1, repeat=600)
         prof_res = np.array(ftimer().results) * 1000  # convert to millisecond
         print(
             "Mean inference time (std dev): %.2f ms (%.2f ms)"
diff --git a/docs/_downloads/2771a7fc8bf8eeb7788823ff349aacc0/tune_network_cuda.py b/docs/_downloads/2771a7fc8bf8eeb7788823ff349aacc0/tune_network_cuda.py
index 5ed3cee..7b5619c 100644
--- a/docs/_downloads/2771a7fc8bf8eeb7788823ff349aacc0/tune_network_cuda.py
+++ b/docs/_downloads/2771a7fc8bf8eeb7788823ff349aacc0/tune_network_cuda.py
@@ -49,7 +49,7 @@ import numpy as np
 import tvm
 from tvm import relay, auto_scheduler
 import tvm.relay.testing
-from tvm.contrib import graph_runtime
+from tvm.contrib import graph_executor
 
 #################################################################
 # Define a Network
@@ -252,7 +252,7 @@ def run_tuning():
 #   The last line also prints the total number of measurement trials,
 #   total time spent on auto-tuning and the id of the next task to tune.
 #
-#   There will also be some "dmlc::Error"s and CUDA errors, because the
+#   There will also be some "tvm::Error"s and CUDA errors, because the
 #   auto-scheduler will try some invalid schedules.
 #   You can safely ignore them if the tuning can continue, because these
 #   errors are isolated from the main process.
@@ -280,15 +280,15 @@ with auto_scheduler.ApplyHistoryBest(log_file):
     with tvm.transform.PassContext(opt_level=3, config={"relay.backend.use_auto_scheduler": True}):
         lib = relay.build(mod, target=target, params=params)
 
-# Create graph runtime
-ctx = tvm.context(str(target), 0)
-module = graph_runtime.GraphModule(lib["default"](ctx))
+# Create graph executor
+dev = tvm.device(str(target), 0)
+module = graph_executor.GraphModule(lib["default"](dev))
 data_tvm = tvm.nd.array((np.random.uniform(size=input_shape)).astype(dtype))
 module.set_input("data", data_tvm)
 
 # Evaluate
 print("Evaluate inference time cost...")
-ftimer = module.module.time_evaluator("run", ctx, repeat=3, min_repeat_ms=500)
+ftimer = module.module.time_evaluator("run", dev, repeat=3, min_repeat_ms=500)
 prof_res = np.array(ftimer().results) * 1e3  # convert to millisecond
 print("Mean inference time (std dev): %.2f ms (%.2f ms)" % (np.mean(prof_res), np.std(prof_res)))
 
diff --git a/docs/_downloads/2c0ed53a9ebd68caf76cd8235fae2711/tune_relay_mobile_gpu.ipynb b/docs/_downloads/2c0ed53a9ebd68caf76cd8235fae2711/tune_relay_mobile_gpu.ipynb
index 705ba34..369ce55 100644
--- a/docs/_downloads/2c0ed53a9ebd68caf76cd8235fae2711/tune_relay_mobile_gpu.ipynb
+++ b/docs/_downloads/2c0ed53a9ebd68caf76cd8235fae2711/tune_relay_mobile_gpu.ipynb
@@ -33,7 +33,7 @@
       },
       "outputs": [],
       "source": [
-        "import os\n\nimport numpy as np\n\nimport tvm\nfrom tvm import relay, autotvm\nimport tvm.relay.testing\nfrom tvm.autotvm.tuner import XGBTuner, GATuner, RandomTuner, GridSearchTuner\nfrom tvm.contrib.utils import tempdir\nimport tvm.contrib.graph_runtime as runtime"
+        "import os\n\nimport numpy as np\n\nimport tvm\nfrom tvm import relay, autotvm\nimport tvm.relay.testing\nfrom tvm.autotvm.tuner import XGBTuner, GATuner, RandomTuner, GridSearchTuner\nfrom tvm.contrib.utils import tempdir\nimport tvm.contrib.graph_executor as runtime"
       ]
     },
     {
@@ -90,7 +90,7 @@
       },
       "outputs": [],
       "source": [
-        "#### DEVICE CONFIG ####\n\ntarget = tvm.target.Target(\"opencl -device=mali\")\n\n# Replace \"aarch64-linux-gnu\" with the correct target of your board.\n# This target host is used for cross compilation. You can query it by :code:`gcc -v` on your device.\ntarget_host = \"llvm -mtriple=aarch64-linux-gnu\"\n\n# Also replace this with the device key in your tracker\ndevice_key = \"rk3399\"\n\n# Set this to True if you use android phone\nuse_android = False\n\n#### TUNING OPTION ### [...]
+        "#### DEVICE CONFIG ####\n# Replace \"aarch64-linux-gnu\" with the correct target of your board.\n# This target host is used for cross compilation. You can query it by :code:`gcc -v` on your device.\ntarget = tvm.target.Target(\"opencl -device=mali\", host=\"llvm -mtriple=aarch64-linux-gnu\")\n\n# Also replace this with the device key in your tracker\ndevice_key = \"rk3399\"\n\n# Set this to True if you use android phone\nuse_android = False\n\n#### TUNING OPTION ####\nnetwork =  [...]
       ]
     },
     {
@@ -133,7 +133,7 @@
       },
       "outputs": [],
       "source": [
-        "def tune_and_evaluate(tuning_opt):\n    # extract workloads from relay program\n    print(\"Extract tasks...\")\n    mod, params, input_shape, _ = get_network(network, batch_size=1)\n    tasks = autotvm.task.extract_from_program(\n        mod[\"main\"],\n        target=target,\n        target_host=target_host,\n        params=params,\n        ops=(relay.op.get(\"nn.conv2d\"),),\n    )\n\n    # run tuning tasks\n    print(\"Tuning...\")\n    tune_tasks(tasks, **tuning_opt)\n\n    [...]
+        "def tune_and_evaluate(tuning_opt):\n    # extract workloads from relay program\n    print(\"Extract tasks...\")\n    mod, params, input_shape, _ = get_network(network, batch_size=1)\n    tasks = autotvm.task.extract_from_program(\n        mod[\"main\"],\n        target=target,\n        params=params,\n        ops=(relay.op.get(\"nn.conv2d\"),),\n    )\n\n    # run tuning tasks\n    print(\"Tuning...\")\n    tune_tasks(tasks, **tuning_opt)\n\n    # compile kernels with history be [...]
       ]
     },
     {
diff --git a/docs/_downloads/2c8ef0390ad4c53ca85671fa36c33b26/tune_conv2d_cuda.py b/docs/_downloads/2c8ef0390ad4c53ca85671fa36c33b26/tune_conv2d_cuda.py
index dc8e6e5..d14f9c3 100644
--- a/docs/_downloads/2c8ef0390ad4c53ca85671fa36c33b26/tune_conv2d_cuda.py
+++ b/docs/_downloads/2c8ef0390ad4c53ca85671fa36c33b26/tune_conv2d_cuda.py
@@ -230,15 +230,15 @@ a_np = np.random.uniform(size=(N, CI, H, W)).astype(np.float32)
 w_np = np.random.uniform(size=(CO, CI, KH, KW)).astype(np.float32)
 c_np = conv2d_nchw_python(a_np, w_np, strides, padding)
 
-ctx = tvm.gpu()
-a_tvm = tvm.nd.array(a_np, ctx=ctx)
-w_tvm = tvm.nd.array(w_np, ctx=ctx)
-c_tvm = tvm.nd.empty(c_np.shape, ctx=ctx)
+dev = tvm.gpu()
+a_tvm = tvm.nd.array(a_np, device=dev)
+w_tvm = tvm.nd.array(w_np, device=dev)
+c_tvm = tvm.nd.empty(c_np.shape, device=dev)
 func(a_tvm, w_tvm, c_tvm)
 
 tvm.testing.assert_allclose(c_np, c_tvm.asnumpy(), rtol=1e-2)
 
 # Evaluate running time. Here we choose a large repeat number (400) to reduce the noise
 # and the overhead of kernel launch. You can also use nvprof to validate the result.
-evaluator = func.time_evaluator(func.entry_name, ctx, number=400)
+evaluator = func.time_evaluator(func.entry_name, dev, number=400)
 print("Time cost of this operator: %f" % evaluator(a_tvm, w_tvm, c_tvm).mean)
diff --git a/docs/_downloads/2d6bc109462266f309063f204dc15e8e/autotvm_matmul.py b/docs/_downloads/2d6bc109462266f309063f204dc15e8e/autotvm_matmul.py
new file mode 100644
index 0000000..930e003
--- /dev/null
+++ b/docs/_downloads/2d6bc109462266f309063f204dc15e8e/autotvm_matmul.py
@@ -0,0 +1,376 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""
+Optimizing Operators with Templates and AutoTVM
+===============================================
+**Authors**:
+`Lianmin Zheng <https://github.com/merrymercy>`_,
+`Chris Hoge <https://github.com/hogepodge>`_
+
+In this tutorial, we will now show how the TVM Template Extension (TE) language
+can be used to write scheduling templates that can be searched by AutoTVM to
+find optimal configurations of scheduling variables. This process is called
+Auto-Tuning, and builds on TE to help automate the process of optimizing
+operations.
+
+This tutorial builds on the previous `tutorial on how to write a matrix
+multiplication using TE <tensor_expr_get_started>`.
+
+There are two steps in auto-tuning.
+
+- The first step is defining a search space.
+- The second step is running a search algorithm to explore through this space.
+
+In this tutorial, you can learn how to perform these two steps in TVM. The whole
+workflow is illustrated by a matrix multiplication example.
+
+.. note::
+  Note that this tutorial will not run on Windows or recent versions of macOS.
+  To get it to run, you will need to wrap the body of this tutorial in a
+  :code:`if __name__ == "__main__":` block.
+"""
+
+################################################################################
+# Install dependencies
+# --------------------
+# To use autotvm package in TVM, we need to install some extra dependencies.
+#
+# .. code-block:: bash
+#
+#   pip3 install --user psutil xgboost cloudpickle
+#
+# To make TVM run faster in tuning, it is recommended to use cython as FFI of
+# TVM. In the root directory of TVM, execute:
+#
+# .. code-block:: bash
+#
+#   pip3 install --user cython
+#   sudo make cython3
+#
+# Now return to python code. Begin by importing the required packages.
+
+import logging
+import sys
+
+import numpy as np
+import tvm
+from tvm import te
+import tvm.testing
+
+# the module is called `autotvm`
+from tvm import autotvm
+
+################################################################################
+# Basic Matrix Multiplication with TE
+# -----------------------------------
+# Recall the basic implementation of matrix multiplication using TE. We write
+# it down here with a few changes. We will wrap the multiplication in a python
+# function definition. For simplicity, we will focus our attention on a split
+# optimization, using a fixed value that defines the block size of the
+# reordering.
+
+
+def matmul_basic(N, L, M, dtype):
+
+    a = te.placeholder((n, l), name="a", dtype=dtype)
+    B = te.placeholder((L, M), name="B", dtype=dtype)
+
+    k = te.reduce_axis((0, L), name="k")
+    C = te.compute((N, M), lambda i, j: te.sum(A[i, k] * B[k, j], axis=k), name="C")
+    s = te.create_schedule(C.op)
+
+    # schedule
+    y, x = s[C].op.axis
+    k = s[C].op.reduce_axis[0]
+
+    yo, yi = s[C].split(y, 8)
+    xo, xi = s[C].split(x, 8)
+
+    s[C].reorder(yo, xo, k, yi, xi)
+
+    return s, [A, B, C]
+
+
+################################################################################
+# Matrix Multiplication with AutoTVM
+# ----------------------------------
+# In the previous schedule code, we use a constant "8" as the tiling factor.
+# However, it might not be the best one because the best tiling factor depends
+# on real hardware environment and input shape.
+#
+# If you want the schedule code to be portable across a wider range of input
+# shapes and target hardware, it is better to define a set of candidate values
+# and pick the best one according to the measurement results on target
+# hardware.
+#
+# In autotvm, we can define a tunable parameter, or a "knob" for such kind of
+# value.
+
+################################################################################
+# A Basic Matrix Multiplication Template
+# --------------------------------------
+# We begin with an example of how to create a tunable parameter set for the
+# block size of the `split` scheduling operation.
+
+# Matmul V1: List candidate values
+@autotvm.template("tutorial/matmul_v1")  # 1. use a decorator
+def matmul_v1(N, L, M, dtype):
+    A = te.placeholder((N, L), name="A", dtype=dtype)
+    B = te.placeholder((L, M), name="B", dtype=dtype)
+
+    k = te.reduce_axis((0, L), name="k")
+    C = te.compute((N, M), lambda i, j: te.sum(A[i, k] * B[k, j], axis=k), name="C")
+    s = te.create_schedule(C.op)
+
+    # schedule
+    y, x = s[C].op.axis
+    k = s[C].op.reduce_axis[0]
+
+    # 2. get the config object
+    cfg = autotvm.get_config()
+
+    # 3. define search space
+    cfg.define_knob("tile_y", [1, 2, 4, 8, 16])
+    cfg.define_knob("tile_x", [1, 2, 4, 8, 16])
+
+    # 4. schedule according to config
+    yo, yi = s[C].split(y, cfg["tile_y"].val)
+    xo, xi = s[C].split(x, cfg["tile_x"].val)
+
+    s[C].reorder(yo, xo, k, yi, xi)
+
+    return s, [A, B, C]
+
+
+################################################################################
+# Here we make four modifications to the previous schedule code and get a
+# tunable "template". We can explain the modifications one by one.
+#
+# 1. Use a decorator to mark this function as a simple template.
+# 2. Get a config object: You can regard this :code:`cfg` as an argument of
+#    this function but we obtain it in a different way. With this argument, this
+#    function is no longer a deterministic schedule. Instead, we can pass
+#    different configurations to this function and get different schedules. A
+#    function that uses a configuration object like this is called a "template".
+#
+#    To make the template function more compact, we can do two things to define
+#    the parameter search space within a single function.
+#
+#    1. Define a search space across a set values. This is done by making
+#       :code:`cfg` a :any:`ConfigSpace` object. It will collect all of the
+#       tunable knobs in this function and build a search space from it.
+#    2. Schedule according to an entity in this space. This is done by making
+#       :code:`cfg` a :any:`ConfigEntity` object. When it is a
+#       :any:`ConfigEntity`, it will ignore all space definition API (namely,
+#       :code:`cfg.define_XXXXX(...)`). Instead, it will store deterministic
+#       values for all tunable knobs, and we schedule according to these values.
+#
+#    During auto-tuning, we will first call this template with a
+#    :any:`ConfigSpace` object to build the search space. Then we call this
+#    template with different :any:`ConfigEntity` in the built space to get
+#    different schedules. Finally we will measure the code generated by
+#    different schedules and pick the best one.
+#
+# 3. Define two tunable knobs. The first one is :code:`tile_y` with 5 possible
+#    values. The second one is :code:`tile_x` with a same list of possible values.
+#    These two knobs are independent, so they span a search space with size 25 =
+#    5x5.
+# 4. The configuration knobs are passed to the :code:`split` schedule
+#    operation, allowing us to schedule according to the 5x5 deterministic values
+#    we previously defined in :code:`cfg`.
+
+################################################################################
+# A Matrix Multiplication Template with the Advanced Parameter API
+# ----------------------------------------------------------------
+# In the previous template, we manually listed all of the possible values for a
+# knob. This is the lowest level API to define the space, and gives an explicit
+# enumeration of the parameter space to search. However, we also provide
+# another set of APIs that can make the definition of the search space easier
+# and smarter. Where possible, we receomment you use this higher-level API
+#
+# In the following example, we use :any:`ConfigSpace.define_split` to define a
+# split knob. It will enumerate all the possible ways to split an axis and
+# construct the space.
+#
+# We also have :any:`ConfigSpace.define_reorder` for reorder knob and
+# :any:`ConfigSpace.define_annotate` for annotation like unroll, vectorization,
+# thread binding. When the high level API cannot meet your requirements, you
+# can always fall back to using the low level API.
+
+
+@autotvm.template("tutorial/matmul")
+def matmul(N, L, M, dtype):
+    A = te.placeholder((N, L), name="A", dtype=dtype)
+    B = te.placeholder((L, M), name="B", dtype=dtype)
+
+    k = te.reduce_axis((0, L), name="k")
+    C = te.compute((N, M), lambda i, j: te.sum(A[i, k] * B[k, j], axis=k), name="C")
+    s = te.create_schedule(C.op)
+
+    # schedule
+    y, x = s[C].op.axis
+    k = s[C].op.reduce_axis[0]
+
+    ##### define space begin #####
+    cfg = autotvm.get_config()
+    cfg.define_split("tile_y", y, num_outputs=2)
+    cfg.define_split("tile_x", x, num_outputs=2)
+    ##### define space end #####
+
+    # schedule according to config
+    yo, yi = cfg["tile_y"].apply(s, C, y)
+    xo, xi = cfg["tile_x"].apply(s, C, x)
+
+    s[C].reorder(yo, xo, k, yi, xi)
+
+    return s, [A, B, C]
+
+
+################################################################################
+# .. note:: More Explanation on :code:`cfg.define_split`
+#
+#  In this template, :code:`cfg.define_split("tile_y", y, num_outputs=2)` will
+#  enumerate all possible combinations that can split axis y into two axes with
+#  factors of the length of y. For example, if the length of y is 32 and we
+#  want to split it into two axes using factors of 32, then there are 6
+#  possible values for (length of outer axis, length of inner axis) pair,
+#  namely (32, 1), (16, 2), (8, 4), (4, 8), (2, 16) or (1, 32). These are all 6
+#  possible values of `tile_y`.
+#
+#  During scheduling, :code:`cfg["tile_y"]` is a :code:`SplitEntity` object.
+#  We stores the lengths of outer axes and inner axes in
+#  :code:`cfg['tile_y'].size` (a tuple with two elements).  In this template,
+#  we apply it by using :code:`yo, yi = cfg['tile_y'].apply(s, C, y)`.
+#  Actually, this is equivalent to :code:`yo, yi = s[C].split(y,
+#  cfg["tile_y"].size[1])` or  :code:`yo, yi = s[C].split(y,
+#  nparts=cfg['tile_y"].size[0])`
+#
+#  The advantage of using cfg.apply API is that it makes multi-level splits
+#  (that is, when num_outputs >= 3) easier.
+
+################################################################################
+# Step 2: Use AutoTVM to Optimize the Matrix Multiplication
+# ---------------------------------------------------------
+# In Step 1, we wrote a matrix multiplication template that allowed us to
+# paramaterize the block size used in the `split` schedule. We can now conduct
+# a search over this parameter space. The next step is to pick a tuner to guide
+# the exploration of this space.
+#
+# Auto-tuners in TVM
+# ~~~~~~~~~~~~~~~~~~
+# The job for a tuner can be described by following pseudo code
+#
+#   .. code-block:: c
+#
+#    ct = 0
+#    while ct < max_number_of_trials:
+#        propose a batch of configs
+#        measure this batch of configs on real hardware and get results
+#        ct += batch_size
+#
+# When proposing the next batch of configs, the tuner can take different
+# strategies. Some of the tuner strategies provided by TVM include:
+#
+# * :any:`RandomTuner`: Enumerate the space in a random order
+# * :any:`GridSearchTuner`: Enumerate the space in a grid search order
+# * :any:`GATuner`: Using genetic algorithm to search through the space
+# * :any:`XGBTuner`: Uses a model based method. Train a XGBoost model to
+#   predict the speed of lowered IR and pick the next batch according to the
+#   prediction.
+#
+# You can choose the tuner according to the size of your space, your time
+# budget and other factors.  For example, if your space is very small (less
+# than 1000), a gridsearch tuner or a random tuner is good enough. If your
+# space is at the level of 10^9 (this is the space size of a conv2d operator on
+# CUDA GPU), XGBoostTuner can explore more efficiently and find better configs.
+
+################################################################################
+# Begin tuning
+# ~~~~~~~~~~~~
+# Here we continue our matrix multiplication example. First we create a tuning
+# task. We can also inspect the initialized search space. In this case, for a
+# 512x512 square matrix multiplication, the space size is 10x10=100 Note that
+# the task and search space are independent of the tuner picked.
+
+N, L, M = 512, 512, 512
+task = autotvm.task.create("tutorial/matmul", args=(N, L, M, "float32"), target="llvm")
+print(task.config_space)
+
+################################################################################
+# Then we need to define how to measure the generated code and pick a tuner.
+# Since our space is small, a random tuner is just okay.
+#
+# We only make 10 trials in this tutorial for demonstration. In practice, you
+# can do more trials according to your time budget. We will log the tuning
+# results into a log file. This file can be used to choose the best
+# configuration discovered by the tuner later.
+
+# logging config (for printing tuning log to the screen)
+logging.getLogger("autotvm").setLevel(logging.DEBUG)
+logging.getLogger("autotvm").addHandler(logging.StreamHandler(sys.stdout))
+
+################################################################################
+# There are two steps for measuring a config: build and run. By default, we use
+# all CPU cores to compile program. We then measure them sequentially. To help
+# reduce variance, we take 5 measurements and average them.
+measure_option = autotvm.measure_option(builder="local", runner=autotvm.LocalRunner(number=5))
+
+# Begin tuning with RandomTuner, log records to file `matmul.log`
+# You can use alternatives like XGBTuner.
+tuner = autotvm.tuner.RandomTuner(task)
+tuner.tune(
+    n_trial=10,
+    measure_option=measure_option,
+    callbacks=[autotvm.callback.log_to_file("matmul.log")],
+)
+
+################################################################################
+# With tuning completed, we can choose the configuration from the log file that
+# has the best measured performance and compile the schedule with the
+# corresponding parameters. We also do a quick verfication that the schedule is
+# producing correct answers.  We can call the function :code:`matmul` directly
+# under the :any:`autotvm.apply_history_best` context. When we call this
+# function, it will query the dispatch context with its argument and get the
+# best config with the same argument.
+
+# apply history best from log file
+with autotvm.apply_history_best("matmul.log"):
+    with tvm.target.Target("llvm"):
+        s, arg_bufs = matmul(N, L, M, "float32")
+        func = tvm.build(s, arg_bufs)
+
+# check correctness
+a_np = np.random.uniform(size=(N, L)).astype(np.float32)
+b_np = np.random.uniform(size=(L, M)).astype(np.float32)
+c_np = a_np.dot(b_np)
+
+c_tvm = tvm.nd.empty(c_np.shape)
+func(tvm.nd.array(a_np), tvm.nd.array(b_np), c_tvm)
+
+tvm.testing.assert_allclose(c_np, c_tvm.asnumpy(), rtol=1e-4)
+
+################################################################################
+# Final Notes and Summary
+# -----------------------
+# In this tutorial, we have shown how to build operator templates that allow
+# TVM to search a parameter space and choose optimized schedule configurations.
+# To gain a deeper understanding of how this works, we recommend expanding on
+# this example by adding new search parameters to the schedule based on
+# schedule operations demonstated in the `Getting Started With Tensor
+# Expressions <tensor_expr_get_started>_` tutorial In the upcoming sections, we
+# will demonstate the AutoScheduler, a method for TVM to optimize common
+# operators without the need for the user to provide a user-defined template.
diff --git a/docs/_downloads/2e974b05b6d59fcf944f96d27106b994/from_keras.ipynb b/docs/_downloads/2e974b05b6d59fcf944f96d27106b994/from_keras.ipynb
index 2380bf8..5738c9b 100644
--- a/docs/_downloads/2e974b05b6d59fcf944f96d27106b994/from_keras.ipynb
+++ b/docs/_downloads/2e974b05b6d59fcf944f96d27106b994/from_keras.ipynb
@@ -80,7 +80,7 @@
       },
       "outputs": [],
       "source": [
-        "shape_dict = {\"input_1\": data.shape}\nmod, params = relay.frontend.from_keras(keras_resnet50, shape_dict)\n# compile the model\ntarget = \"cuda\"\nctx = tvm.gpu(0)\nwith tvm.transform.PassContext(opt_level=3):\n    executor = relay.build_module.create_executor(\"graph\", mod, ctx, target)"
+        "shape_dict = {\"input_1\": data.shape}\nmod, params = relay.frontend.from_keras(keras_resnet50, shape_dict)\n# compile the model\ntarget = \"cuda\"\ndev = tvm.gpu(0)\nwith tvm.transform.PassContext(opt_level=3):\n    executor = relay.build_module.create_executor(\"graph\", mod, dev, target)"
       ]
     },
     {
diff --git a/docs/_downloads/2f6dcf56b15f857f94b6d320c1ace6e5/from_coreml.ipynb b/docs/_downloads/2f6dcf56b15f857f94b6d320c1ace6e5/from_coreml.ipynb
index 04f282f..4671279 100644
--- a/docs/_downloads/2f6dcf56b15f857f94b6d320c1ace6e5/from_coreml.ipynb
+++ b/docs/_downloads/2f6dcf56b15f857f94b6d320c1ace6e5/from_coreml.ipynb
@@ -98,7 +98,7 @@
       },
       "outputs": [],
       "source": [
-        "from tvm.contrib import graph_runtime\n\nctx = tvm.cpu(0)\ndtype = \"float32\"\nm = graph_runtime.GraphModule(lib[\"default\"](ctx))\n# set inputs\nm.set_input(\"image\", tvm.nd.array(x.astype(dtype)))\n# execute\nm.run()\n# get outputs\ntvm_output = m.get_output(0)\ntop1 = np.argmax(tvm_output.asnumpy()[0])"
+        "from tvm.contrib import graph_executor\n\ndev = tvm.cpu(0)\ndtype = \"float32\"\nm = graph_executor.GraphModule(lib[\"default\"](dev))\n# set inputs\nm.set_input(\"image\", tvm.nd.array(x.astype(dtype)))\n# execute\nm.run()\n# get outputs\ntvm_output = m.get_output(0)\ntop1 = np.argmax(tvm_output.asnumpy()[0])"
       ]
     },
     {
diff --git a/docs/_downloads/30015213c2882505d466865fafaed52d/from_caffe2.ipynb b/docs/_downloads/30015213c2882505d466865fafaed52d/from_caffe2.ipynb
index 5f6fc42..0185e28 100644
--- a/docs/_downloads/30015213c2882505d466865fafaed52d/from_caffe2.ipynb
+++ b/docs/_downloads/30015213c2882505d466865fafaed52d/from_caffe2.ipynb
@@ -87,7 +87,7 @@
       },
       "outputs": [],
       "source": [
-        "import tvm\nfrom tvm import te\nfrom tvm.contrib import graph_runtime\n\n# context x86 CPU, use tvm.gpu(0) if you run on GPU\nctx = tvm.cpu(0)\n# create a runtime executor module\nm = graph_runtime.GraphModule(lib[\"default\"](ctx))\n# set inputs\nm.set_input(input_name, tvm.nd.array(data.astype(\"float32\")))\n# execute\nm.run()\n# get outputs\ntvm_out = m.get_output(0)\ntop1_tvm = np.argmax(tvm_out.asnumpy()[0])"
+        "import tvm\nfrom tvm import te\nfrom tvm.contrib import graph_executor\n\n# context x86 CPU, use tvm.gpu(0) if you run on GPU\ndev = tvm.cpu(0)\n# create a runtime executor module\nm = graph_executor.GraphModule(lib[\"default\"](dev))\n# set inputs\nm.set_input(input_name, tvm.nd.array(data.astype(\"float32\")))\n# execute\nm.run()\n# get outputs\ntvm_out = m.get_output(0)\ntop1_tvm = np.argmax(tvm_out.asnumpy()[0])"
       ]
     },
     {
diff --git a/docs/_downloads/33a19782c8aaf9fc62e565c57df5caca/deploy_sparse.py b/docs/_downloads/33a19782c8aaf9fc62e565c57df5caca/deploy_sparse.py
index 9641fb8..1fcb1b3 100644
--- a/docs/_downloads/33a19782c8aaf9fc62e565c57df5caca/deploy_sparse.py
+++ b/docs/_downloads/33a19782c8aaf9fc62e565c57df5caca/deploy_sparse.py
@@ -81,8 +81,8 @@ import time
 import itertools
 import numpy as np
 import tensorflow as tf
-from tvm import relay
-from tvm.contrib import graph_runtime
+from tvm import relay, runtime
+from tvm.contrib import graph_executor
 from tvm.relay import data_dep_optimization as ddo
 from tensorflow.python.framework.convert_to_constants import (
     convert_variables_to_constants_v2,
@@ -106,7 +106,7 @@ seq_len = 128
 # appropriately for your specific machine. CUDA and ROCm are also supported.
 target = "llvm"
 # Which device to run on. Should be one of tvm.cpu() or tvm.gpu().
-ctx = tvm.cpu()
+dev = tvm.cpu()
 # If true, then a sparse variant of the network will be run and
 # benchmarked.
 measure_sparse = True
@@ -196,7 +196,7 @@ def import_graphdef(
             with open(os.path.join(abs_path, relay_file), "w") as fo:
                 fo.write(tvm.ir.save_json(mod))
             with open(os.path.join(abs_path, relay_params), "wb") as fo:
-                fo.write(relay.save_param_dict(params))
+                fo.write(runtime.save_param_dict(params))
 
     return mod, params, shape_dict
 
@@ -208,18 +208,18 @@ def import_graphdef(
 # the weights are sparse, we won't see any speedup because we are using
 # regular dense matrix multiplications on these dense (but mostly zero)
 # tensors instead of sparse aware kernels.
-def run_relay_graph(mod, params, shape_dict, target, ctx):
+def run_relay_graph(mod, params, shape_dict, target, dev):
     with relay.build_config(opt_level=3):
         lib = relay.build(mod, target=target, params=params)
     input_shape = shape_dict["input_1"]
     dummy_data = np.random.uniform(size=input_shape, low=0, high=input_shape[1]).astype("int32")
 
-    m = graph_runtime.GraphModule(lib["default"](ctx))
+    m = graph_executor.GraphModule(lib["default"](dev))
     m.set_input(0, dummy_data)
     m.run()
     tvm_output = m.get_output(0)
 
-    ftimer = m.module.time_evaluator("run", ctx, repeat=5, number=5)
+    ftimer = m.module.time_evaluator("run", dev, repeat=5, number=5)
     prof_res = np.array(ftimer().results) * 1000
     print(
         "%-20s %-19s (%s)"
@@ -228,9 +228,9 @@ def run_relay_graph(mod, params, shape_dict, target, ctx):
     return tvm_output
 
 
-def run_dense(mod, params, shape_dict, target, ctx):
+def run_dense(mod, params, shape_dict, target, dev):
     print("Dense Model Benchmark:")
-    return run_relay_graph(mod, params, shape_dict, target, ctx)
+    return run_relay_graph(mod, params, shape_dict, target, dev)
 
 
 ###############################################################################
@@ -295,13 +295,13 @@ def random_sparse_bert_params(func, params, density, BS_R, BS_C):
     return new_params
 
 
-def run_sparse(mod, params, shape_dict, target, ctx, bs_r, sparsity, gen_weights):
+def run_sparse(mod, params, shape_dict, target, dev, bs_r, sparsity, gen_weights):
     mod, params = ddo.simplify_fc_transpose.convert(mod["main"], params)
     if gen_weights:
         params = random_sparse_bert_params(mod, params, BS_R=bs_r, BS_C=1, density=1 - sparsity)
     mod, params = ddo.bsr_dense.convert(mod, params, (bs_r, 1), sparsity_threshold=0.8)
     print("Block Sparse Model with {blocksize}x1 blocks:".format(blocksize=bs_r))
-    return run_relay_graph(mod, params, shape_dict, target, ctx)
+    return run_relay_graph(mod, params, shape_dict, target, dev)
 
 
 ###############################################################################
@@ -312,10 +312,10 @@ def run_sparse(mod, params, shape_dict, target, ctx, bs_r, sparsity, gen_weights
 # you'll need to uncomment the last line first.
 def benchmark():
     mod, params, shape_dict = import_graphdef(name, batch_size, seq_len)
-    run_dense(mod, params, shape_dict, target, ctx)
+    run_dense(mod, params, shape_dict, target, dev)
     if measure_sparse:
         gen_weights = "prune" not in name
-        run_sparse(mod, params, shape_dict, target, ctx, bs_r, sparsity, gen_weights)
+        run_sparse(mod, params, shape_dict, target, dev, bs_r, sparsity, gen_weights)
 
 
 # benchmark()
diff --git a/docs/_downloads/37c76200603adf82ebeffc23bdef8d31/tensor_expr_get_started.py b/docs/_downloads/37c76200603adf82ebeffc23bdef8d31/tensor_expr_get_started.py
index 7f1bb6a..4f8f041 100644
--- a/docs/_downloads/37c76200603adf82ebeffc23bdef8d31/tensor_expr_get_started.py
+++ b/docs/_downloads/37c76200603adf82ebeffc23bdef8d31/tensor_expr_get_started.py
@@ -17,303 +17,886 @@
 """
 .. _tutorial-tensor-expr-get-started:
 
-Get Started with Tensor Expression
-==================================
+Working with Operators Using Tensor Expressions
+===============================================
 **Author**: `Tianqi Chen <https://tqchen.github.io>`_
 
-This is an introductory tutorial to the Tensor expression language in TVM.
-TVM uses a domain specific tensor expression for efficient kernel construction.
+In this tutorial we will turn our attention to how TVM works with Tensor
+Expressions (TE) to create a space to search for performant configurations. TE
+describes tensor computations in a pure functional language (that is each
+expression has no side effects). When viewed in context of the TVM as a whole,
+Relay describes a computation as a set of operators, and each of these
+operators can be represented as a TE expression where each TE expression takes
+input tensors and produces an output tensor. It's important to note that the
+tensor isn't necessarily a fully materialized array, rather it is a
+representation of a computation. If you want to produce a computation from a
+TE, you will need to use the scheduling features of TVM.
 
-In this tutorial, we will demonstrate the basic workflow to use
-the tensor expression language.
+This is an introductory tutorial to the Tensor expression language in TVM. TVM
+uses a domain specific tensor expression for efficient kernel construction. We
+will demonstrate the basic workflow with two examples of using the tensor expression
+language. The first example introduces TE and scheduling with vector
+addition. The second expands on these concepts with a step-by-step optimization
+of a matrix multiplication with TE. This matrix multiplication example will
+serve as the comparative basis for future tutorials covering more advanced
+features of TVM.
 """
-from __future__ import absolute_import, print_function
+
+################################################################################
+# Example 1: Writing and Scheduling Vector Addition in TE for CPU
+# ---------------------------------------------------------------
+#
+# Let's look at an example in Python in which we will implement a TE for
+# vector addition, followed by a schedule targeted towards a CPU. We begin by initializing a TVM
+# environment.
 
 import tvm
 import tvm.testing
 from tvm import te
 import numpy as np
 
-# Global declarations of environment.
+# You will get better performance if you can identify the CPU you are targeting
+# and specify it. If you're using llvm, you can get this information from the
+# command ``llc --version`` to get the CPU type, and you can check
+# ``/proc/cpuinfo`` for additional extensions that your processor might
+# support. For example, ``tgt = "llvm -mcpu=`skylake`
 
-tgt_host = "llvm"
-# Change it to respective GPU if gpu is enabled Ex: cuda, opencl, rocm
-tgt = "cuda"
+tgt = tvm.target.Target(target="llvm", host="llvm")
+
+################################################################################
+# Describing the Vector Computation
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+# We describe a vector addition computation. TVM adopts tensor semantics, with
+# each intermediate result represented as a multi-dimensional array. The user
+# needs to describe the computation rule that generates the tensors. We first
+# define a symbolic variable n to represent the shape. We then define two
+# placeholder Tensors, ``A`` and ``B``, with given shape ``(n,)``. We then
+# describe the result tensor ``C``, with a ``compute`` operation. The
+# ``compute`` defines a computation, with the output conforming to the
+# specified tensor shape and the computation to be performed at each position
+# in the tensor defined by the lambda function. Note that while ``n`` is a
+# variable, it defines a consistent shape between the ``A``, ``B`` and ``C``
+# tensors. Remember, no actual computation happens during this phase, as we
+# are only declaring how the computation should be done.
 
-######################################################################
-# Vector Add Example
-# ------------------
-# In this tutorial, we will use a vector addition example to demonstrate
-# the workflow.
-#
 
-######################################################################
-# Describe the Computation
-# ------------------------
-# As a first step, we need to describe our computation.
-# TVM adopts tensor semantics, with each intermediate result
-# represented as a multi-dimensional array. The user needs to describe
-# the computation rule that generates the tensors.
-#
-# We first define a symbolic variable n to represent the shape.
-# We then define two placeholder Tensors, A and B, with given shape (n,)
-#
-# We then describe the result tensor C, with a compute operation.  The
-# compute function takes the shape of the tensor, as well as a lambda
-# function that describes the computation rule for each position of
-# the tensor.
-#
-# No computation happens during this phase, as we are only declaring how
-# the computation should be done.
-#
 n = te.var("n")
 A = te.placeholder((n,), name="A")
 B = te.placeholder((n,), name="B")
 C = te.compute(A.shape, lambda i: A[i] + B[i], name="C")
-print(type(C))
 
-######################################################################
-# Schedule the Computation
-# ------------------------
-# While the above lines describe the computation rule, we can compute
-# C in many ways since the axis of C can be computed in a data
-# parallel manner.  TVM asks the user to provide a description of the
-# computation called a schedule.
-#
-# A schedule is a set of transformation of computation that transforms
-# the loop of computations in the program.
+################################################################################
+# .. note:: Lambda Functions
 #
-# After we construct the schedule, by default the schedule computes
-# C in a serial manner in a row-major order.
+# The second argument to the ``te.compute`` method is the function that
+# performs the computation. In this example, we're using an anonymous function,
+# also known as a ``lambda`` function, to define the computation, in this case
+# addition on the ``i``th element of ``A`` and ``B``.
+
+################################################################################
+# Create a Default Schedule for the Computation
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+#
+# While the above lines describe the computation rule, we can compute ``C`` in
+# many different ways to fit different devices. For a tensor with multiple
+# axes, you can choose which axis to iterate over first, or computations can be
+# split across different threads. TVM requires that the user to provide a
+# schedule, which is a description of how the computation should be performed.
+# Scheduling operations within TE can change loop orders, split computations
+# across different threads, group blocks of data together, amongst other
+# operations. An important concept behind schedules is that they only describe
+# how the computation is performed, so different schedules for the same TE will
+# produce the same result.
+#
+# TVM allows you to create a naive schedule that will compute ``C`` in by
+# iterating in row major order.
 #
 # .. code-block:: c
 #
 #   for (int i = 0; i < n; ++i) {
 #     C[i] = A[i] + B[i];
 #   }
-#
+
 s = te.create_schedule(C.op)
 
 ######################################################################
-# We used the split construct to split the first axis of C,
-# this will split the original iteration axis into product of
-# two iterations. This is equivalent to the following code.
-#
-# .. code-block:: c
-#
-#   for (int bx = 0; bx < ceil(n / 64); ++bx) {
-#     for (int tx = 0; tx < 64; ++tx) {
-#       int i = bx * 64 + tx;
-#       if (i < n) {
-#         C[i] = A[i] + B[i];
-#       }
-#     }
-#   }
-#
-bx, tx = s[C].split(C.op.axis[0], factor=64)
+# Compile and Evaluate the Default Schedule
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+# With the TE expression and a schedule, we can produce runnable code for our
+# target language and architecture, in this case LLVM and a CPU. We provide
+# TVM with the schedule, a list of the TE expressions that are in the schedule,
+# the target and host, and the name of the function we are producing. The result
+# of the output is a type-erased function that can be called directly from Python.
+#
+# In the following line, we use tvm.build to create a function. The build
+# function takes the schedule, the desired signature of the function (including
+# the inputs and outputs) as well as target language we want to compile to.
 
-######################################################################
-# Finally we bind the iteration axis bx and tx to threads in the GPU
-# compute grid. These are GPU specific constructs that allow us
-# to generate code that runs on GPU.
-#
-if tgt == "cuda" or tgt == "rocm" or tgt.startswith("opencl"):
-    s[C].bind(bx, te.thread_axis("blockIdx.x"))
-    s[C].bind(tx, te.thread_axis("threadIdx.x"))
+fadd = tvm.build(s, [A, B, C], tgt, name="myadd")
 
-######################################################################
-# Compilation
-# -----------
-# After we have finished specifying the schedule, we can compile it
-# into a TVM function. By default TVM compiles into a type-erased
-# function that can be directly called from the python side.
-#
-# In the following line, we use tvm.build to create a function.
-# The build function takes the schedule, the desired signature of the
-# function (including the inputs and outputs) as well as target language
-# we want to compile to.
-#
-# The result of compilation fadd is a GPU device function (if GPU is
-# involved) as well as a host wrapper that calls into the GPU
-# function.  fadd is the generated host wrapper function, it contains
-# a reference to the generated device function internally.
-#
-fadd = tvm.build(s, [A, B, C], tgt, target_host=tgt_host, name="myadd")
+################################################################################
+# Let's run the function, and compare the output to the same computation in
+# numpy. The compiled TVM function is exposes a concise C API that can be invoked
+# from any language. We begin by creating a device, which is a device (CPU in this
+# example) that TVM can compile the schedule to. In this case the device is an
+# LLVM CPU target. We can then initialize the tensors in our device and
+# perform the custom addition operation. To verify that the computation is
+# correct, we can compare the result of the output of the c tensor to the same
+# computation performed by numpy.
 
-######################################################################
-# Run the Function
-# ----------------
-# The compiled TVM function is exposes a concise C API
-# that can be invoked from any language.
-#
-# We provide a minimal array API in python to aid quick testing and prototyping.
-# The array API is based on the `DLPack <https://github.com/dmlc/dlpack>`_ standard.
-#
-# - We first create a GPU context.
-# - Then tvm.nd.array copies the data to the GPU.
-# - fadd runs the actual computation.
-# - asnumpy() copies the GPU array back to the CPU and we can use this to verify correctness
-#
-ctx = tvm.context(tgt, 0)
+dev = tvm.device(tgt.kind.name, 0)
 
 n = 1024
-a = tvm.nd.array(np.random.uniform(size=n).astype(A.dtype), ctx)
-b = tvm.nd.array(np.random.uniform(size=n).astype(B.dtype), ctx)
-c = tvm.nd.array(np.zeros(n, dtype=C.dtype), ctx)
+a = tvm.nd.array(np.random.uniform(size=n).astype(A.dtype), dev)
+b = tvm.nd.array(np.random.uniform(size=n).astype(B.dtype), dev)
+c = tvm.nd.array(np.zeros(n, dtype=C.dtype), dev)
 fadd(a, b, c)
 tvm.testing.assert_allclose(c.asnumpy(), a.asnumpy() + b.asnumpy())
 
-######################################################################
-# Inspect the Generated Code
-# --------------------------
-# You can inspect the generated code in TVM. The result of tvm.build
-# is a TVM Module. fadd is the host module that contains the host wrapper,
-# it also contains a device module for the CUDA (GPU) function.
-#
-# The following code fetches the device module and prints the content code.
-#
-if tgt == "cuda" or tgt == "rocm" or tgt.startswith("opencl"):
-    dev_module = fadd.imported_modules[0]
-    print("-----GPU code-----")
-    print(dev_module.get_source())
-else:
-    print(fadd.get_source())
+################################################################################
+# To get a comparison of how fast this version is compared to numpy, create a
+# helper function to run a profile of the TVM generated code.
+import timeit
 
-######################################################################
+np_repeat = 100
+np_running_time = timeit.timeit(
+    setup="import numpy\n"
+    "n = 32768\n"
+    'dtype = "float32"\n'
+    "a = numpy.random.rand(n, 1).astype(dtype)\n"
+    "b = numpy.random.rand(n, 1).astype(dtype)\n",
+    stmt="answer = a + b",
+    number=np_repeat,
+)
+print("Numpy running time: %f" % (np_running_time / np_repeat))
+
+
+def evaluate_addition(func, target, optimization, log):
+    dev = tvm.device(target.kind.name, 0)
+    n = 32768
+    a = tvm.nd.array(np.random.uniform(size=n).astype(A.dtype), dev)
+    b = tvm.nd.array(np.random.uniform(size=n).astype(B.dtype), dev)
+    c = tvm.nd.array(np.zeros(n, dtype=C.dtype), dev)
+
+    evaluator = func.time_evaluator(func.entry_name, dev, number=10)
+    mean_time = evaluator(a, b, c).mean
+    print("%s: %f" % (optimization, mean_time))
+
+    log.append((optimization, mean_time))
+
+
+log = [("numpy", np_running_time / np_repeat)]
+evaluate_addition(fadd, tgt, "naive", log=log)
+
+################################################################################
+# Updating the Schedule to Use Paralleism
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+#
+# Now that we've illustrated the fundamentals of TE, let's go deeper into what
+# schedules do, and how they can be used to optimize tensor expressions for
+# different architectures. A schedule is a series of steps that are applied to
+# an expression to transform it in a number of different ways. When a schedule
+# is applied to an expression in TE, the inputs and outputs remain the same,
+# but when compiled the implementation of the expression can change. This
+# tensor addition, in the default schedule, is run serially but is easy to
+# parallelize across all of the processor threads. We can apply the parallel
+# schedule operation to our computation.
+
+s[C].parallel(C.op.axis[0])
+
+################################################################################
+# The ``tvm.lower`` command will generate the Intermediate Representation (IR)
+# of the TE, with the corresponding schedule. By lowering the expression as we
+# apply different schedule operations, we can see the effect of scheduling on
+# the ordering of the computation. We use the flag ``simple_mode=True`` to
+# return a readable C-style statement.
+
+print(tvm.lower(s, [A, B, C], simple_mode=True))
+
+################################################################################
+# It's now possible for TVM to run these blocks on independent threads. Let's
+# compile and run this new schedule with the parallel operation applied:
+
+fadd_parallel = tvm.build(s, [A, B, C], tgt, name="myadd_parallel")
+fadd_parallel(a, b, c)
+
+tvm.testing.assert_allclose(c.asnumpy(), a.asnumpy() + b.asnumpy())
+
+evaluate_addition(fadd_parallel, tgt, "parallel", log=log)
+
+################################################################################
+# Updating the Schedule to Use Vectorization
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+# Modern CPUs also have the ability to perform SIMD operations on floating
+# point values, and we can apply another schedule to our computation expression
+# to take advantage of this. Accomplishing this requires multiple steps: first
+# we have to split the schedule into inner and outer loops using the split
+# scheduling primitive. The inner loops can use vectorization to use SIMD
+# instructions using the vectorize scheduling primitive, then the outer loops
+# can be parallelized using the parallel scheduling primitive. Choose the split
+# factor to be the number of threads on your CPU.
+
+# Recreate the schedule, since we modified it with the parallel operation in
+# the previous example
+n = te.var("n")
+A = te.placeholder((n,), name="A")
+B = te.placeholder((n,), name="B")
+C = te.compute(A.shape, lambda i: A[i] + B[i], name="C")
+
+s = te.create_schedule(C.op)
+
+# This factor should be chosen to match the number of threads appropriate for
+# your CPU. This will vary depending on architecture, but a good rule is
+# setting this factor to equal the number of available CPU cores.
+factor = 4
+
+outer, inner = s[C].split(C.op.axis[0], factor=factor)
+s[C].parallel(outer)
+s[C].vectorize(inner)
+
+fadd_vector = tvm.build(s, [A, B, C], tgt, name="myadd_parallel")
+
+evaluate_addition(fadd_vector, tgt, "vector", log=log)
+
+print(tvm.lower(s, [A, B, C], simple_mode=True))
+
+################################################################################
+# Comparing the Diferent Schedules
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+# We can now compare the different schedules
+
+baseline = log[0][1]
+print("%s\t%s\t%s" % ("Operator".rjust(20), "Timing".rjust(20), "Performance".rjust(20)))
+for result in log:
+    print(
+        "%s\t%s\t%s"
+        % (result[0].rjust(20), str(result[1]).rjust(20), str(result[1] / baseline).rjust(20))
+    )
+
+
+################################################################################
 # .. note:: Code Specialization
 #
-#   As you may have noticed, the declarations of A, B and C all
-#   take the same shape argument, n. TVM will take advantage of this
-#   to pass only a single shape argument to the kernel, as you will find in
-#   the printed device code. This is one form of specialization.
-#
-#   On the host side, TVM will automatically generate check code
-#   that checks the constraints in the parameters. So if you pass
-#   arrays with different shapes into fadd, an error will be raised.
+#   As you may have noticed, the declarations of ``A``, ``B`` and ``C`` all
+#   take the same shape argument, ``n``. TVM will take advantage of this to
+#   pass only a single shape argument to the kernel, as you will find in the
+#   printed device code. This is one form of specialization.
 #
-#   We can do more specializations. For example, we can write
-#   :code:`n = tvm.runtime.convert(1024)` instead of :code:`n = te.var("n")`,
-#   in the computation declaration. The generated function will
-#   only take vectors with length 1024.
+#   On the host side, TVM will automatically generate check code that checks
+#   the constraints in the parameters. So if you pass arrays with different
+#   shapes into fadd, an error will be raised.
 #
+#   We can do more specializations. For example, we can write :code:`n =
+#   tvm.runtime.convert(1024)` instead of :code:`n = te.var("n")`, in the
+#   computation declaration. The generated function will only take vectors with
+#   length 1024.
 
-######################################################################
-# Save Compiled Module
-# --------------------
-# Besides runtime compilation, we can save the compiled modules into
-# a file and load them back later. This is called ahead of time compilation.
+################################################################################
+# We've defined, scheduled, and compiled a vector addition operator, which we
+# were then able to execute on the TVM runtime. We can save the operator as a
+# library, which we can then load later using the TVM runtime.
+
+################################################################################
+# Targeting Vector Addition for GPUs (Optional)
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+# TVM is capable of targeting multiple architectures. In the next example, we
+# will target compilation of the vector addition to GPUs.
+
+# If you want to run this code, change ``run_cuda = True``
+# Note that by default this example is not run in the docs CI.
+
+run_cuda = False
+if run_cuda:
+    # Change this target to the correct backend for you gpu. For example: cuda (NVIDIA GPUs),
+    # rocm (Radeon GPUS), OpenCL (opencl).
+    tgt_gpu = tvm.target.Target(target="cuda", host="llvm")
+
+    # Recreate the schedule
+    n = te.var("n")
+    A = te.placeholder((n,), name="A")
+    B = te.placeholder((n,), name="B")
+    C = te.compute(A.shape, lambda i: A[i] + B[i], name="C")
+    print(type(C))
+
+    s = te.create_schedule(C.op)
+
+    bx, tx = s[C].split(C.op.axis[0], factor=64)
+
+    xXXXXXXXx
+
+    ################################################################################
+    # Finally we must bind the iteration axis bx and tx to threads in the GPU
+    # compute grid. The naive schedule is not valid for GPUs, and these are
+    # specific constructs that allow us to generate code that runs on a GPU.
+
+    s[C].bind(bx, te.thread_axis("blockIdx.x"))
+    s[C].bind(tx, te.thread_axis("threadIdx.x"))
+
+    ######################################################################
+    # Compilation
+    # -----------
+    # After we have finished specifying the schedule, we can compile it
+    # into a TVM function. By default TVM compiles into a type-erased
+    # function that can be directly called from the python side.
+    #
+    # In the following line, we use tvm.build to create a function.
+    # The build function takes the schedule, the desired signature of the
+    # function (including the inputs and outputs) as well as target language
+    # we want to compile to.
+    #
+    # The result of compilation fadd is a GPU device function (if GPU is
+    # involved) as well as a host wrapper that calls into the GPU
+    # function. fadd is the generated host wrapper function, it contains
+    # a reference to the generated device function internally.
+
+    fadd = tvm.build(s, [A, B, C], target=tgt_gpu, name="myadd")
+
+    ################################################################################
+    # The compiled TVM function is exposes a concise C API that can be invoked from
+    # any language.
+    #
+    # We provide a minimal array API in python to aid quick testing and prototyping.
+    # The array API is based on the `DLPack <https://github.com/dmlc/dlpack>`_ standard.
+    #
+    # - We first create a GPU device.
+    # - Then tvm.nd.array copies the data to the GPU.
+    # - ``fadd`` runs the actual computation
+    # - ``asnumpy()`` copies the GPU array back to the CPU (so we can verify correctness).
+    #
+    # Note that copying the data to and from the memory on the GPU is a required step.
+
+    dev = tvm.device(tgt_gpu.kind.name, 0)
+
+    n = 1024
+    a = tvm.nd.array(np.random.uniform(size=n).astype(A.dtype), dev)
+    b = tvm.nd.array(np.random.uniform(size=n).astype(B.dtype), dev)
+    c = tvm.nd.array(np.zeros(n, dtype=C.dtype), dev)
+    fadd(a, b, c)
+    tvm.testing.assert_allclose(c.asnumpy(), a.asnumpy() + b.asnumpy())
+
+    ################################################################################
+    # Inspect the Generated GPU Code
+    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    # You can inspect the generated code in TVM. The result of tvm.build is a TVM
+    # Module. fadd is the host module that contains the host wrapper, it also
+    # contains a device module for the CUDA (GPU) function.
+    #
+    # The following code fetches the device module and prints the content code.
+
+    if (
+        tgt_gpu.kind.name == "cuda"
+        or tgt_gpu.kind.name == "rocm"
+        or tgt_gpu.kind.name.startswith("opencl")
+    ):
+        dev_module = fadd.imported_modules[0]
+        print("-----GPU code-----")
+        print(dev_module.get_source())
+    else:
+        print(fadd.get_source())
+
+################################################################################
+# Saving and Loading Compiled Modules
+# -----------------------------------
+# Besides runtime compilation, we can save the compiled modules into a file and
+# load them back later.
 #
 # The following code first performs the following steps:
 #
 # - It saves the compiled host module into an object file.
 # - Then it saves the device module into a ptx file.
 # - cc.create_shared calls a compiler (gcc) to create a shared library
-#
+
 from tvm.contrib import cc
 from tvm.contrib import utils
 
 temp = utils.tempdir()
 fadd.save(temp.relpath("myadd.o"))
-if tgt == "cuda":
+if tgt.kind.name == "cuda":
     fadd.imported_modules[0].save(temp.relpath("myadd.ptx"))
-if tgt == "rocm":
+if tgt.kind.name == "rocm":
     fadd.imported_modules[0].save(temp.relpath("myadd.hsaco"))
-if tgt.startswith("opencl"):
+if tgt.kind.name.startswith("opencl"):
     fadd.imported_modules[0].save(temp.relpath("myadd.cl"))
 cc.create_shared(temp.relpath("myadd.so"), [temp.relpath("myadd.o")])
 print(temp.listdir())
 
-######################################################################
+################################################################################
 # .. note:: Module Storage Format
 #
-#   The CPU (host) module is directly saved as a shared library (.so).
-#   There can be multiple customized formats of the device code.
-#   In our example, the device code is stored in ptx, as well as a meta
-#   data json file. They can be loaded and linked separately via import.
-#
+#   The CPU (host) module is directly saved as a shared library (.so). There
+#   can be multiple customized formats of the device code. In our example, the
+#   device code is stored in ptx, as well as a meta data json file. They can be
+#   loaded and linked separately via import.
 
-######################################################################
+################################################################################
 # Load Compiled Module
-# --------------------
-# We can load the compiled module from the file system and run the code.
-# The following code loads the host and device module separately and
-# re-links them together. We can verify that the newly loaded function works.
-#
+# ~~~~~~~~~~~~~~~~~~~~
+# We can load the compiled module from the file system and run the code. The
+# following code loads the host and device module separately and links them
+# together. We can verify that the newly loaded function works.
+
 fadd1 = tvm.runtime.load_module(temp.relpath("myadd.so"))
-if tgt == "cuda":
+if tgt.kind.name == "cuda":
     fadd1_dev = tvm.runtime.load_module(temp.relpath("myadd.ptx"))
     fadd1.import_module(fadd1_dev)
 
-if tgt == "rocm":
+if tgt.kind.name == "rocm":
     fadd1_dev = tvm.runtime.load_module(temp.relpath("myadd.hsaco"))
     fadd1.import_module(fadd1_dev)
 
-if tgt.startswith("opencl"):
+if tgt.kind.name.startswith("opencl"):
     fadd1_dev = tvm.runtime.load_module(temp.relpath("myadd.cl"))
     fadd1.import_module(fadd1_dev)
 
 fadd1(a, b, c)
 tvm.testing.assert_allclose(c.asnumpy(), a.asnumpy() + b.asnumpy())
 
-######################################################################
+################################################################################
 # Pack Everything into One Library
-# --------------------------------
-# In the above example, we store the device and host code separately.
-# TVM also supports export everything as one shared library.
-# Under the hood, we pack the device modules into binary blobs and link
-# them together with the host code.
-# Currently we support packing of Metal, OpenCL and CUDA modules.
-#
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+# In the above example, we store the device and host code separately. TVM also
+# supports export everything as one shared library. Under the hood, we pack
+# the device modules into binary blobs and link them together with the host
+# code. Currently we support packing of Metal, OpenCL and CUDA modules.
+
 fadd.export_library(temp.relpath("myadd_pack.so"))
 fadd2 = tvm.runtime.load_module(temp.relpath("myadd_pack.so"))
 fadd2(a, b, c)
 tvm.testing.assert_allclose(c.asnumpy(), a.asnumpy() + b.asnumpy())
 
-######################################################################
+################################################################################
 # .. note:: Runtime API and Thread-Safety
 #
-#   The compiled modules of TVM do not depend on the TVM compiler.
-#   Instead, they only depend on a minimum runtime library.
-#   The TVM runtime library wraps the device drivers and provides
-#   thread-safe and device agnostic calls into the compiled functions.
-#
-#   This means that you can call the compiled TVM functions from any thread,
-#   on any GPUs.
+#   The compiled modules of TVM do not depend on the TVM compiler. Instead,
+#   they only depend on a minimum runtime library. The TVM runtime library
+#   wraps the device drivers and provides thread-safe and device agnostic calls
+#   into the compiled functions.
 #
+#   This means that you can call the compiled TVM functions from any thread, on
+#   any GPUs, provided that you have compiled the code for that GPU.
 
-######################################################################
+################################################################################
 # Generate OpenCL Code
 # --------------------
-# TVM provides code generation features into multiple backends,
-# we can also generate OpenCL code or LLVM code that runs on CPU backends.
+# TVM provides code generation features into multiple backends. We can also
+# generate OpenCL code or LLVM code that runs on CPU backends.
 #
 # The following code blocks generate OpenCL code, creates array on an OpenCL
 # device, and verifies the correctness of the code.
-#
-if tgt.startswith("opencl"):
+
+if tgt.kind.name.startswith("opencl"):
     fadd_cl = tvm.build(s, [A, B, C], tgt, name="myadd")
     print("------opencl code------")
     print(fadd_cl.imported_modules[0].get_source())
-    ctx = tvm.cl(0)
+    dev = tvm.cl(0)
     n = 1024
-    a = tvm.nd.array(np.random.uniform(size=n).astype(A.dtype), ctx)
-    b = tvm.nd.array(np.random.uniform(size=n).astype(B.dtype), ctx)
-    c = tvm.nd.array(np.zeros(n, dtype=C.dtype), ctx)
+    a = tvm.nd.array(np.random.uniform(size=n).astype(A.dtype), dev)
+    b = tvm.nd.array(np.random.uniform(size=n).astype(B.dtype), dev)
+    c = tvm.nd.array(np.zeros(n, dtype=C.dtype), dev)
     fadd_cl(a, b, c)
     tvm.testing.assert_allclose(c.asnumpy(), a.asnumpy() + b.asnumpy())
 
-######################################################################
-# Summary
-# -------
-# This tutorial provides a walk through of TVM workflow using
-# a vector add example. The general workflow is
+################################################################################
+# .. note:: TE Scheduling Primitives
+#
+#   TVM includes a number of different scheduling primitives:
+#
+#   - split: splits a specified axis into two axises by the defined factor.
+#   - tile: tiles will split a computation across two axes by the defined factors.
+#   - fuse: fuses two consecutive axises of one computation.
+#   - reorder: can reorder the axises of a computation into a defined order.
+#   - bind: can bind a computation to a specific thread, useful in GPU programming.
+#   - compute_at: by default, TVM will compute tensors at the outermost level
+#     of the function, or the root, by default. compute_at specifies that one
+#     tensor should be computed at the first axis of computation for another
+#     operator.
+#   - compute_inline: when marked inline, a computation will be expanded then
+#     inserted into the address where the tensor is required.
+#   - compute_root: moves a computation to the outermost layer, or root, of the
+#     function. This means that stage of the computation will be fully computed
+#     before it moves on to the next stage.
+#
+#   A complete description of these primitives can be found in the
+# [Schedule Primitives](https://tvm.apache.org/docs/tutorials/language/schedule_primitives.html) docs page.
+
+################################################################################
+# Example 2: Manually Optimizing Matrix Multiplication with TE
+# ------------------------------------------------------------
+#
+# Now we will consider a second, more advanced example, demonstrating how with
+# just 18 lines of python code TVM speeds up a common matrix multiplication operation by 18x.
+#
+# **Matrix multiplication is a compute intensive operation. There are two important optimizations for good CPU performance:**
+# 1. Increase the cache hit rate of memory access. Both complex numerical
+#    computation and hot-spot memory access can be accelerated by a high cache hit
+#    rate. This requires us to transform the origin memory access pattern to a pattern that fits the cache policy.
+# 2. SIMD (Single instruction multi-data), also known as the vector processing
+#    unit. On each cycle instead of processing a single value, SIMD can process a small batch of data.
+#    This requires us to transform the data access pattern in the loop
+#    body in uniform pattern so that the LLVM backend can lower it to SIMD.
+#
+# The techniques used in this tutorial are a subset of tricks mentioned in this
+# `repository <https://github.com/flame/how-to-optimize-gemm>`_. Some of them
+# have been applied by TVM abstraction automatically, but some of them cannot
+# be automatically applied due to TVM constraints.
+
+################################################################################
+# Preparation and Performance Baseline
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+#
+# We begin by collecting performance data on the `numpy` implementation of
+# matrix multiplication.
+
+import tvm
+import tvm.testing
+from tvm import te
+import numpy
+
+# The size of the matrix
+# (M, K) x (K, N)
+# You are free to try out different shapes, sometimes TVM optimization outperforms numpy with MKL.
+M = 1024
+K = 1024
+N = 1024
+
+# The default tensor data type in tvm
+dtype = "float32"
+
+# You will want to adjust the target to match any CPU vector extensions you
+# might have. For example, if you're using using Intel AVX2 (Advanced Vector
+# Extensions) ISA for SIMD, you can get the best performance by changing the
+# following line to ``llvm -mcpu=core-avx2``, or specific type of CPU you use.
+# Recall that you're using llvm, you can get this information from the command
+# ``llc --version`` to get the CPU type, and you can check ``/proc/cpuinfo``
+# for additional extensions that your processor might support.
+
+target = tvm.target.Target(target="llvm", host="llvm")
+dev = tvm.device(target.kind.name, 0)
+
+# Random generated tensor for testing
+a = tvm.nd.array(numpy.random.rand(M, K).astype(dtype), dev)
+b = tvm.nd.array(numpy.random.rand(K, N).astype(dtype), dev)
+
+# Repeatedly perform a matrix multiplication to get a performance baseline
+# for the default numpy implementation
+np_repeat = 100
+np_running_time = timeit.timeit(
+    setup="import numpy\n"
+    "M = " + str(M) + "\n"
+    "K = " + str(K) + "\n"
+    "N = " + str(N) + "\n"
+    'dtype = "float32"\n'
+    "a = numpy.random.rand(M, K).astype(dtype)\n"
+    "b = numpy.random.rand(K, N).astype(dtype)\n",
+    stmt="answer = numpy.dot(a, b)",
+    number=np_repeat,
+)
+print("Numpy running time: %f" % (np_running_time / np_repeat))
+
+answer = numpy.dot(a.asnumpy(), b.asnumpy())
+
+################################################################################
+# Now we write a basic matrix multiplication using TVM TE and verify that it
+# produces the same results as the numpy implementation. We also write a
+# function that will help us measure the performance of the schedule
+# optimizations.
+
+# TVM Matrix Multiplication using TE
+k = te.reduce_axis((0, K), "k")
+A = te.placeholder((M, K), name="A")
+B = te.placeholder((K, N), name="B")
+C = te.compute((M, N), lambda x, y: te.sum(A[x, k] * B[k, y], axis=k), name="C")
+
+# Default schedule
+s = te.create_schedule(C.op)
+func = tvm.build(s, [A, B, C], target=target, name="mmult")
+
+c = tvm.nd.array(numpy.zeros((M, N), dtype=dtype), dev)
+func(a, b, c)
+tvm.testing.assert_allclose(c.asnumpy(), answer, rtol=1e-5)
+
+
+def evaluate_operation(s, vars, target, name, optimization, log):
+    func = tvm.build(s, [A, B, C], target=target, name="mmult")
+    assert func
+
+    c = tvm.nd.array(numpy.zeros((M, N), dtype=dtype), dev)
+    func(a, b, c)
+    tvm.testing.assert_allclose(c.asnumpy(), answer, rtol=1e-5)
+
+    evaluator = func.time_evaluator(func.entry_name, dev, number=10)
+    mean_time = evaluator(a, b, c).mean
+    print("%s: %f" % (optimization, mean_time))
+    log.append((optimization, mean_time))
+
+
+log = []
+
+evaluate_operation(s, [A, B, C], target=target, name="mmult", optimization="none", log=log)
+
+################################################################################
+# Let's take a look at the intermediate representation of the operator and
+# default schedule using the TVM lower function. Note how the implementation is
+# essentially a naive implementation of a matrix multiplication, using three
+# nested loops over the indices of the A and B matrices.
+
+print(tvm.lower(s, [A, B, C], simple_mode=True))
+
+################################################################################
+# Optimization 1: Blocking
+# ~~~~~~~~~~~~~~~~~~~~~~~~
+#
+# A important trick to enhance the cache hit rate is blocking, where you
+# structure memory access such that the inside a block is a small neighborhood
+# that has high memory locality. In this tutorial, we pick a block factor of
+# 32. This will result in a block that will fill a 32 * 32 * sizeof(float) area
+# of memory. This corresponds to a cache size of 4KB, in relation to a
+# reference cache size of 32 KB for L1 cache.
+#
+# We begin by creating a default schedule for the ``C`` operation, then apply a
+# ``tile`` scheduling primitive to it with the specified block factor, with the
+# scheduling primitive returning the resulting loop order from outermost to
+# innermost, as a vector ``[x_outer, y_outer, x_inner, y_inner]``. We then get
+# the reduction axis for output of the operation, and perform a split operation
+# on it using a factor of 4. This factor doesn't directly impact the blocking
+# optimization we're working on right now, but will be useful later when we
+# apply vectorization.
+#
+# Now that the operation has been blocked, we can reorder the computation to
+# put the reduction operation into the outermost loop of the computation,
+# helping to guarantee that the blocked data remains in cache. This completes
+# the schedule, and we can build and test the performance compared to the naive
+# schedule.
+
+bn = 32
+
+# Blocking by loop tiling
+xo, yo, xi, yi = s[C].tile(C.op.axis[0], C.op.axis[1], bn, bn)
+(k,) = s[C].op.reduce_axis
+ko, ki = s[C].split(k, factor=4)
+
+# Hoist reduction domain outside the blocking loop
+s[C].reorder(xo, yo, ko, ki, xi, yi)
+
+evaluate_operation(s, [A, B, C], target=target, name="mmult", optimization="blocking", log=log)
+
+################################################################################
+# By reordering the computation to take advantage of caching, you should see a
+# significant improvement in the performance of the computation. Now, print the
+# internal representation and compare it to the original:
+
+print(tvm.lower(s, [A, B, C], simple_mode=True))
+
+################################################################################
+# Optimization 2: Vectorization
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+#
+# Another important optimization trick is vectorization. When the memory access
+# pattern is uniform, the compiler can detect this pattern and pass the
+# continuous memory to the SIMD vector processor. In TVM, we can use the
+# ``vectorize`` interface to hint the compiler this pattern, taking advantage
+# of this hardware feature.
+#
+# In this tutorial, we chose to vectorize the inner loop row data since it is
+# already cache friendly from our previous optimizations.
+
+# Apply the vectorization optimization
+s[C].vectorize(yi)
+
+evaluate_operation(s, [A, B, C], target=target, name="mmult", optimization="vectorization", log=log)
+
+# The generalized IR after vectorization
+print(tvm.lower(s, [A, B, C], simple_mode=True))
+
+################################################################################
+# Optimization 3: Loop Permutation
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+#
+# If we look at the above IR, we can see the inner loop row data is vectorized
+# and B is transformed into PackedB (this is evident by the `(float32x32*)B2`
+# portion of the inner loop). The traversal of PackedB is sequential now. So we
+# will look at the access pattern of A. In current schedule, A is accessed
+# column by column which is not cache friendly. If we change the nested loop
+# order of `ki` and inner axes `xi`, the access pattern for A matrix will be
+# more cache friendly.
+
+s = te.create_schedule(C.op)
+xo, yo, xi, yi = s[C].tile(C.op.axis[0], C.op.axis[1], bn, bn)
+(k,) = s[C].op.reduce_axis
+ko, ki = s[C].split(k, factor=4)
+
+# re-ordering
+s[C].reorder(xo, yo, ko, xi, ki, yi)
+s[C].vectorize(yi)
+
+evaluate_operation(
+    s, [A, B, C], target=target, name="mmult", optimization="loop permutation", log=log
+)
+
+# Again, print the new generalized IR
+print(tvm.lower(s, [A, B, C], simple_mode=True))
+
+################################################################################
+# Optimization 4: Array Packing
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+#
+# Another important trick is array packing. This trick is to reorder the
+# storage dimension of the array to convert the continuous access pattern on
+# certain dimension to a sequential pattern after flattening.
+#
+# .. image:: https://github.com/dmlc/web-data/raw/main/tvm/tutorial/array-packing.png
+#    :align: center
+#
+# Just as it is shown in the figure above, after blocking the computations, we
+# can observe the array access pattern of B (after flattening), which is
+# regular but discontinuous. We expect that after some transformation we can
+# get a continuous access pattern. By reordering a ``[16][16]`` array to a
+# ``[16/4][16][4]`` array the access pattern of B will be sequential when
+# grabing the corresponding value from the packed array.
+#
+# To accomplish this, we are going to have to start with a new default
+# schedule, taking into account the new packing of B. It's worth taking a
+# moment to comment on this: TE is a powerful and expressive language for
+# writing optimized operators, but it often requires some knowledge of the
+# underlying algorithm, data structures, and hardware target that you are
+# writing for. Later in the tutorial, we will discuss some of the options for
+# letting TVM take that burden. Regardless, let's move on with the new
+# optimized schedule.
+
+# We have to re-write the algorithm slightly.
+packedB = te.compute((N / bn, K, bn), lambda x, y, z: B[y, x * bn + z], name="packedB")
+C = te.compute(
+    (M, N),
+    lambda x, y: te.sum(A[x, k] * packedB[y // bn, k, tvm.tir.indexmod(y, bn)], axis=k),
+    name="C",
+)
+
+s = te.create_schedule(C.op)
+
+xo, yo, xi, yi = s[C].tile(C.op.axis[0], C.op.axis[1], bn, bn)
+(k,) = s[C].op.reduce_axis
+ko, ki = s[C].split(k, factor=4)
+
+s[C].reorder(xo, yo, ko, xi, ki, yi)
+s[C].vectorize(yi)
+
+x, y, z = s[packedB].op.axis
+s[packedB].vectorize(z)
+s[packedB].parallel(x)
+
+evaluate_operation(s, [A, B, C], target=target, name="mmult", optimization="array packing", log=log)
+
+# Here is the generated IR after array packing.
+print(tvm.lower(s, [A, B, C], simple_mode=True))
+
+################################################################################
+# Optimization 5: Optimizing Block Writing Through Caching
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+#
+# Up to this point all of our optimizations have focused on efficiently
+# accessing and computing the data from the `A` and `B` matrices to compute the
+# `C` matrix. After the blocking optimization, the operator will write result
+# to `C` block by block, and the access pattern is not sequential. We can
+# address this by using a sequential cache array, using a combination of
+# `cache_write`, `compute_at`, and `unroll`to hold the block results and write
+# to `C` when all the block results are ready.
+
+s = te.create_schedule(C.op)
+
+# Allocate write cache
+CC = s.cache_write(C, "global")
+
+xo, yo, xi, yi = s[C].tile(C.op.axis[0], C.op.axis[1], bn, bn)
+
+# Write cache is computed at yo
+s[CC].compute_at(s[C], yo)
+
+# New inner axes
+xc, yc = s[CC].op.axis
+
+(k,) = s[CC].op.reduce_axis
+ko, ki = s[CC].split(k, factor=4)
+s[CC].reorder(ko, xc, ki, yc)
+s[CC].unroll(ki)
+s[CC].vectorize(yc)
+
+x, y, z = s[packedB].op.axis
+s[packedB].vectorize(z)
+s[packedB].parallel(x)
+
+evaluate_operation(s, [A, B, C], target=target, name="mmult", optimization="block caching", log=log)
+
+# Here is the generated IR after write cache blocking.
+print(tvm.lower(s, [A, B, C], simple_mode=True))
+
+################################################################################
+# Optimization 6: Parallelization
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+#
+# So far, our computation is only designed to use a single core. Nearly all
+# modern processors have multiple cores, and computation can benefit from
+# running computations in parallel. The final optimization is to take advantage
+# of thread-level parallelization.
+
+# parallel
+s[C].parallel(xo)
+
+x, y, z = s[packedB].op.axis
+s[packedB].vectorize(z)
+s[packedB].parallel(x)
+
+evaluate_operation(
+    s, [A, B, C], target=target, name="mmult", optimization="parallelization", log=log
+)
+
+# Here is the generated IR after parallelization.
+print(tvm.lower(s, [A, B, C], simple_mode=True))
+
+################################################################################
+# Summary of Matrix Multiplication Example
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+#
+# After applying the above simple optimizations with only 18 lines of code, our
+# generated code can begin to approach the performance of `numpy` with the Math
+# Kernel Library (MKL). Since we've been logging the performance as we've been
+# working, we can compare the results.
+
+baseline = log[0][1]
+print("%s\t%s\t%s" % ("Operator".rjust(20), "Timing".rjust(20), "Performance".rjust(20)))
+for result in log:
+    print(
+        "%s\t%s\t%s"
+        % (result[0].rjust(20), str(result[1]).rjust(20), str(result[1] / baseline).rjust(20))
+    )
+
+################################################################################
+# Note that the outputs on the web page reflect the running times on a
+# non-exclusive Docker container, and should be considered unreliable. It is
+# highly encouraged to run the tutorial by yourself to observe the performance
+# gain achieved by TVM, and to carefully work through each example to
+# understand the iterative improvements that are made to the matrix
+# multiplication operation.
+
+################################################################################
+# Final Notes and Summary
+# -----------------------
+# As mentioned earlier, how to apply optimizations using TE and scheduling
+# primitives can require some knowledge of the underlying architecture and
+# algorithms. However, TE was designed to act as a foundation for more complex
+# algorithms that can search the potential optimization. With the knowledge you
+# have from this introduction to TE, we can now begin to explore how TVM can
+# automate the schedule optimization process.
+#
+# This tutorial provided a walkthrough of TVM Tensor Expresstion (TE) workflow
+# using a vector add and a matrix multiplication examples. The general workflow
+# is
 #
 # - Describe your computation via a series of operations.
 # - Describe how we want to compute use schedule primitives.
 # - Compile to the target function we want.
 # - Optionally, save the function to be loaded later.
 #
-# You are more than welcome to checkout other examples and
-# tutorials to learn more about the supported operations, scheduling primitives
-# and other features in TVM.
-#
+# Upcoming tutorials expand on the matrix multiplication example, and show how
+# you can build generic templates of the matrix multiplication and other
+# operations with tunable parameters that allows you to automatically optimize
+# the computation for specific platforms.
diff --git a/docs/_downloads/38606228ff7130fbd6473b7c0625ddcd/deploy_model_on_android.ipynb b/docs/_downloads/38606228ff7130fbd6473b7c0625ddcd/deploy_model_on_android.ipynb
index 141a5a4..e40b1da 100644
--- a/docs/_downloads/38606228ff7130fbd6473b7c0625ddcd/deploy_model_on_android.ipynb
+++ b/docs/_downloads/38606228ff7130fbd6473b7c0625ddcd/deploy_model_on_android.ipynb
@@ -26,14 +26,14 @@
       },
       "outputs": [],
       "source": [
-        "import os\nimport numpy as np\nfrom PIL import Image\nimport keras\nfrom keras.applications.mobilenet_v2 import MobileNetV2\nimport tvm\nfrom tvm import te\nimport tvm.relay as relay\nfrom tvm import rpc\nfrom tvm.contrib import utils, ndk, graph_runtime as runtime\nfrom tvm.contrib.download import download_testdata"
+        "import os\nimport numpy as np\nfrom PIL import Image\nimport keras\nfrom keras.applications.mobilenet_v2 import MobileNetV2\nimport tvm\nfrom tvm import te\nimport tvm.relay as relay\nfrom tvm import rpc\nfrom tvm.contrib import utils, ndk, graph_executor as runtime\nfrom tvm.contrib.download import download_testdata"
       ]
     },
     {
       "cell_type": "markdown",
       "metadata": {},
       "source": [
-        "Setup Environment\n-----------------\nSince there are many required packages for Android, it is recommended to use the official Docker Image.\n\nFirst, to build and run Docker Image, we can run the following command.\n\n.. code-block:: bash\n\n  git clone --recursive https://github.com/apache/tvm tvm\n  cd tvm\n  docker build -t tvm.demo_android -f docker/Dockerfile.demo_android ./docker\n  docker run --pid=host -h tvm -v $PWD:/workspace \\\n         -w /workspace -p 9190:9190 - [...]
+        "Setup Environment\n-----------------\nSince there are many required packages for Android, it is recommended to use the official Docker Image.\n\nFirst, to build and run Docker Image, we can run the following command.\n\n.. code-block:: bash\n\n  git clone --recursive https://github.com/apache/tvm tvm\n  cd tvm\n  docker build -t tvm.demo_android -f docker/Dockerfile.demo_android ./docker\n  docker run --pid=host -h tvm -v $PWD:/workspace \\\n         -w /workspace -p 9190:9190 - [...]
       ]
     },
     {
@@ -119,7 +119,7 @@
       },
       "outputs": [],
       "source": [
-        "local_demo = True\n\n# by default on CPU target will execute.\n# select 'cpu', 'opencl' and 'vulkan'\ntest_target = \"cpu\"\n\n# Change target configuration.\n# Run `adb shell cat /proc/cpuinfo` to find the arch.\narch = \"arm64\"\ntarget = \"llvm -mtriple=%s-linux-android\" % arch\ntarget_host = None\n\nif local_demo:\n    target_host = None\n    target = \"llvm\"\nelif test_target == \"opencl\":\n    target_host = target\n    target = \"opencl\"\nelif test_target == \"vulkan\" [...]
+        "local_demo = True\n\n# by default on CPU target will execute.\n# select 'cpu', 'opencl' and 'vulkan'\ntest_target = \"cpu\"\n\n# Change target configuration.\n# Run `adb shell cat /proc/cpuinfo` to find the arch.\narch = \"arm64\"\ntarget = tvm.target.Target(\"llvm -mtriple=%s-linux-android\" % arch)\n\nif local_demo:\n    target = tvm.target.Target(\"llvm\")\nelif test_target == \"opencl\":\n    target = tvm.target.Target(\"opencl\", host=target)\nelif test_target == \"vulkan\" [...]
       ]
     },
     {
@@ -137,7 +137,7 @@
       },
       "outputs": [],
       "source": [
-        "tracker_host = os.environ.get(\"TVM_TRACKER_HOST\", \"0.0.0.0\")\ntracker_port = int(os.environ.get(\"TVM_TRACKER_PORT\", 9190))\nkey = \"android\"\n\nif local_demo:\n    remote = rpc.LocalSession()\nelse:\n    tracker = rpc.connect_tracker(tracker_host, tracker_port)\n    # When running a heavy model, we should increase the `session_timeout`\n    remote = tracker.request(key, priority=0, session_timeout=60)\n\nif local_demo:\n    ctx = remote.cpu(0)\nelif test_target == \"openc [...]
+        "tracker_host = os.environ.get(\"TVM_TRACKER_HOST\", \"0.0.0.0\")\ntracker_port = int(os.environ.get(\"TVM_TRACKER_PORT\", 9190))\nkey = \"android\"\n\nif local_demo:\n    remote = rpc.LocalSession()\nelse:\n    tracker = rpc.connect_tracker(tracker_host, tracker_port)\n    # When running a heavy model, we should increase the `session_timeout`\n    remote = tracker.request(key, priority=0, session_timeout=60)\n\nif local_demo:\n    dev = remote.cpu(0)\nelif test_target == \"openc [...]
       ]
     },
     {
@@ -155,7 +155,7 @@
       },
       "outputs": [],
       "source": [
-        "# set input data\nmodule.set_input(input_name, tvm.nd.array(x.astype(dtype)))\n# run\nmodule.run()\n# get output\nout = module.get_output(0)\n\n# get top1 result\ntop1 = np.argmax(out.asnumpy())\nprint(\"TVM prediction top-1: {}\".format(synset[top1]))\n\nprint(\"Evaluate inference time cost...\")\nftimer = module.module.time_evaluator(\"run\", ctx, number=1, repeat=10)\nprof_res = np.array(ftimer().results) * 1000  # convert to millisecond\nprint(\"Mean inference time (std dev) [...]
+        "# set input data\nmodule.set_input(input_name, tvm.nd.array(x.astype(dtype)))\n# run\nmodule.run()\n# get output\nout = module.get_output(0)\n\n# get top1 result\ntop1 = np.argmax(out.asnumpy())\nprint(\"TVM prediction top-1: {}\".format(synset[top1]))\n\nprint(\"Evaluate inference time cost...\")\nftimer = module.module.time_evaluator(\"run\", dev, number=1, repeat=10)\nprof_res = np.array(ftimer().results) * 1000  # convert to millisecond\nprint(\"Mean inference time (std dev) [...]
       ]
     },
     {
diff --git a/docs/_downloads/3961fdfa7abff1b6dc996faa43b4c40f/deploy_model_on_android.py b/docs/_downloads/3961fdfa7abff1b6dc996faa43b4c40f/deploy_model_on_android.py
index ff7ef44..158280f 100644
--- a/docs/_downloads/3961fdfa7abff1b6dc996faa43b4c40f/deploy_model_on_android.py
+++ b/docs/_downloads/3961fdfa7abff1b6dc996faa43b4c40f/deploy_model_on_android.py
@@ -34,7 +34,7 @@ import tvm
 from tvm import te
 import tvm.relay as relay
 from tvm import rpc
-from tvm.contrib import utils, ndk, graph_runtime as runtime
+from tvm.contrib import utils, ndk, graph_executor as runtime
 from tvm.contrib.download import download_testdata
 
 
@@ -71,7 +71,7 @@ from tvm.contrib.download import download_testdata
 #         -DUSE_RPC=ON \
 #         -DUSE_SORT=ON \
 #         -DUSE_VULKAN=ON \
-#         -DUSE_GRAPH_RUNTIME=ON \
+#         -DUSE_GRAPH_EXECUTOR=ON \
 #         ..
 #   make -j10
 #
@@ -257,25 +257,21 @@ test_target = "cpu"
 # Change target configuration.
 # Run `adb shell cat /proc/cpuinfo` to find the arch.
 arch = "arm64"
-target = "llvm -mtriple=%s-linux-android" % arch
-target_host = None
+target = tvm.target.Target("llvm -mtriple=%s-linux-android" % arch)
 
 if local_demo:
-    target_host = None
-    target = "llvm"
+    target = tvm.target.Target("llvm")
 elif test_target == "opencl":
-    target_host = target
-    target = "opencl"
+    target = tvm.target.Target("opencl", host=target)
 elif test_target == "vulkan":
-    target_host = target
-    target = "vulkan"
+    target = tvm.target.Target("vulkan", host=target)
 
 input_name = "input_1"
 shape_dict = {input_name: x.shape}
 mod, params = relay.frontend.from_keras(keras_mobilenet_v2, shape_dict)
 
 with tvm.transform.PassContext(opt_level=3):
-    lib = relay.build(mod, target=target, target_host=target_host, params=params)
+    lib = relay.build(mod, target=target, params=params)
 
 # After `relay.build`, you will get three return values: graph,
 # library and the new parameter, since we do some optimization that will
@@ -305,20 +301,20 @@ else:
     remote = tracker.request(key, priority=0, session_timeout=60)
 
 if local_demo:
-    ctx = remote.cpu(0)
+    dev = remote.cpu(0)
 elif test_target == "opencl":
-    ctx = remote.cl(0)
+    dev = remote.cl(0)
 elif test_target == "vulkan":
-    ctx = remote.vulkan(0)
+    dev = remote.vulkan(0)
 else:
-    ctx = remote.cpu(0)
+    dev = remote.cpu(0)
 
 # upload the library to remote device and load it
 remote.upload(lib_fname)
 rlib = remote.load_module("net.so")
 
 # create the remote runtime module
-module = runtime.GraphModule(rlib["default"](ctx))
+module = runtime.GraphModule(rlib["default"](dev))
 
 ######################################################################
 # Execute on TVM
@@ -336,7 +332,7 @@ top1 = np.argmax(out.asnumpy())
 print("TVM prediction top-1: {}".format(synset[top1]))
 
 print("Evaluate inference time cost...")
-ftimer = module.module.time_evaluator("run", ctx, number=1, repeat=10)
+ftimer = module.module.time_evaluator("run", dev, number=1, repeat=10)
 prof_res = np.array(ftimer().results) * 1000  # convert to millisecond
 print("Mean inference time (std dev): %.2f ms (%.2f ms)" % (np.mean(prof_res), np.std(prof_res)))
 
diff --git a/docs/_downloads/41a7d69f33d4708ae5b843474adf9c9e/tune_matmul_x86.py b/docs/_downloads/41a7d69f33d4708ae5b843474adf9c9e/tune_matmul_x86.py
new file mode 100644
index 0000000..931f877
--- /dev/null
+++ b/docs/_downloads/41a7d69f33d4708ae5b843474adf9c9e/tune_matmul_x86.py
@@ -0,0 +1,214 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""
+Optimizing Operators with Auto-scheduling
+=========================================
+**Author**: `Lianmin Zheng <https://github.com/merrymercy>`_, \
+            `Chengfan Jia <https://github.com/jcf94/>`_
+
+In this tutorial, we will show how TVM's Auto Scheduling feature can find
+optimal schedules without the need for writing a custom template.
+
+Different from the template-based :ref:`<autotvm_matmul>` which relies on
+manual templates to define the search space, the auto-scheduler does not
+require any templates.  Users only need to write the computation declaration
+without any schedule commands or templates.  The auto-scheduler can
+automatically generate a large search space and find a good schedule in the
+space.
+
+We use matrix multiplication as an example in this tutorial.
+
+.. note::
+  Note that this tutorial will not run on Windows or recent versions of macOS. To
+  get it to run, you will need to wrap the body of this tutorial in a :code:`if
+  __name__ == "__main__":` block.
+"""
+
+import os
+
+import numpy as np
+import tvm
+from tvm import te, auto_scheduler
+
+################################################################################
+# Defining the Matrix Multiplication
+# ----------------------------------
+# To start, we define a matrix multiplication with a bias addition.  Note that
+# this uses standard operations available in TVMs Tensor Expression language.
+# The major difference is the use of the `auto_sceduler` decorator at the top
+# of the function definition.  The function should return a list of
+# input/output tensors.  From these tensors, the auto-scheduler can get the
+# whole computational graph.
+
+
+@auto_scheduler.register_workload  # Note the auto_scheduler decorator
+def matmul_add(N, L, M, dtype):
+    A = te.placeholder((N, L), name="A", dtype=dtype)
+    B = te.placeholder((L, M), name="B", dtype=dtype)
+    C = te.placeholder((N, M), name="C", dtype=dtype)
+
+    k = te.reduce_axis((0, L), name="k")
+    matmul = te.compute(
+        (N, M),
+        lambda i, j: te.sum(A[i, k] * B[k, j], axis=k),
+        name="matmul",
+        attrs={"layout_free_placeholders": [B]},  # enable automatic layout transform for tensor B
+    )
+    out = te.compute((N, M), lambda i, j: matmul[i, j] + C[i, j], name="out")
+
+    return [A, B, C, out]
+
+
+################################################################################
+# Create the search task
+# ----------------------
+# With the function defined, we can now create the task for the auto_scheduler
+# to search against. We specify the particular parameters for this matrix
+# multiplication, in this case a multiplication of to square matricies of size
+# 1024x1024. We then create a search task with N=L=M=1024 and dtype="float32"
+#
+# .. note:: Improve performance with custom targets
+#   In order for TVM to take full advantage of specific hardware platforms,
+#   you will want to manuall specify your CPU capabilities. For example:
+#   - replace "llvm" below with "llvm -mcpu=core-avx2" to enable AVX2
+#   - replace "llvm" below with "llvm -mcpu=skylake-avx512" to enable AVX-512
+
+target = tvm.target.Target("llvm")
+N = L = M = 1024
+task = tvm.auto_scheduler.SearchTask(func=matmul_add, args=(N, L, M, "float32"), target=target)
+
+# Inspect the computational graph
+print("Computational DAG:")
+print(task.compute_dag)
+
+################################################################################
+# Set Parameters for Auto-Scheduler
+# ---------------------------------
+# Next, we set parameters for the auto-scheduler.
+#
+# * :code:`num_measure_trials` is the number of measurement trials we can use
+#   during the search.  We only make 10 trials in this tutorial for a fast
+#   demonstration. In practice, 1000 is a good value for the search to converge.
+#   You can do more trials according to your time budget.
+# * In addition, we use :code:`RecordToFile` to log measurement records into a
+#   file `matmul.json`.  The measurement records can be used to query the history
+#   best, resume the search, and do more analyses later.
+# * see :any:`auto_scheduler.TuningOptions` for more parameters
+
+log_file = "matmul.json"
+tune_option = auto_scheduler.TuningOptions(
+    num_measure_trials=10,
+    measure_callbacks=[auto_scheduler.RecordToFile(log_file)],
+    verbose=2,
+)
+
+################################################################################
+# Run the search
+# --------------
+# Now we get all inputs ready. Pretty simple, isn't it?  We can kick off the
+# search and let the auto-scheduler do its magic.  After some measurement
+# trials, we can load the best schedule from the log file and apply it.
+
+# Run auto-tuning (search)
+task.tune(tune_option)
+# Apply the best schedule
+sch, args = task.apply_best(log_file)
+
+################################################################################
+# Inspecting the Optimized Schedule
+# ---------------------------------
+# We can lower the schedule to see the IR after auto-scheduling.  The
+# auto-scheduler correctly performs optimizations including multi-level tiling,
+# layout transformation, parallelization, vectorization, unrolling, and
+# operator fusion.
+
+print("Lowered TIR:")
+print(tvm.lower(sch, args, simple_mode=True))
+
+################################################################################
+# Check correctness and evaluate performance
+# ------------------------------------------
+# We build the binary and check its correctness and performance.
+
+func = tvm.build(sch, args, target)
+a_np = np.random.uniform(size=(N, L)).astype(np.float32)
+b_np = np.random.uniform(size=(L, M)).astype(np.float32)
+c_np = np.random.uniform(size=(N, M)).astype(np.float32)
+out_np = a_np.dot(b_np) + c_np
+
+dev = tvm.cpu()
+a_tvm = tvm.nd.array(a_np, device=dev)
+b_tvm = tvm.nd.array(b_np, device=dev)
+c_tvm = tvm.nd.array(c_np, device=dev)
+out_tvm = tvm.nd.empty(out_np.shape, device=dev)
+func(a_tvm, b_tvm, c_tvm, out_tvm)
+
+# Check results
+np.testing.assert_allclose(out_np, out_tvm.asnumpy(), rtol=1e-3)
+
+# Evaluate execution time.
+evaluator = func.time_evaluator(func.entry_name, dev, min_repeat_ms=500)
+print(
+    "Execution time of this operator: %.3f ms"
+    % (np.median(evaluator(a_tvm, b_tvm, c_tvm, out_tvm).results) * 1000)
+)
+
+
+################################################################################
+# Using the record file
+# ---------------------
+# During the search, all measurement records are logged into the record file
+# "matmul.json". The measurement records can be used to re-apply search
+# results, resume the search, and perform other analyses.
+#
+# Here is an example where we load the best schedule from a file, and print the
+# equivalent python schedule API. This can be used for debugging and learning
+# the behavior of the auto-scheduler.
+
+print("Equivalent python schedule:")
+print(task.print_best(log_file))
+
+################################################################################
+# A more complicated example is to resume the search.  In this case, we need to
+# create the search policy and cost model by ourselves and resume the status of
+# search policy and cost model with the log file.  In the example below we
+# resume the status and do more 5 trials.
+
+
+def resume_search(task, log_file):
+    print("Resume search:")
+    cost_model = auto_scheduler.XGBModel()
+    cost_model.update_from_file(log_file)
+    search_policy = auto_scheduler.SketchPolicy(
+        task, cost_model, init_search_callbacks=[auto_scheduler.PreloadMeasuredStates(log_file)]
+    )
+    tune_option = auto_scheduler.TuningOptions(
+        num_measure_trials=5, measure_callbacks=[auto_scheduler.RecordToFile(log_file)]
+    )
+    task.tune(tune_option, search_policy=search_policy)
+
+
+resume_search(task, log_file)
+
+################################################################################
+# Final Notes and Summary
+# -----------------------
+# In this tutorial, we have shown how to use the TVM Auto-Scheduler to
+# automatically optimize a matrix multiplication, without the need to specify a
+# search template.  It ends a series of examples that starts from the Tensor
+# Expression (TE) language that demonstrates how TVM can optimize computational
+# operations.
diff --git a/docs/_downloads/440add54bfa6dfb4fa9ed5037187aa4c/opt_gemm.ipynb b/docs/_downloads/440add54bfa6dfb4fa9ed5037187aa4c/opt_gemm.ipynb
index 90cb47f..0896acc 100644
--- a/docs/_downloads/440add54bfa6dfb4fa9ed5037187aa4c/opt_gemm.ipynb
+++ b/docs/_downloads/440add54bfa6dfb4fa9ed5037187aa4c/opt_gemm.ipynb
@@ -33,7 +33,7 @@
       },
       "outputs": [],
       "source": [
-        "import tvm\nimport tvm.testing\nfrom tvm import te\nimport numpy\nimport timeit\n\n# The size of the matrix\n# (M, K) x (K, N)\n# You are free to try out different shapes, sometimes TVM optimization outperforms numpy with MKL.\nM = 1024\nK = 1024\nN = 1024\n\n# The default tensor type in tvm\ndtype = \"float32\"\n\n# using Intel AVX2(Advanced Vector Extensions) ISA for SIMD\n# To get the best performance, please change the following line\n# to llvm -mcpu=core-avx2, or specific t [...]
+        "import tvm\nimport tvm.testing\nfrom tvm import te\nimport numpy\nimport timeit\n\n# The size of the matrix\n# (M, K) x (K, N)\n# You are free to try out different shapes, sometimes TVM optimization outperforms numpy with MKL.\nM = 1024\nK = 1024\nN = 1024\n\n# The default tensor type in tvm\ndtype = \"float32\"\n\n# using Intel AVX2(Advanced Vector Extensions) ISA for SIMD\n# To get the best performance, please change the following line\n# to llvm -mcpu=core-avx2, or specific t [...]
       ]
     },
     {
@@ -69,7 +69,7 @@
       },
       "outputs": [],
       "source": [
-        "bn = 32\ns = te.create_schedule(C.op)\n\n# Blocking by loop tiling\nxo, yo, xi, yi = s[C].tile(C.op.axis[0], C.op.axis[1], bn, bn)\n(k,) = s[C].op.reduce_axis\nko, ki = s[C].split(k, factor=4)\n\n# Hoist reduction domain outside the blocking loop\ns[C].reorder(xo, yo, ko, ki, xi, yi)\n\nfunc = tvm.build(s, [A, B, C], target=target, name=\"mmult\")\nassert func\n\nc = tvm.nd.array(numpy.zeros((M, N), dtype=dtype), ctx)\nfunc(a, b, c)\ntvm.testing.assert_allclose(c.asnumpy(), answ [...]
+        "bn = 32\ns = te.create_schedule(C.op)\n\n# Blocking by loop tiling\nxo, yo, xi, yi = s[C].tile(C.op.axis[0], C.op.axis[1], bn, bn)\n(k,) = s[C].op.reduce_axis\nko, ki = s[C].split(k, factor=4)\n\n# Hoist reduction domain outside the blocking loop\ns[C].reorder(xo, yo, ko, ki, xi, yi)\n\nfunc = tvm.build(s, [A, B, C], target=target, name=\"mmult\")\nassert func\n\nc = tvm.nd.array(numpy.zeros((M, N), dtype=dtype), dev)\nfunc(a, b, c)\ntvm.testing.assert_allclose(c.asnumpy(), answ [...]
       ]
     },
     {
@@ -105,7 +105,7 @@
       },
       "outputs": [],
       "source": [
-        "s = te.create_schedule(C.op)\nxo, yo, xi, yi = s[C].tile(C.op.axis[0], C.op.axis[1], bn, bn)\n(k,) = s[C].op.reduce_axis\nko, ki = s[C].split(k, factor=4)\n\ns[C].reorder(xo, yo, ko, ki, xi, yi)\n\n# Vectorization\ns[C].vectorize(yi)\n\nfunc = tvm.build(s, [A, B, C], target=target, name=\"mmult\")\nassert func\n\nc = tvm.nd.array(numpy.zeros((M, N), dtype=dtype), ctx)\nfunc(a, b, c)\ntvm.testing.assert_allclose(c.asnumpy(), answer, rtol=1e-5)\n\nevaluator = func.time_evaluator(f [...]
+        "s = te.create_schedule(C.op)\nxo, yo, xi, yi = s[C].tile(C.op.axis[0], C.op.axis[1], bn, bn)\n(k,) = s[C].op.reduce_axis\nko, ki = s[C].split(k, factor=4)\n\ns[C].reorder(xo, yo, ko, ki, xi, yi)\n\n# Vectorization\ns[C].vectorize(yi)\n\nfunc = tvm.build(s, [A, B, C], target=target, name=\"mmult\")\nassert func\n\nc = tvm.nd.array(numpy.zeros((M, N), dtype=dtype), dev)\nfunc(a, b, c)\ntvm.testing.assert_allclose(c.asnumpy(), answer, rtol=1e-5)\n\nevaluator = func.time_evaluator(f [...]
       ]
     },
     {
@@ -141,7 +141,7 @@
       },
       "outputs": [],
       "source": [
-        "s = te.create_schedule(C.op)\nxo, yo, xi, yi = s[C].tile(C.op.axis[0], C.op.axis[1], bn, bn)\n(k,) = s[C].op.reduce_axis\nko, ki = s[C].split(k, factor=4)\n\n# re-ordering\ns[C].reorder(xo, yo, ko, xi, ki, yi)\ns[C].vectorize(yi)\n\nfunc = tvm.build(s, [A, B, C], target=target, name=\"mmult\")\nassert func\n\nc = tvm.nd.array(numpy.zeros((M, N), dtype=dtype), ctx)\nfunc(a, b, c)\ntvm.testing.assert_allclose(c.asnumpy(), answer, rtol=1e-5)\n\nevaluator = func.time_evaluator(func. [...]
+        "s = te.create_schedule(C.op)\nxo, yo, xi, yi = s[C].tile(C.op.axis[0], C.op.axis[1], bn, bn)\n(k,) = s[C].op.reduce_axis\nko, ki = s[C].split(k, factor=4)\n\n# re-ordering\ns[C].reorder(xo, yo, ko, xi, ki, yi)\ns[C].vectorize(yi)\n\nfunc = tvm.build(s, [A, B, C], target=target, name=\"mmult\")\nassert func\n\nc = tvm.nd.array(numpy.zeros((M, N), dtype=dtype), dev)\nfunc(a, b, c)\ntvm.testing.assert_allclose(c.asnumpy(), answer, rtol=1e-5)\n\nevaluator = func.time_evaluator(func. [...]
       ]
     },
     {
@@ -184,7 +184,7 @@
       },
       "outputs": [],
       "source": [
-        "# We have to re-write the algorithm slightly.\npackedB = te.compute((N / bn, K, bn), lambda x, y, z: B[y, x * bn + z], name=\"packedB\")\nC = te.compute(\n    (M, N),\n    lambda x, y: te.sum(A[x, k] * packedB[y // bn, k, tvm.tir.indexmod(y, bn)], axis=k),\n    name=\"C\",\n)\n\ns = te.create_schedule(C.op)\n\nxo, yo, xi, yi = s[C].tile(C.op.axis[0], C.op.axis[1], bn, bn)\n(k,) = s[C].op.reduce_axis\nko, ki = s[C].split(k, factor=4)\n\ns[C].reorder(xo, yo, ko, xi, ki, yi)\ns[C]. [...]
+        "# We have to re-write the algorithm slightly.\npackedB = te.compute((N / bn, K, bn), lambda x, y, z: B[y, x * bn + z], name=\"packedB\")\nC = te.compute(\n    (M, N),\n    lambda x, y: te.sum(A[x, k] * packedB[y // bn, k, tvm.tir.indexmod(y, bn)], axis=k),\n    name=\"C\",\n)\n\ns = te.create_schedule(C.op)\n\nxo, yo, xi, yi = s[C].tile(C.op.axis[0], C.op.axis[1], bn, bn)\n(k,) = s[C].op.reduce_axis\nko, ki = s[C].split(k, factor=4)\n\ns[C].reorder(xo, yo, ko, xi, ki, yi)\ns[C]. [...]
       ]
     },
     {
@@ -220,7 +220,7 @@
       },
       "outputs": [],
       "source": [
-        "s = te.create_schedule(C.op)\n\n# Allocate write cache\nCC = s.cache_write(C, \"global\")\n\nxo, yo, xi, yi = s[C].tile(C.op.axis[0], C.op.axis[1], bn, bn)\n\n# Write cache is computed at yo\ns[CC].compute_at(s[C], yo)\n\n# New inner axes\nxc, yc = s[CC].op.axis\n\n(k,) = s[CC].op.reduce_axis\nko, ki = s[CC].split(k, factor=4)\ns[CC].reorder(ko, xc, ki, yc)\ns[CC].unroll(ki)\ns[CC].vectorize(yc)\n\nx, y, z = s[packedB].op.axis\ns[packedB].vectorize(z)\ns[packedB].parallel(x)\n\n [...]
+        "s = te.create_schedule(C.op)\n\n# Allocate write cache\nCC = s.cache_write(C, \"global\")\n\nxo, yo, xi, yi = s[C].tile(C.op.axis[0], C.op.axis[1], bn, bn)\n\n# Write cache is computed at yo\ns[CC].compute_at(s[C], yo)\n\n# New inner axes\nxc, yc = s[CC].op.axis\n\n(k,) = s[CC].op.reduce_axis\nko, ki = s[CC].split(k, factor=4)\ns[CC].reorder(ko, xc, ki, yc)\ns[CC].unroll(ki)\ns[CC].vectorize(yc)\n\nx, y, z = s[packedB].op.axis\ns[packedB].vectorize(z)\ns[packedB].parallel(x)\n\n [...]
       ]
     },
     {
@@ -256,7 +256,7 @@
       },
       "outputs": [],
       "source": [
-        "s = te.create_schedule(C.op)\n\nCC = s.cache_write(C, \"global\")\n\nxo, yo, xi, yi = s[C].tile(C.op.axis[0], C.op.axis[1], bn, bn)\n\ns[CC].compute_at(s[C], yo)\n\nxc, yc = s[CC].op.axis\n\n(k,) = s[CC].op.reduce_axis\nko, ki = s[CC].split(k, factor=4)\ns[CC].reorder(ko, xc, ki, yc)\ns[CC].unroll(ki)\ns[CC].vectorize(yc)\n\n# parallel\ns[C].parallel(xo)\n\nx, y, z = s[packedB].op.axis\ns[packedB].vectorize(z)\ns[packedB].parallel(x)\n\nfunc = tvm.build(s, [A, B, C], target=targ [...]
+        "s = te.create_schedule(C.op)\n\nCC = s.cache_write(C, \"global\")\n\nxo, yo, xi, yi = s[C].tile(C.op.axis[0], C.op.axis[1], bn, bn)\n\ns[CC].compute_at(s[C], yo)\n\nxc, yc = s[CC].op.axis\n\n(k,) = s[CC].op.reduce_axis\nko, ki = s[CC].split(k, factor=4)\ns[CC].reorder(ko, xc, ki, yc)\ns[CC].unroll(ki)\ns[CC].vectorize(yc)\n\n# parallel\ns[C].parallel(xo)\n\nx, y, z = s[packedB].op.axis\ns[packedB].vectorize(z)\ns[packedB].parallel(x)\n\nfunc = tvm.build(s, [A, B, C], target=targ [...]
       ]
     },
     {
diff --git a/docs/_downloads/48779ddff800bd9d4b8b3bd7ef8f054c/using_external_lib.py b/docs/_downloads/48779ddff800bd9d4b8b3bd7ef8f054c/using_external_lib.py
index 8e7fcd7..232f618 100644
--- a/docs/_downloads/48779ddff800bd9d4b8b3bd7ef8f054c/using_external_lib.py
+++ b/docs/_downloads/48779ddff800bd9d4b8b3bd7ef8f054c/using_external_lib.py
@@ -34,7 +34,7 @@ To begin with, we import Relay and TVM.
 import tvm
 from tvm import te
 import numpy as np
-from tvm.contrib import graph_runtime as runtime
+from tvm.contrib import graph_executor as runtime
 from tvm import relay
 from tvm.relay import testing
 import tvm.testing
@@ -77,9 +77,9 @@ logging.basicConfig(level=logging.DEBUG)  # to dump TVM IR after fusion
 target = "cuda"
 lib = relay.build_module.build(net, target, params=params)
 
-ctx = tvm.context(target, 0)
+dev = tvm.device(target, 0)
 data = np.random.uniform(-1, 1, size=data_shape).astype("float32")
-module = runtime.GraphModule(lib["default"](ctx))
+module = runtime.GraphModule(lib["default"](dev))
 module.set_input("data", data)
 module.run()
 out_shape = (batch_size, out_channels, 224, 224)
@@ -498,9 +498,9 @@ net, params = testing.create_workload(simple_net)
 target = "cuda -libs=cudnn"  # use cudnn for convolution
 lib = relay.build_module.build(net, target, params=params)
 
-ctx = tvm.context(target, 0)
+dev = tvm.device(target, 0)
 data = np.random.uniform(-1, 1, size=data_shape).astype("float32")
-module = runtime.GraphModule(lib["default"](ctx))
+module = runtime.GraphModule(lib["default"](dev))
 module.set_input("data", data)
 module.run()
 out_shape = (batch_size, out_channels, 224, 224)
diff --git a/docs/_downloads/48bd751ebaae08fce134e559f86a25cc/tune_relay_vta.ipynb b/docs/_downloads/48bd751ebaae08fce134e559f86a25cc/tune_relay_vta.ipynb
index a14424b..f5c8ee8 100644
--- a/docs/_downloads/48bd751ebaae08fce134e559f86a25cc/tune_relay_vta.ipynb
+++ b/docs/_downloads/48bd751ebaae08fce134e559f86a25cc/tune_relay_vta.ipynb
@@ -33,7 +33,7 @@
       },
       "outputs": [],
       "source": [
-        "import os\nfrom mxnet.gluon.model_zoo import vision\nimport numpy as np\nfrom PIL import Image\n\nfrom tvm import topi\nimport tvm\nfrom tvm import te\nfrom tvm import rpc, autotvm, relay\nfrom tvm.contrib import graph_runtime, utils, download\nfrom tvm.autotvm.measure.measure_methods import request_remote\nfrom tvm.autotvm.tuner import XGBTuner, GATuner, RandomTuner, GridSearchTuner\n\nimport vta\nfrom vta.testing import simulator\nfrom vta.top import graph_pack"
+        "import os\nfrom mxnet.gluon.model_zoo import vision\nimport numpy as np\nfrom PIL import Image\n\nfrom tvm import topi\nimport tvm\nfrom tvm import te\nfrom tvm import rpc, autotvm, relay\nfrom tvm.contrib import graph_executor, utils, download\nfrom tvm.autotvm.measure.measure_methods import request_remote\nfrom tvm.autotvm.tuner import XGBTuner, GATuner, RandomTuner, GridSearchTuner\n\nimport vta\nfrom vta.testing import simulator\nfrom vta.top import graph_pack"
       ]
     },
     {
@@ -144,7 +144,7 @@
       },
       "outputs": [],
       "source": [
-        "def tune_and_evaluate(tuning_opt):\n\n    # Register VTA tuning tasks\n    register_vta_tuning_tasks()\n\n    # Perform task extraction on Relay program\n    print(\"Extract tasks...\")\n    relay_prog, params = compile_network(env, target, network, start_pack, stop_pack)\n    mod = tvm.IRModule.from_expr(relay_prog)\n    tasks = autotvm.task.extract_from_program(\n        mod,\n        params=params,\n        ops=(relay.op.get(\"nn.conv2d\"),),\n        target=target,\n         [...]
+        "def tune_and_evaluate(tuning_opt):\n\n    # Register VTA tuning tasks\n    register_vta_tuning_tasks()\n\n    # Perform task extraction on Relay program\n    print(\"Extract tasks...\")\n    relay_prog, params = compile_network(env, target, network, start_pack, stop_pack)\n    mod = tvm.IRModule.from_expr(relay_prog)\n    tasks = autotvm.task.extract_from_program(\n        mod,\n        params=params,\n        ops=(relay.op.get(\"nn.conv2d\"),),\n        target=target,\n         [...]
       ]
     },
     {
diff --git a/docs/_downloads/4c010cd30d80efae3f2dfe5743a098a5/install.ipynb b/docs/_downloads/4c010cd30d80efae3f2dfe5743a098a5/install.ipynb
new file mode 100644
index 0000000..3e572ac
--- /dev/null
+++ b/docs/_downloads/4c010cd30d80efae3f2dfe5743a098a5/install.ipynb
@@ -0,0 +1,57 @@
+{
+  "cells": [
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "%matplotlib inline"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "\nInstalling TVM\n==============\n**Authors**:\n`Jocelyn Shiue <https://github.com/>`_,\n`Chris Hoge <https://github.com/hogepodge>`_\n\nDepending on your needs and your working environment, there are a few different\nmethods for installing TVM. These include:\n    * Installing from source \n    * Installing from third-party binary package.\n\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "Installing from Source\n----------------------\nInstalling from source is the recommended method for installing TVM. It will\nallow you to enable specific features such as GPU support, microcontroller\nsupport (uTVM), and a debugging runtime, and other features. You will also\nwant to install from source if you want to actively contribute to the TVM\nproject. The full instructions are on the `Install TVM From Source\n</install/from_source.html>`_ page.\n\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "Installing From Binary Packages\n--------------------------------\nYou may install convenient third-party binary package distributions to\nquickly try things out. TLCPack is a thirdparty volunteer community that\nbuilds binary packages from TVM source. It offers support matrix with\ninstructions to install on different platforms, with different features.\nCheckout  `TLCPack <https://tlcpack.ai>`_ to learn more. Note that the\nthirdparty binary packages could contain additional l [...]
+      ]
+    }
+  ],
+  "metadata": {
+    "kernelspec": {
+      "display_name": "Python 3",
+      "language": "python",
+      "name": "python3"
+    },
+    "language_info": {
+      "codemirror_mode": {
+        "name": "ipython",
+        "version": 3
+      },
+      "file_extension": ".py",
+      "mimetype": "text/x-python",
+      "name": "python",
+      "nbconvert_exporter": "python",
+      "pygments_lexer": "ipython3",
+      "version": "3.6.12"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}
\ No newline at end of file
diff --git a/docs/_downloads/4dd41316d6ea7ff2b6993aab65428bf5/cross_compilation_and_rpc.ipynb b/docs/_downloads/4dd41316d6ea7ff2b6993aab65428bf5/cross_compilation_and_rpc.ipynb
index 2f09bdd..0598d4b 100644
--- a/docs/_downloads/4dd41316d6ea7ff2b6993aab65428bf5/cross_compilation_and_rpc.ipynb
+++ b/docs/_downloads/4dd41316d6ea7ff2b6993aab65428bf5/cross_compilation_and_rpc.ipynb
@@ -108,7 +108,7 @@
       },
       "outputs": [],
       "source": [
-        "remote.upload(path)\nfunc = remote.load_module(\"lib.tar\")\n\n# create arrays on the remote device\nctx = remote.cpu()\na = tvm.nd.array(np.random.uniform(size=1024).astype(A.dtype), ctx)\nb = tvm.nd.array(np.zeros(1024, dtype=A.dtype), ctx)\n# the function will run on the remote device\nfunc(a, b)\nnp.testing.assert_equal(b.asnumpy(), a.asnumpy() + 1)"
+        "remote.upload(path)\nfunc = remote.load_module(\"lib.tar\")\n\n# create arrays on the remote device\ndev = remote.cpu()\na = tvm.nd.array(np.random.uniform(size=1024).astype(A.dtype), dev)\nb = tvm.nd.array(np.zeros(1024, dtype=A.dtype), dev)\n# the function will run on the remote device\nfunc(a, b)\nnp.testing.assert_equal(b.asnumpy(), a.asnumpy() + 1)"
       ]
     },
     {
@@ -126,7 +126,7 @@
       },
       "outputs": [],
       "source": [
-        "time_f = func.time_evaluator(func.entry_name, ctx, number=10)\ncost = time_f(a, b).mean\nprint(\"%g secs/op\" % cost)"
+        "time_f = func.time_evaluator(func.entry_name, dev, number=10)\ncost = time_f(a, b).mean\nprint(\"%g secs/op\" % cost)"
       ]
     },
     {
@@ -144,7 +144,7 @@
       },
       "outputs": [],
       "source": [
-        "def run_opencl():\n    # NOTE: This is the setting for my rk3399 board. You need to modify\n    # them according to your environment.\n    target_host = \"llvm -mtriple=aarch64-linux-gnu\"\n    opencl_device_host = \"10.77.1.145\"\n    opencl_device_port = 9090\n\n    # create schedule for the above \"add one\" compute declaration\n    s = te.create_schedule(B.op)\n    xo, xi = s[B].split(B.op.axis[0], factor=32)\n    s[B].bind(xo, te.thread_axis(\"blockIdx.x\"))\n    s[B].bind( [...]
+        "def run_opencl():\n    # NOTE: This is the setting for my rk3399 board. You need to modify\n    # them according to your environment.\n    opencl_device_host = \"10.77.1.145\"\n    opencl_device_port = 9090\n    target = tvm.target.Target(\"opencl\", host=\"llvm -mtriple=aarch64-linux-gnu\")\n\n    # create schedule for the above \"add one\" compute declaration\n    s = te.create_schedule(B.op)\n    xo, xi = s[B].split(B.op.axis[0], factor=32)\n    s[B].bind(xo, te.thread_axis(\ [...]
       ]
     },
     {
diff --git a/docs/_downloads/4e9540fc014621d8d3bd14869c1ab227/scan.ipynb b/docs/_downloads/4e9540fc014621d8d3bd14869c1ab227/scan.ipynb
index 2193c48..74e530b 100644
--- a/docs/_downloads/4e9540fc014621d8d3bd14869c1ab227/scan.ipynb
+++ b/docs/_downloads/4e9540fc014621d8d3bd14869c1ab227/scan.ipynb
@@ -80,7 +80,7 @@
       },
       "outputs": [],
       "source": [
-        "fscan = tvm.build(s, [X, s_scan], \"cuda\", name=\"myscan\")\nctx = tvm.gpu(0)\nn = 1024\nm = 10\na_np = np.random.uniform(size=(m, n)).astype(s_scan.dtype)\na = tvm.nd.array(a_np, ctx)\nb = tvm.nd.array(np.zeros((m, n), dtype=s_scan.dtype), ctx)\nfscan(a, b)\ntvm.testing.assert_allclose(b.asnumpy(), np.cumsum(a_np, axis=0))"
+        "fscan = tvm.build(s, [X, s_scan], \"cuda\", name=\"myscan\")\ndev = tvm.gpu(0)\nn = 1024\nm = 10\na_np = np.random.uniform(size=(m, n)).astype(s_scan.dtype)\na = tvm.nd.array(a_np, dev)\nb = tvm.nd.array(np.zeros((m, n), dtype=s_scan.dtype), dev)\nfscan(a, b)\ntvm.testing.assert_allclose(b.asnumpy(), np.cumsum(a_np, axis=0))"
       ]
     },
     {
diff --git a/docs/_downloads/4f4a49a5483a0d0aa4af30f58c3c8664/deploy_quantized.ipynb b/docs/_downloads/4f4a49a5483a0d0aa4af30f58c3c8664/deploy_quantized.ipynb
index 94eabbc..294b0b6 100644
--- a/docs/_downloads/4f4a49a5483a0d0aa4af30f58c3c8664/deploy_quantized.ipynb
+++ b/docs/_downloads/4f4a49a5483a0d0aa4af30f58c3c8664/deploy_quantized.ipynb
@@ -26,7 +26,7 @@
       },
       "outputs": [],
       "source": [
-        "import tvm\nfrom tvm import te\nfrom tvm import relay\nimport mxnet as mx\nfrom tvm.contrib.download import download_testdata\nfrom mxnet import gluon\nimport logging\nimport os\n\nbatch_size = 1\nmodel_name = \"resnet18_v1\"\ntarget = \"cuda\"\nctx = tvm.context(target)"
+        "import tvm\nfrom tvm import te\nfrom tvm import relay\nimport mxnet as mx\nfrom tvm.contrib.download import download_testdata\nfrom mxnet import gluon\nimport logging\nimport os\n\nbatch_size = 1\nmodel_name = \"resnet18_v1\"\ntarget = \"cuda\"\ndev = tvm.device(target)"
       ]
     },
     {
@@ -116,7 +116,7 @@
       },
       "outputs": [],
       "source": [
-        "def run_inference(mod):\n    executor = relay.create_executor(\"vm\", mod, ctx, target)\n    val_data, batch_fn = get_val_data()\n    for i, batch in enumerate(val_data):\n        data, label = batch_fn(batch)\n        prediction = executor.evaluate()(data)\n        if i > 10:  # only run inference on a few samples in this tutorial\n            break\n\n\ndef main():\n    mod, params = get_model()\n    mod = quantize(mod, params, data_aware=True)\n    run_inference(mod)\n\n\nif  [...]
+        "def run_inference(mod):\n    executor = relay.create_executor(\"vm\", mod, dev, target)\n    val_data, batch_fn = get_val_data()\n    for i, batch in enumerate(val_data):\n        data, label = batch_fn(batch)\n        prediction = executor.evaluate()(data)\n        if i > 10:  # only run inference on a few samples in this tutorial\n            break\n\n\ndef main():\n    mod, params = get_model()\n    mod = quantize(mod, params, data_aware=True)\n    run_inference(mod)\n\n\nif  [...]
       ]
     }
   ],
diff --git a/docs/_downloads/50b174352ccf0a0defcbd8e6b40145e2/from_tensorflow.py b/docs/_downloads/50b174352ccf0a0defcbd8e6b40145e2/from_tensorflow.py
index 5cdc395..9c8d0f6 100644
--- a/docs/_downloads/50b174352ccf0a0defcbd8e6b40145e2/from_tensorflow.py
+++ b/docs/_downloads/50b174352ccf0a0defcbd8e6b40145e2/from_tensorflow.py
@@ -70,14 +70,12 @@ label_map_url = os.path.join(repo_base, label_map)
 
 # Target settings
 # Use these commented settings to build for cuda.
-# target = 'cuda'
-# target_host = 'llvm'
+# target = tvm.target.Target("cuda", host="llvm")
 # layout = "NCHW"
-# ctx = tvm.gpu(0)
-target = "llvm"
-target_host = "llvm"
+# dev = tvm.gpu(0)
+target = tvm.target.Target("llvm", host="llvm")
 layout = None
-ctx = tvm.cpu(0)
+dev = tvm.cpu(0)
 
 ######################################################################
 # Download required files
@@ -145,17 +143,17 @@ print("Tensorflow protobuf imported to relay frontend.")
 #   lib: target library which can be deployed on target with TVM runtime.
 
 with tvm.transform.PassContext(opt_level=3):
-    lib = relay.build(mod, target=target, target_host=target_host, params=params)
+    lib = relay.build(mod, target, params=params)
 
 ######################################################################
 # Execute the portable graph on TVM
 # ---------------------------------
 # Now we can try deploying the compiled model on target.
 
-from tvm.contrib import graph_runtime
+from tvm.contrib import graph_executor
 
 dtype = "uint8"
-m = graph_runtime.GraphModule(lib["default"](ctx))
+m = graph_executor.GraphModule(lib["default"](dev))
 # set inputs
 m.set_input("DecodeJpeg/contents", tvm.nd.array(x.astype(dtype)))
 # execute
diff --git a/docs/_downloads/5605e45978c6b29c0434162ad6899d08/autotvm_matmul.ipynb b/docs/_downloads/5605e45978c6b29c0434162ad6899d08/autotvm_matmul.ipynb
new file mode 100644
index 0000000..861cdd7
--- /dev/null
+++ b/docs/_downloads/5605e45978c6b29c0434162ad6899d08/autotvm_matmul.ipynb
@@ -0,0 +1,222 @@
+{
+  "cells": [
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "%matplotlib inline"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "\nOptimizing Operators with Templates and AutoTVM\n===============================================\n**Authors**:\n`Lianmin Zheng <https://github.com/merrymercy>`_,\n`Chris Hoge <https://github.com/hogepodge>`_\n\nIn this tutorial, we will now show how the TVM Template Extension (TE) language\ncan be used to write scheduling templates that can be searched by AutoTVM to\nfind optimal configurations of scheduling variables. This process is called\nAuto-Tuning, and builds on TE to h [...]
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "Install dependencies\n--------------------\nTo use autotvm package in TVM, we need to install some extra dependencies.\n\n.. code-block:: bash\n\n  pip3 install --user psutil xgboost cloudpickle\n\nTo make TVM run faster in tuning, it is recommended to use cython as FFI of\nTVM. In the root directory of TVM, execute:\n\n.. code-block:: bash\n\n  pip3 install --user cython\n  sudo make cython3\n\nNow return to python code. Begin by importing the required packages.\n\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "import logging\nimport sys\n\nimport numpy as np\nimport tvm\nfrom tvm import te\nimport tvm.testing\n\n# the module is called `autotvm`\nfrom tvm import autotvm"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "Basic Matrix Multiplication with TE\n-----------------------------------\nRecall the basic implementation of matrix multiplication using TE. We write\nit down here with a few changes. We will wrap the multiplication in a python\nfunction definition. For simplicity, we will focus our attention on a split\noptimization, using a fixed value that defines the block size of the\nreordering.\n\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "def matmul_basic(N, L, M, dtype):\n\n    a = te.placeholder((n, l), name=\"a\", dtype=dtype)\n    B = te.placeholder((L, M), name=\"B\", dtype=dtype)\n\n    k = te.reduce_axis((0, L), name=\"k\")\n    C = te.compute((N, M), lambda i, j: te.sum(A[i, k] * B[k, j], axis=k), name=\"C\")\n    s = te.create_schedule(C.op)\n\n    # schedule\n    y, x = s[C].op.axis\n    k = s[C].op.reduce_axis[0]\n\n    yo, yi = s[C].split(y, 8)\n    xo, xi = s[C].split(x, 8)\n\n    s[C].reorder(yo, xo [...]
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "Matrix Multiplication with AutoTVM\n----------------------------------\nIn the previous schedule code, we use a constant \"8\" as the tiling factor.\nHowever, it might not be the best one because the best tiling factor depends\non real hardware environment and input shape.\n\nIf you want the schedule code to be portable across a wider range of input\nshapes and target hardware, it is better to define a set of candidate values\nand pick the best one according to the measurement r [...]
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "A Basic Matrix Multiplication Template\n--------------------------------------\nWe begin with an example of how to create a tunable parameter set for the\nblock size of the `split` scheduling operation.\n\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "# Matmul V1: List candidate values\n@autotvm.template(\"tutorial/matmul_v1\")  # 1. use a decorator\ndef matmul_v1(N, L, M, dtype):\n    A = te.placeholder((N, L), name=\"A\", dtype=dtype)\n    B = te.placeholder((L, M), name=\"B\", dtype=dtype)\n\n    k = te.reduce_axis((0, L), name=\"k\")\n    C = te.compute((N, M), lambda i, j: te.sum(A[i, k] * B[k, j], axis=k), name=\"C\")\n    s = te.create_schedule(C.op)\n\n    # schedule\n    y, x = s[C].op.axis\n    k = s[C].op.reduce_ax [...]
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "Here we make four modifications to the previous schedule code and get a\ntunable \"template\". We can explain the modifications one by one.\n\n1. Use a decorator to mark this function as a simple template.\n2. Get a config object: You can regard this :code:`cfg` as an argument of\n   this function but we obtain it in a different way. With this argument, this\n   function is no longer a deterministic schedule. Instead, we can pass\n   different configurations to this function and [...]
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "A Matrix Multiplication Template with the Advanced Parameter API\n----------------------------------------------------------------\nIn the previous template, we manually listed all of the possible values for a\nknob. This is the lowest level API to define the space, and gives an explicit\nenumeration of the parameter space to search. However, we also provide\nanother set of APIs that can make the definition of the search space easier\nand smarter. Where possible, we receomment y [...]
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "@autotvm.template(\"tutorial/matmul\")\ndef matmul(N, L, M, dtype):\n    A = te.placeholder((N, L), name=\"A\", dtype=dtype)\n    B = te.placeholder((L, M), name=\"B\", dtype=dtype)\n\n    k = te.reduce_axis((0, L), name=\"k\")\n    C = te.compute((N, M), lambda i, j: te.sum(A[i, k] * B[k, j], axis=k), name=\"C\")\n    s = te.create_schedule(C.op)\n\n    # schedule\n    y, x = s[C].op.axis\n    k = s[C].op.reduce_axis[0]\n\n    ##### define space begin #####\n    cfg = autotvm.g [...]
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "<div class=\"alert alert-info\"><h4>Note</h4><p>More Explanation on :code:`cfg.define_split`</p></div>\n\n In this template, :code:`cfg.define_split(\"tile_y\", y, num_outputs=2)` will\n enumerate all possible combinations that can split axis y into two axes with\n factors of the length of y. For example, if the length of y is 32 and we\n want to split it into two axes using factors of 32, then there are 6\n possible values for (length of outer axis, length of inner axis) pair,\ [...]
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "Step 2: Use AutoTVM to Optimize the Matrix Multiplication\n---------------------------------------------------------\nIn Step 1, we wrote a matrix multiplication template that allowed us to\nparamaterize the block size used in the `split` schedule. We can now conduct\na search over this parameter space. The next step is to pick a tuner to guide\nthe exploration of this space.\n\nAuto-tuners in TVM\n~~~~~~~~~~~~~~~~~~\nThe job for a tuner can be described by following pseudo code [...]
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "Begin tuning\n~~~~~~~~~~~~\nHere we continue our matrix multiplication example. First we create a tuning\ntask. We can also inspect the initialized search space. In this case, for a\n512x512 square matrix multiplication, the space size is 10x10=100 Note that\nthe task and search space are independent of the tuner picked.\n\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "N, L, M = 512, 512, 512\ntask = autotvm.task.create(\"tutorial/matmul\", args=(N, L, M, \"float32\"), target=\"llvm\")\nprint(task.config_space)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "Then we need to define how to measure the generated code and pick a tuner.\nSince our space is small, a random tuner is just okay.\n\nWe only make 10 trials in this tutorial for demonstration. In practice, you\ncan do more trials according to your time budget. We will log the tuning\nresults into a log file. This file can be used to choose the best\nconfiguration discovered by the tuner later.\n\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "# logging config (for printing tuning log to the screen)\nlogging.getLogger(\"autotvm\").setLevel(logging.DEBUG)\nlogging.getLogger(\"autotvm\").addHandler(logging.StreamHandler(sys.stdout))"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "There are two steps for measuring a config: build and run. By default, we use\nall CPU cores to compile program. We then measure them sequentially. To help\nreduce variance, we take 5 measurements and average them.\n\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "measure_option = autotvm.measure_option(builder=\"local\", runner=autotvm.LocalRunner(number=5))\n\n# Begin tuning with RandomTuner, log records to file `matmul.log`\n# You can use alternatives like XGBTuner.\ntuner = autotvm.tuner.RandomTuner(task)\ntuner.tune(\n    n_trial=10,\n    measure_option=measure_option,\n    callbacks=[autotvm.callback.log_to_file(\"matmul.log\")],\n)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "With tuning completed, we can choose the configuration from the log file that\nhas the best measured performance and compile the schedule with the\ncorresponding parameters. We also do a quick verfication that the schedule is\nproducing correct answers.  We can call the function :code:`matmul` directly\nunder the :any:`autotvm.apply_history_best` context. When we call this\nfunction, it will query the dispatch context with its argument and get the\nbest config with the same argu [...]
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "# apply history best from log file\nwith autotvm.apply_history_best(\"matmul.log\"):\n    with tvm.target.Target(\"llvm\"):\n        s, arg_bufs = matmul(N, L, M, \"float32\")\n        func = tvm.build(s, arg_bufs)\n\n# check correctness\na_np = np.random.uniform(size=(N, L)).astype(np.float32)\nb_np = np.random.uniform(size=(L, M)).astype(np.float32)\nc_np = a_np.dot(b_np)\n\nc_tvm = tvm.nd.empty(c_np.shape)\nfunc(tvm.nd.array(a_np), tvm.nd.array(b_np), c_tvm)\n\ntvm.testing.as [...]
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "Final Notes and Summary\n-----------------------\nIn this tutorial, we have shown how to build operator templates that allow\nTVM to search a parameter space and choose optimized schedule configurations.\nTo gain a deeper understanding of how this works, we recommend expanding on\nthis example by adding new search parameters to the schedule based on\nschedule operations demonstated in the `Getting Started With Tensor\nExpressions <tensor_expr_get_started>_` tutorial In the upcom [...]
+      ]
+    }
+  ],
+  "metadata": {
+    "kernelspec": {
+      "display_name": "Python 3",
+      "language": "python",
+      "name": "python3"
+    },
+    "language_info": {
+      "codemirror_mode": {
+        "name": "ipython",
+        "version": 3
+      },
+      "file_extension": ".py",
+      "mimetype": "text/x-python",
+      "name": "python",
+      "nbconvert_exporter": "python",
+      "pygments_lexer": "ipython3",
+      "version": "3.6.12"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}
\ No newline at end of file
diff --git a/docs/_downloads/578004d7db54caef0007609ae5540c72/intro_topi.ipynb b/docs/_downloads/578004d7db54caef0007609ae5540c72/intro_topi.ipynb
index c027c43..f70c263 100644
--- a/docs/_downloads/578004d7db54caef0007609ae5540c72/intro_topi.ipynb
+++ b/docs/_downloads/578004d7db54caef0007609ae5540c72/intro_topi.ipynb
@@ -159,7 +159,7 @@
       },
       "outputs": [],
       "source": [
-        "func = tvm.build(sg, [a, b, g], \"cuda\")\nctx = tvm.gpu(0)\na_np = np.random.uniform(size=(x, y, y)).astype(a.dtype)\nb_np = np.random.uniform(size=(y, y)).astype(b.dtype)\ng_np = np.sum(np.add(a_np + b_np, a_np * b_np) / 2.0)\na_nd = tvm.nd.array(a_np, ctx)\nb_nd = tvm.nd.array(b_np, ctx)\ng_nd = tvm.nd.array(np.zeros(g_np.shape, dtype=g_np.dtype), ctx)\nfunc(a_nd, b_nd, g_nd)\ntvm.testing.assert_allclose(g_nd.asnumpy(), g_np, rtol=1e-5)"
+        "func = tvm.build(sg, [a, b, g], \"cuda\")\ndev = tvm.gpu(0)\na_np = np.random.uniform(size=(x, y, y)).astype(a.dtype)\nb_np = np.random.uniform(size=(y, y)).astype(b.dtype)\ng_np = np.sum(np.add(a_np + b_np, a_np * b_np) / 2.0)\na_nd = tvm.nd.array(a_np, dev)\nb_nd = tvm.nd.array(b_np, dev)\ng_nd = tvm.nd.array(np.zeros(g_np.shape, dtype=g_np.dtype), dev)\nfunc(a_nd, b_nd, g_nd)\ntvm.testing.assert_allclose(g_nd.asnumpy(), g_np, rtol=1e-5)"
       ]
     },
     {
diff --git a/docs/_downloads/5b32f1dc3e9e2fc5ac5be0918758b967/deploy_quantized.py b/docs/_downloads/5b32f1dc3e9e2fc5ac5be0918758b967/deploy_quantized.py
index e75f6e9..b2210b8 100644
--- a/docs/_downloads/5b32f1dc3e9e2fc5ac5be0918758b967/deploy_quantized.py
+++ b/docs/_downloads/5b32f1dc3e9e2fc5ac5be0918758b967/deploy_quantized.py
@@ -39,7 +39,7 @@ import os
 batch_size = 1
 model_name = "resnet18_v1"
 target = "cuda"
-ctx = tvm.context(target)
+dev = tvm.device(target)
 
 ###############################################################################
 # Prepare the Dataset
@@ -146,7 +146,7 @@ def quantize(mod, params, data_aware):
 # -------------
 # We create a Relay VM to build and execute the model.
 def run_inference(mod):
-    executor = relay.create_executor("vm", mod, ctx, target)
+    executor = relay.create_executor("vm", mod, dev, target)
     val_data, batch_fn = get_val_data()
     for i, batch in enumerate(val_data):
         data, label = batch_fn(batch)
diff --git a/docs/_downloads/5bd1bb9c6505ea40407fa19f01579414/reduction.py b/docs/_downloads/5bd1bb9c6505ea40407fa19f01579414/reduction.py
index cffa10e..f782ac6 100644
--- a/docs/_downloads/5bd1bb9c6505ea40407fa19f01579414/reduction.py
+++ b/docs/_downloads/5bd1bb9c6505ea40407fa19f01579414/reduction.py
@@ -137,9 +137,9 @@ print(fcuda.imported_modules[0].get_source())
 # Verify the correctness of result kernel by comparing it to numpy.
 #
 nn = 128
-ctx = tvm.gpu(0)
-a = tvm.nd.array(np.random.uniform(size=(nn, nn)).astype(A.dtype), ctx)
-b = tvm.nd.array(np.zeros(nn, dtype=B.dtype), ctx)
+dev = tvm.gpu(0)
+a = tvm.nd.array(np.random.uniform(size=(nn, nn)).astype(A.dtype), dev)
+b = tvm.nd.array(np.zeros(nn, dtype=B.dtype), dev)
 fcuda(a, b)
 tvm.testing.assert_allclose(b.asnumpy(), np.sum(a.asnumpy(), axis=1), rtol=1e-4)
 
diff --git a/docs/_downloads/5c443f88ea44ce77c5ccade429af6e74/deploy_prequantized_tflite.py b/docs/_downloads/5c443f88ea44ce77c5ccade429af6e74/deploy_prequantized_tflite.py
index 121ad9d..e0f9a6b 100644
--- a/docs/_downloads/5c443f88ea44ce77c5ccade429af6e74/deploy_prequantized_tflite.py
+++ b/docs/_downloads/5c443f88ea44ce77c5ccade429af6e74/deploy_prequantized_tflite.py
@@ -168,9 +168,9 @@ def run_tflite_model(tflite_model_buf, input_data):
 ###############################################################################
 # Lets run TVM compiled pre-quantized model inference and get the TVM prediction.
 def run_tvm(lib):
-    from tvm.contrib import graph_runtime
+    from tvm.contrib import graph_executor
 
-    rt_mod = graph_runtime.GraphModule(lib["default"](tvm.cpu(0)))
+    rt_mod = graph_executor.GraphModule(lib["default"](tvm.cpu(0)))
     rt_mod.set_input("input", data)
     rt_mod.run()
     tvm_res = rt_mod.get_output(0).asnumpy()
@@ -231,8 +231,8 @@ print("TFLite Top-5 labels:", tflite_pred)
 # -------------------
 # Here we give an example of how to measure performance of TVM compiled models.
 n_repeat = 100  # should be bigger to make the measurement more accurate
-ctx = tvm.cpu(0)
-ftimer = rt_mod.module.time_evaluator("run", ctx, number=1, repeat=n_repeat)
+dev = tvm.cpu(0)
+ftimer = rt_mod.module.time_evaluator("run", dev, number=1, repeat=n_repeat)
 prof_res = np.array(ftimer().results) * 1e3
 print("Elapsed average ms:", np.mean(prof_res))
 
diff --git a/docs/_downloads/612f9e42b0247df5c8ab277534e2af65/tune_relay_vta.py b/docs/_downloads/612f9e42b0247df5c8ab277534e2af65/tune_relay_vta.py
index ed2671c..7deb740 100644
--- a/docs/_downloads/612f9e42b0247df5c8ab277534e2af65/tune_relay_vta.py
+++ b/docs/_downloads/612f9e42b0247df5c8ab277534e2af65/tune_relay_vta.py
@@ -62,7 +62,7 @@ from tvm import topi
 import tvm
 from tvm import te
 from tvm import rpc, autotvm, relay
-from tvm.contrib import graph_runtime, utils, download
+from tvm.contrib import graph_executor, utils, download
 from tvm.autotvm.measure.measure_methods import request_remote
 from tvm.autotvm.tuner import XGBTuner, GATuner, RandomTuner, GridSearchTuner
 
@@ -431,9 +431,9 @@ def tune_and_evaluate(tuning_opt):
         remote.upload(temp.relpath("graphlib.tar"))
         lib = remote.load_module("graphlib.tar")
 
-        # Generate the graph runtime
+        # Generate the graph executor
         ctx = remote.ext_dev(0) if device == "vta" else remote.cpu(0)
-        m = graph_runtime.GraphModule(lib["default"](ctx))
+        m = graph_executor.GraphModule(lib["default"](ctx))
 
         # upload parameters to device
         image = tvm.nd.array((np.random.uniform(size=(1, 3, 224, 224))).astype("float32"))
diff --git a/docs/_downloads/65bd9927a152de6eed3444185b24287f/tensorize.ipynb b/docs/_downloads/65bd9927a152de6eed3444185b24287f/tensorize.ipynb
index 85ca07f..ec63911 100644
--- a/docs/_downloads/65bd9927a152de6eed3444185b24287f/tensorize.ipynb
+++ b/docs/_downloads/65bd9927a152de6eed3444185b24287f/tensorize.ipynb
@@ -152,7 +152,7 @@
       },
       "outputs": [],
       "source": [
-        "func = tvm.build(s, [A, B, C], target=\"llvm\", name=\"gemv\")\n\nfrom tvm.topi.utils import get_const_tuple\n\ndtype = A.dtype\nctx = tvm.context(\"cpu\", 0)\na = np.random.uniform(size=get_const_tuple(A.shape)).astype(dtype)\nb = np.random.uniform(size=get_const_tuple(B.shape)).astype(dtype)\nc = tvm.nd.array(np.zeros(get_const_tuple(C.shape), dtype=dtype), ctx)\nfunc(tvm.nd.array(a, ctx), tvm.nd.array(b, ctx), c)\ntvm.testing.assert_allclose(c.asnumpy(), np.dot(a, b.T), rtol=1e-3)"
+        "func = tvm.build(s, [A, B, C], target=\"llvm\", name=\"gemv\")\n\nfrom tvm.topi.utils import get_const_tuple\n\ndtype = A.dtype\ndev = tvm.device(\"cpu\", 0)\na = np.random.uniform(size=get_const_tuple(A.shape)).astype(dtype)\nb = np.random.uniform(size=get_const_tuple(B.shape)).astype(dtype)\nc = tvm.nd.array(np.zeros(get_const_tuple(C.shape), dtype=dtype), dev)\nfunc(tvm.nd.array(a, dev), tvm.nd.array(b, dev), c)\ntvm.testing.assert_allclose(c.asnumpy(), np.dot(a, b.T), rtol=1e-3)"
       ]
     },
     {
@@ -206,7 +206,7 @@
       },
       "outputs": [],
       "source": [
-        "gemv = intrin_gemv(factor, factor)\ns[C].tensorize(yi, gemv)\ns[C].pragma(yo, \"import_llvm\", gemv_impl())\n\nfunc = tvm.build(s, [A, B, C], target=\"llvm\", name=\"gemv\")\na = np.random.uniform(size=get_const_tuple(A.shape)).astype(dtype)\nb = np.random.uniform(size=get_const_tuple(B.shape)).astype(dtype)\nc = tvm.nd.array(np.zeros(get_const_tuple(C.shape), dtype=dtype), ctx)\nfunc(tvm.nd.array(a, ctx), tvm.nd.array(b, ctx), c)\ntvm.testing.assert_allclose(c.asnumpy(), np.dot [...]
+        "gemv = intrin_gemv(factor, factor)\ns[C].tensorize(yi, gemv)\ns[C].pragma(yo, \"import_llvm\", gemv_impl())\n\nfunc = tvm.build(s, [A, B, C], target=\"llvm\", name=\"gemv\")\na = np.random.uniform(size=get_const_tuple(A.shape)).astype(dtype)\nb = np.random.uniform(size=get_const_tuple(B.shape)).astype(dtype)\nc = tvm.nd.array(np.zeros(get_const_tuple(C.shape), dtype=dtype), dev)\nfunc(tvm.nd.array(a, dev), tvm.nd.array(b, dev), c)\ntvm.testing.assert_allclose(c.asnumpy(), np.dot [...]
       ]
     },
     {
diff --git a/docs/_downloads/6748a8fb7e82692825b259c20af8372a/opt_conv_cuda.ipynb b/docs/_downloads/6748a8fb7e82692825b259c20af8372a/opt_conv_cuda.ipynb
index a7716f2..117d38d 100644
--- a/docs/_downloads/6748a8fb7e82692825b259c20af8372a/opt_conv_cuda.ipynb
+++ b/docs/_downloads/6748a8fb7e82692825b259c20af8372a/opt_conv_cuda.ipynb
@@ -123,7 +123,7 @@
       },
       "outputs": [],
       "source": [
-        "func = tvm.build(s, [A, W, B], \"cuda\")\nctx = tvm.gpu(0)\na_np = np.random.uniform(size=(in_size, in_size, in_channel, batch)).astype(A.dtype)\nw_np = np.random.uniform(size=(kernel, kernel, in_channel, out_channel)).astype(W.dtype)\na = tvm.nd.array(a_np, ctx)\nw = tvm.nd.array(w_np, ctx)\nb = tvm.nd.array(np.zeros((out_size, out_size, out_channel, batch), dtype=B.dtype), ctx)\nfunc(a, w, b)\nevaluator = func.time_evaluator(func.entry_name, ctx, number=1)\nprint(\"Convolution [...]
+        "func = tvm.build(s, [A, W, B], \"cuda\")\ndev = tvm.gpu(0)\na_np = np.random.uniform(size=(in_size, in_size, in_channel, batch)).astype(A.dtype)\nw_np = np.random.uniform(size=(kernel, kernel, in_channel, out_channel)).astype(W.dtype)\na = tvm.nd.array(a_np, dev)\nw = tvm.nd.array(w_np, dev)\nb = tvm.nd.array(np.zeros((out_size, out_size, out_channel, batch), dtype=B.dtype), dev)\nfunc(a, w, b)\nevaluator = func.time_evaluator(func.entry_name, dev, number=1)\nprint(\"Convolution [...]
       ]
     }
   ],
diff --git a/docs/_downloads/678f3c372a599a18d909aed0fefb30be/tune_conv2d_layer_cuda.py b/docs/_downloads/678f3c372a599a18d909aed0fefb30be/tune_conv2d_layer_cuda.py
index 396bdb0..41fdcbb 100644
--- a/docs/_downloads/678f3c372a599a18d909aed0fefb30be/tune_conv2d_layer_cuda.py
+++ b/docs/_downloads/678f3c372a599a18d909aed0fefb30be/tune_conv2d_layer_cuda.py
@@ -145,18 +145,18 @@ bias_np = np.random.uniform(size=(1, CO, 1, 1)).astype(np.float32)
 conv_np = conv2d_nchw_python(data_np, weight_np, strides, padding)
 out_np = np.maximum(conv_np + bias_np, 0.0)
 
-ctx = tvm.gpu()
-data_tvm = tvm.nd.array(data_np, ctx=ctx)
-weight_tvm = tvm.nd.array(weight_np, ctx=ctx)
-bias_tvm = tvm.nd.array(bias_np, ctx=ctx)
-out_tvm = tvm.nd.empty(out_np.shape, ctx=ctx)
+dev = tvm.gpu()
+data_tvm = tvm.nd.array(data_np, device=dev)
+weight_tvm = tvm.nd.array(weight_np, device=dev)
+bias_tvm = tvm.nd.array(bias_np, device=dev)
+out_tvm = tvm.nd.empty(out_np.shape, device=dev)
 func(data_tvm, weight_tvm, bias_tvm, out_tvm)
 
 # Check results
 np.testing.assert_allclose(out_np, out_tvm.asnumpy(), rtol=1e-3)
 
 # Evaluate execution time
-evaluator = func.time_evaluator(func.entry_name, ctx, min_repeat_ms=500)
+evaluator = func.time_evaluator(func.entry_name, dev, min_repeat_ms=500)
 print(
     "Execution time of this operator: %.3f ms"
     % (np.median(evaluator(data_tvm, weight_tvm, bias_tvm, out_tvm).results) * 1000)
diff --git a/docs/_downloads/6a91d98d4242322072303282a1f2de9c/relay_quick_start.py b/docs/_downloads/6a91d98d4242322072303282a1f2de9c/relay_quick_start.py
index 444b915..fa92076 100644
--- a/docs/_downloads/6a91d98d4242322072303282a1f2de9c/relay_quick_start.py
+++ b/docs/_downloads/6a91d98d4242322072303282a1f2de9c/relay_quick_start.py
@@ -43,7 +43,7 @@ from tvm import relay
 from tvm.relay import testing
 import tvm
 from tvm import te
-from tvm.contrib import graph_runtime
+from tvm.contrib import graph_executor
 import tvm.testing
 
 ######################################################################
@@ -104,13 +104,13 @@ with tvm.transform.PassContext(opt_level=opt_level):
 #####################################################################
 # Run the generate library
 # ------------------------
-# Now we can create graph runtime and run the module on Nvidia GPU.
+# Now we can create graph executor and run the module on Nvidia GPU.
 
 # create random input
-ctx = tvm.gpu()
+dev = tvm.gpu()
 data = np.random.uniform(-1, 1, size=data_shape).astype("float32")
 # create module
-module = graph_runtime.GraphModule(lib["default"](ctx))
+module = graph_executor.GraphModule(lib["default"](dev))
 # set input and parameters
 module.set_input("data", data)
 # run
@@ -143,7 +143,7 @@ print(temp.listdir())
 loaded_lib = tvm.runtime.load_module(path_lib)
 input_data = tvm.nd.array(np.random.uniform(size=data_shape).astype("float32"))
 
-module = graph_runtime.GraphModule(loaded_lib["default"](ctx))
+module = graph_executor.GraphModule(loaded_lib["default"](dev))
 module.run(data=input_data)
 out_deploy = module.get_output(0).asnumpy()
 
diff --git a/docs/_downloads/6be1519353297beeea03fe17712dc16f/using_external_lib.ipynb b/docs/_downloads/6be1519353297beeea03fe17712dc16f/using_external_lib.ipynb
index df72db3..d84f953 100644
--- a/docs/_downloads/6be1519353297beeea03fe17712dc16f/using_external_lib.ipynb
+++ b/docs/_downloads/6be1519353297beeea03fe17712dc16f/using_external_lib.ipynb
@@ -26,7 +26,7 @@
       },
       "outputs": [],
       "source": [
-        "import tvm\nfrom tvm import te\nimport numpy as np\nfrom tvm.contrib import graph_runtime as runtime\nfrom tvm import relay\nfrom tvm.relay import testing\nimport tvm.testing"
+        "import tvm\nfrom tvm import te\nimport numpy as np\nfrom tvm.contrib import graph_executor as runtime\nfrom tvm import relay\nfrom tvm.relay import testing\nimport tvm.testing"
       ]
     },
     {
@@ -62,7 +62,7 @@
       },
       "outputs": [],
       "source": [
-        "import logging\n\nlogging.basicConfig(level=logging.DEBUG)  # to dump TVM IR after fusion\n\ntarget = \"cuda\"\nlib = relay.build_module.build(net, target, params=params)\n\nctx = tvm.context(target, 0)\ndata = np.random.uniform(-1, 1, size=data_shape).astype(\"float32\")\nmodule = runtime.GraphModule(lib[\"default\"](ctx))\nmodule.set_input(\"data\", data)\nmodule.run()\nout_shape = (batch_size, out_channels, 224, 224)\nout = module.get_output(0, tvm.nd.empty(out_shape))\nout_c [...]
+        "import logging\n\nlogging.basicConfig(level=logging.DEBUG)  # to dump TVM IR after fusion\n\ntarget = \"cuda\"\nlib = relay.build_module.build(net, target, params=params)\n\ndev = tvm.device(target, 0)\ndata = np.random.uniform(-1, 1, size=data_shape).astype(\"float32\")\nmodule = runtime.GraphModule(lib[\"default\"](dev))\nmodule.set_input(\"data\", data)\nmodule.run()\nout_shape = (batch_size, out_channels, 224, 224)\nout = module.get_output(0, tvm.nd.empty(out_shape))\nout_cu [...]
       ]
     },
     {
@@ -87,7 +87,7 @@
       },
       "outputs": [],
       "source": [
-        "net, params = testing.create_workload(simple_net)\ntarget = \"cuda -libs=cudnn\"  # use cudnn for convolution\nlib = relay.build_module.build(net, target, params=params)\n\nctx = tvm.context(target, 0)\ndata = np.random.uniform(-1, 1, size=data_shape).astype(\"float32\")\nmodule = runtime.GraphModule(lib[\"default\"](ctx))\nmodule.set_input(\"data\", data)\nmodule.run()\nout_shape = (batch_size, out_channels, 224, 224)\nout = module.get_output(0, tvm.nd.empty(out_shape))\nout_cu [...]
+        "net, params = testing.create_workload(simple_net)\ntarget = \"cuda -libs=cudnn\"  # use cudnn for convolution\nlib = relay.build_module.build(net, target, params=params)\n\ndev = tvm.device(target, 0)\ndata = np.random.uniform(-1, 1, size=data_shape).astype(\"float32\")\nmodule = runtime.GraphModule(lib[\"default\"](dev))\nmodule.set_input(\"data\", data)\nmodule.run()\nout_shape = (batch_size, out_channels, 224, 224)\nout = module.get_output(0, tvm.nd.empty(out_shape))\nout_cud [...]
       ]
     },
     {
diff --git a/docs/_downloads/6c8a9d3bc4c689f8680a968349965ee5/from_pytorch.ipynb b/docs/_downloads/6c8a9d3bc4c689f8680a968349965ee5/from_pytorch.ipynb
index 6cb27ba..676eb53 100644
--- a/docs/_downloads/6c8a9d3bc4c689f8680a968349965ee5/from_pytorch.ipynb
+++ b/docs/_downloads/6c8a9d3bc4c689f8680a968349965ee5/from_pytorch.ipynb
@@ -98,7 +98,7 @@
       },
       "outputs": [],
       "source": [
-        "target = \"llvm\"\ntarget_host = \"llvm\"\nctx = tvm.cpu(0)\nwith tvm.transform.PassContext(opt_level=3):\n    lib = relay.build(mod, target=target, target_host=target_host, params=params)"
+        "target = tvm.target.Target(\"llvm\", host=\"llvm\")\ndev = tvm.cpu(0)\nwith tvm.transform.PassContext(opt_level=3):\n    lib = relay.build(mod, target=target, params=params)"
       ]
     },
     {
@@ -116,7 +116,7 @@
       },
       "outputs": [],
       "source": [
-        "from tvm.contrib import graph_runtime\n\ndtype = \"float32\"\nm = graph_runtime.GraphModule(lib[\"default\"](ctx))\n# Set inputs\nm.set_input(input_name, tvm.nd.array(img.astype(dtype)))\n# Execute\nm.run()\n# Get outputs\ntvm_output = m.get_output(0)"
+        "from tvm.contrib import graph_executor\n\ndtype = \"float32\"\nm = graph_executor.GraphModule(lib[\"default\"](dev))\n# Set inputs\nm.set_input(input_name, tvm.nd.array(img.astype(dtype)))\n# Execute\nm.run()\n# Get outputs\ntvm_output = m.get_output(0)"
       ]
     },
     {
diff --git a/docs/_downloads/70a0767409e81bb5aaa9ce4e7a151dec/tensor_expr_get_started.ipynb b/docs/_downloads/70a0767409e81bb5aaa9ce4e7a151dec/tensor_expr_get_started.ipynb
index 6032e06..2e5b2b7 100644
--- a/docs/_downloads/70a0767409e81bb5aaa9ce4e7a151dec/tensor_expr_get_started.ipynb
+++ b/docs/_downloads/70a0767409e81bb5aaa9ce4e7a151dec/tensor_expr_get_started.ipynb
@@ -15,7 +15,32 @@
       "cell_type": "markdown",
       "metadata": {},
       "source": [
-        "\n\nGet Started with Tensor Expression\n==================================\n**Author**: `Tianqi Chen <https://tqchen.github.io>`_\n\nThis is an introductory tutorial to the Tensor expression language in TVM.\nTVM uses a domain specific tensor expression for efficient kernel construction.\n\nIn this tutorial, we will demonstrate the basic workflow to use\nthe tensor expression language.\n\n"
+        "\n\nWorking with Operators Using Tensor Expressions\n===============================================\n**Author**: `Tianqi Chen <https://tqchen.github.io>`_\n\nIn this tutorial we will turn our attention to how TVM works with Tensor\nExpressions (TE) to create a space to search for performant configurations. TE\ndescribes tensor computations in a pure functional language (that is each\nexpression has no side effects). When viewed in context of the TVM as a whole,\nRelay describes [...]
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "Example 1: Writing and Scheduling Vector Addition in TE for CPU\n---------------------------------------------------------------\n\nLet's look at an example in Python in which we will implement a TE for\nvector addition, followed by a schedule targeted towards a CPU. We begin by initializing a TVM\nenvironment.\n\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "import tvm\nimport tvm.testing\nfrom tvm import te\nimport numpy as np\n\n# You will get better performance if you can identify the CPU you are targeting\n# and specify it. If you're using llvm, you can get this information from the\n# command ``llc --version`` to get the CPU type, and you can check\n# ``/proc/cpuinfo`` for additional extensions that your processor might\n# support. For example, ``tgt = \"llvm -mcpu=`skylake`\n\ntgt = tvm.target.Target(target=\"llvm\", host=\"llvm\")"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "Describing the Vector Computation\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\nWe describe a vector addition computation. TVM adopts tensor semantics, with\neach intermediate result represented as a multi-dimensional array. The user\nneeds to describe the computation rule that generates the tensors. We first\ndefine a symbolic variable n to represent the shape. We then define two\nplaceholder Tensors, ``A`` and ``B``, with given shape ``(n,)``. We then\ndescribe the result tensor ``C``,  [...]
       ]
     },
     {
@@ -26,21 +51,21 @@
       },
       "outputs": [],
       "source": [
-        "from __future__ import absolute_import, print_function\n\nimport tvm\nimport tvm.testing\nfrom tvm import te\nimport numpy as np\n\n# Global declarations of environment.\n\ntgt_host = \"llvm\"\n# Change it to respective GPU if gpu is enabled Ex: cuda, opencl, rocm\ntgt = \"cuda\""
+        "n = te.var(\"n\")\nA = te.placeholder((n,), name=\"A\")\nB = te.placeholder((n,), name=\"B\")\nC = te.compute(A.shape, lambda i: A[i] + B[i], name=\"C\")"
       ]
     },
     {
       "cell_type": "markdown",
       "metadata": {},
       "source": [
-        "Vector Add Example\n------------------\nIn this tutorial, we will use a vector addition example to demonstrate\nthe workflow.\n\n\n"
+        "<div class=\"alert alert-info\"><h4>Note</h4><p>Lambda Functions</p></div>\n\nThe second argument to the ``te.compute`` method is the function that\nperforms the computation. In this example, we're using an anonymous function,\nalso known as a ``lambda`` function, to define the computation, in this case\naddition on the ``i``th element of ``A`` and ``B``.\n\n"
       ]
     },
     {
       "cell_type": "markdown",
       "metadata": {},
       "source": [
-        "Describe the Computation\n------------------------\nAs a first step, we need to describe our computation.\nTVM adopts tensor semantics, with each intermediate result\nrepresented as a multi-dimensional array. The user needs to describe\nthe computation rule that generates the tensors.\n\nWe first define a symbolic variable n to represent the shape.\nWe then define two placeholder Tensors, A and B, with given shape (n,)\n\nWe then describe the result tensor C, with a compute oper [...]
+        "Create a Default Schedule for the Computation\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n\nWhile the above lines describe the computation rule, we can compute ``C`` in\nmany different ways to fit different devices. For a tensor with multiple\naxes, you can choose which axis to iterate over first, or computations can be\nsplit across different threads. TVM requires that the user to provide a\nschedule, which is a description of how the computation should be performed.\nSched [...]
       ]
     },
     {
@@ -51,14 +76,14 @@
       },
       "outputs": [],
       "source": [
-        "n = te.var(\"n\")\nA = te.placeholder((n,), name=\"A\")\nB = te.placeholder((n,), name=\"B\")\nC = te.compute(A.shape, lambda i: A[i] + B[i], name=\"C\")\nprint(type(C))"
+        "s = te.create_schedule(C.op)"
       ]
     },
     {
       "cell_type": "markdown",
       "metadata": {},
       "source": [
-        "Schedule the Computation\n------------------------\nWhile the above lines describe the computation rule, we can compute\nC in many ways since the axis of C can be computed in a data\nparallel manner.  TVM asks the user to provide a description of the\ncomputation called a schedule.\n\nA schedule is a set of transformation of computation that transforms\nthe loop of computations in the program.\n\nAfter we construct the schedule, by default the schedule computes\nC in a serial ma [...]
+        "Compile and Evaluate the Default Schedule\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\nWith the TE expression and a schedule, we can produce runnable code for our\ntarget language and architecture, in this case LLVM and a CPU. We provide\nTVM with the schedule, a list of the TE expressions that are in the schedule,\nthe target and host, and the name of the function we are producing. The result\nof the output is a type-erased function that can be called directly from Python.\n\nIn [...]
       ]
     },
     {
@@ -69,14 +94,50 @@
       },
       "outputs": [],
       "source": [
-        "s = te.create_schedule(C.op)"
+        "fadd = tvm.build(s, [A, B, C], tgt, name=\"myadd\")"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "Let's run the function, and compare the output to the same computation in\nnumpy. The compiled TVM function is exposes a concise C API that can be invoked\nfrom any language. We begin by creating a device, which is a device (CPU in this\nexample) that TVM can compile the schedule to. In this case the device is an\nLLVM CPU target. We can then initialize the tensors in our device and\nperform the custom addition operation. To verify that the computation is\ncorrect, we can compar [...]
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "dev = tvm.device(tgt.kind.name, 0)\n\nn = 1024\na = tvm.nd.array(np.random.uniform(size=n).astype(A.dtype), dev)\nb = tvm.nd.array(np.random.uniform(size=n).astype(B.dtype), dev)\nc = tvm.nd.array(np.zeros(n, dtype=C.dtype), dev)\nfadd(a, b, c)\ntvm.testing.assert_allclose(c.asnumpy(), a.asnumpy() + b.asnumpy())"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "To get a comparison of how fast this version is compared to numpy, create a\nhelper function to run a profile of the TVM generated code.\n\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "import timeit\n\nnp_repeat = 100\nnp_running_time = timeit.timeit(\n    setup=\"import numpy\\n\"\n    \"n = 32768\\n\"\n    'dtype = \"float32\"\\n'\n    \"a = numpy.random.rand(n, 1).astype(dtype)\\n\"\n    \"b = numpy.random.rand(n, 1).astype(dtype)\\n\",\n    stmt=\"answer = a + b\",\n    number=np_repeat,\n)\nprint(\"Numpy running time: %f\" % (np_running_time / np_repeat))\n\n\ndef evaluate_addition(func, target, optimization, log):\n    dev = tvm.device(target.kind.name,  [...]
       ]
     },
     {
       "cell_type": "markdown",
       "metadata": {},
       "source": [
-        "We used the split construct to split the first axis of C,\nthis will split the original iteration axis into product of\ntwo iterations. This is equivalent to the following code.\n\n.. code-block:: c\n\n  for (int bx = 0; bx < ceil(n / 64); ++bx) {\n    for (int tx = 0; tx < 64; ++tx) {\n      int i = bx * 64 + tx;\n      if (i < n) {\n        C[i] = A[i] + B[i];\n      }\n    }\n  }\n\n\n"
+        "Updating the Schedule to Use Paralleism\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n\nNow that we've illustrated the fundamentals of TE, let's go deeper into what\nschedules do, and how they can be used to optimize tensor expressions for\ndifferent architectures. A schedule is a series of steps that are applied to\nan expression to transform it in a number of different ways. When a schedule\nis applied to an expression in TE, the inputs and outputs remain the same,\nbut when compi [...]
       ]
     },
     {
@@ -87,14 +148,14 @@
       },
       "outputs": [],
       "source": [
-        "bx, tx = s[C].split(C.op.axis[0], factor=64)"
+        "s[C].parallel(C.op.axis[0])"
       ]
     },
     {
       "cell_type": "markdown",
       "metadata": {},
       "source": [
-        "Finally we bind the iteration axis bx and tx to threads in the GPU\ncompute grid. These are GPU specific constructs that allow us\nto generate code that runs on GPU.\n\n\n"
+        "The ``tvm.lower`` command will generate the Intermediate Representation (IR)\nof the TE, with the corresponding schedule. By lowering the expression as we\napply different schedule operations, we can see the effect of scheduling on\nthe ordering of the computation. We use the flag ``simple_mode=True`` to\nreturn a readable C-style statement.\n\n"
       ]
     },
     {
@@ -105,14 +166,14 @@
       },
       "outputs": [],
       "source": [
-        "if tgt == \"cuda\" or tgt == \"rocm\" or tgt.startswith(\"opencl\"):\n    s[C].bind(bx, te.thread_axis(\"blockIdx.x\"))\n    s[C].bind(tx, te.thread_axis(\"threadIdx.x\"))"
+        "print(tvm.lower(s, [A, B, C], simple_mode=True))"
       ]
     },
     {
       "cell_type": "markdown",
       "metadata": {},
       "source": [
-        "Compilation\n-----------\nAfter we have finished specifying the schedule, we can compile it\ninto a TVM function. By default TVM compiles into a type-erased\nfunction that can be directly called from the python side.\n\nIn the following line, we use tvm.build to create a function.\nThe build function takes the schedule, the desired signature of the\nfunction (including the inputs and outputs) as well as target language\nwe want to compile to.\n\nThe result of compilation fadd is [...]
+        "It's now possible for TVM to run these blocks on independent threads. Let's\ncompile and run this new schedule with the parallel operation applied:\n\n"
       ]
     },
     {
@@ -123,14 +184,14 @@
       },
       "outputs": [],
       "source": [
-        "fadd = tvm.build(s, [A, B, C], tgt, target_host=tgt_host, name=\"myadd\")"
+        "fadd_parallel = tvm.build(s, [A, B, C], tgt, name=\"myadd_parallel\")\nfadd_parallel(a, b, c)\n\ntvm.testing.assert_allclose(c.asnumpy(), a.asnumpy() + b.asnumpy())\n\nevaluate_addition(fadd_parallel, tgt, \"parallel\", log=log)"
       ]
     },
     {
       "cell_type": "markdown",
       "metadata": {},
       "source": [
-        "Run the Function\n----------------\nThe compiled TVM function is exposes a concise C API\nthat can be invoked from any language.\n\nWe provide a minimal array API in python to aid quick testing and prototyping.\nThe array API is based on the `DLPack <https://github.com/dmlc/dlpack>`_ standard.\n\n- We first create a GPU context.\n- Then tvm.nd.array copies the data to the GPU.\n- fadd runs the actual computation.\n- asnumpy() copies the GPU array back to the CPU and we can use t [...]
+        "Updating the Schedule to Use Vectorization\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\nModern CPUs also have the ability to perform SIMD operations on floating\npoint values, and we can apply another schedule to our computation expression\nto take advantage of this. Accomplishing this requires multiple steps: first\nwe have to split the schedule into inner and outer loops using the split\nscheduling primitive. The inner loops can use vectorization to use SIMD\ninstructions usin [...]
       ]
     },
     {
@@ -141,14 +202,14 @@
       },
       "outputs": [],
       "source": [
-        "ctx = tvm.context(tgt, 0)\n\nn = 1024\na = tvm.nd.array(np.random.uniform(size=n).astype(A.dtype), ctx)\nb = tvm.nd.array(np.random.uniform(size=n).astype(B.dtype), ctx)\nc = tvm.nd.array(np.zeros(n, dtype=C.dtype), ctx)\nfadd(a, b, c)\ntvm.testing.assert_allclose(c.asnumpy(), a.asnumpy() + b.asnumpy())"
+        "# Recreate the schedule, since we modified it with the parallel operation in\n# the previous example\nn = te.var(\"n\")\nA = te.placeholder((n,), name=\"A\")\nB = te.placeholder((n,), name=\"B\")\nC = te.compute(A.shape, lambda i: A[i] + B[i], name=\"C\")\n\ns = te.create_schedule(C.op)\n\n# This factor should be chosen to match the number of threads appropriate for\n# your CPU. This will vary depending on architecture, but a good rule is\n# setting this factor to equal the numb [...]
       ]
     },
     {
       "cell_type": "markdown",
       "metadata": {},
       "source": [
-        "Inspect the Generated Code\n--------------------------\nYou can inspect the generated code in TVM. The result of tvm.build\nis a TVM Module. fadd is the host module that contains the host wrapper,\nit also contains a device module for the CUDA (GPU) function.\n\nThe following code fetches the device module and prints the content code.\n\n\n"
+        "Comparing the Diferent Schedules\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\nWe can now compare the different schedules\n\n"
       ]
     },
     {
@@ -159,21 +220,46 @@
       },
       "outputs": [],
       "source": [
-        "if tgt == \"cuda\" or tgt == \"rocm\" or tgt.startswith(\"opencl\"):\n    dev_module = fadd.imported_modules[0]\n    print(\"-----GPU code-----\")\n    print(dev_module.get_source())\nelse:\n    print(fadd.get_source())"
+        "baseline = log[0][1]\nprint(\"%s\\t%s\\t%s\" % (\"Operator\".rjust(20), \"Timing\".rjust(20), \"Performance\".rjust(20)))\nfor result in log:\n    print(\n        \"%s\\t%s\\t%s\"\n        % (result[0].rjust(20), str(result[1]).rjust(20), str(result[1] / baseline).rjust(20))\n    )"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "<div class=\"alert alert-info\"><h4>Note</h4><p>Code Specialization\n\n  As you may have noticed, the declarations of ``A``, ``B`` and ``C`` all\n  take the same shape argument, ``n``. TVM will take advantage of this to\n  pass only a single shape argument to the kernel, as you will find in the\n  printed device code. This is one form of specialization.\n\n  On the host side, TVM will automatically generate check code that checks\n  the constraints in the parameters. So if you p [...]
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "We've defined, scheduled, and compiled a vector addition operator, which we\nwere then able to execute on the TVM runtime. We can save the operator as a\nlibrary, which we can then load later using the TVM runtime.\n\n"
       ]
     },
     {
       "cell_type": "markdown",
       "metadata": {},
       "source": [
-        "<div class=\"alert alert-info\"><h4>Note</h4><p>Code Specialization\n\n  As you may have noticed, the declarations of A, B and C all\n  take the same shape argument, n. TVM will take advantage of this\n  to pass only a single shape argument to the kernel, as you will find in\n  the printed device code. This is one form of specialization.\n\n  On the host side, TVM will automatically generate check code\n  that checks the constraints in the parameters. So if you pass\n  arrays wi [...]
+        "Targeting Vector Addition for GPUs (Optional)\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\nTVM is capable of targeting multiple architectures. In the next example, we\nwill target compilation of the vector addition to GPUs.\n\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "# If you want to run this code, change ``run_cuda = True``\n# Note that by default this example is not run in the docs CI.\n\nrun_cuda = False\nif run_cuda:\n    # Change this target to the correct backend for you gpu. For example: cuda (NVIDIA GPUs),\n    # rocm (Radeon GPUS), OpenCL (opencl).\n    tgt_gpu = tvm.target.Target(target=\"cuda\", host=\"llvm\")\n\n    # Recreate the schedule\n    n = te.var(\"n\")\n    A = te.placeholder((n,), name=\"A\")\n    B = te.placeholder((n [...]
       ]
     },
     {
       "cell_type": "markdown",
       "metadata": {},
       "source": [
-        "Save Compiled Module\n--------------------\nBesides runtime compilation, we can save the compiled modules into\na file and load them back later. This is called ahead of time compilation.\n\nThe following code first performs the following steps:\n\n- It saves the compiled host module into an object file.\n- Then it saves the device module into a ptx file.\n- cc.create_shared calls a compiler (gcc) to create a shared library\n\n\n"
+        "Saving and Loading Compiled Modules\n-----------------------------------\nBesides runtime compilation, we can save the compiled modules into a file and\nload them back later.\n\nThe following code first performs the following steps:\n\n- It saves the compiled host module into an object file.\n- Then it saves the device module into a ptx file.\n- cc.create_shared calls a compiler (gcc) to create a shared library\n\n"
       ]
     },
     {
@@ -184,21 +270,21 @@
       },
       "outputs": [],
       "source": [
-        "from tvm.contrib import cc\nfrom tvm.contrib import utils\n\ntemp = utils.tempdir()\nfadd.save(temp.relpath(\"myadd.o\"))\nif tgt == \"cuda\":\n    fadd.imported_modules[0].save(temp.relpath(\"myadd.ptx\"))\nif tgt == \"rocm\":\n    fadd.imported_modules[0].save(temp.relpath(\"myadd.hsaco\"))\nif tgt.startswith(\"opencl\"):\n    fadd.imported_modules[0].save(temp.relpath(\"myadd.cl\"))\ncc.create_shared(temp.relpath(\"myadd.so\"), [temp.relpath(\"myadd.o\")])\nprint(temp.listdir())"
+        "from tvm.contrib import cc\nfrom tvm.contrib import utils\n\ntemp = utils.tempdir()\nfadd.save(temp.relpath(\"myadd.o\"))\nif tgt.kind.name == \"cuda\":\n    fadd.imported_modules[0].save(temp.relpath(\"myadd.ptx\"))\nif tgt.kind.name == \"rocm\":\n    fadd.imported_modules[0].save(temp.relpath(\"myadd.hsaco\"))\nif tgt.kind.name.startswith(\"opencl\"):\n    fadd.imported_modules[0].save(temp.relpath(\"myadd.cl\"))\ncc.create_shared(temp.relpath(\"myadd.so\"), [temp.relpath(\"my [...]
       ]
     },
     {
       "cell_type": "markdown",
       "metadata": {},
       "source": [
-        "<div class=\"alert alert-info\"><h4>Note</h4><p>Module Storage Format\n\n  The CPU (host) module is directly saved as a shared library (.so).\n  There can be multiple customized formats of the device code.\n  In our example, the device code is stored in ptx, as well as a meta\n  data json file. They can be loaded and linked separately via import.</p></div>\n\n\n"
+        "<div class=\"alert alert-info\"><h4>Note</h4><p>Module Storage Format\n\n  The CPU (host) module is directly saved as a shared library (.so). There\n  can be multiple customized formats of the device code. In our example, the\n  device code is stored in ptx, as well as a meta data json file. They can be\n  loaded and linked separately via import.</p></div>\n\n"
       ]
     },
     {
       "cell_type": "markdown",
       "metadata": {},
       "source": [
-        "Load Compiled Module\n--------------------\nWe can load the compiled module from the file system and run the code.\nThe following code loads the host and device module separately and\nre-links them together. We can verify that the newly loaded function works.\n\n\n"
+        "Load Compiled Module\n~~~~~~~~~~~~~~~~~~~~\nWe can load the compiled module from the file system and run the code. The\nfollowing code loads the host and device module separately and links them\ntogether. We can verify that the newly loaded function works.\n\n"
       ]
     },
     {
@@ -209,14 +295,14 @@
       },
       "outputs": [],
       "source": [
-        "fadd1 = tvm.runtime.load_module(temp.relpath(\"myadd.so\"))\nif tgt == \"cuda\":\n    fadd1_dev = tvm.runtime.load_module(temp.relpath(\"myadd.ptx\"))\n    fadd1.import_module(fadd1_dev)\n\nif tgt == \"rocm\":\n    fadd1_dev = tvm.runtime.load_module(temp.relpath(\"myadd.hsaco\"))\n    fadd1.import_module(fadd1_dev)\n\nif tgt.startswith(\"opencl\"):\n    fadd1_dev = tvm.runtime.load_module(temp.relpath(\"myadd.cl\"))\n    fadd1.import_module(fadd1_dev)\n\nfadd1(a, b, c)\ntvm.tes [...]
+        "fadd1 = tvm.runtime.load_module(temp.relpath(\"myadd.so\"))\nif tgt.kind.name == \"cuda\":\n    fadd1_dev = tvm.runtime.load_module(temp.relpath(\"myadd.ptx\"))\n    fadd1.import_module(fadd1_dev)\n\nif tgt.kind.name == \"rocm\":\n    fadd1_dev = tvm.runtime.load_module(temp.relpath(\"myadd.hsaco\"))\n    fadd1.import_module(fadd1_dev)\n\nif tgt.kind.name.startswith(\"opencl\"):\n    fadd1_dev = tvm.runtime.load_module(temp.relpath(\"myadd.cl\"))\n    fadd1.import_module(fadd1_d [...]
       ]
     },
     {
       "cell_type": "markdown",
       "metadata": {},
       "source": [
-        "Pack Everything into One Library\n--------------------------------\nIn the above example, we store the device and host code separately.\nTVM also supports export everything as one shared library.\nUnder the hood, we pack the device modules into binary blobs and link\nthem together with the host code.\nCurrently we support packing of Metal, OpenCL and CUDA modules.\n\n\n"
+        "Pack Everything into One Library\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\nIn the above example, we store the device and host code separately. TVM also\nsupports export everything as one shared library. Under the hood, we pack\nthe device modules into binary blobs and link them together with the host\ncode. Currently we support packing of Metal, OpenCL and CUDA modules.\n\n"
       ]
     },
     {
@@ -234,14 +320,226 @@
       "cell_type": "markdown",
       "metadata": {},
       "source": [
-        "<div class=\"alert alert-info\"><h4>Note</h4><p>Runtime API and Thread-Safety\n\n  The compiled modules of TVM do not depend on the TVM compiler.\n  Instead, they only depend on a minimum runtime library.\n  The TVM runtime library wraps the device drivers and provides\n  thread-safe and device agnostic calls into the compiled functions.\n\n  This means that you can call the compiled TVM functions from any thread,\n  on any GPUs.</p></div>\n\n\n"
+        "<div class=\"alert alert-info\"><h4>Note</h4><p>Runtime API and Thread-Safety\n\n  The compiled modules of TVM do not depend on the TVM compiler. Instead,\n  they only depend on a minimum runtime library. The TVM runtime library\n  wraps the device drivers and provides thread-safe and device agnostic calls\n  into the compiled functions.\n\n  This means that you can call the compiled TVM functions from any thread, on\n  any GPUs, provided that you have compiled the code for that [...]
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "Generate OpenCL Code\n--------------------\nTVM provides code generation features into multiple backends. We can also\ngenerate OpenCL code or LLVM code that runs on CPU backends.\n\nThe following code blocks generate OpenCL code, creates array on an OpenCL\ndevice, and verifies the correctness of the code.\n\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "if tgt.kind.name.startswith(\"opencl\"):\n    fadd_cl = tvm.build(s, [A, B, C], tgt, name=\"myadd\")\n    print(\"------opencl code------\")\n    print(fadd_cl.imported_modules[0].get_source())\n    dev = tvm.cl(0)\n    n = 1024\n    a = tvm.nd.array(np.random.uniform(size=n).astype(A.dtype), dev)\n    b = tvm.nd.array(np.random.uniform(size=n).astype(B.dtype), dev)\n    c = tvm.nd.array(np.zeros(n, dtype=C.dtype), dev)\n    fadd_cl(a, b, c)\n    tvm.testing.assert_allclose(c.as [...]
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "<div class=\"alert alert-info\"><h4>Note</h4><p>TE Scheduling Primitives\n\n  TVM includes a number of different scheduling primitives:\n\n  - split: splits a specified axis into two axises by the defined factor.\n  - tile: tiles will split a computation across two axes by the defined factors.\n  - fuse: fuses two consecutive axises of one computation.\n  - reorder: can reorder the axises of a computation into a defined order.\n  - bind: can bind a computation to a specific thre [...]
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "Example 2: Manually Optimizing Matrix Multiplication with TE\n------------------------------------------------------------\n\nNow we will consider a second, more advanced example, demonstrating how with\njust 18 lines of python code TVM speeds up a common matrix multiplication operation by 18x.\n\n**Matrix multiplication is a compute intensive operation. There are two important optimizations for good CPU performance:**\n1. Increase the cache hit rate of memory access. Both compl [...]
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "Preparation and Performance Baseline\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n\nWe begin by collecting performance data on the `numpy` implementation of\nmatrix multiplication.\n\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "import tvm\nimport tvm.testing\nfrom tvm import te\nimport numpy\n\n# The size of the matrix\n# (M, K) x (K, N)\n# You are free to try out different shapes, sometimes TVM optimization outperforms numpy with MKL.\nM = 1024\nK = 1024\nN = 1024\n\n# The default tensor data type in tvm\ndtype = \"float32\"\n\n# You will want to adjust the target to match any CPU vector extensions you\n# might have. For example, if you're using using Intel AVX2 (Advanced Vector\n# Extensions) ISA for [...]
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "Now we write a basic matrix multiplication using TVM TE and verify that it\nproduces the same results as the numpy implementation. We also write a\nfunction that will help us measure the performance of the schedule\noptimizations.\n\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "# TVM Matrix Multiplication using TE\nk = te.reduce_axis((0, K), \"k\")\nA = te.placeholder((M, K), name=\"A\")\nB = te.placeholder((K, N), name=\"B\")\nC = te.compute((M, N), lambda x, y: te.sum(A[x, k] * B[k, y], axis=k), name=\"C\")\n\n# Default schedule\ns = te.create_schedule(C.op)\nfunc = tvm.build(s, [A, B, C], target=target, name=\"mmult\")\n\nc = tvm.nd.array(numpy.zeros((M, N), dtype=dtype), dev)\nfunc(a, b, c)\ntvm.testing.assert_allclose(c.asnumpy(), answer, rtol=1e- [...]
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "Let's take a look at the intermediate representation of the operator and\ndefault schedule using the TVM lower function. Note how the implementation is\nessentially a naive implementation of a matrix multiplication, using three\nnested loops over the indices of the A and B matrices.\n\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "print(tvm.lower(s, [A, B, C], simple_mode=True))"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "Optimization 1: Blocking\n~~~~~~~~~~~~~~~~~~~~~~~~\n\nA important trick to enhance the cache hit rate is blocking, where you\nstructure memory access such that the inside a block is a small neighborhood\nthat has high memory locality. In this tutorial, we pick a block factor of\n32. This will result in a block that will fill a 32 * 32 * sizeof(float) area\nof memory. This corresponds to a cache size of 4KB, in relation to a\nreference cache size of 32 KB for L1 cache.\n\nWe begi [...]
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "bn = 32\n\n# Blocking by loop tiling\nxo, yo, xi, yi = s[C].tile(C.op.axis[0], C.op.axis[1], bn, bn)\n(k,) = s[C].op.reduce_axis\nko, ki = s[C].split(k, factor=4)\n\n# Hoist reduction domain outside the blocking loop\ns[C].reorder(xo, yo, ko, ki, xi, yi)\n\nevaluate_operation(s, [A, B, C], target=target, name=\"mmult\", optimization=\"blocking\", log=log)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "By reordering the computation to take advantage of caching, you should see a\nsignificant improvement in the performance of the computation. Now, print the\ninternal representation and compare it to the original:\n\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "print(tvm.lower(s, [A, B, C], simple_mode=True))"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "Optimization 2: Vectorization\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n\nAnother important optimization trick is vectorization. When the memory access\npattern is uniform, the compiler can detect this pattern and pass the\ncontinuous memory to the SIMD vector processor. In TVM, we can use the\n``vectorize`` interface to hint the compiler this pattern, taking advantage\nof this hardware feature.\n\nIn this tutorial, we chose to vectorize the inner loop row data since it is\nalready cache  [...]
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "# Apply the vectorization optimization\ns[C].vectorize(yi)\n\nevaluate_operation(s, [A, B, C], target=target, name=\"mmult\", optimization=\"vectorization\", log=log)\n\n# The generalized IR after vectorization\nprint(tvm.lower(s, [A, B, C], simple_mode=True))"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "Optimization 3: Loop Permutation\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n\nIf we look at the above IR, we can see the inner loop row data is vectorized\nand B is transformed into PackedB (this is evident by the `(float32x32*)B2`\nportion of the inner loop). The traversal of PackedB is sequential now. So we\nwill look at the access pattern of A. In current schedule, A is accessed\ncolumn by column which is not cache friendly. If we change the nested loop\norder of `ki` and inner axes  [...]
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "s = te.create_schedule(C.op)\nxo, yo, xi, yi = s[C].tile(C.op.axis[0], C.op.axis[1], bn, bn)\n(k,) = s[C].op.reduce_axis\nko, ki = s[C].split(k, factor=4)\n\n# re-ordering\ns[C].reorder(xo, yo, ko, xi, ki, yi)\ns[C].vectorize(yi)\n\nevaluate_operation(\n    s, [A, B, C], target=target, name=\"mmult\", optimization=\"loop permutation\", log=log\n)\n\n# Again, print the new generalized IR\nprint(tvm.lower(s, [A, B, C], simple_mode=True))"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "Optimization 4: Array Packing\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n\nAnother important trick is array packing. This trick is to reorder the\nstorage dimension of the array to convert the continuous access pattern on\ncertain dimension to a sequential pattern after flattening.\n\n![](https://github.com/dmlc/web-data/raw/main/tvm/tutorial/array-packing.png)\n\n   :align: center\n\nJust as it is shown in the figure above, after blocking the computations, we\ncan observe the array access [...]
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "# We have to re-write the algorithm slightly.\npackedB = te.compute((N / bn, K, bn), lambda x, y, z: B[y, x * bn + z], name=\"packedB\")\nC = te.compute(\n    (M, N),\n    lambda x, y: te.sum(A[x, k] * packedB[y // bn, k, tvm.tir.indexmod(y, bn)], axis=k),\n    name=\"C\",\n)\n\ns = te.create_schedule(C.op)\n\nxo, yo, xi, yi = s[C].tile(C.op.axis[0], C.op.axis[1], bn, bn)\n(k,) = s[C].op.reduce_axis\nko, ki = s[C].split(k, factor=4)\n\ns[C].reorder(xo, yo, ko, xi, ki, yi)\ns[C]. [...]
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "Optimization 5: Optimizing Block Writing Through Caching\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n\nUp to this point all of our optimizations have focused on efficiently\naccessing and computing the data from the `A` and `B` matrices to compute the\n`C` matrix. After the blocking optimization, the operator will write result\nto `C` block by block, and the access pattern is not sequential. We can\naddress this by using a sequential cache array, using a combinati [...]
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "s = te.create_schedule(C.op)\n\n# Allocate write cache\nCC = s.cache_write(C, \"global\")\n\nxo, yo, xi, yi = s[C].tile(C.op.axis[0], C.op.axis[1], bn, bn)\n\n# Write cache is computed at yo\ns[CC].compute_at(s[C], yo)\n\n# New inner axes\nxc, yc = s[CC].op.axis\n\n(k,) = s[CC].op.reduce_axis\nko, ki = s[CC].split(k, factor=4)\ns[CC].reorder(ko, xc, ki, yc)\ns[CC].unroll(ki)\ns[CC].vectorize(yc)\n\nx, y, z = s[packedB].op.axis\ns[packedB].vectorize(z)\ns[packedB].parallel(x)\n\n [...]
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "Optimization 6: Parallelization\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n\nSo far, our computation is only designed to use a single core. Nearly all\nmodern processors have multiple cores, and computation can benefit from\nrunning computations in parallel. The final optimization is to take advantage\nof thread-level parallelization.\n\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "# parallel\ns[C].parallel(xo)\n\nx, y, z = s[packedB].op.axis\ns[packedB].vectorize(z)\ns[packedB].parallel(x)\n\nevaluate_operation(\n    s, [A, B, C], target=target, name=\"mmult\", optimization=\"parallelization\", log=log\n)\n\n# Here is the generated IR after parallelization.\nprint(tvm.lower(s, [A, B, C], simple_mode=True))"
       ]
     },
     {
       "cell_type": "markdown",
       "metadata": {},
       "source": [
-        "Generate OpenCL Code\n--------------------\nTVM provides code generation features into multiple backends,\nwe can also generate OpenCL code or LLVM code that runs on CPU backends.\n\nThe following code blocks generate OpenCL code, creates array on an OpenCL\ndevice, and verifies the correctness of the code.\n\n\n"
+        "Summary of Matrix Multiplication Example\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n\nAfter applying the above simple optimizations with only 18 lines of code, our\ngenerated code can begin to approach the performance of `numpy` with the Math\nKernel Library (MKL). Since we've been logging the performance as we've been\nworking, we can compare the results.\n\n"
       ]
     },
     {
@@ -252,14 +550,21 @@
       },
       "outputs": [],
       "source": [
-        "if tgt.startswith(\"opencl\"):\n    fadd_cl = tvm.build(s, [A, B, C], tgt, name=\"myadd\")\n    print(\"------opencl code------\")\n    print(fadd_cl.imported_modules[0].get_source())\n    ctx = tvm.cl(0)\n    n = 1024\n    a = tvm.nd.array(np.random.uniform(size=n).astype(A.dtype), ctx)\n    b = tvm.nd.array(np.random.uniform(size=n).astype(B.dtype), ctx)\n    c = tvm.nd.array(np.zeros(n, dtype=C.dtype), ctx)\n    fadd_cl(a, b, c)\n    tvm.testing.assert_allclose(c.asnumpy(), a [...]
+        "baseline = log[0][1]\nprint(\"%s\\t%s\\t%s\" % (\"Operator\".rjust(20), \"Timing\".rjust(20), \"Performance\".rjust(20)))\nfor result in log:\n    print(\n        \"%s\\t%s\\t%s\"\n        % (result[0].rjust(20), str(result[1]).rjust(20), str(result[1] / baseline).rjust(20))\n    )"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "Note that the outputs on the web page reflect the running times on a\nnon-exclusive Docker container, and should be considered unreliable. It is\nhighly encouraged to run the tutorial by yourself to observe the performance\ngain achieved by TVM, and to carefully work through each example to\nunderstand the iterative improvements that are made to the matrix\nmultiplication operation.\n\n"
       ]
     },
     {
       "cell_type": "markdown",
       "metadata": {},
       "source": [
-        "Summary\n-------\nThis tutorial provides a walk through of TVM workflow using\na vector add example. The general workflow is\n\n- Describe your computation via a series of operations.\n- Describe how we want to compute use schedule primitives.\n- Compile to the target function we want.\n- Optionally, save the function to be loaded later.\n\nYou are more than welcome to checkout other examples and\ntutorials to learn more about the supported operations, scheduling primitives\nand [...]
+        "Final Notes and Summary\n-----------------------\nAs mentioned earlier, how to apply optimizations using TE and scheduling\nprimitives can require some knowledge of the underlying architecture and\nalgorithms. However, TE was designed to act as a foundation for more complex\nalgorithms that can search the potential optimization. With the knowledge you\nhave from this introduction to TE, we can now begin to explore how TVM can\nautomate the schedule optimization process.\n\nThis  [...]
       ]
     }
   ],
diff --git a/docs/_downloads/70d345c5409f99cb5de9dc44f147ff6f/build_gcn.py b/docs/_downloads/70d345c5409f99cb5de9dc44f147ff6f/build_gcn.py
index b832d18..e73dc2d 100644
--- a/docs/_downloads/70d345c5409f99cb5de9dc44f147ff6f/build_gcn.py
+++ b/docs/_downloads/70d345c5409f99cb5de9dc44f147ff6f/build_gcn.py
@@ -175,7 +175,7 @@ print("Test accuracy of DGL results: {:.2%}".format(acc))
 #                                        = ((H * W)^t * A^t)^t
 #                                        = ((W^t * H^t) * A^t)^t
 from tvm import relay
-from tvm.contrib import graph_runtime
+from tvm.contrib import graph_executor
 import tvm
 from tvm import te
 
@@ -335,9 +335,9 @@ mod["main"] = func
 with tvm.transform.PassContext(opt_level=0):  # Currently only support opt_level=0
     lib = relay.build(mod, target, params=params)
 
-# Generate graph runtime
-ctx = tvm.context(target, 0)
-m = graph_runtime.GraphModule(lib["default"](ctx))
+# Generate graph executor
+dev = tvm.device(target, 0)
+m = graph_executor.GraphModule(lib["default"](dev))
 
 ######################################################################
 # Run the TVM model, test for accuracy and verify with DGL
diff --git a/docs/_downloads/72871483681951fd0400ddc905113f11/from_caffe2.py b/docs/_downloads/72871483681951fd0400ddc905113f11/from_caffe2.py
index 34581c6..a3378de 100644
--- a/docs/_downloads/72871483681951fd0400ddc905113f11/from_caffe2.py
+++ b/docs/_downloads/72871483681951fd0400ddc905113f11/from_caffe2.py
@@ -105,12 +105,12 @@ with transform.PassContext(opt_level=3):
 # The process is no different from other examples.
 import tvm
 from tvm import te
-from tvm.contrib import graph_runtime
+from tvm.contrib import graph_executor
 
 # context x86 CPU, use tvm.gpu(0) if you run on GPU
-ctx = tvm.cpu(0)
+dev = tvm.cpu(0)
 # create a runtime executor module
-m = graph_runtime.GraphModule(lib["default"](ctx))
+m = graph_executor.GraphModule(lib["default"](dev))
 # set inputs
 m.set_input(input_name, tvm.nd.array(data.astype("float32")))
 # execute
diff --git a/docs/_downloads/739deb9ab034a5315ce6ba6bf7e5ff44/tune_relay_cuda.ipynb b/docs/_downloads/739deb9ab034a5315ce6ba6bf7e5ff44/tune_relay_cuda.ipynb
index 82a5712..924fe57 100644
--- a/docs/_downloads/739deb9ab034a5315ce6ba6bf7e5ff44/tune_relay_cuda.ipynb
+++ b/docs/_downloads/739deb9ab034a5315ce6ba6bf7e5ff44/tune_relay_cuda.ipynb
@@ -33,7 +33,7 @@
       },
       "outputs": [],
       "source": [
-        "import os\n\nimport numpy as np\n\nimport tvm\nfrom tvm import relay, autotvm\nimport tvm.relay.testing\nfrom tvm.autotvm.tuner import XGBTuner, GATuner, RandomTuner, GridSearchTuner\nimport tvm.contrib.graph_runtime as runtime"
+        "import os\n\nimport numpy as np\n\nimport tvm\nfrom tvm import relay, autotvm\nimport tvm.relay.testing\nfrom tvm.autotvm.tuner import XGBTuner, GATuner, RandomTuner, GridSearchTuner\nimport tvm.contrib.graph_executor as runtime"
       ]
     },
     {
@@ -112,7 +112,7 @@
       },
       "outputs": [],
       "source": [
-        "def tune_and_evaluate(tuning_opt):\n    # extract workloads from relay program\n    print(\"Extract tasks...\")\n    mod, params, input_shape, out_shape = get_network(network, batch_size=1)\n    tasks = autotvm.task.extract_from_program(\n        mod[\"main\"], target=target, params=params, ops=(relay.op.get(\"nn.conv2d\"),)\n    )\n\n    # run tuning tasks\n    print(\"Tuning...\")\n    tune_tasks(tasks, **tuning_opt)\n\n    # compile kernels with history best records\n    with [...]
+        "def tune_and_evaluate(tuning_opt):\n    # extract workloads from relay program\n    print(\"Extract tasks...\")\n    mod, params, input_shape, out_shape = get_network(network, batch_size=1)\n    tasks = autotvm.task.extract_from_program(\n        mod[\"main\"], target=target, params=params, ops=(relay.op.get(\"nn.conv2d\"),)\n    )\n\n    # run tuning tasks\n    print(\"Tuning...\")\n    tune_tasks(tasks, **tuning_opt)\n\n    # compile kernels with history best records\n    with [...]
       ]
     },
     {
diff --git a/docs/_downloads/78bebde8ea0f8558ac1a6fe12999f99f/tune_network_mali.py b/docs/_downloads/78bebde8ea0f8558ac1a6fe12999f99f/tune_network_mali.py
index ca1067b..35751fa 100644
--- a/docs/_downloads/78bebde8ea0f8558ac1a6fe12999f99f/tune_network_mali.py
+++ b/docs/_downloads/78bebde8ea0f8558ac1a6fe12999f99f/tune_network_mali.py
@@ -49,7 +49,7 @@ import numpy as np
 import tvm
 from tvm import relay, auto_scheduler
 import tvm.relay.testing
-from tvm.contrib import graph_runtime
+from tvm.contrib import graph_executor
 import os
 
 #################################################################
@@ -139,8 +139,7 @@ layout = "NHWC"
 use_ndk = True
 # Path to cross compiler
 os.environ["TVM_NDK_CC"] = "/usr/bin/aarch64-linux-gnu-g++"
-target_host = tvm.target.Target("llvm -mtriple=aarch64-linux-gnu")
-target = tvm.target.Target("opencl -device=mali")
+target = tvm.target.Target("opencl -device=mali", host="llvm -mtriple=aarch64-linux-gnu")
 dtype = "float32"
 log_file = "%s-%s-B%d-%s.json" % (network, layout, batch_size, target.kind.name)
 
@@ -170,7 +169,7 @@ device_key = "rk3399"
 # Extract tasks from the network
 print("Extract tasks...")
 mod, params, input_shape, output_shape = get_network(network, batch_size, layout, dtype=dtype)
-tasks, task_weights = auto_scheduler.extract_tasks(mod["main"], params, target, target_host)
+tasks, task_weights = auto_scheduler.extract_tasks(mod["main"], params, target)
 
 for idx, task in enumerate(tasks):
     print("========== Task %d  (workload key: %s) ==========" % (idx, task.workload_key))
@@ -182,14 +181,14 @@ for idx, task in enumerate(tasks):
 #
 #     from tvm.auto_scheduler.utils import request_remote
 #     remote = request_remote(device_key, "0.0.0.0", 9190)
-#     ctx = remote.cl()
-#     max_shared_memory_per_block = ctx.max_shared_memory_per_block
+#     dev = remote.cl()
+#     max_shared_memory_per_block = dev.max_shared_memory_per_block
 #     # There is no explicit local memory limition
 #     # so we can use INT32_MAX to disalbe the check on local_memory.
 #     max_local_memory_per_block = 2147483647 # INT32_MAX
-#     max_threads_per_block = ctx.max_threads_per_block
-#     max_vthread_extent = int(ctx.warp_size / 4) if int(ctx.warp_size / 4) > 1 else ctx.warp_size
-#     warp_size = ctx.warp_size
+#     max_threads_per_block = dev.max_threads_per_block
+#     max_vthread_extent = int(dev.warp_size / 4) if int(dev.warp_size / 4) > 1 else dev.warp_size
+#     warp_size = dev.warp_size
 #     hardware_params = auto_scheduler.HardwareParams(-1, 16, 64,
 #                                                     max_shared_memory_per_block, max_local_memory_per_block,
 #                                                     max_threads_per_block, max_vthread_extent, warp_size)
@@ -198,7 +197,9 @@ for idx, task in enumerate(tasks):
 #
 #   .. code-block:: python
 #
-#     tasks, task_weights = auto_scheduler.extract_tasks(mod["main"], params, target, target_host, hardware_params)
+#    tasks, task_weights = auto_scheduler.extract_tasks(
+#        mod["main"], params, target, hardware_params = hardware_params
+#    )
 #
 
 #################################################################
@@ -240,14 +241,14 @@ def tune_and_evaluate():
         with tvm.transform.PassContext(
             opt_level=3, config={"relay.backend.use_auto_scheduler": True}
         ):
-            lib = relay.build(mod, target=target, target_host=target_host, params=params)
+            lib = relay.build(mod, target, params=params)
 
-    # Create graph runtime
+    # Create graph executor
     print("=============== Request Remote ===============")
     from tvm.auto_scheduler.utils import request_remote
 
     remote = request_remote(device_key, "0.0.0.0", 9190)
-    ctx = remote.cl()
+    dev = remote.cl()
     from tvm.contrib import utils, ndk
 
     temp = utils.tempdir()
@@ -256,14 +257,14 @@ def tune_and_evaluate():
     lib.export_library(path_lib, ndk.create_shared)
     remote.upload(path_lib)
     loaded_lib = remote.load_module(filename)
-    module = graph_runtime.GraphModule(loaded_lib["default"](ctx))
+    module = graph_executor.GraphModule(loaded_lib["default"](dev))
     data = (np.random.uniform(size=input_shape)).astype(dtype)
     data_tvm = tvm.nd.array(data)
     module.set_input("data", data_tvm)
 
     # Evaluate
     print("Evaluate inference time cost...")
-    ftimer = module.module.time_evaluator("run", ctx, repeat=3, min_repeat_ms=500)
+    ftimer = module.module.time_evaluator("run", dev, repeat=3, min_repeat_ms=500)
     prof_res = np.array(ftimer().results) * 1e3  # convert to millisecond
     print(
         "Mean inference time (std dev): %.2f ms (%.2f ms)" % (np.mean(prof_res), np.std(prof_res))
@@ -329,7 +330,7 @@ def tune_and_evaluate():
 #   The last line also prints the total number of measurement trials,
 #   total time spent on auto-tuning and the id of the next task to tune.
 #
-#   There will also be some "dmlc::Error"s errors, because the
+#   There will also be some "tvm::Error"s errors, because the
 #   auto-scheduler will try some invalid schedules.
 #   You can safely ignore them if the tuning can continue, because these
 #   errors are isolated from the main process.
diff --git a/docs/_downloads/78da213eae381b8ff94cc356ee7c5423/deploy_prequantized.ipynb b/docs/_downloads/78da213eae381b8ff94cc356ee7c5423/deploy_prequantized.ipynb
index d7017cc..945bc55 100644
--- a/docs/_downloads/78da213eae381b8ff94cc356ee7c5423/deploy_prequantized.ipynb
+++ b/docs/_downloads/78da213eae381b8ff94cc356ee7c5423/deploy_prequantized.ipynb
@@ -51,7 +51,7 @@
       },
       "outputs": [],
       "source": [
-        "def get_transform():\n    import torchvision.transforms as transforms\n\n    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])\n    return transforms.Compose(\n        [\n            transforms.Resize(256),\n            transforms.CenterCrop(224),\n            transforms.ToTensor(),\n            normalize,\n        ]\n    )\n\n\ndef get_real_image(im_height, im_width):\n    img_url = \"https://github.com/dmlc/mxnet.js/blob/main/data/cat.png? [...]
+        "def get_transform():\n    import torchvision.transforms as transforms\n\n    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])\n    return transforms.Compose(\n        [\n            transforms.Resize(256),\n            transforms.CenterCrop(224),\n            transforms.ToTensor(),\n            normalize,\n        ]\n    )\n\n\ndef get_real_image(im_height, im_width):\n    img_url = \"https://github.com/dmlc/mxnet.js/blob/main/data/cat.png? [...]
       ]
     },
     {
@@ -231,7 +231,7 @@
       },
       "outputs": [],
       "source": [
-        "n_repeat = 100  # should be bigger to make the measurement more accurate\nctx = tvm.cpu(0)\nftimer = rt_mod.module.time_evaluator(\"run\", ctx, number=1, repeat=n_repeat)\nprof_res = np.array(ftimer().results) * 1e3\nprint(\"Elapsed average ms:\", np.mean(prof_res))"
+        "n_repeat = 100  # should be bigger to make the measurement more accurate\ndev = tvm.cpu(0)\nftimer = rt_mod.module.time_evaluator(\"run\", dev, number=1, repeat=n_repeat)\nprof_res = np.array(ftimer().results) * 1e3\nprint(\"Elapsed average ms:\", np.mean(prof_res))"
       ]
     },
     {
diff --git a/docs/_downloads/7ece74acc230c7d55086182cc8884b09/extern_op.py b/docs/_downloads/7ece74acc230c7d55086182cc8884b09/extern_op.py
index 794101a..277af71 100644
--- a/docs/_downloads/7ece74acc230c7d55086182cc8884b09/extern_op.py
+++ b/docs/_downloads/7ece74acc230c7d55086182cc8884b09/extern_op.py
@@ -77,11 +77,11 @@ s = te.create_schedule(D.op)
 # -----------------
 # We can verify that the result matches what we expected.
 #
-ctx = tvm.cpu(0)
+dev = tvm.cpu(0)
 f = tvm.build(s, [A, B, D, bias], "llvm")
-a = tvm.nd.array(np.random.uniform(size=(n, l)).astype(A.dtype), ctx)
-b = tvm.nd.array(np.random.uniform(size=(l, m)).astype(B.dtype), ctx)
-d = tvm.nd.array(np.zeros((n, m), dtype=D.dtype), ctx)
+a = tvm.nd.array(np.random.uniform(size=(n, l)).astype(A.dtype), dev)
+b = tvm.nd.array(np.random.uniform(size=(l, m)).astype(B.dtype), dev)
+d = tvm.nd.array(np.zeros((n, m), dtype=D.dtype), dev)
 bb = 10.0
 f(a, b, d, bb)
 tvm.testing.assert_allclose(d.asnumpy(), np.dot(a.asnumpy(), b.asnumpy()) + 10, rtol=1e-5)
@@ -125,8 +125,8 @@ B = te.extern(
 )
 s = te.create_schedule(B.op)
 f = tvm.build(s, [A, B], "llvm")
-a = tvm.nd.array(np.random.uniform(size=(n,)).astype(A.dtype), ctx)
-b = tvm.nd.array(np.random.uniform(size=(n,)).astype(B.dtype), ctx)
+a = tvm.nd.array(np.random.uniform(size=(n,)).astype(A.dtype), dev)
+b = tvm.nd.array(np.random.uniform(size=(n,)).astype(B.dtype), dev)
 f(a, b)
 tvm.testing.assert_allclose(b.asnumpy(), a.asnumpy() + 1, rtol=1e-5)
 
diff --git a/docs/_downloads/8246644805c8dfcb0b33ca356cc1fafc/deploy_ssd_gluoncv.ipynb b/docs/_downloads/8246644805c8dfcb0b33ca356cc1fafc/deploy_ssd_gluoncv.ipynb
index 337ba47..9202cb2 100644
--- a/docs/_downloads/8246644805c8dfcb0b33ca356cc1fafc/deploy_ssd_gluoncv.ipynb
+++ b/docs/_downloads/8246644805c8dfcb0b33ca356cc1fafc/deploy_ssd_gluoncv.ipynb
@@ -26,7 +26,7 @@
       },
       "outputs": [],
       "source": [
-        "import tvm\nfrom tvm import te\n\nfrom matplotlib import pyplot as plt\nfrom tvm import relay\nfrom tvm.contrib import graph_runtime\nfrom tvm.contrib.download import download_testdata\nfrom gluoncv import model_zoo, data, utils"
+        "import tvm\nfrom tvm import te\n\nfrom matplotlib import pyplot as plt\nfrom tvm import relay\nfrom tvm.contrib import graph_executor\nfrom tvm.contrib.download import download_testdata\nfrom gluoncv import model_zoo, data, utils"
       ]
     },
     {
@@ -98,7 +98,7 @@
       },
       "outputs": [],
       "source": [
-        "def run(lib, ctx):\n    # Build TVM runtime\n    m = graph_runtime.GraphModule(lib[\"default\"](ctx))\n    tvm_input = tvm.nd.array(x.asnumpy(), ctx=ctx)\n    m.set_input(\"data\", tvm_input)\n    # execute\n    m.run()\n    # get outputs\n    class_IDs, scores, bounding_boxs = m.get_output(0), m.get_output(1), m.get_output(2)\n    return class_IDs, scores, bounding_boxs\n\n\nfor target in [\"llvm\", \"cuda\"]:\n    ctx = tvm.context(target, 0)\n    if ctx.exist:\n        lib =  [...]
+        "def run(lib, dev):\n    # Build TVM runtime\n    m = graph_executor.GraphModule(lib[\"default\"](dev))\n    tvm_input = tvm.nd.array(x.asnumpy(), device=dev)\n    m.set_input(\"data\", tvm_input)\n    # execute\n    m.run()\n    # get outputs\n    class_IDs, scores, bounding_boxs = m.get_output(0), m.get_output(1), m.get_output(2)\n    return class_IDs, scores, bounding_boxs\n\n\nfor target in [\"llvm\", \"cuda\"]:\n    dev = tvm.device(target, 0)\n    if dev.exist:\n        lib [...]
       ]
     },
     {
diff --git a/docs/_downloads/835a4def1e256b7a1f711621fc031418/from_darknet.ipynb b/docs/_downloads/835a4def1e256b7a1f711621fc031418/from_darknet.ipynb
index c19a0fa..ac4c11e 100644
--- a/docs/_downloads/835a4def1e256b7a1f711621fc031418/from_darknet.ipynb
+++ b/docs/_downloads/835a4def1e256b7a1f711621fc031418/from_darknet.ipynb
@@ -80,7 +80,7 @@
       },
       "outputs": [],
       "source": [
-        "target = \"llvm\"\ntarget_host = \"llvm\"\nctx = tvm.cpu(0)\ndata = np.empty([batch_size, net.c, net.h, net.w], dtype)\nshape = {\"data\": data.shape}\nprint(\"Compiling the model...\")\nwith tvm.transform.PassContext(opt_level=3):\n    lib = relay.build(mod, target=target, target_host=target_host, params=params)\n\n[neth, netw] = shape[\"data\"][2:]  # Current image shape is 608x608"
+        "target = tvm.target.Target(\"llvm\", host=\"llvm\")\ndev = tvm.cpu(0)\ndata = np.empty([batch_size, net.c, net.h, net.w], dtype)\nshape = {\"data\": data.shape}\nprint(\"Compiling the model...\")\nwith tvm.transform.PassContext(opt_level=3):\n    lib = relay.build(mod, target=target, params=params)\n\n[neth, netw] = shape[\"data\"][2:]  # Current image shape is 608x608"
       ]
     },
     {
@@ -116,7 +116,7 @@
       },
       "outputs": [],
       "source": [
-        "from tvm.contrib import graph_runtime\n\nm = graph_runtime.GraphModule(lib[\"default\"](ctx))\n\n# set inputs\nm.set_input(\"data\", tvm.nd.array(data.astype(dtype)))\n# execute\nprint(\"Running the test image...\")\n\n# detection\n# thresholds\nthresh = 0.5\nnms_thresh = 0.45\n\nm.run()\n# get outputs\ntvm_out = []\nif MODEL_NAME == \"yolov2\":\n    layer_out = {}\n    layer_out[\"type\"] = \"Region\"\n    # Get the region layer attributes (n, out_c, out_h, out_w, classes, coor [...]
+        "from tvm.contrib import graph_executor\n\nm = graph_executor.GraphModule(lib[\"default\"](dev))\n\n# set inputs\nm.set_input(\"data\", tvm.nd.array(data.astype(dtype)))\n# execute\nprint(\"Running the test image...\")\n\n# detection\n# thresholds\nthresh = 0.5\nnms_thresh = 0.45\n\nm.run()\n# get outputs\ntvm_out = []\nif MODEL_NAME == \"yolov2\":\n    layer_out = {}\n    layer_out[\"type\"] = \"Region\"\n    # Get the region layer attributes (n, out_c, out_h, out_w, classes, co [...]
       ]
     }
   ],
diff --git a/docs/_downloads/836dc3852acf09662e9eb37c4c5e1e1b/opt_gemm.py b/docs/_downloads/836dc3852acf09662e9eb37c4c5e1e1b/opt_gemm.py
index 971269d..72a8b0a 100644
--- a/docs/_downloads/836dc3852acf09662e9eb37c4c5e1e1b/opt_gemm.py
+++ b/docs/_downloads/836dc3852acf09662e9eb37c4c5e1e1b/opt_gemm.py
@@ -75,11 +75,11 @@ dtype = "float32"
 # To get the best performance, please change the following line
 # to llvm -mcpu=core-avx2, or specific type of CPU you use
 target = "llvm"
-ctx = tvm.context(target, 0)
+dev = tvm.device(target, 0)
 
 # Random generated tensor for testing
-a = tvm.nd.array(numpy.random.rand(M, K).astype(dtype), ctx)
-b = tvm.nd.array(numpy.random.rand(K, N).astype(dtype), ctx)
+a = tvm.nd.array(numpy.random.rand(M, K).astype(dtype), dev)
+b = tvm.nd.array(numpy.random.rand(K, N).astype(dtype), dev)
 
 np_repeat = 100
 np_runing_time = timeit.timeit(
@@ -108,11 +108,11 @@ s = te.create_schedule(C.op)
 func = tvm.build(s, [A, B, C], target=target, name="mmult")
 assert func
 
-c = tvm.nd.array(numpy.zeros((M, N), dtype=dtype), ctx)
+c = tvm.nd.array(numpy.zeros((M, N), dtype=dtype), dev)
 func(a, b, c)
 tvm.testing.assert_allclose(c.asnumpy(), answer, rtol=1e-5)
 
-evaluator = func.time_evaluator(func.entry_name, ctx, number=1)
+evaluator = func.time_evaluator(func.entry_name, dev, number=1)
 print("Baseline: %f" % evaluator(a, b, c).mean)
 
 ################################################################################################
@@ -143,13 +143,13 @@ s[C].reorder(xo, yo, ko, ki, xi, yi)
 func = tvm.build(s, [A, B, C], target=target, name="mmult")
 assert func
 
-c = tvm.nd.array(numpy.zeros((M, N), dtype=dtype), ctx)
+c = tvm.nd.array(numpy.zeros((M, N), dtype=dtype), dev)
 func(a, b, c)
 tvm.testing.assert_allclose(c.asnumpy(), answer, rtol=1e-5)
 
 # By simply tiling the loop 32x32, and hoisting ko, ki outside the blocking loops,
 # we can see big speedup compared with the baseline.
-evaluator = func.time_evaluator(func.entry_name, ctx, number=10)
+evaluator = func.time_evaluator(func.entry_name, dev, number=10)
 print("Opt1: %f" % evaluator(a, b, c).mean)
 
 ################################################################################################
@@ -179,11 +179,11 @@ s[C].vectorize(yi)
 func = tvm.build(s, [A, B, C], target=target, name="mmult")
 assert func
 
-c = tvm.nd.array(numpy.zeros((M, N), dtype=dtype), ctx)
+c = tvm.nd.array(numpy.zeros((M, N), dtype=dtype), dev)
 func(a, b, c)
 tvm.testing.assert_allclose(c.asnumpy(), answer, rtol=1e-5)
 
-evaluator = func.time_evaluator(func.entry_name, ctx, number=10)
+evaluator = func.time_evaluator(func.entry_name, dev, number=10)
 print("Opt2: %f" % evaluator(a, b, c).mean)
 
 ################################################################################################
@@ -212,11 +212,11 @@ s[C].vectorize(yi)
 func = tvm.build(s, [A, B, C], target=target, name="mmult")
 assert func
 
-c = tvm.nd.array(numpy.zeros((M, N), dtype=dtype), ctx)
+c = tvm.nd.array(numpy.zeros((M, N), dtype=dtype), dev)
 func(a, b, c)
 tvm.testing.assert_allclose(c.asnumpy(), answer, rtol=1e-5)
 
-evaluator = func.time_evaluator(func.entry_name, ctx, number=10)
+evaluator = func.time_evaluator(func.entry_name, dev, number=10)
 print("Opt3: %f" % evaluator(a, b, c).mean)
 
 ################################################################################################
@@ -268,11 +268,11 @@ s[packedB].parallel(x)
 func = tvm.build(s, [A, B, C], target=target, name="mmult")
 assert func
 
-c = tvm.nd.array(numpy.zeros((M, N), dtype=dtype), ctx)
+c = tvm.nd.array(numpy.zeros((M, N), dtype=dtype), dev)
 func(a, b, c)
 tvm.testing.assert_allclose(c.asnumpy(), answer, rtol=1e-5)
 
-evaluator = func.time_evaluator(func.entry_name, ctx, number=10)
+evaluator = func.time_evaluator(func.entry_name, dev, number=10)
 print("Opt4: %f" % evaluator(a, b, c).mean)
 
 ################################################################################################
@@ -314,11 +314,11 @@ s[packedB].parallel(x)
 func = tvm.build(s, [A, B, C], target=target, name="mmult")
 assert func
 
-c = tvm.nd.array(numpy.zeros((M, N), dtype=dtype), ctx)
+c = tvm.nd.array(numpy.zeros((M, N), dtype=dtype), dev)
 func(a, b, c)
 tvm.testing.assert_allclose(c.asnumpy(), answer, rtol=1e-5)
 
-evaluator = func.time_evaluator(func.entry_name, ctx, number=10)
+evaluator = func.time_evaluator(func.entry_name, dev, number=10)
 print("Opt5: %f" % evaluator(a, b, c).mean)
 
 ################################################################################################
@@ -357,11 +357,11 @@ s[packedB].parallel(x)
 func = tvm.build(s, [A, B, C], target=target, name="mmult")
 assert func
 
-c = tvm.nd.array(numpy.zeros((M, N), dtype=dtype), ctx)
+c = tvm.nd.array(numpy.zeros((M, N), dtype=dtype), dev)
 func(a, b, c)
 tvm.testing.assert_allclose(c.asnumpy(), answer, rtol=1e-5)
 
-evaluator = func.time_evaluator(func.entry_name, ctx, number=50)
+evaluator = func.time_evaluator(func.entry_name, dev, number=50)
 opt6_time = evaluator(a, b, c).mean
 print("Opt6: %f" % opt6_time)
 
diff --git a/docs/_downloads/83dedc6352b4016772e17480ef01345d/deploy_model_on_rasp.py b/docs/_downloads/83dedc6352b4016772e17480ef01345d/deploy_model_on_rasp.py
index cae9d90..a59665f 100644
--- a/docs/_downloads/83dedc6352b4016772e17480ef01345d/deploy_model_on_rasp.py
+++ b/docs/_downloads/83dedc6352b4016772e17480ef01345d/deploy_model_on_rasp.py
@@ -30,7 +30,7 @@ import tvm
 from tvm import te
 import tvm.relay as relay
 from tvm import rpc
-from tvm.contrib import utils, graph_runtime as runtime
+from tvm.contrib import utils, graph_executor as runtime
 from tvm.contrib.download import download_testdata
 
 ######################################################################
@@ -217,8 +217,8 @@ remote.upload(lib_fname)
 rlib = remote.load_module("net.tar")
 
 # create the remote runtime module
-ctx = remote.cpu(0)
-module = runtime.GraphModule(rlib["default"](ctx))
+dev = remote.cpu(0)
+module = runtime.GraphModule(rlib["default"](dev))
 # set input data
 module.set_input("data", tvm.nd.array(x.astype("float32")))
 # run
diff --git a/docs/_downloads/85ba00b8ada85b8c5367f37b526a8caa/tune_relay_x86.py b/docs/_downloads/85ba00b8ada85b8c5367f37b526a8caa/tune_relay_x86.py
index 30e62ef..dd5d405 100644
--- a/docs/_downloads/85ba00b8ada85b8c5367f37b526a8caa/tune_relay_x86.py
+++ b/docs/_downloads/85ba00b8ada85b8c5367f37b526a8caa/tune_relay_x86.py
@@ -36,7 +36,7 @@ from tvm import relay, autotvm
 from tvm.relay import testing
 from tvm.autotvm.tuner import XGBTuner, GATuner, RandomTuner, GridSearchTuner
 from tvm.autotvm.graph_tuner import DPTuner, PBQPTuner
-import tvm.contrib.graph_runtime as runtime
+import tvm.contrib.graph_executor as runtime
 
 #################################################################
 # Define network
@@ -213,14 +213,14 @@ def tune_and_evaluate(tuning_opt):
             lib = relay.build_module.build(mod, target=target, params=params)
 
         # upload parameters to device
-        ctx = tvm.cpu()
+        dev = tvm.cpu()
         data_tvm = tvm.nd.array((np.random.uniform(size=data_shape)).astype(dtype))
-        module = runtime.GraphModule(lib["default"](ctx))
+        module = runtime.GraphModule(lib["default"](dev))
         module.set_input(input_name, data_tvm)
 
         # evaluate
         print("Evaluate inference time cost...")
-        ftimer = module.module.time_evaluator("run", ctx, number=100, repeat=3)
+        ftimer = module.module.time_evaluator("run", dev, number=100, repeat=3)
         prof_res = np.array(ftimer().results) * 1000  # convert to millisecond
         print(
             "Mean inference time (std dev): %.2f ms (%.2f ms)"
diff --git a/docs/_downloads/8631d5082613ab80110d8237562cd480/extern_op.ipynb b/docs/_downloads/8631d5082613ab80110d8237562cd480/extern_op.ipynb
index 10e212e..301b0ae 100644
--- a/docs/_downloads/8631d5082613ab80110d8237562cd480/extern_op.ipynb
+++ b/docs/_downloads/8631d5082613ab80110d8237562cd480/extern_op.ipynb
@@ -62,7 +62,7 @@
       },
       "outputs": [],
       "source": [
-        "ctx = tvm.cpu(0)\nf = tvm.build(s, [A, B, D, bias], \"llvm\")\na = tvm.nd.array(np.random.uniform(size=(n, l)).astype(A.dtype), ctx)\nb = tvm.nd.array(np.random.uniform(size=(l, m)).astype(B.dtype), ctx)\nd = tvm.nd.array(np.zeros((n, m), dtype=D.dtype), ctx)\nbb = 10.0\nf(a, b, d, bb)\ntvm.testing.assert_allclose(d.asnumpy(), np.dot(a.asnumpy(), b.asnumpy()) + 10, rtol=1e-5)"
+        "dev = tvm.cpu(0)\nf = tvm.build(s, [A, B, D, bias], \"llvm\")\na = tvm.nd.array(np.random.uniform(size=(n, l)).astype(A.dtype), dev)\nb = tvm.nd.array(np.random.uniform(size=(l, m)).astype(B.dtype), dev)\nd = tvm.nd.array(np.zeros((n, m), dtype=D.dtype), dev)\nbb = 10.0\nf(a, b, d, bb)\ntvm.testing.assert_allclose(d.asnumpy(), np.dot(a.asnumpy(), b.asnumpy()) + 10, rtol=1e-5)"
       ]
     },
     {
@@ -98,7 +98,7 @@
       },
       "outputs": [],
       "source": [
-        "@tvm.register_func(\"tvm.contrib.my_tvm_addone\")\ndef my_tvm_addone(x, y):\n    print(\"my_tvm_addone signatures: %s, %s\" % (type(x), type(y)))\n    tvm.nd.array(x.asnumpy() + 1).copyto(y)\n\n\nA = te.placeholder((n,), name=\"A\")\nB = te.extern(\n    A.shape,\n    [A],\n    lambda ins, outs: tvm.tir.call_packed(\"tvm.contrib.my_tvm_addone\", ins[0], outs[0]),\n    name=\"C\",\n)\ns = te.create_schedule(B.op)\nf = tvm.build(s, [A, B], \"llvm\")\na = tvm.nd.array(np.random.unif [...]
+        "@tvm.register_func(\"tvm.contrib.my_tvm_addone\")\ndef my_tvm_addone(x, y):\n    print(\"my_tvm_addone signatures: %s, %s\" % (type(x), type(y)))\n    tvm.nd.array(x.asnumpy() + 1).copyto(y)\n\n\nA = te.placeholder((n,), name=\"A\")\nB = te.extern(\n    A.shape,\n    [A],\n    lambda ins, outs: tvm.tir.call_packed(\"tvm.contrib.my_tvm_addone\", ins[0], outs[0]),\n    name=\"C\",\n)\ns = te.create_schedule(B.op)\nf = tvm.build(s, [A, B], \"llvm\")\na = tvm.nd.array(np.random.unif [...]
       ]
     },
     {
diff --git a/docs/_downloads/870680567a5bf1e4697356b416e302b4/opt_matmul_auto_tensorcore.ipynb b/docs/_downloads/870680567a5bf1e4697356b416e302b4/opt_matmul_auto_tensorcore.ipynb
index e8fc00c..227b021 100644
--- a/docs/_downloads/870680567a5bf1e4697356b416e302b4/opt_matmul_auto_tensorcore.ipynb
+++ b/docs/_downloads/870680567a5bf1e4697356b416e302b4/opt_matmul_auto_tensorcore.ipynb
@@ -69,7 +69,7 @@
       },
       "outputs": [],
       "source": [
-        "# check whether the gpu has tensorcore\nif not tvm.gpu(0).exist or not tvm.runtime.enabled(\"cuda\"):\n    raise Exception(\"skip building this tutorial because cuda is not enabled..\")\n\nctx = tvm.gpu()\nif not nvcc.have_tensorcore(ctx.compute_version):\n    raise Exception(\"the gpu has no tensorcore, skipping...\")\n\nM, N, L = 512, 32, 512\ndtype = \"float16\"\nlayout = \"NN\"\nif len(sys.argv) >= 4:\n    M, N, L = int(sys.argv[1]), int(sys.argv[2]), int(sys.argv[3])\nif le [...]
... 574166 lines suppressed ...