You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tvm.apache.org by tq...@apache.org on 2020/07/03 21:20:29 UTC

[incubator-tvm-site] branch asf-site updated: Docs build at Fri Jul 3 14:20:16 PDT 2020

This is an automated email from the ASF dual-hosted git repository.

tqchen pushed a commit to branch asf-site
in repository https://gitbox.apache.org/repos/asf/incubator-tvm-site.git


The following commit(s) were added to refs/heads/asf-site by this push:
     new 4bef74e  Docs build at Fri Jul  3 14:20:16 PDT 2020
4bef74e is described below

commit 4bef74eb15b033438ff5057f88e22814edc2384c
Author: tqchen <tq...@octoml.ai>
AuthorDate: Fri Jul 3 14:20:16 2020 -0700

    Docs build at Fri Jul  3 14:20:16 PDT 2020
---
 .../deploy_sparse.py                               |  352 ++
 .../intrin_math.ipynb                              |    4 +-
 .../deploy_sparse.ipynb                            |  187 +
 .../opt_conv_tensorcore.ipynb                      |    2 +-
 .../micro_tflite.ipynb                             |  157 +
 .../opt_conv_tensorcore.py                         |    8 +-
 .../intrin_math.py                                 |   12 +-
 .../micro_tflite.py                                |  218 +
 docs/_images/sphx_glr_deploy_sparse_thumb.png      |  Bin 0 -> 26786 bytes
 docs/_images/sphx_glr_micro_tflite_thumb.png       |  Bin 0 -> 26786 bytes
 docs/_sources/api/python/topi.rst.txt              |    2 +
 docs/_sources/contribute/code_review.rst.txt       |   47 +-
 docs/_sources/contribute/release_process.rst.txt   |    9 +-
 docs/_sources/dev/inferbound.rst.txt               |    6 +-
 docs/_sources/frontend/tensorflow.rst.txt          |    4 +
 docs/_sources/langref/relay_op.rst.txt             |    1 +
 docs/_sources/langref/relay_pattern.rst.txt        |   53 +-
 .../tutorials/autotvm/sg_execution_times.rst.txt   |   16 +-
 .../tutorials/autotvm/tune_conv2d_cuda.rst.txt     |   42 +-
 .../tutorials/autotvm/tune_simple_template.rst.txt |   20 +-
 .../tutorials/cross_compilation_and_rpc.rst.txt    |    2 +-
 .../tutorials/dev/low_level_custom_pass.rst.txt    |   20 +-
 .../tutorials/dev/sg_execution_times.rst.txt       |    6 +-
 .../frontend/deploy_model_on_android.rst.txt       |    2 +-
 .../tutorials/frontend/deploy_prequantized.rst.txt |    2 +-
 .../frontend/deploy_prequantized_tflite.rst.txt    |    4 +-
 .../tutorials/frontend/deploy_sparse.rst.txt       |  442 ++
 .../tutorials/frontend/deploy_ssd_gluoncv.rst.txt  |  133 +-
 docs/_sources/tutorials/frontend/from_onnx.rst.txt |    4 +-
 .../tutorials/frontend/from_tensorflow.rst.txt     |    4 +-
 .../tutorials/frontend/sg_execution_times.rst.txt  |   37 +-
 .../tutorials/frontend/using_external_lib.rst.txt  |    1 -
 docs/_sources/tutorials/index.rst.txt              |   54 +
 .../tutorials/language/intrin_math.rst.txt         |   12 +-
 docs/_sources/tutorials/language/reduction.rst.txt |   74 +-
 docs/_sources/tutorials/language/scan.rst.txt      |   52 +-
 .../tutorials/language/schedule_primitives.rst.txt |  136 +-
 .../tutorials/language/sg_execution_times.rst.txt  |   18 +-
 docs/_sources/tutorials/language/tensorize.rst.txt |   46 +-
 .../tutorials/language/tuple_inputs.rst.txt        |   50 +-
 docs/_sources/tutorials/micro/micro_tflite.rst.txt |  283 ++
 .../tutorials/micro/sg_execution_times.rst.txt     |   10 +
 .../tutorials/optimize/opt_conv_cuda.rst.txt       |    2 +-
 .../tutorials/optimize/opt_conv_tensorcore.rst.txt |   54 +-
 docs/_sources/tutorials/optimize/opt_gemm.rst.txt  |  114 +-
 .../tutorials/optimize/sg_execution_times.rst.txt  |   10 +-
 docs/_sources/tutorials/relay_quick_start.rst.txt  |    6 +-
 docs/_sources/tutorials/sg_execution_times.rst.txt |    8 +-
 docs/_sources/tutorials/topi/intro_topi.rst.txt    |  312 +-
 .../tutorials/topi/sg_execution_times.rst.txt      |    4 +-
 docs/_sources/vta/install.rst.txt                  |   17 +-
 .../tutorials/autotvm/sg_execution_times.rst.txt   |    4 +-
 .../vta/tutorials/autotvm/tune_relay_vta.rst.txt   |   24 +-
 .../frontend/deploy_classification.rst.txt         |    4 +-
 .../tutorials/frontend/deploy_detection.rst.txt    |    2 +-
 .../tutorials/frontend/sg_execution_times.rst.txt  |    6 +-
 .../_sources/vta/tutorials/matrix_multiply.rst.txt |   78 +-
 .../vta/tutorials/optimize/convolution_opt.rst.txt |  146 +-
 .../tutorials/optimize/matrix_multiply_opt.rst.txt |  120 +-
 .../tutorials/optimize/sg_execution_times.rst.txt  |    6 +-
 .../vta/tutorials/sg_execution_times.rst.txt       |    6 +-
 .../_sources/vta/tutorials/vta_get_started.rst.txt |   42 +-
 docs/api/doxygen/algorithm_8h.html                 |    2 +-
 docs/api/doxygen/algorithm_8h__incl.svg            | 1390 +++---
 docs/api/doxygen/analyzer_8h.html                  |    2 +-
 docs/api/doxygen/analyzer_8h__incl.svg             | 1066 ++--
 docs/api/doxygen/analyzer_8h_source.html           |    6 +-
 docs/api/doxygen/annotated.html                    |  722 +--
 docs/api/doxygen/annotation_8h.html                |    2 +-
 docs/api/doxygen/annotation_8h__incl.svg           |  928 ++--
 docs/api/doxygen/array__utils_8h.html              |    2 +-
 docs/api/doxygen/array__utils_8h__incl.svg         | 2017 ++++----
 docs/api/doxygen/attr__registry__map_8h.html       |    7 +-
 .../doxygen/attr__registry__map_8h__dep__incl.svg  |  786 ++-
 docs/api/doxygen/attr__registry__map_8h__incl.svg  |  566 ++-
 .../api/doxygen/attr__registry__map_8h_source.html |   23 +-
 docs/api/doxygen/autodiff_8h.html                  |    2 +-
 docs/api/doxygen/autodiff_8h__incl.svg             | 1744 +++----
 docs/api/doxygen/base_8h.html                      |    2 +-
 docs/api/doxygen/base_8h__dep__incl.svg            |    2 +-
 docs/api/doxygen/base_8h__incl.svg                 | 1030 ++--
 docs/api/doxygen/base_8h_source.html               |    6 +-
 docs/api/doxygen/batch__matmul_8h.html             |    2 +-
 docs/api/doxygen/batch__matmul_8h__incl.svg        | 2048 ++++----
 docs/api/doxygen/bias__add_8h.html                 |    2 +-
 docs/api/doxygen/bias__add_8h__incl.svg            | 1975 ++++----
 docs/api/doxygen/bitserial_8h.html                 |    2 +-
 docs/api/doxygen/bitserial_8h__incl.svg            | 1112 +++--
 docs/api/doxygen/bound_8h.html                     |    2 +-
 docs/api/doxygen/bound_8h__dep__incl.svg           |    2 +-
 docs/api/doxygen/bound_8h__incl.svg                | 1078 ++---
 docs/api/doxygen/broadcast_8h.html                 |    2 +-
 docs/api/doxygen/broadcast_8h__incl.svg            | 1931 ++++----
 docs/api/doxygen/broadcast_8h_source.html          |    2 +-
 docs/api/doxygen/buffer_8h.html                    |    2 +-
 docs/api/doxygen/buffer_8h__incl.svg               |  902 ++--
 docs/api/doxygen/buffer_8h_source.html             |   12 +-
 docs/api/doxygen/builtin_8h.html                   |  317 ++
 docs/api/doxygen/builtin_8h__dep__incl.svg         |  161 +
 docs/api/doxygen/builtin_8h__incl.svg              | 1353 ++++++
 docs/api/doxygen/builtin_8h_source.html            |  170 +
 docs/api/doxygen/c__runtime__api_8h.html           |    2 +-
 docs/api/doxygen/c__runtime__api_8h__dep__incl.svg | 1219 +++--
 docs/api/doxygen/c__runtime__api_8h_source.html    |    2 +-
 docs/api/doxygen/classes.html                      |  281 +-
 docs/api/doxygen/classtvm_1_1BaseAttrsNode.html    |   10 +-
 docs/api/doxygen/classtvm_1_1BaseExprNode.html     |    2 +-
 .../classtvm_1_1BaseExprNode__inherit__graph.svg   | 1310 +++--
 .../doxygen/classtvm_1_1DenseMapNode-members.html  |  130 +
 docs/api/doxygen/classtvm_1_1DenseMapNode.html     |  574 +++
 .../classtvm_1_1DenseMapNode__coll__graph.svg      |   96 +
 .../classtvm_1_1DenseMapNode__inherit__graph.svg   |   78 +
 .../classtvm_1_1DictAttrsNode__coll__graph.svg     |    4 +-
 .../classtvm_1_1FloatImmNode__coll__graph.svg      |    2 +-
 .../classtvm_1_1IRModuleNode__coll__graph.svg      |    8 +-
 .../classtvm_1_1IntImmNode__coll__graph.svg        |    2 +-
 docs/api/doxygen/classtvm_1_1Map-members.html      |   31 +-
 docs/api/doxygen/classtvm_1_1Map.html              |  168 +-
 docs/api/doxygen/classtvm_1_1MapNode-members.html  |   24 +-
 docs/api/doxygen/classtvm_1_1MapNode.html          |  633 ++-
 .../classtvm_1_1MapNode_1_1iterator-members.html   |  124 +
 .../doxygen/classtvm_1_1MapNode_1_1iterator.html   |  600 +++
 ...lasstvm_1_1MapNode_1_1iterator__coll__graph.svg |   79 +
 .../doxygen/classtvm_1_1MapNode__coll__graph.svg   |   53 +-
 .../classtvm_1_1MapNode__inherit__graph.svg        |  103 +-
 ...ml => classtvm_1_1Map_1_1iterator-members.html} |   34 +-
 docs/api/doxygen/classtvm_1_1Map_1_1iterator.html  |  457 ++
 .../classtvm_1_1Map_1_1iterator__coll__graph.svg   |   29 +
 docs/api/doxygen/classtvm_1_1Map__coll__graph.svg  |   10 +-
 .../doxygen/classtvm_1_1Map__inherit__graph.svg    |   10 +-
 docs/api/doxygen/classtvm_1_1PrimExprNode.html     |    2 +-
 .../classtvm_1_1PrimExprNode__coll__graph.svg      |    2 +-
 .../classtvm_1_1PrimExprNode__inherit__graph.svg   | 1060 ++--
 .../classtvm_1_1PrimTypeNode__coll__graph.svg      |    2 +-
 docs/api/doxygen/classtvm_1_1Range-members.html    |    2 +-
 docs/api/doxygen/classtvm_1_1Range.html            |   10 +-
 .../api/doxygen/classtvm_1_1Range__coll__graph.svg |    2 +-
 .../doxygen/classtvm_1_1Range__inherit__graph.svg  |    2 +-
 .../doxygen/classtvm_1_1SmallMapNode-members.html  |  135 +
 docs/api/doxygen/classtvm_1_1SmallMapNode.html     |  573 +++
 .../classtvm_1_1SmallMapNode__coll__graph.svg      |  100 +
 .../classtvm_1_1SmallMapNode__inherit__graph.svg   |  100 +
 docs/api/doxygen/classtvm_1_1Target-members.html   |   15 +-
 docs/api/doxygen/classtvm_1_1Target.html           |   61 +-
 ...bers.html => classtvm_1_1TargetId-members.html} |   21 +-
 ...vm_1_1Target.html => classtvm_1_1TargetId.html} |  192 +-
 ...ml => classtvm_1_1TargetIdAttrMap-members.html} |   21 +-
 docs/api/doxygen/classtvm_1_1TargetIdAttrMap.html  |  210 +
 .../classtvm_1_1TargetIdAttrMap__coll__graph.svg   |   65 +
 ...classtvm_1_1TargetIdAttrMap__inherit__graph.svg |   44 +
 ....html => classtvm_1_1TargetIdNode-members.html} |   35 +-
 docs/api/doxygen/classtvm_1_1TargetIdNode.html     |  468 ++
 .../classtvm_1_1TargetIdNode__coll__graph.svg      |  151 +
 .../classtvm_1_1TargetIdNode__inherit__graph.svg   |   41 +
 ...l => classtvm_1_1TargetIdRegEntry-members.html} |   25 +-
 docs/api/doxygen/classtvm_1_1TargetIdRegEntry.html |  479 ++
 .../classtvm_1_1TargetIdRegEntry__coll__graph.svg  |   28 +
 .../doxygen/classtvm_1_1TargetId__coll__graph.svg  |   40 +
 .../classtvm_1_1TargetId__inherit__graph.svg       |   40 +
 .../doxygen/classtvm_1_1TargetNode-members.html    |   20 +-
 docs/api/doxygen/classtvm_1_1TargetNode.html       |  233 +-
 .../classtvm_1_1TargetNode__coll__graph.svg        |  240 +-
 .../classtvm_1_1TargetNode__inherit__graph.svg     |   57 +-
 .../doxygen/classtvm_1_1Target__coll__graph.svg    |   41 +-
 .../doxygen/classtvm_1_1Target__inherit__graph.svg |   41 +-
 .../classtvm_1_1TensorTypeNode__coll__graph.svg    |    2 +-
 .../classtvm_1_1arith_1_1Analyzer-members.html     |    6 +-
 .../api/doxygen/classtvm_1_1arith_1_1Analyzer.html |   36 +-
 ...vm_1_1arith_1_1CanonicalSimplifier-members.html |    2 +-
 .../classtvm_1_1arith_1_1CanonicalSimplifier.html  |   12 +-
 ..._1_1arith_1_1ConstIntBoundAnalyzer-members.html |    4 +-
 ...classtvm_1_1arith_1_1ConstIntBoundAnalyzer.html |   24 +-
 ...1_1arith_1_1IntConstraintsNode__coll__graph.svg |    4 +-
 ...1_1IntConstraintsTransformNode__coll__graph.svg |    4 +-
 .../classtvm_1_1arith_1_1IntSet-members.html       |   38 +-
 docs/api/doxygen/classtvm_1_1arith_1_1IntSet.html  |  350 +-
 .../classtvm_1_1arith_1_1IntSet__coll__graph.svg   |   72 +-
 ...classtvm_1_1arith_1_1IntSet__inherit__graph.svg |   72 +-
 ...tvm_1_1arith_1_1ModularSetAnalyzer-members.html |    2 +-
 .../classtvm_1_1arith_1_1ModularSetAnalyzer.html   |   12 +-
 ...stvm_1_1arith_1_1RewriteSimplifier-members.html |    2 +-
 .../classtvm_1_1arith_1_1RewriteSimplifier.html    |   12 +-
 .../api/doxygen/classtvm_1_1relay_1_1CallNode.html |    2 +-
 ..._1relay_1_1DataTypePatternNode__coll__graph.svg |    2 +-
 ...relay_1_1InterpreterClosureObj__coll__graph.svg |    4 +-
 .../classtvm_1_1runtime_1_1Array-members.html      |    1 +
 docs/api/doxygen/classtvm_1_1runtime_1_1Array.html |   16 +
 .../classtvm_1_1runtime_1_1DataType-members.html   |   22 +-
 .../doxygen/classtvm_1_1runtime_1_1DataType.html   |   31 +-
 ...lasstvm_1_1runtime_1_1DataType__coll__graph.svg |    2 +-
 ...classtvm_1_1runtime_1_1IterAdapter-members.html |    4 +-
 .../classtvm_1_1runtime_1_1IterAdapter.html        |   16 +-
 .../api/doxygen/classtvm_1_1runtime_1_1Object.html |    2 +-
 .../doxygen/classtvm_1_1runtime_1_1ObjectPtr.html  |    2 +-
 .../classtvm_1_1runtime_1_1ObjectRef-members.html  |   25 +-
 .../doxygen/classtvm_1_1runtime_1_1ObjectRef.html  |   22 +
 .../classtvm_1_1runtime_1_1String-members.html     |    2 +-
 .../api/doxygen/classtvm_1_1runtime_1_1String.html |    8 +-
 ...1_1runtime_1_1vm_1_1VirtualMachine-members.html |    2 +-
 ...lasstvm_1_1runtime_1_1vm_1_1VirtualMachine.html |   12 +-
 ...untime_1_1vm_1_1VirtualMachine__coll__graph.svg |    4 +-
 ...tvm_1_1te_1_1BaseComputeOpNode__coll__graph.svg |    4 +-
 ...lasstvm_1_1te_1_1ComputeOpNode__coll__graph.svg |    4 +-
 ...classtvm_1_1te_1_1ExternOpNode__coll__graph.svg |    4 +-
 ...classtvm_1_1te_1_1HybridOpNode__coll__graph.svg |    4 +-
 ...lasstvm_1_1te_1_1OperationNode__coll__graph.svg |    4 +-
 ...tvm_1_1te_1_1PlaceholderOpNode__coll__graph.svg |    6 +-
 .../classtvm_1_1te_1_1ScanOpNode__coll__graph.svg  |    4 +-
 ...classtvm_1_1te_1_1ScheduleNode__coll__graph.svg |    4 +-
 .../classtvm_1_1te_1_1StageNode__coll__graph.svg   |    4 +-
 ...m_1_1te_1_1TensorComputeOpNode__coll__graph.svg |    4 +-
 .../classtvm_1_1te_1_1TensorNode__coll__graph.svg  |    2 +-
 .../classtvm_1_1tir_1_1AddNode__coll__graph.svg    |    2 +-
 ...lasstvm_1_1tir_1_1AllocateNode__coll__graph.svg |    2 +-
 .../classtvm_1_1tir_1_1AndNode__coll__graph.svg    |    2 +-
 .../classtvm_1_1tir_1_1AnyNode__coll__graph.svg    |    2 +-
 ...lasstvm_1_1tir_1_1BinaryOpNode__coll__graph.svg |    2 +-
 ...asstvm_1_1tir_1_1BroadcastNode__coll__graph.svg |    2 +-
 ...sstvm_1_1tir_1_1BufferLoadNode__coll__graph.svg |    2 +-
 .../classtvm_1_1tir_1_1BufferNode__coll__graph.svg |    2 +-
 .../doxygen/classtvm_1_1tir_1_1Call-members.html   |   15 +-
 docs/api/doxygen/classtvm_1_1tir_1_1Call.html      |   39 +-
 .../classtvm_1_1tir_1_1CallNode-members.html       |   36 +-
 docs/api/doxygen/classtvm_1_1tir_1_1CallNode.html  |  474 +-
 .../classtvm_1_1tir_1_1CallNode__coll__graph.svg   |  291 +-
 ...classtvm_1_1tir_1_1CallNode__inherit__graph.svg |  108 +-
 .../classtvm_1_1tir_1_1CastNode__coll__graph.svg   |    2 +-
 .../classtvm_1_1tir_1_1CmpOpNode__coll__graph.svg  |    2 +-
 .../classtvm_1_1tir_1_1DivNode__coll__graph.svg    |    2 +-
 .../classtvm_1_1tir_1_1EQNode__coll__graph.svg     |    2 +-
 ...lasstvm_1_1tir_1_1FloorDivNode__coll__graph.svg |    2 +-
 ...lasstvm_1_1tir_1_1FloorModNode__coll__graph.svg |    2 +-
 docs/api/doxygen/classtvm_1_1tir_1_1Free.html      |  189 -
 docs/api/doxygen/classtvm_1_1tir_1_1FreeNode.html  |  308 --
 .../classtvm_1_1tir_1_1FreeNode__coll__graph.svg   |  137 -
 ...classtvm_1_1tir_1_1FreeNode__inherit__graph.svg |   62 -
 .../classtvm_1_1tir_1_1Free__coll__graph.svg       |   56 -
 .../classtvm_1_1tir_1_1Free__inherit__graph.svg    |   56 -
 .../classtvm_1_1tir_1_1GENode__coll__graph.svg     |    2 +-
 .../classtvm_1_1tir_1_1GTNode__coll__graph.svg     |    2 +-
 ...classtvm_1_1tir_1_1IterVarNode__coll__graph.svg |    2 +-
 .../classtvm_1_1tir_1_1LTNode__coll__graph.svg     |    2 +-
 .../classtvm_1_1tir_1_1LetNode__coll__graph.svg    |    2 +-
 .../classtvm_1_1tir_1_1LoadNode__coll__graph.svg   |    2 +-
 .../classtvm_1_1tir_1_1MaxNode__coll__graph.svg    |    2 +-
 .../classtvm_1_1tir_1_1MinNode__coll__graph.svg    |    2 +-
 .../classtvm_1_1tir_1_1ModNode__coll__graph.svg    |    2 +-
 .../classtvm_1_1tir_1_1MulNode__coll__graph.svg    |    2 +-
 .../classtvm_1_1tir_1_1NENode__coll__graph.svg     |    2 +-
 .../classtvm_1_1tir_1_1NotNode__coll__graph.svg    |    2 +-
 .../classtvm_1_1tir_1_1OrNode__coll__graph.svg     |    2 +-
 .../classtvm_1_1tir_1_1PrimFunc-members.html       |    2 +-
 docs/api/doxygen/classtvm_1_1tir_1_1PrimFunc.html  |   10 +-
 .../doxygen/classtvm_1_1tir_1_1PrimFuncNode.html   |    3 +-
 ...lasstvm_1_1tir_1_1PrimFuncNode__coll__graph.svg |    4 +-
 ...tvm_1_1tir_1_1ProducerLoadNode__coll__graph.svg |    2 +-
 .../classtvm_1_1tir_1_1RampNode__coll__graph.svg   |    2 +-
 .../classtvm_1_1tir_1_1ReduceNode__coll__graph.svg |    2 +-
 .../classtvm_1_1tir_1_1SelectNode__coll__graph.svg |    2 +-
 ...classtvm_1_1tir_1_1ShuffleNode__coll__graph.svg |    2 +-
 ...classtvm_1_1tir_1_1SizeVarNode__coll__graph.svg |    2 +-
 docs/api/doxygen/classtvm_1_1tir_1_1Stmt.html      |    2 +-
 ...classtvm_1_1tir_1_1StmtExprMutator-members.html |   13 +-
 .../classtvm_1_1tir_1_1StmtExprMutator.html        |    2 -
 ...stvm_1_1tir_1_1StmtExprMutator__coll__graph.svg |    2 +-
 ...m_1_1tir_1_1StmtExprMutator__inherit__graph.svg |    2 +-
 ...classtvm_1_1tir_1_1StmtExprVisitor-members.html |   13 +-
 .../classtvm_1_1tir_1_1StmtExprVisitor.html        |    6 +-
 ...stvm_1_1tir_1_1StmtExprVisitor__coll__graph.svg |   86 +-
 ...m_1_1tir_1_1StmtExprVisitor__inherit__graph.svg |   86 +-
 ...tmt_01_6n_00_01Args_8_8_8args_08_4-members.html |   17 +-
 ...onst_01Stmt_01_6n_00_01Args_8_8_8args_08_4.html |   38 -
 ...01_6n_00_01Args_8_8_8args_08_4__coll__graph.svg |    2 +-
 .../classtvm_1_1tir_1_1StmtMutator-members.html    |   13 +-
 .../doxygen/classtvm_1_1tir_1_1StmtMutator.html    |   26 -
 ...classtvm_1_1tir_1_1StmtMutator__coll__graph.svg |    2 +-
 ...sstvm_1_1tir_1_1StmtMutator__inherit__graph.svg |    2 +-
 docs/api/doxygen/classtvm_1_1tir_1_1StmtNode.html  |    2 +-
 ...classtvm_1_1tir_1_1StmtNode__inherit__graph.svg |  273 +-
 .../classtvm_1_1tir_1_1StmtVisitor-members.html    |   13 +-
 .../doxygen/classtvm_1_1tir_1_1StmtVisitor.html    |   30 +-
 ...classtvm_1_1tir_1_1StmtVisitor__coll__graph.svg |   44 +-
 ...sstvm_1_1tir_1_1StmtVisitor__inherit__graph.svg |   48 +-
 .../classtvm_1_1tir_1_1Stmt__inherit__graph.svg    |  207 +-
 ...asstvm_1_1tir_1_1StringImmNode__coll__graph.svg |    2 +-
 .../classtvm_1_1tir_1_1SubNode__coll__graph.svg    |    2 +-
 .../classtvm_1_1tir_1_1VarNode__coll__graph.svg    |    2 +-
 ..._1transform_1_1PassContextNode__coll__graph.svg |    4 +-
 docs/api/doxygen/codegen_8h.html                   |    2 +-
 docs/api/doxygen/codegen_8h__incl.svg              | 1679 +++----
 docs/api/doxygen/codegen_8h_source.html            |    2 +-
 docs/api/doxygen/constant__utils_8h.html           |    2 +-
 docs/api/doxygen/constant__utils_8h__incl.svg      | 2298 ++++-----
 docs/api/doxygen/constant__utils_8h_source.html    |    2 +-
 docs/api/doxygen/cublas_8h.html                    |    2 +-
 docs/api/doxygen/cublas_8h__incl.svg               | 2109 ++++----
 docs/api/doxygen/cuda_2dense_8h.html               |    2 +-
 docs/api/doxygen/cuda_2dense_8h__incl.svg          | 2035 ++++----
 docs/api/doxygen/cuda_2dense_8h_source.html        |   13 +-
 docs/api/doxygen/cuda_2injective_8h.html           |    2 +-
 docs/api/doxygen/cuda_2injective_8h__incl.svg      | 1976 ++++----
 docs/api/doxygen/cuda_2injective_8h_source.html    |    9 +-
 docs/api/doxygen/cuda_2normalization_8h.html       |    2 +-
 docs/api/doxygen/cuda_2normalization_8h__incl.svg  | 1972 ++++----
 .../api/doxygen/cuda_2normalization_8h_source.html |    4 +-
 docs/api/doxygen/cuda_2pooling_8h.html             |    2 +-
 docs/api/doxygen/cuda_2pooling_8h__incl.svg        | 1996 ++++----
 docs/api/doxygen/cuda_2pooling_8h_source.html      |   11 +-
 docs/api/doxygen/cuda_2reduction_8h.html           |    2 +-
 docs/api/doxygen/cuda_2reduction_8h__incl.svg      | 1976 ++++----
 docs/api/doxygen/cuda_2reduction_8h_source.html    |   16 +-
 docs/api/doxygen/cuda_2softmax_8h.html             |    2 +-
 docs/api/doxygen/cuda_2softmax_8h__incl.svg        | 1976 ++++----
 docs/api/doxygen/cuda_2softmax_8h_source.html      |    8 +-
 docs/api/doxygen/data__layout_8h.html              |    2 +-
 docs/api/doxygen/data__layout_8h__dep__incl.svg    |    2 +-
 docs/api/doxygen/data__layout_8h__incl.svg         | 1686 ++++---
 docs/api/doxygen/data__layout_8h_source.html       |   10 +-
 docs/api/doxygen/data__type_8h.html                |    2 +-
 docs/api/doxygen/data__type_8h__dep__incl.svg      | 1384 +++---
 docs/api/doxygen/data__type_8h_source.html         |   78 +-
 docs/api/doxygen/dataflow__matcher_8h.html         |    2 +-
 docs/api/doxygen/dataflow__matcher_8h__incl.svg    | 1298 ++---
 docs/api/doxygen/dataflow__matcher_8h_source.html  |    4 +-
 docs/api/doxygen/dataflow__pattern_8h.html         |    2 +-
 docs/api/doxygen/dataflow__pattern_8h__incl.svg    | 1788 +++----
 docs/api/doxygen/dataflow__pattern_8h_source.html  |    6 +-
 .../api/doxygen/dataflow__pattern__functor_8h.html |    2 +-
 .../dataflow__pattern__functor_8h__incl.svg        | 1318 ++---
 docs/api/doxygen/debug_8h.html                     |    2 +-
 docs/api/doxygen/debug_8h__incl.svg                |  956 ++--
 docs/api/doxygen/detail_2broadcast_8h.html         |    2 +-
 docs/api/doxygen/detail_2broadcast_8h__incl.svg    | 2017 ++++----
 docs/api/doxygen/detail_2broadcast_8h_source.html  |    4 +-
 docs/api/doxygen/detail_2extern_8h.html            |    3 +-
 docs/api/doxygen/detail_2extern_8h__incl.svg       | 2076 ++++----
 docs/api/doxygen/detail_2extern_8h_source.html     |   23 +-
 docs/api/doxygen/device__api_8h.html               |    4 +-
 docs/api/doxygen/device__api_8h_source.html        |   24 +-
 docs/api/doxygen/device__copy_8h.html              |    2 +-
 docs/api/doxygen/device__copy_8h__incl.svg         |  928 ++--
 docs/api/doxygen/dilate_8h.html                    |    2 +-
 docs/api/doxygen/dilate_8h__incl.svg               | 2015 ++++----
 docs/api/doxygen/dilate_8h_source.html             |    6 +-
 docs/api/doxygen/dir_000005_000007.html            |    2 +-
 docs/api/doxygen/dir_000005_000018.html            |    2 +-
 docs/api/doxygen/dir_000005_000019.html            |    2 +-
 docs/api/doxygen/dir_000007_000004.html            |    2 +-
 docs/api/doxygen/dir_000007_000008.html            |    2 +-
 docs/api/doxygen/dir_000011_000000.html            |    2 +-
 docs/api/doxygen/dir_000012_000000.html            |    2 +-
 docs/api/doxygen/dir_000018_000004.html            |    2 +-
 docs/api/doxygen/dir_000018_000008.html            |    2 +-
 docs/api/doxygen/dir_000018_000013.html            |    2 +-
 docs/api/doxygen/dir_000018_000017.html            |    2 +-
 docs/api/doxygen/dir_000020_000000.html            |    2 +-
 .../dir_02be2c9d68e402f80df60bd528724ee5_dep.svg   |   12 +-
 .../dir_14640941691eabc830844bef8114fc0c_dep.svg   |    4 +-
 .../dir_194ecda214f05a38134392ac6a69b970_dep.svg   |    4 +-
 .../dir_501163e255a572c2b8f8622055dcc830_dep.svg   |    4 +-
 .../dir_59fae77b9344561cec492d4c84fe16f0_dep.svg   |    4 +-
 .../dir_5da96592f3a7c442b838b075c58254c2.html      |    3 +
 .../dir_5da96592f3a7c442b838b075c58254c2_dep.svg   |   24 +-
 .../dir_63946bee875c6d52bce55e72a67a86ad.html      |    4 +-
 .../dir_63946bee875c6d52bce55e72a67a86ad_dep.svg   |   20 +-
 .../dir_72c2f11201cd7636dc7624de0754daa5.html      |    6 +
 .../dir_72c2f11201cd7636dc7624de0754daa5_dep.svg   |    8 +-
 .../dir_82fbc0b750acd2d16bf657eba2735040_dep.svg   |    4 +-
 .../dir_85f3d6180da4edc5eb489febd20e807a_dep.svg   |    4 +-
 .../dir_9028d5d964ed0471973ff1d6dc49c294_dep.svg   |    4 +-
 .../dir_919d148e3bc0d2ada10274b170555243_dep.svg   |    4 +-
 .../dir_9a72de697ed8769045f2ae88dc0db9c6_dep.svg   |    4 +-
 .../dir_a99f4dc995763900c0e382b7dd5d0038_dep.svg   |    4 +-
 .../dir_b4c7d8e826c599ba55146c099a14beb5_dep.svg   |   24 +-
 .../dir_cf1b2a4a187708c24d9394e639e6417e_dep.svg   |    4 +-
 .../dir_e79ffd3285e304ad4c501fa62028ed74_dep.svg   |    4 +-
 .../dir_f97d855a3173728370e632aa77170e34_dep.svg   |    4 +-
 docs/api/doxygen/driver__api_8h.html               |    2 +-
 docs/api/doxygen/driver__api_8h__incl.svg          | 1572 +++---
 docs/api/doxygen/elemwise_8h.html                  |    3 +-
 docs/api/doxygen/elemwise_8h__incl.svg             | 2099 ++++----
 docs/api/doxygen/elemwise_8h_source.html           |  104 +-
 docs/api/doxygen/env__func_8h.html                 |    4 +-
 docs/api/doxygen/env__func_8h__dep__incl.svg       |  847 +++-
 docs/api/doxygen/env__func_8h__incl.svg            |  654 ++-
 docs/api/doxygen/env__func_8h_source.html          |    2 +-
 docs/api/doxygen/error_8h.html                     |    4 +-
 docs/api/doxygen/error_8h__dep__incl.svg           |  729 +--
 docs/api/doxygen/error_8h__incl.svg                | 1090 +++--
 docs/api/doxygen/feature_8h.html                   |    2 +-
 docs/api/doxygen/feature_8h__incl.svg              | 1341 +++--
 docs/api/doxygen/files.html                        |   25 +-
 docs/api/doxygen/flatten_8h.html                   |    2 +-
 docs/api/doxygen/flatten_8h__incl.svg              | 2023 ++++----
 docs/api/doxygen/functions.html                    |    5 +-
 docs/api/doxygen/functions_0x7e.html               |    6 +
 docs/api/doxygen/functions_a.html                  |   20 +-
 docs/api/doxygen/functions_b.html                  |   22 +-
 docs/api/doxygen/functions_c.html                  |   63 +-
 docs/api/doxygen/functions_d.html                  |   25 +-
 docs/api/doxygen/functions_e.html                  |   30 +-
 docs/api/doxygen/functions_enum.html               |    3 -
 docs/api/doxygen/functions_eval.html               |   19 +-
 docs/api/doxygen/functions_f.html                  |   20 +-
 docs/api/doxygen/functions_func.html               |    1 -
 docs/api/doxygen/functions_func_0x7e.html          |    7 +-
 docs/api/doxygen/functions_func_a.html             |   18 +-
 docs/api/doxygen/functions_func_b.html             |   12 +-
 docs/api/doxygen/functions_func_c.html             |   50 +-
 docs/api/doxygen/functions_func_d.html             |    1 -
 docs/api/doxygen/functions_func_e.html             |   23 +-
 docs/api/doxygen/functions_func_f.html             |   20 +-
 docs/api/doxygen/functions_func_g.html             |   15 +-
 docs/api/doxygen/functions_func_h.html             |    1 -
 docs/api/doxygen/functions_func_i.html             |   49 +-
 docs/api/doxygen/functions_func_j.html             |    1 -
 docs/api/doxygen/functions_func_k.html             |  140 -
 docs/api/doxygen/functions_func_l.html             |   10 +-
 docs/api/doxygen/functions_func_m.html             |   10 +-
 docs/api/doxygen/functions_func_n.html             |    5 +-
 docs/api/doxygen/functions_func_o.html             |   56 +-
 docs/api/doxygen/functions_func_p.html             |   14 +-
 docs/api/doxygen/functions_func_r.html             |    7 +-
 docs/api/doxygen/functions_func_s.html             |   26 +-
 docs/api/doxygen/functions_func_t.html             |   20 +-
 docs/api/doxygen/functions_func_u.html             |    9 +-
 docs/api/doxygen/functions_func_v.html             |   19 +-
 docs/api/doxygen/functions_func_w.html             |    1 -
 docs/api/doxygen/functions_g.html                  |   12 +
 docs/api/doxygen/functions_i.html                  |   70 +-
 docs/api/doxygen/functions_k.html                  |   24 +-
 docs/api/doxygen/functions_l.html                  |   15 +-
 docs/api/doxygen/functions_m.html                  |   28 +-
 docs/api/doxygen/functions_n.html                  |    8 +-
 docs/api/doxygen/functions_o.html                  |   52 +-
 docs/api/doxygen/functions_p.html                  |   29 +-
 docs/api/doxygen/functions_r.html                  |   21 +-
 docs/api/doxygen/functions_rela.html               |   48 +-
 docs/api/doxygen/functions_s.html                  |   46 +-
 docs/api/doxygen/functions_t.html                  |   60 +-
 docs/api/doxygen/functions_type.html               |   57 +-
 docs/api/doxygen/functions_u.html                  |    8 +-
 docs/api/doxygen/functions_v.html                  |   41 +-
 docs/api/doxygen/functions_vars.html               |    5 +-
 docs/api/doxygen/functions_vars_a.html             |    3 +-
 docs/api/doxygen/functions_vars_b.html             |   13 -
 docs/api/doxygen/functions_vars_c.html             |    3 -
 docs/api/doxygen/functions_vars_d.html             |   14 +-
 docs/api/doxygen/functions_vars_f.html             |    3 +
 docs/api/doxygen/functions_vars_i.html             |   18 +-
 docs/api/doxygen/functions_vars_k.html             |   10 +-
 docs/api/doxygen/functions_vars_l.html             |    6 -
 docs/api/doxygen/functions_vars_m.html             |    5 +-
 docs/api/doxygen/functions_vars_n.html             |    4 +-
 docs/api/doxygen/functions_vars_o.html             |    4 +-
 docs/api/doxygen/functions_vars_p.html             |    6 -
 docs/api/doxygen/functions_vars_r.html             |    3 -
 docs/api/doxygen/functions_vars_s.html             |   18 +-
 docs/api/doxygen/functions_vars_t.html             |   22 +-
 docs/api/doxygen/functions_vars_v.html             |    4 +-
 docs/api/doxygen/functor_8h.html                   |    2 +-
 docs/api/doxygen/functor_8h__dep__incl.svg         | 1206 ++---
 docs/api/doxygen/fuse_8h.html                      |    2 +-
 docs/api/doxygen/fuse_8h__incl.svg                 | 2017 ++++----
 docs/api/doxygen/generic_2default_8h.html          |    2 +-
 docs/api/doxygen/generic_2default_8h__incl.svg     | 1976 ++++----
 docs/api/doxygen/generic_2default_8h_source.html   |    6 +-
 docs/api/doxygen/generic_2extern_8h.html           |    2 +-
 docs/api/doxygen/generic_2extern_8h__incl.svg      | 2018 ++++----
 docs/api/doxygen/generic_2extern_8h_source.html    |    6 +-
 docs/api/doxygen/generic_2injective_8h.html        |    2 +-
 docs/api/doxygen/generic_2injective_8h__incl.svg   | 1976 ++++----
 docs/api/doxygen/generic_2injective_8h_source.html |    6 +-
 docs/api/doxygen/generic__func_8h.html             |    2 +-
 docs/api/doxygen/generic__func_8h__dep__incl.svg   |    2 +-
 docs/api/doxygen/generic__func_8h__incl.svg        | 1414 +++---
 docs/api/doxygen/generic__func_8h_source.html      |    2 +-
 docs/api/doxygen/globals.html                      |   31 +-
 docs/api/doxygen/globals_defs.html                 |   38 +-
 docs/api/doxygen/hierarchy.html                    | 1494 +++---
 docs/api/doxygen/image_8h.html                     |    2 +-
 docs/api/doxygen/image_8h__incl.svg                | 1112 +++--
 docs/api/doxygen/image_8h_source.html              |    2 +-
 .../doxygen/include_2tvm_2ir_2transform_8h.html    |    6 +-
 .../include_2tvm_2ir_2transform_8h__dep__incl.svg  |  681 +--
 .../include_2tvm_2ir_2transform_8h__incl.svg       | 1150 +++--
 .../include_2tvm_2ir_2transform_8h_source.html     |   12 +-
 .../doxygen/include_2tvm_2relay_2attrs_2nn_8h.html |    5 +-
 .../include_2tvm_2relay_2attrs_2nn_8h__incl.svg    | 1110 +++--
 .../include_2tvm_2relay_2attrs_2nn_8h_source.html  |  506 +-
 .../include_2tvm_2relay_2attrs_2transform_8h.html  |    5 +-
 ...lude_2tvm_2relay_2attrs_2transform_8h__incl.svg | 1390 +++---
 ...de_2tvm_2relay_2attrs_2transform_8h_source.html |  125 +-
 .../include_2tvm_2relay_2qnn_2transform_8h.html    |    2 +-
 ...nclude_2tvm_2relay_2qnn_2transform_8h__incl.svg | 1244 +++--
 .../doxygen/include_2tvm_2relay_2transform_8h.html |    7 +-
 .../include_2tvm_2relay_2transform_8h__incl.svg    | 1424 +++---
 .../include_2tvm_2relay_2transform_8h_source.html  |    9 +-
 .../doxygen/include_2tvm_2tir_2transform_8h.html   |    5 +-
 .../include_2tvm_2tir_2transform_8h__incl.svg      | 1314 +++--
 .../include_2tvm_2tir_2transform_8h_source.html    |    5 +-
 docs/api/doxygen/inherit_graph_10.svg              |   38 +-
 docs/api/doxygen/inherit_graph_100.svg             |   17 +-
 docs/api/doxygen/inherit_graph_101.svg             |   14 +-
 docs/api/doxygen/inherit_graph_102.svg             |   14 +-
 docs/api/doxygen/inherit_graph_103.svg             |   15 +-
 docs/api/doxygen/inherit_graph_104.svg             |   16 +-
 docs/api/doxygen/inherit_graph_105.svg             |   54 +-
 docs/api/doxygen/inherit_graph_106.svg             |   19 +-
 docs/api/doxygen/inherit_graph_107.svg             |   17 +-
 docs/api/doxygen/inherit_graph_108.svg             |   14 +-
 docs/api/doxygen/inherit_graph_109.svg             |   19 +-
 docs/api/doxygen/inherit_graph_11.svg              |   25 +-
 docs/api/doxygen/inherit_graph_110.svg             |   17 +-
 docs/api/doxygen/inherit_graph_111.svg             |   12 +-
 docs/api/doxygen/inherit_graph_112.svg             |   54 +-
 docs/api/doxygen/inherit_graph_113.svg             |   18 +-
 docs/api/doxygen/inherit_graph_114.svg             |   15 +-
 docs/api/doxygen/inherit_graph_115.svg             |   14 +-
 docs/api/doxygen/inherit_graph_116.svg             |   18 +-
 docs/api/doxygen/inherit_graph_117.svg             |   18 +-
 docs/api/doxygen/inherit_graph_118.svg             |   12 +-
 docs/api/doxygen/inherit_graph_119.svg             |   12 +-
 docs/api/doxygen/inherit_graph_12.svg              |   23 +-
 docs/api/doxygen/inherit_graph_120.svg             |   12 +-
 docs/api/doxygen/inherit_graph_121.svg             |   15 +-
 docs/api/doxygen/inherit_graph_122.svg             |   17 +-
 docs/api/doxygen/inherit_graph_123.svg             |   12 +-
 docs/api/doxygen/inherit_graph_124.svg             |   14 +-
 docs/api/doxygen/inherit_graph_125.svg             |   14 +-
 docs/api/doxygen/inherit_graph_126.svg             |   15 +-
 docs/api/doxygen/inherit_graph_127.svg             |   68 +-
 docs/api/doxygen/inherit_graph_128.svg             |   68 +-
 docs/api/doxygen/inherit_graph_129.svg             |   18 +-
 docs/api/doxygen/inherit_graph_130.svg             |   14 +-
 docs/api/doxygen/inherit_graph_131.svg             |   16 +-
 docs/api/doxygen/inherit_graph_132.svg             |   15 +-
 docs/api/doxygen/inherit_graph_133.svg             |   27 +-
 docs/api/doxygen/inherit_graph_134.svg             |   27 +-
 docs/api/doxygen/inherit_graph_135.svg             |   15 +-
 docs/api/doxygen/inherit_graph_136.svg             |   68 +-
 docs/api/doxygen/inherit_graph_137.svg             |   68 +-
 docs/api/doxygen/inherit_graph_138.svg             |   18 +-
 ...inherit_graph_130.svg => inherit_graph_139.svg} |    0
 ...inherit_graph_131.svg => inherit_graph_140.svg} |    0
 ...inherit_graph_132.svg => inherit_graph_141.svg} |    0
 ...inherit_graph_133.svg => inherit_graph_142.svg} |    0
 ...inherit_graph_134.svg => inherit_graph_143.svg} |    0
 ...inherit_graph_135.svg => inherit_graph_144.svg} |    0
 ...inherit_graph_136.svg => inherit_graph_145.svg} |    0
 ...inherit_graph_137.svg => inherit_graph_146.svg} |    0
 ...inherit_graph_138.svg => inherit_graph_147.svg} |    0
 docs/api/doxygen/inherit_graph_15.svg              |   15 +-
 docs/api/doxygen/inherit_graph_16.svg              |   40 +-
 docs/api/doxygen/inherit_graph_17.svg              |   37 +-
 docs/api/doxygen/inherit_graph_18.svg              |   25 +-
 docs/api/doxygen/inherit_graph_19.svg              |   12 +-
 docs/api/doxygen/inherit_graph_20.svg              |   12 +-
 docs/api/doxygen/inherit_graph_21.svg              |   12 +-
 docs/api/doxygen/inherit_graph_22.svg              |   15 +-
 docs/api/doxygen/inherit_graph_23.svg              |   14 +-
 docs/api/doxygen/inherit_graph_24.svg              |   15 +-
 docs/api/doxygen/inherit_graph_25.svg              |   12 +-
 docs/api/doxygen/inherit_graph_26.svg              |   15 +-
 docs/api/doxygen/inherit_graph_27.svg              |   15 +-
 docs/api/doxygen/inherit_graph_28.svg              |   15 +-
 docs/api/doxygen/inherit_graph_29.svg              |   14 +-
 docs/api/doxygen/inherit_graph_30.svg              |    4 +-
 docs/api/doxygen/inherit_graph_31.svg              |   14 +-
 docs/api/doxygen/inherit_graph_32.svg              |    4 +-
 docs/api/doxygen/inherit_graph_33.svg              |   54 +-
 docs/api/doxygen/inherit_graph_34.svg              |   54 +-
 docs/api/doxygen/inherit_graph_35.svg              |    4 +-
 docs/api/doxygen/inherit_graph_36.svg              |   14 +-
 docs/api/doxygen/inherit_graph_37.svg              |   27 +-
 docs/api/doxygen/inherit_graph_38.svg              |   27 +-
 docs/api/doxygen/inherit_graph_39.svg              |   17 +-
 docs/api/doxygen/inherit_graph_40.svg              |   14 +-
 docs/api/doxygen/inherit_graph_41.svg              |   17 +-
 docs/api/doxygen/inherit_graph_42.svg              |   17 +-
 docs/api/doxygen/inherit_graph_43.svg              |   14 +-
 docs/api/doxygen/inherit_graph_44.svg              |   14 +-
 docs/api/doxygen/inherit_graph_45.svg              |    4 +-
 docs/api/doxygen/inherit_graph_46.svg              |    4 +-
 docs/api/doxygen/inherit_graph_47.svg              |   15 +-
 docs/api/doxygen/inherit_graph_48.svg              |    4 +-
 docs/api/doxygen/inherit_graph_49.svg              |    4 +-
 docs/api/doxygen/inherit_graph_5.svg               | 5018 +------------------
 docs/api/doxygen/inherit_graph_50.svg              |   12 +-
 docs/api/doxygen/inherit_graph_51.svg              |   15 +-
 docs/api/doxygen/inherit_graph_52.svg              |   14 +-
 docs/api/doxygen/inherit_graph_53.svg              | 2319 +--------
 docs/api/doxygen/inherit_graph_54.svg              |   12 +-
 docs/api/doxygen/inherit_graph_55.svg              |   16 +-
 docs/api/doxygen/inherit_graph_56.svg              |   15 +-
 docs/api/doxygen/inherit_graph_57.svg              | 2317 ++++++++-
 docs/api/doxygen/inherit_graph_58.svg              |   12 +-
 docs/api/doxygen/inherit_graph_59.svg              |   12 +-
 docs/api/doxygen/inherit_graph_6.svg               |   12 +-
 docs/api/doxygen/inherit_graph_60.svg              |   17 +-
 docs/api/doxygen/inherit_graph_61.svg              |   15 +-
 docs/api/doxygen/inherit_graph_62.svg              |   15 +-
 docs/api/doxygen/inherit_graph_63.svg              |   27 +-
 docs/api/doxygen/inherit_graph_64.svg              |   40 +-
 docs/api/doxygen/inherit_graph_65.svg              |   14 +-
 docs/api/doxygen/inherit_graph_66.svg              |   27 +-
 docs/api/doxygen/inherit_graph_67.svg              |   32 +-
 docs/api/doxygen/inherit_graph_68.svg              |   15 +-
 docs/api/doxygen/inherit_graph_69.svg              |   15 +-
 docs/api/doxygen/inherit_graph_7.svg               |   12 +-
 docs/api/doxygen/inherit_graph_70.svg              |   40 +-
 docs/api/doxygen/inherit_graph_71.svg              |   28 +-
 docs/api/doxygen/inherit_graph_72.svg              |   15 +-
 docs/api/doxygen/inherit_graph_73.svg              |   25 +-
 docs/api/doxygen/inherit_graph_74.svg              |   28 +-
 docs/api/doxygen/inherit_graph_75.svg              |   14 +-
 docs/api/doxygen/inherit_graph_76.svg              |   28 +-
 docs/api/doxygen/inherit_graph_77.svg              |   12 +-
 docs/api/doxygen/inherit_graph_78.svg              |   14 +-
 docs/api/doxygen/inherit_graph_79.svg              |   15 +-
 docs/api/doxygen/inherit_graph_8.svg               |   12 +-
 docs/api/doxygen/inherit_graph_80.svg              |   12 +-
 docs/api/doxygen/inherit_graph_81.svg              |   14 +-
 docs/api/doxygen/inherit_graph_82.svg              | 5116 +++++++++++++++++++-
 docs/api/doxygen/inherit_graph_83.svg              |  203 +-
 docs/api/doxygen/inherit_graph_84.svg              |   12 +-
 docs/api/doxygen/inherit_graph_85.svg              |   14 +-
 docs/api/doxygen/inherit_graph_86.svg              |   28 +-
 docs/api/doxygen/inherit_graph_87.svg              |  202 +-
 docs/api/doxygen/inherit_graph_88.svg              |  132 +-
 docs/api/doxygen/inherit_graph_89.svg              |   15 +-
 docs/api/doxygen/inherit_graph_9.svg               |   37 +-
 docs/api/doxygen/inherit_graph_90.svg              |   15 +-
 docs/api/doxygen/inherit_graph_91.svg              |   14 +-
 docs/api/doxygen/inherit_graph_92.svg              |   12 +-
 docs/api/doxygen/inherit_graph_93.svg              |   16 +-
 docs/api/doxygen/inherit_graph_94.svg              |  133 +-
 docs/api/doxygen/inherit_graph_95.svg              |   14 +-
 docs/api/doxygen/inherit_graph_96.svg              |   14 +-
 docs/api/doxygen/inherit_graph_97.svg              |   17 +-
 docs/api/doxygen/inherit_graph_98.svg              |   12 +-
 docs/api/doxygen/inherit_graph_99.svg              |   17 +-
 docs/api/doxygen/inherits.html                     |  268 +-
 docs/api/doxygen/int__set_8h.html                  |    2 +-
 docs/api/doxygen/int__set_8h__dep__incl.svg        |    2 +-
 docs/api/doxygen/int__set_8h__incl.svg             | 1008 ++--
 docs/api/doxygen/int__set_8h_source.html           |   10 +-
 docs/api/doxygen/int__solver_8h.html               |    2 +-
 docs/api/doxygen/int__solver_8h__incl.svg          |  984 ++--
 docs/api/doxygen/int__solver_8h_source.html        |    4 +-
 docs/api/doxygen/interpreter_8h.html               |    2 +-
 docs/api/doxygen/interpreter_8h__incl.svg          | 1960 ++++----
 docs/api/doxygen/interpreter_8h_source.html        |    6 +-
 docs/api/doxygen/ir_2adt_8h.html                   |    4 +-
 docs/api/doxygen/ir_2adt_8h__dep__incl.svg         | 1115 ++---
 docs/api/doxygen/ir_2adt_8h__incl.svg              |  886 ++--
 docs/api/doxygen/ir_2adt_8h_source.html            |    4 +-
 docs/api/doxygen/ir_2attrs_8h.html                 |    6 +-
 docs/api/doxygen/ir_2attrs_8h__dep__incl.svg       | 1239 +++--
 docs/api/doxygen/ir_2attrs_8h__incl.svg            |  908 ++--
 docs/api/doxygen/ir_2attrs_8h_source.html          |   16 +-
 docs/api/doxygen/ir_2expr_8h.html                  |    8 +-
 docs/api/doxygen/ir_2expr_8h__dep__incl.svg        |  762 +--
 docs/api/doxygen/ir_2expr_8h__incl.svg             |  832 ++--
 docs/api/doxygen/ir_2expr_8h_source.html           |   19 +-
 docs/api/doxygen/ir_2function_8h.html              |    4 +-
 docs/api/doxygen/ir_2function_8h__dep__incl.svg    | 1072 ++--
 docs/api/doxygen/ir_2function_8h__incl.svg         |  944 ++--
 docs/api/doxygen/ir_2function_8h_source.html       |    6 +-
 docs/api/doxygen/ir_2module_8h.html                |    4 +-
 docs/api/doxygen/ir_2module_8h__dep__incl.svg      | 1108 +++--
 docs/api/doxygen/ir_2module_8h__incl.svg           | 1044 ++--
 docs/api/doxygen/ir_2module_8h_source.html         |    6 +-
 docs/api/doxygen/ir_2op_8h.html                    |   79 +-
 docs/api/doxygen/ir_2op_8h__dep__incl.svg          |  795 ++-
 docs/api/doxygen/ir_2op_8h__incl.svg               | 1197 +++--
 docs/api/doxygen/ir_2op_8h_source.html             |   26 +-
 docs/api/doxygen/ir_2type_8h.html                  |    4 +-
 docs/api/doxygen/ir_2type_8h__dep__incl.svg        | 1597 +++---
 docs/api/doxygen/ir_2type_8h__incl.svg             |  794 ++-
 docs/api/doxygen/ir_2type_8h_source.html           |    6 +-
 docs/api/doxygen/local__response__norm_8h.html     |    2 +-
 .../api/doxygen/local__response__norm_8h__incl.svg | 2048 ++++----
 docs/api/doxygen/mapping_8h.html                   |    2 +-
 docs/api/doxygen/mapping_8h__incl.svg              | 2048 ++++----
 docs/api/doxygen/namespacemembers.html             |    6 +
 docs/api/doxygen/namespacemembers_b.html           |   13 +-
 docs/api/doxygen/namespacemembers_c.html           |   26 +-
 docs/api/doxygen/namespacemembers_enum.html        |    5 +-
 docs/api/doxygen/namespacemembers_eval.html        |   34 +-
 docs/api/doxygen/namespacemembers_f.html           |    5 +-
 docs/api/doxygen/namespacemembers_func.html        |    6 +
 docs/api/doxygen/namespacemembers_func_b.html      |   15 +-
 docs/api/doxygen/namespacemembers_func_c.html      |   21 +
 docs/api/doxygen/namespacemembers_func_f.html      |    7 +-
 docs/api/doxygen/namespacemembers_func_h.html      |    6 +-
 docs/api/doxygen/namespacemembers_func_i.html      |   15 +-
 docs/api/doxygen/namespacemembers_func_l.html      |   19 +-
 docs/api/doxygen/namespacemembers_func_m.html      |   11 +-
 docs/api/doxygen/namespacemembers_func_n.html      |    5 +-
 docs/api/doxygen/namespacemembers_func_p.html      |    4 +
 docs/api/doxygen/namespacemembers_func_r.html      |    3 +-
 docs/api/doxygen/namespacemembers_func_s.html      |   11 +-
 docs/api/doxygen/namespacemembers_func_t.html      |   88 +-
 docs/api/doxygen/namespacemembers_func_v.html      |    9 +
 docs/api/doxygen/namespacemembers_h.html           |    6 +-
 docs/api/doxygen/namespacemembers_i.html           |   15 +-
 docs/api/doxygen/namespacemembers_k.html           |   36 +-
 docs/api/doxygen/namespacemembers_l.html           |   15 +-
 docs/api/doxygen/namespacemembers_m.html           |   11 +-
 docs/api/doxygen/namespacemembers_n.html           |    3 +
 docs/api/doxygen/namespacemembers_p.html           |    6 +-
 docs/api/doxygen/namespacemembers_r.html           |    3 +-
 docs/api/doxygen/namespacemembers_s.html           |   11 +-
 docs/api/doxygen/namespacemembers_t.html           |  134 +-
 docs/api/doxygen/namespacemembers_type.html        |    9 +
 docs/api/doxygen/namespacemembers_v.html           |    9 +
 docs/api/doxygen/namespacemembers_vars.html        |   93 -
 docs/api/doxygen/namespaces.html                   |    2 +-
 docs/api/doxygen/namespacetopi.html                |  265 +-
 docs/api/doxygen/namespacetvm.html                 |  443 +-
 docs/api/doxygen/namespacetvm_1_1detail.html       |   12 +
 docs/api/doxygen/namespacetvm_1_1relay.html        |    6 +
 .../namespacetvm_1_1relay_1_1transform.html        |   28 +
 docs/api/doxygen/namespacetvm_1_1runtime.html      |   46 +-
 .../api/doxygen/namespacetvm_1_1runtime_1_1vm.html |    5 +-
 docs/api/doxygen/namespacetvm_1_1tir.html          |  276 +-
 .../doxygen/namespacetvm_1_1tir_1_1builtin.html    | 1241 +++++
 .../doxygen/namespacetvm_1_1tir_1_1intrinsic.html  |  737 ---
 .../doxygen/namespacetvm_1_1tir_1_1transform.html  |   21 +
 docs/api/doxygen/ndarray_8h.html                   |    2 +-
 docs/api/doxygen/ndarray_8h__dep__incl.svg         | 1555 +++---
 docs/api/doxygen/ndarray_8h_source.html            |   20 +-
 docs/api/doxygen/nn_2bnn_8h.html                   |    2 +-
 docs/api/doxygen/nn_2bnn_8h__incl.svg              | 2023 ++++----
 docs/api/doxygen/nn_2bnn_8h_source.html            |   10 +-
 docs/api/doxygen/nn_2dense_8h.html                 |    2 +-
 docs/api/doxygen/nn_2dense_8h__incl.svg            | 2048 ++++----
 docs/api/doxygen/nn_2pooling_8h.html               |    2 +-
 docs/api/doxygen/nn_2pooling_8h__incl.svg          | 2183 ++++-----
 docs/api/doxygen/nn_2pooling_8h_source.html        |    8 +-
 docs/api/doxygen/nn_2softmax_8h.html               |    2 +-
 docs/api/doxygen/nn_2softmax_8h__incl.svg          | 2050 ++++----
 docs/api/doxygen/nn_2softmax_8h_source.html        |    8 +-
 docs/api/doxygen/node_2container_8h.html           |  126 +-
 docs/api/doxygen/node_2container_8h__dep__incl.svg | 1472 +++---
 docs/api/doxygen/node_2container_8h__incl.svg      |  478 +-
 docs/api/doxygen/node_2container_8h_source.html    |  158 +-
 docs/api/doxygen/node_8h.html                      |    4 +-
 docs/api/doxygen/node_8h__dep__incl.svg            | 1586 +++---
 docs/api/doxygen/node_8h__incl.svg                 |  736 ++-
 docs/api/doxygen/node_8h_source.html               |    6 +-
 docs/api/doxygen/object_8h.html                    |   18 +-
 docs/api/doxygen/object_8h__dep__incl.svg          | 1621 ++++---
 docs/api/doxygen/object_8h_source.html             |  129 +-
 docs/api/doxygen/op__attr__types_8h__incl.svg      | 1491 ------
 docs/api/doxygen/op__attr__types_8h_source.html    |  132 -
 docs/api/doxygen/op__strategy_8h.html              |    4 +-
 docs/api/doxygen/op__strategy_8h__incl.svg         | 2238 ++++-----
 docs/api/doxygen/op__strategy_8h_source.html       |   12 +-
 docs/api/doxygen/operation_8h.html                 |    2 +-
 docs/api/doxygen/operation_8h__incl.svg            | 2035 ++++----
 docs/api/doxygen/operation_8h_source.html          |   16 +-
 docs/api/doxygen/packed__func_8h.html              |    2 +-
 docs/api/doxygen/packed__func_8h__dep__incl.svg    | 1351 +++---
 docs/api/doxygen/packed__func_8h_source.html       |   20 +-
 docs/api/doxygen/pad__utils_8h.html                |    2 +-
 docs/api/doxygen/pad__utils_8h__incl.svg           | 2045 ++++----
 docs/api/doxygen/pattern_8h.html                   |    2 +-
 docs/api/doxygen/pattern_8h__incl.svg              | 1012 ++--
 docs/api/doxygen/pattern__functor_8h.html          |    2 +-
 docs/api/doxygen/pattern__functor_8h__incl.svg     | 1731 ++++---
 docs/api/doxygen/ravel__unravel_8h.html            |    2 +-
 docs/api/doxygen/ravel__unravel_8h__incl.svg       | 2029 ++++----
 docs/api/doxygen/ravel__unravel_8h_source.html     |    2 +-
 docs/api/doxygen/reduce_8h.html                    |    2 +-
 docs/api/doxygen/reduce_8h__incl.svg               |  928 ++--
 docs/api/doxygen/reduction_8h.html                 |    2 +-
 docs/api/doxygen/reduction_8h__incl.svg            | 2041 ++++----
 docs/api/doxygen/reduction_8h_source.html          |   20 +-
 docs/api/doxygen/reflection_8h.html                |    8 +-
 docs/api/doxygen/reflection_8h__dep__incl.svg      | 1598 +++---
 docs/api/doxygen/reflection_8h__incl.svg           |  632 ++-
 docs/api/doxygen/reflection_8h_source.html         |    6 +-
 docs/api/doxygen/registry_8h.html                  |   73 +-
 docs/api/doxygen/registry_8h__dep__incl.svg        |  856 +++-
 docs/api/doxygen/registry_8h_source.html           |    2 +-
 docs/api/doxygen/relay_2adt_8h.html                |    2 +-
 docs/api/doxygen/relay_2adt_8h__incl.svg           | 1890 ++++----
 docs/api/doxygen/relay_2adt_8h_source.html         |    6 +-
 docs/api/doxygen/relay_2analysis_8h.html           |    2 +-
 docs/api/doxygen/relay_2analysis_8h__incl.svg      | 1465 +++---
 docs/api/doxygen/relay_2attrs_2memory_8h.html      |    2 +-
 docs/api/doxygen/relay_2attrs_2memory_8h__incl.svg | 1799 ++++---
 .../doxygen/relay_2attrs_2memory_8h_source.html    |    2 +-
 docs/api/doxygen/relay_2expr_8h.html               |    2 +-
 docs/api/doxygen/relay_2expr_8h__dep__incl.svg     |    2 +-
 docs/api/doxygen/relay_2expr_8h__incl.svg          | 1530 +++---
 docs/api/doxygen/relay_2expr_8h_source.html        |    6 +-
 docs/api/doxygen/relay_2expr__functor_8h.html      |    2 +-
 docs/api/doxygen/relay_2expr__functor_8h__incl.svg | 1755 ++++---
 docs/api/doxygen/relay_2function_8h.html           |    2 +-
 docs/api/doxygen/relay_2function_8h__incl.svg      | 1389 +++---
 docs/api/doxygen/relay_2function_8h_source.html    |    4 +-
 docs/api/doxygen/relay_2op_8h.html                 |    2 +-
 docs/api/doxygen/relay_2op_8h__incl.svg            | 1796 +++----
 ...ypes_8h.html => relay_2op__attr__types_8h.html} |    6 +-
 ...vg => relay_2op__attr__types_8h__dep__incl.svg} |    0
 .../doxygen/relay_2op__attr__types_8h__incl.svg    | 1510 ++++++
 .../doxygen/relay_2op__attr__types_8h_source.html  |  132 +
 docs/api/doxygen/relay_2qnn_2attrs_8h.html         |    2 +-
 docs/api/doxygen/relay_2qnn_2attrs_8h__incl.svg    |  928 ++--
 docs/api/doxygen/relay_2type_8h.html               |    2 +-
 docs/api/doxygen/relay_2type_8h__dep__incl.svg     |    2 +-
 docs/api/doxygen/relay_2type_8h__incl.svg          | 1474 +++---
 docs/api/doxygen/relay_2type_8h_source.html        |    4 +-
 docs/api/doxygen/reorg_8h.html                     |    2 +-
 docs/api/doxygen/reorg_8h__incl.svg                | 2145 ++++----
 docs/api/doxygen/repr__printer_8h.html             |    2 +-
 docs/api/doxygen/repr__printer_8h__dep__incl.svg   | 1541 +++---
 docs/api/doxygen/repr__printer_8h_source.html      |    4 +-
 docs/api/doxygen/rocblas_8h.html                   |    2 +-
 docs/api/doxygen/rocblas_8h__incl.svg              | 2060 ++++----
 docs/api/doxygen/rocm_2dense_8h.html               |    2 +-
 docs/api/doxygen/rocm_2dense_8h__incl.svg          | 1542 +++---
 docs/api/doxygen/rocm_2dense_8h_source.html        |   11 +-
 docs/api/doxygen/rocm_2injective_8h.html           |    2 +-
 docs/api/doxygen/rocm_2injective_8h__incl.svg      | 1982 ++++----
 docs/api/doxygen/rocm_2injective_8h_source.html    |    4 +-
 docs/api/doxygen/rocm_2normalization_8h.html       |    2 +-
 docs/api/doxygen/rocm_2normalization_8h__incl.svg  | 1870 +++----
 .../api/doxygen/rocm_2normalization_8h_source.html |    2 +-
 docs/api/doxygen/rocm_2pooling_8h.html             |    2 +-
 docs/api/doxygen/rocm_2pooling_8h__incl.svg        | 2028 ++++----
 docs/api/doxygen/rocm_2pooling_8h_source.html      |    4 +-
 docs/api/doxygen/rocm_2reduction_8h.html           |    2 +-
 docs/api/doxygen/rocm_2reduction_8h__incl.svg      | 1982 ++++----
 docs/api/doxygen/rocm_2reduction_8h_source.html    |    4 +-
 docs/api/doxygen/rocm_2softmax_8h.html             |    2 +-
 docs/api/doxygen/rocm_2softmax_8h__incl.svg        | 1982 ++++----
 docs/api/doxygen/rocm_2softmax_8h_source.html      |    4 +-
 docs/api/doxygen/runtime_2container_8h.html        |   11 +-
 .../doxygen/runtime_2container_8h__dep__incl.svg   | 1654 +++----
 docs/api/doxygen/runtime_2container_8h__incl.svg   |  373 +-
 docs/api/doxygen/runtime_2container_8h_source.html |  323 +-
 docs/api/doxygen/runtime_2memory_8h.html           |    2 +-
 docs/api/doxygen/runtime_2memory_8h__dep__incl.svg | 1712 ++++---
 docs/api/doxygen/runtime_2memory_8h_source.html    |   10 +-
 docs/api/doxygen/runtime_2module_8h.html           |    2 +-
 docs/api/doxygen/runtime_2module_8h__dep__incl.svg | 1349 +++---
 docs/api/doxygen/runtime_2module_8h_source.html    |   10 +-
 docs/api/doxygen/schedule_8h.html                  |    2 +-
 docs/api/doxygen/schedule_8h__dep__incl.svg        |    2 +-
 docs/api/doxygen/schedule_8h__incl.svg             | 1700 +++----
 docs/api/doxygen/schedule_8h_source.html           |    8 +-
 docs/api/doxygen/schedule__pass_8h.html            |    2 +-
 docs/api/doxygen/schedule__pass_8h__incl.svg       | 1835 +++----
 docs/api/doxygen/search/all_0.js                   |    6 +-
 docs/api/doxygen/search/all_1.js                   |   35 +-
 docs/api/doxygen/search/all_10.js                  |   22 +-
 docs/api/doxygen/search/all_12.js                  |   15 +-
 docs/api/doxygen/search/all_13.js                  |   46 +-
 docs/api/doxygen/search/all_14.js                  |  155 +-
 docs/api/doxygen/search/all_15.js                  |    3 +-
 docs/api/doxygen/search/all_16.js                  |   27 +-
 docs/api/doxygen/search/all_19.js                  |    2 +
 docs/api/doxygen/search/all_2.js                   |   26 +-
 docs/api/doxygen/search/all_3.js                   |   39 +-
 docs/api/doxygen/search/all_4.js                   |   19 +-
 docs/api/doxygen/search/all_5.js                   |   20 +-
 docs/api/doxygen/search/all_6.js                   |   21 +-
 docs/api/doxygen/search/all_7.js                   |    9 +-
 docs/api/doxygen/search/all_8.js                   |    6 +-
 docs/api/doxygen/search/all_9.js                   |   54 +-
 docs/api/doxygen/search/all_b.js                   |   47 +-
 docs/api/doxygen/search/all_c.js                   |    8 +-
 docs/api/doxygen/search/all_d.js                   |   20 +-
 docs/api/doxygen/search/all_e.js                   |    7 +-
 docs/api/doxygen/search/all_f.js                   |   31 +-
 docs/api/doxygen/search/classes_0.js               |    4 +
 docs/api/doxygen/search/classes_10.js              |    1 +
 docs/api/doxygen/search/classes_11.js              |    4 +
 docs/api/doxygen/search/classes_13.js              |    5 +-
 docs/api/doxygen/search/classes_2.js               |    1 +
 docs/api/doxygen/search/classes_3.js               |    1 +
 docs/api/doxygen/search/classes_5.js               |    2 -
 docs/api/doxygen/search/classes_7.js               |    2 +-
 docs/api/doxygen/search/classes_8.js               |    5 +
 docs/api/doxygen/search/classes_a.js               |    1 +
 docs/api/doxygen/search/classes_c.js               |    4 +-
 docs/api/doxygen/search/classes_d.js               |    1 +
 docs/api/doxygen/search/defines_0.js               |    5 +-
 docs/api/doxygen/search/defines_1.js               |    5 +-
 docs/api/doxygen/search/defines_2.js               |    4 +-
 docs/api/doxygen/search/defines_3.js               |    3 +-
 docs/api/doxygen/search/defines_4.js               |    8 +-
 docs/api/doxygen/search/defines_5.js               |    8 +-
 docs/api/doxygen/search/defines_6.js               |   53 +-
 .../search/{enumvalues_d.html => defines_7.html}   |    2 +-
 .../doxygen/search/{defines_6.js => defines_7.js}  |   11 +-
 .../search/{enumvalues_d.html => defines_8.html}   |    2 +-
 docs/api/doxygen/search/defines_8.js               |    4 +
 docs/api/doxygen/search/enums_2.js                 |    4 +-
 docs/api/doxygen/search/enums_9.js                 |    2 +-
 docs/api/doxygen/search/enumvalues_1.js            |   20 +-
 docs/api/doxygen/search/enumvalues_2.js            |   21 +-
 docs/api/doxygen/search/enumvalues_3.js            |    7 +-
 docs/api/doxygen/search/enumvalues_4.js            |  116 +-
 docs/api/doxygen/search/enumvalues_5.js            |  103 +-
 docs/api/doxygen/search/enumvalues_6.js            |    3 +-
 docs/api/doxygen/search/enumvalues_7.js            |    2 +-
 docs/api/doxygen/search/enumvalues_8.js            |    2 +-
 docs/api/doxygen/search/enumvalues_9.js            |    4 +-
 docs/api/doxygen/search/enumvalues_a.js            |    3 +-
 docs/api/doxygen/search/enumvalues_b.js            |    2 +-
 docs/api/doxygen/search/enumvalues_c.js            |    2 +-
 docs/api/doxygen/search/enumvalues_d.js            |    4 -
 docs/api/doxygen/search/files_1.js                 |    7 +-
 docs/api/doxygen/search/files_b.js                 |    3 +-
 docs/api/doxygen/search/files_f.js                 |    7 +-
 docs/api/doxygen/search/functions_1.js             |    9 +-
 docs/api/doxygen/search/functions_10.js            |    9 +-
 docs/api/doxygen/search/functions_11.js            |    6 +-
 docs/api/doxygen/search/functions_12.js            |   19 +-
 docs/api/doxygen/search/functions_13.js            |   38 +-
 docs/api/doxygen/search/functions_14.js            |    2 +-
 docs/api/doxygen/search/functions_15.js            |    9 +-
 docs/api/doxygen/search/functions_18.js            |    2 +
 docs/api/doxygen/search/functions_2.js             |   13 +-
 docs/api/doxygen/search/functions_3.js             |   29 +-
 docs/api/doxygen/search/functions_5.js             |    8 +-
 docs/api/doxygen/search/functions_6.js             |    8 +-
 docs/api/doxygen/search/functions_7.js             |    9 +-
 docs/api/doxygen/search/functions_8.js             |    4 +-
 docs/api/doxygen/search/functions_9.js             |   26 +-
 docs/api/doxygen/search/functions_b.js             |    1 -
 docs/api/doxygen/search/functions_c.js             |    7 +-
 docs/api/doxygen/search/functions_d.js             |   10 +-
 docs/api/doxygen/search/functions_e.js             |    3 +-
 docs/api/doxygen/search/functions_f.js             |   17 +-
 docs/api/doxygen/search/namespaces_2.js            |    4 +-
 docs/api/doxygen/search/related_0.js               |    4 +-
 docs/api/doxygen/search/related_2.js               |    4 +-
 docs/api/doxygen/search/related_6.js               |    2 +
 docs/api/doxygen/search/related_8.js               |    2 +-
 docs/api/doxygen/search/related_9.js               |    1 +
 docs/api/doxygen/search/related_a.js               |    1 +
 docs/api/doxygen/search/related_b.js               |    6 +-
 docs/api/doxygen/search/searchdata.js              |    6 +-
 docs/api/doxygen/search/typedefs_2.js              |    3 +-
 docs/api/doxygen/search/typedefs_3.js              |    2 +-
 docs/api/doxygen/search/typedefs_7.js              |    4 +-
 docs/api/doxygen/search/typedefs_8.js              |    4 +-
 docs/api/doxygen/search/typedefs_9.js              |    3 +-
 docs/api/doxygen/search/typedefs_a.js              |    9 +-
 docs/api/doxygen/search/typedefs_b.js              |   16 +-
 docs/api/doxygen/search/typedefs_c.js              |   15 +-
 docs/api/doxygen/search/typedefs_d.js              |   46 +-
 docs/api/doxygen/search/typedefs_e.js              |   44 +-
 .../search/{enumvalues_d.html => typedefs_f.html}  |    2 +-
 docs/api/doxygen/search/typedefs_f.js              |    4 +
 docs/api/doxygen/search/variables_0.js             |    4 +-
 docs/api/doxygen/search/variables_1.js             |    4 +-
 docs/api/doxygen/search/variables_10.js            |    1 -
 docs/api/doxygen/search/variables_11.js            |    6 +-
 docs/api/doxygen/search/variables_12.js            |   40 +-
 docs/api/doxygen/search/variables_14.js            |    3 +-
 docs/api/doxygen/search/variables_2.js             |    6 +-
 docs/api/doxygen/search/variables_3.js             |    1 -
 docs/api/doxygen/search/variables_4.js             |    8 +-
 docs/api/doxygen/search/variables_6.js             |    1 +
 docs/api/doxygen/search/variables_9.js             |    7 +-
 docs/api/doxygen/search/variables_a.js             |    4 +-
 docs/api/doxygen/search/variables_b.js             |    2 -
 docs/api/doxygen/search/variables_c.js             |    3 +-
 docs/api/doxygen/search/variables_d.js             |    4 +-
 docs/api/doxygen/search/variables_e.js             |    3 +-
 docs/api/doxygen/search/variables_f.js             |    2 -
 docs/api/doxygen/serializer_8h.html                |    2 +-
 docs/api/doxygen/serializer_8h__dep__incl.svg      | 1553 +++---
 docs/api/doxygen/span_8h.html                      |    4 +-
 docs/api/doxygen/span_8h__dep__incl.svg            | 1454 +++---
 docs/api/doxygen/span_8h__incl.svg                 |  756 ++-
 docs/api/doxygen/span_8h_source.html               |    4 +-
 docs/api/doxygen/stmt_8h.html                      |    8 +-
 docs/api/doxygen/stmt_8h__dep__incl.svg            |    2 +-
 docs/api/doxygen/stmt_8h__incl.svg                 | 1016 ++--
 docs/api/doxygen/stmt_8h_source.html               |  192 +-
 docs/api/doxygen/stmt__functor_8h.html             |    2 +-
 docs/api/doxygen/stmt__functor_8h__incl.svg        | 1110 +++--
 docs/api/doxygen/stmt__functor_8h_source.html      |   43 +-
 ...cttvm_1_1Map_1_1ValueConverter__coll__graph.svg |   23 -
 .../structtvm_1_1ObjectEqual__coll__graph.svg      |   22 -
 .../structtvm_1_1ObjectHash__coll__graph.svg       |   22 -
 ..._1_1TargetIdNode_1_1ValueTypeInfo-members.html} |   11 +-
 ...tructtvm_1_1TargetIdNode_1_1ValueTypeInfo.html} |   94 +-
 ...1TargetIdNode_1_1ValueTypeInfo__coll__graph.svg |  121 +
 ...structtvm_1_1detail_1_1ValueTypeInfoMaker.html} |   23 +-
 ..._1detail_1_1ValueTypeInfoMaker__coll__graph.svg |   24 +
 ...ttvm_1_1detail_1_1is__specialized-members.html} |    9 +-
 ...=> structtvm_1_1detail_1_1is__specialized.html} |   57 +-
 ...gs_8_8_8_01_4_00_01Container_01_4-members.html} |    8 +-
 ...r_3_01Args_8_8_8_01_4_00_01Container_01_4.html} |   52 +-
 ...8_8_8_01_4_00_01Container_01_4__coll__graph.svg |   38 +
 ..._8_01_4_00_01Container_01_4__inherit__graph.svg |   38 +
 ...m_1_1detail_1_1is__specialized__coll__graph.svg |   37 +
 ..._1detail_1_1is__specialized__inherit__graph.svg |   37 +
 ..._1_1relay_1_1AllocStorageAttrs__coll__graph.svg |    2 +-
 ...m_1_1relay_1_1AllocTensorAttrs__coll__graph.svg |    2 +-
 ...ucttvm_1_1relay_1_1ArangeAttrs__coll__graph.svg |    2 +-
 ...cttvm_1_1relay_1_1ArgsortAttrs__coll__graph.svg |    2 +-
 ..._1_1relay_1_1BinaryConv2DAttrs__coll__graph.svg |    2 +-
 ...m_1_1relay_1_1BinaryDenseAttrs__coll__graph.svg |    2 +-
 ...cttvm_1_1relay_1_1BitPackAttrs__coll__graph.svg |    2 +-
 ...tructtvm_1_1relay_1_1CastAttrs__coll__graph.svg |    2 +-
 ...ttvm_1_1relay_1_1CastHintAttrs__coll__graph.svg |    2 +-
 ...ucttvm_1_1relay_1_1Conv1DAttrs__coll__graph.svg |    2 +-
 ...1relay_1_1Conv1DTransposeAttrs__coll__graph.svg |    2 +-
 ...ucttvm_1_1relay_1_1Conv2DAttrs__coll__graph.svg |    2 +-
 ...1relay_1_1Conv2DTransposeAttrs__coll__graph.svg |    2 +-
 ..._1relay_1_1Conv2DWinogradAttrs__coll__graph.svg |    2 +-
 ...gradNNPACKWeightTransformAttrs__coll__graph.svg |    2 +-
 ...ucttvm_1_1relay_1_1Conv3DAttrs__coll__graph.svg |    2 +-
 ...1relay_1_1Conv3DTransposeAttrs__coll__graph.svg |    2 +-
 ..._1relay_1_1Conv3DWinogradAttrs__coll__graph.svg |    2 +-
 ...y_1_1ConvGemmWeightTransformAttrs-members.html} |   26 +-
 ..._1_1relay_1_1ConvGemmWeightTransformAttrs.html} |   58 +-
 ..._1ConvGemmWeightTransformAttrs__coll__graph.svg |   89 +
 ...onvGemmWeightTransformAttrs__inherit__graph.svg |   89 +
 ...1_1relay_1_1CropAndResizeAttrs__coll__graph.svg |    2 +-
 ...relay_1_1DeformableConv2DAttrs__coll__graph.svg |    2 +-
 ...ructtvm_1_1relay_1_1DenseAttrs__coll__graph.svg |    2 +-
 ...vm_1_1relay_1_1Dilation2DAttrs__coll__graph.svg |    2 +-
 ...ucttvm_1_1relay_1_1InitOpAttrs__coll__graph.svg |    2 +-
 ...ructtvm_1_1relay_1_1MeshgridAttrs-members.html} |   33 +-
 ...ml => structtvm_1_1relay_1_1MeshgridAttrs.html} |   60 +-
 ...ttvm_1_1relay_1_1MeshgridAttrs__coll__graph.svg |   87 +
 ...m_1_1relay_1_1MeshgridAttrs__inherit__graph.svg |   87 +
 ...m_1_1relay_1_1NdarraySizeAttrs__coll__graph.svg |    2 +-
 ...elay_1_1NonMaximumSuppressionAttrs-members.html |    2 +-
 ...tvm_1_1relay_1_1NonMaximumSuppressionAttrs.html |   10 +-
 ..._1_1NonMaximumSuppressionAttrs__coll__graph.svg |  110 +-
 ...ucttvm_1_1relay_1_1OneHotAttrs__coll__graph.svg |    2 +-
 ...structtvm_1_1relay_1_1ReshapeAttrs-members.html |    2 +-
 .../structtvm_1_1relay_1_1ReshapeAttrs.html        |    8 +-
 ...cttvm_1_1relay_1_1ReshapeAttrs__coll__graph.svg |   17 +-
 ...ttvm_1_1relay_1_1Resize3dAttrs__coll__graph.svg |    2 +-
 ...ucttvm_1_1relay_1_1ResizeAttrs__coll__graph.svg |    2 +-
 ...cttvm_1_1relay_1_1ShapeOfAttrs__coll__graph.svg |    2 +-
 ...tructtvm_1_1relay_1_1TopKAttrs__coll__graph.svg |    2 +-
 ...1relay_1_1qnn_1_1QuantizeAttrs__coll__graph.svg |    2 +-
 ...elay_1_1qnn_1_1RequantizeAttrs__coll__graph.svg |    2 +-
 ...ructtvm_1_1runtime_1_1ObjectEqual-members.html} |    8 +-
 ...ml => structtvm_1_1runtime_1_1ObjectEqual.html} |   45 +-
 ...ttvm_1_1runtime_1_1ObjectEqual__coll__graph.svg |   22 +
 ...tructtvm_1_1runtime_1_1ObjectHash-members.html} |    8 +-
 ...tml => structtvm_1_1runtime_1_1ObjectHash.html} |   42 +-
 ...cttvm_1_1runtime_1_1ObjectHash__coll__graph.svg |   22 +
 ...lueConverter_3_01tvm_1_1Bool_01_4-members.html} |    8 +-
 ...edFuncValueConverter_3_01tvm_1_1Bool_01_4.html} |   53 +-
 ...Converter_3_01tvm_1_1Bool_01_4__coll__graph.svg |   23 +
 ...FuncValueConverter_3_01tvm_1_1Integer_01_4.html |    4 +-
 .../structtvm_1_1runtime_1_1TypeIndex-members.html |    9 +-
 .../doxygen/structtvm_1_1runtime_1_1TypeIndex.html |    8 +-
 ...vm_1_1runtime_1_1vm_1_1Instruction-members.html |   17 +-
 .../structtvm_1_1runtime_1_1vm_1_1Instruction.html |   96 +-
 ..._1runtime_1_1vm_1_1Instruction__coll__graph.svg |    4 +-
 ...vm_1_1runtime_1_1vm_1_1VMFrame__coll__graph.svg |    4 +-
 .../structtvm_1_1tir_1_1LENode__coll__graph.svg    |    2 +-
 docs/api/doxygen/structural__equal_8h.html         |    4 +-
 .../doxygen/structural__equal_8h__dep__incl.svg    | 1155 +++--
 docs/api/doxygen/structural__equal_8h__incl.svg    |  540 +--
 docs/api/doxygen/structural__equal_8h_source.html  |    2 +-
 docs/api/doxygen/structural__hash_8h.html          |    4 +-
 .../api/doxygen/structural__hash_8h__dep__incl.svg | 1155 +++--
 docs/api/doxygen/structural__hash_8h__incl.svg     |  538 +-
 docs/api/doxygen/structural__hash_8h_source.html   |    8 +-
 docs/api/doxygen/target_8h.html                    |    3 +-
 docs/api/doxygen/target_8h__dep__incl.svg          |    2 +-
 docs/api/doxygen/target_8h__incl.svg               | 1680 +++----
 docs/api/doxygen/target_8h_source.html             |   39 +-
 docs/api/doxygen/target__id_8h.html                |  227 +
 ..._dep__incl.svg => target__id_8h__dep__incl.svg} |  328 +-
 docs/api/doxygen/target__id_8h__incl.svg           | 1285 +++++
 docs/api/doxygen/target__id_8h_source.html         |  143 +
 docs/api/doxygen/target__info_8h.html              |    2 +-
 docs/api/doxygen/target__info_8h__incl.svg         |  864 ++--
 docs/api/doxygen/target__info_8h_source.html       |    2 +-
 docs/api/doxygen/tensor_8h.html                    |    2 +-
 docs/api/doxygen/tensor_8h__dep__incl.svg          |    2 +-
 docs/api/doxygen/tensor_8h__incl.svg               | 1737 +++----
 docs/api/doxygen/tensor_8h_source.html             |   12 +-
 docs/api/doxygen/tensor__intrin_8h.html            |    2 +-
 docs/api/doxygen/tensor__intrin_8h__dep__incl.svg  |    2 +-
 docs/api/doxygen/tensor__intrin_8h__incl.svg       | 1734 +++----
 docs/api/doxygen/tensor__intrin_8h_source.html     |    2 +-
 docs/api/doxygen/tensor__type_8h.html              |    2 +-
 docs/api/doxygen/tensor__type_8h__dep__incl.svg    |    2 +-
 docs/api/doxygen/tensor__type_8h__incl.svg         |  864 ++--
 docs/api/doxygen/tensor__type_8h_source.html       |    6 +-
 docs/api/doxygen/tensor__utils_8h.html             |    2 +-
 docs/api/doxygen/tensor__utils_8h__incl.svg        | 2017 ++++----
 docs/api/doxygen/tensor__utils_8h_source.html      |    2 +-
 docs/api/doxygen/tir_2analysis_8h.html             |    9 +-
 docs/api/doxygen/tir_2analysis_8h__incl.svg        | 1842 ++++---
 docs/api/doxygen/tir_2analysis_8h_source.html      |   12 +-
 docs/api/doxygen/tir_2expr_8h.html                 |  133 +-
 docs/api/doxygen/tir_2expr_8h__dep__incl.svg       | 1118 +++--
 docs/api/doxygen/tir_2expr_8h__incl.svg            |  984 ++--
 docs/api/doxygen/tir_2expr_8h_source.html          |  161 +-
 docs/api/doxygen/tir_2expr__functor_8h.html        |    2 +-
 docs/api/doxygen/tir_2expr__functor_8h__incl.svg   |  994 ++--
 docs/api/doxygen/tir_2expr__functor_8h_source.html |    4 +-
 docs/api/doxygen/tir_2function_8h.html             |    2 +-
 docs/api/doxygen/tir_2function_8h__incl.svg        | 1154 +++--
 docs/api/doxygen/tir_2function_8h_source.html      |   27 +-
 docs/api/doxygen/tir_2op_8h.html                   |   50 +-
 docs/api/doxygen/tir_2op_8h__dep__incl.svg         |    2 +-
 docs/api/doxygen/tir_2op_8h__incl.svg              | 2116 ++++----
 docs/api/doxygen/tir_2op_8h_source.html            |  128 +-
 ...ay_2op_8h.html => tir_2op__attr__types_8h.html} |   84 +-
 .../doxygen/tir_2op__attr__types_8h__dep__incl.svg |  249 +
 docs/api/doxygen/tir_2op__attr__types_8h__incl.svg |  518 ++
 .../doxygen/tir_2op__attr__types_8h_source.html    |  113 +
 docs/api/doxygen/topi_2include_2topi_2nn_8h.html   |    2 +-
 .../doxygen/topi_2include_2topi_2nn_8h__incl.svg   | 2166 +++++----
 .../doxygen/topi_2include_2topi_2nn_8h_source.html |   10 +-
 .../doxygen/topi_2include_2topi_2transform_8h.html |    5 +-
 .../topi_2include_2topi_2transform_8h__incl.svg    | 2123 ++++----
 .../topi_2include_2topi_2transform_8h_source.html  |   36 +-
 docs/api/doxygen/type__functor_8h.html             |    2 +-
 docs/api/doxygen/type__functor_8h__incl.svg        | 1495 +++---
 docs/api/doxygen/type__functor_8h_source.html      |    2 +-
 docs/api/doxygen/type__relation_8h.html            |    4 +-
 docs/api/doxygen/type__relation_8h__dep__incl.svg  |  842 +++-
 docs/api/doxygen/type__relation_8h__incl.svg       | 1098 +++--
 docs/api/doxygen/type__relation_8h_source.html     |    2 +-
 docs/api/doxygen/util_8h.html                      |    2 +-
 docs/api/doxygen/util_8h__incl.svg                 |  862 ++--
 docs/api/doxygen/util_8h_source.html               |    4 +-
 docs/api/doxygen/var_8h.html                       |    9 +-
 docs/api/doxygen/var_8h__dep__incl.svg             | 1108 ++---
 docs/api/doxygen/var_8h__incl.svg                  |  874 ++--
 docs/api/doxygen/var_8h_source.html                |    8 +-
 docs/api/doxygen/vision_8h.html                    |    2 +-
 docs/api/doxygen/vision_8h__incl.svg               | 1112 +++--
 docs/api/doxygen/vision_8h_source.html             |   50 +-
 docs/api/doxygen/vm_8h.html                        |    3 +-
 docs/api/doxygen/vm_8h_source.html                 |  172 +-
 docs/api/doxygen/with_8h.html                      |    2 +-
 docs/api/doxygen/with_8h__dep__incl.svg            | 1138 ++---
 docs/api/doxygen/with_8h_source.html               |    2 +-
 docs/api/doxygen/x86_2bnn_8h.html                  |    2 +-
 docs/api/doxygen/x86_2bnn_8h__incl.svg             | 1897 ++++----
 docs/api/doxygen/x86_2bnn_8h_source.html           |    6 +-
 docs/api/doxygen/x86_2default_8h.html              |    2 +-
 docs/api/doxygen/x86_2default_8h__incl.svg         | 1976 ++++----
 docs/api/doxygen/x86_2default_8h_source.html       |    6 +-
 docs/api/doxygen/x86_2injective_8h.html            |    2 +-
 docs/api/doxygen/x86_2injective_8h__incl.svg       | 1897 ++++----
 docs/api/doxygen/x86_2injective_8h_source.html     |    6 +-
 docs/api/python/ir.html                            |    6 +-
 docs/api/python/ndarray.html                       |    4 +-
 docs/api/python/relay/dataflow_pattern.html        |    8 +-
 docs/api/python/relay/frontend.html                |    2 +-
 docs/api/python/relay/index.html                   |  152 +-
 docs/api/python/relay/nn.html                      |   65 +-
 docs/api/python/relay/testing.html                 |   30 +-
 docs/api/python/relay/transform.html               |   50 +-
 docs/api/python/relay/vision.html                  |    6 +-
 docs/api/python/target.html                        |   34 +-
 docs/api/python/tir.html                           |  178 +-
 docs/api/python/topi.html                          |   49 +-
 docs/api/typedoc/classes/bytestreamreader.html     |   12 +-
 docs/api/typedoc/classes/cachedcallstack.html      |   34 +-
 docs/api/typedoc/classes/dlcontext.html            |   10 +-
 docs/api/typedoc/classes/dldatatype.html           |   12 +-
 docs/api/typedoc/classes/environment.html          |   12 +-
 docs/api/typedoc/classes/ffilibrary.html           |   20 +-
 docs/api/typedoc/classes/graphruntime.html         |   16 +-
 docs/api/typedoc/classes/instance.html             |   40 +-
 docs/api/typedoc/classes/memory.html               |   34 +-
 docs/api/typedoc/classes/module.html               |   10 +-
 docs/api/typedoc/classes/ndarray.html              |   22 +-
 docs/api/typedoc/classes/packedfunccell.html       |    6 +-
 docs/api/typedoc/classes/rpcserver.html            |   14 +-
 docs/api/typedoc/classes/scalar.html               |    6 +-
 docs/api/typedoc/classes/webgpucontext.html        |   12 +-
 docs/api/typedoc/enums/argtypecode.html            |   30 +-
 docs/api/typedoc/enums/aynccallbackcode.html       |    4 +-
 docs/api/typedoc/enums/dldatatypecode.html         |    8 +-
 docs/api/typedoc/enums/rpcserverstate.html         |   12 +-
 docs/api/typedoc/enums/sizeof.html                 |   18 +-
 docs/api/typedoc/index.html                        |  114 +-
 docs/api/typedoc/interfaces/disposable.html        |    2 +-
 docs/api/typedoc/interfaces/functioninfo.html      |    6 +-
 docs/api/typedoc/interfaces/libraryprovider.html   |    4 +-
 docs/contribute/code_guide.html                    |    2 +-
 docs/contribute/code_review.html                   |   51 +-
 docs/contribute/committer_guide.html               |    2 +-
 docs/contribute/community.html                     |    2 +-
 docs/contribute/document.html                      |    2 +-
 docs/contribute/error_handling.html                |    2 +-
 docs/contribute/git_howto.html                     |    6 +-
 docs/contribute/index.html                         |    6 +-
 docs/contribute/pull_request.html                  |    2 +-
 docs/contribute/release_process.html               |   15 +-
 docs/dev/inferbound.html                           |    6 +-
 docs/faq.html                                      |    4 +-
 docs/frontend/tensorflow.html                      |    4 +
 docs/genindex.html                                 |   50 +-
 docs/langref/relay_op.html                         |   17 +-
 docs/langref/relay_pattern.html                    |   44 +-
 docs/objects.inv                                   |  Bin 14710 -> 14952 bytes
 docs/searchindex.js                                |    2 +-
 docs/tutorials/autotvm/sg_execution_times.html     |   14 +-
 docs/tutorials/autotvm/tune_conv2d_cuda.html       |   43 +-
 docs/tutorials/autotvm/tune_relay_arm.html         |    1 +
 docs/tutorials/autotvm/tune_relay_cuda.html        |    1 +
 docs/tutorials/autotvm/tune_relay_mobile_gpu.html  |    1 +
 docs/tutorials/autotvm/tune_relay_x86.html         |    1 +
 docs/tutorials/autotvm/tune_simple_template.html   |   21 +-
 docs/tutorials/cross_compilation_and_rpc.html      |    3 +-
 docs/tutorials/dev/low_level_custom_pass.html      |   21 +-
 docs/tutorials/dev/relay_pass_infra.html           |    1 +
 docs/tutorials/dev/sg_execution_times.html         |    6 +-
 docs/tutorials/frontend/build_gcn.html             |    6 +-
 .../frontend/deploy_model_on_android.html          |    4 +-
 docs/tutorials/frontend/deploy_model_on_rasp.html  |    2 +
 docs/tutorials/frontend/deploy_prequantized.html   |    4 +-
 .../frontend/deploy_prequantized_tflite.html       |    6 +-
 docs/tutorials/frontend/deploy_quantized.html      |    2 +
 docs/tutorials/frontend/deploy_sparse.html         |  626 +++
 docs/tutorials/frontend/deploy_ssd_gluoncv.html    |  137 +-
 docs/tutorials/frontend/from_caffe2.html           |    2 +
 docs/tutorials/frontend/from_coreml.html           |    2 +
 docs/tutorials/frontend/from_darknet.html          |    2 +
 docs/tutorials/frontend/from_keras.html            |    2 +
 docs/tutorials/frontend/from_mxnet.html            |    2 +
 docs/tutorials/frontend/from_onnx.html             |   10 +-
 docs/tutorials/frontend/from_pytorch.html          |    2 +
 docs/tutorials/frontend/from_tensorflow.html       |    6 +-
 docs/tutorials/frontend/from_tflite.html           |    2 +
 docs/tutorials/frontend/sg_execution_times.html    |   37 +-
 docs/tutorials/frontend/using_external_lib.html    |    3 +-
 docs/tutorials/index.html                          |  104 +-
 docs/tutorials/language/extern_op.html             |    1 +
 docs/tutorials/language/intrin_math.html           |   13 +-
 docs/tutorials/language/reduction.html             |   75 +-
 docs/tutorials/language/scan.html                  |   53 +-
 docs/tutorials/language/schedule_primitives.html   |  137 +-
 docs/tutorials/language/sg_execution_times.html    |   18 +-
 docs/tutorials/language/tedd.html                  |    5 +-
 docs/tutorials/language/tensorize.html             |   47 +-
 docs/tutorials/language/tuple_inputs.html          |   51 +-
 docs/tutorials/micro/micro_tflite.html             |  458 ++
 .../{topi => micro}/sg_execution_times.html        |    8 +-
 docs/tutorials/optimize/opt_conv_cuda.html         |    3 +-
 docs/tutorials/optimize/opt_conv_tensorcore.html   |   55 +-
 docs/tutorials/optimize/opt_gemm.html              |  115 +-
 .../optimize/opt_matmul_auto_tensorcore.html       |    1 +
 docs/tutorials/optimize/sg_execution_times.html    |   10 +-
 docs/tutorials/relay_quick_start.html              |  107 +-
 docs/tutorials/sg_execution_times.html             |    8 +-
 docs/tutorials/tensor_expr_get_started.html        |    1 +
 docs/tutorials/topi/intro_topi.html                |  317 +-
 docs/tutorials/topi/sg_execution_times.html        |    4 +-
 docs/vta/index.html                                |    4 +-
 docs/vta/install.html                              |   28 +-
 docs/vta/tutorials/autotvm/sg_execution_times.html |    4 +-
 docs/vta/tutorials/autotvm/tune_relay_vta.html     |  206 +-
 .../tutorials/frontend/deploy_classification.html  |   18 +-
 docs/vta/tutorials/frontend/deploy_detection.html  |    2 +-
 .../vta/tutorials/frontend/sg_execution_times.html |    6 +-
 docs/vta/tutorials/matrix_multiply.html            |   78 +-
 docs/vta/tutorials/optimize/convolution_opt.html   |  146 +-
 .../tutorials/optimize/matrix_multiply_opt.html    |  120 +-
 .../vta/tutorials/optimize/sg_execution_times.html |    6 +-
 docs/vta/tutorials/sg_execution_times.html         |    6 +-
 docs/vta/tutorials/vta_get_started.html            |   42 +-
 1280 files changed, 153073 insertions(+), 134075 deletions(-)

diff --git a/docs/_downloads/33a19782c8aaf9fc62e565c57df5caca/deploy_sparse.py b/docs/_downloads/33a19782c8aaf9fc62e565c57df5caca/deploy_sparse.py
new file mode 100644
index 0000000..87d5e45
--- /dev/null
+++ b/docs/_downloads/33a19782c8aaf9fc62e565c57df5caca/deploy_sparse.py
@@ -0,0 +1,352 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""
+Deploy a Hugging Face Pruned Model on CPU
+=========================================
+**Author**: `Josh Fromm <https://github.com/jwfromm>`_
+
+This tutorial demonstrates how to take any pruned model, in this case `PruneBert
+from Hugging Face
+<https://huggingface.co/huggingface/prunebert-base-uncased-6-finepruned-w-distil-squad>`_,
+and use TVM to leverage the model's sparsity support to produce real speedups. Although
+the primary purpose of this tutorial is to realize speedups on already pruned
+models, it may also be useful to estimate how fast a model would be *if* it were
+pruned. To this end, we also provide a function that takes an unpruned model and
+replaces its weights
+with random and pruned weights at a specified sparsity. This may be a useful
+feature when trying to decide if a model is worth pruning or not.
+
+Before we get into the code, it's useful to discuss sparsity and pruning
+and dig into the two
+different types of sparsity: **structured** and **unstructured**.
+
+Pruning is a technique primarily used to reduce the parameter size of a model
+by replacing weight values with 0s. Although many methods exist for choosing which
+weights should be set to 0, the most straight forward is by picking the 
+weights with the smallest value. Typically, weights are pruned to a desired
+sparsity percentage. For example, a 95% sparse model would have only 5% of
+its weights non-zero. Pruning to very high sparsities often requires
+finetuning or full retraining as it tends to be a lossy approximation.
+Although parameter size benefits are quite easy to obtain from a pruned model
+through simple compression, leveraging sparsity to yield runtime speedups
+is more complicated.
+
+In structured sparsity weights are pruned with the goal of clustering
+pruned weights together. In other words, they are pruned using both their
+value and location. The benefit of bunching up pruned weights is that it allows
+an algorithm such as matrix multiplication to skip entire blocks. It turns out
+that some degree of *block sparsity* is very important to realizing significant
+speedups on most hardware available today. 
+This is because when loading memory in most CPUs or GPUs, 
+it doesn't save any work to skip reading a single value at a time, instead an entire
+chunk or tile is read in and executed using something like vectorized instructions.
+
+Unstructured sparse weights are those that are pruned only on the value of
+the original weights. They may appear to be scattered randomly throughout
+a tensor rather than in chunks like we'd see in block sparse weights.
+At low sparsities, unstructured pruning techniques are difficult to
+accelerate. However, at high sparsities many blocks of all 0 values
+will naturally appear, making it possible to accelerate.
+
+This tutorial interacts with both structured and unstructured sparsity.
+Hugging Face's PruneBert model is unstructured but 95% sparse, allowing us
+to apply TVM's block sparse optimizations to it, even if not optimally.
+When generating random sparse weights for an unpruned model, we do so with structured
+sparsity. A fun exercise is comparing the real speed of PruneBert with the block
+sparse speed using fake weights to see the benefit of structured sparsity.
+"""
+
+###############################################################################
+# Load Required Modules
+# ---------------------
+# Other than TVM, scipy, the latest transformers, and
+# tensorflow 2.2+ are required.
+import os
+import tvm
+import time
+import itertools
+import numpy as np
+import tensorflow as tf
+from tvm import relay
+from tvm.contrib import graph_runtime
+from tvm.relay import data_dep_optimization as ddo
+from tensorflow.python.framework.convert_to_constants import (
+    convert_variables_to_constants_v2,
+)
+import scipy.sparse as sp
+
+
+###############################################################################
+# Configure Settings
+# ------------------
+# Let's start by defining some parameters that define the type of model
+# and sparsity to run.
+
+# The name of the transformer model to download and run.
+name = "huggingface/prunebert-base-uncased-6-finepruned-w-distil-squad"
+# The number of batches in an input.
+batch_size = 1
+# The length of each input sequence.
+seq_len = 128
+# TVM platform identifier. Although cuda is also supported, it requires
+# tuning that is outside the scope of this tutorial. Note that best
+# cpu performance can be achieved by setting -mcpu appropriately for
+# your specific machine.
+target = "llvm"
+# Which device to run on. Should be one of tvm.cpu() or tvm.gpu().
+ctx = tvm.cpu()
+# If true, then a sparse variant of the network will be run and
+# benchmarked.
+measure_sparse = True
+# The block size of structured sparsity to convert weight tensors
+# into. Changing this parameter may yield speedups for some platforms.
+bs_r = 1
+# For models besides PruneBert (which is 95% sparse), this parameter
+# determines how sparse the generated weights should be. The higher
+# the sparsity, the faster the result.
+sparsity = 0.85
+
+
+###############################################################################
+# Download and Convert Transformers Model
+# ---------------------------------------
+# Now we'll grab a model from the transformers module, download it,
+# convert it into a TensorFlow graphdef in preperation for converting that graphdef into
+# a relay graph that we can optimize and deploy.
+def load_keras_model(module, name, seq_len, batch_size, report_runtime=True):
+    model = module.from_pretrained(name)
+    dummy_input = tf.keras.Input(shape=[seq_len], batch_size=batch_size, dtype="int32")
+    dummy_out = model(dummy_input)  # Propagate shapes through the keras model.
+    if report_runtime:
+        np_input = np.random.uniform(
+            size=[batch_size, seq_len], low=0, high=seq_len
+        ).astype("int32")
+        start = time.time()
+        repeats = 50
+        for i in range(repeats):
+            np_out = model(np_input)
+        end = time.time()
+        print("Keras Runtime: %f ms." % (1000 * ((end - start) / repeats)))
+    return model
+
+
+def convert_to_graphdef(model, batch_size, seq_len):
+    model_func = tf.function(lambda x: model(x))
+    input_dict = model._saved_model_inputs_spec
+    input_spec = input_dict[list(input_dict.keys())[0]]
+    model_func = model_func.get_concrete_function(
+        tf.TensorSpec([batch_size, seq_len], input_spec.dtype)
+    )
+    frozen_func = convert_variables_to_constants_v2(model_func)
+    return frozen_func.graph.as_graph_def()
+
+
+def download_model(name, batch_size, seq_len):
+    import transformers
+
+    module = getattr(transformers, "TFBertForSequenceClassification")
+    model = load_keras_model(module, name=name, batch_size=batch_size, seq_len=seq_len)
+    return convert_to_graphdef(model, batch_size, seq_len)
+
+
+###############################################################################
+# Convert to Relay Graph
+# ----------------------
+# We now have all the tooling to get a transformers model in the right format
+# for relay conversion. Let's import it! In the following function we
+# save the imported graph in relay's json format so that we dont have
+# to reimport from tensorflow each time this script is run.
+def import_graphdef(
+    name,
+    batch_size,
+    seq_len,
+    save_relay=True,
+    relay_file="model.json",
+    relay_params="model.params",
+):
+    abs_path = os.path.dirname(os.path.abspath(__file__))
+    shape_dict = {"input_1": (batch_size, seq_len)}
+    relay_file = ("%s_%d_%d_%s" % (name, batch_size, seq_len, relay_file)).replace(
+        "/", "_"
+    )
+    relay_params = ("%s_%d_%d_%s" % (name, batch_size, seq_len, relay_params)).replace(
+        "/", "_"
+    )
+    if os.path.exists(os.path.join(abs_path, relay_file)) and os.path.exists(
+        os.path.join(abs_path, relay_params)
+    ):
+        with open(os.path.join(abs_path, relay_file), "r") as fi:
+            mod = tvm.ir.load_json(fi.read())
+        with open(os.path.join(abs_path, relay_params), "rb") as fi:
+            params = relay.load_param_dict(fi.read())
+    else:
+        graph_def = download_model(name, batch_size, seq_len)
+
+        mod, params = relay.frontend.from_tensorflow(graph_def, shape=shape_dict)
+
+        if save_relay:
+            with open(os.path.join(abs_path, relay_file), "w") as fo:
+                fo.write(tvm.ir.save_json(mod))
+            with open(os.path.join(abs_path, relay_params), "wb") as fo:
+                fo.write(relay.save_param_dict(params))
+
+    return mod, params, shape_dict
+
+
+###############################################################################
+# Run the Dense Graph
+# -------------------
+# Let's run the default version of the imported model. Note that even if
+# the weights are sparse, we won't see any speedup because we are using
+# regular dense matrix multiplications on these dense (but mostly zero)
+# tensors instead of sparse aware kernels.
+def run_relay_graph(mod, params, shape_dict, target, ctx):
+    with relay.build_config(opt_level=3):
+        graph, lib, params = relay.build(mod, target=target, params=params)
+    input_shape = shape_dict["input_1"]
+    dummy_data = np.random.uniform(size=input_shape, low=0, high=input_shape[1]).astype(
+        "int32"
+    )
+
+    m = graph_runtime.create(graph, lib, ctx)
+    m.set_input(0, dummy_data)
+    m.set_input(**params)
+    m.run()
+    tvm_output = m.get_output(0)
+
+    ftimer = m.module.time_evaluator("run", ctx, repeat=5, number=5)
+    prof_res = np.array(ftimer().results) * 1000
+    print(
+        "%-20s %-19s (%s)"
+        % ("Runtime:", "%.2f ms" % np.mean(prof_res), "%.2f ms" % np.std(prof_res))
+    )
+    return tvm_output
+
+
+def run_dense(mod, params, shape_dict, target, ctx):
+    print("Dense Model Benchmark:")
+    return run_relay_graph(mod, params, shape_dict, target, ctx)
+
+
+###############################################################################
+# Run the Sparse Graph
+# --------------------
+# Next we'll convert the graph into a sparse representation and generate
+# fake sparse weights if needed. Then we'll use the same benchmarking
+# script as dense to see how much faster we go! We apply a few relay passes
+# to the graph to get it leveraging sparsity. First we use
+# `simplify_fc_transpose` to use transposes on the weights of dense layers
+# into the parameters. This makes it easier to convert to matrix multiplies
+# to sparse versions. Next we apply `bsr_dense.convert` to identify all
+# weight matrices that can be sparse, and automatically replace them.
+# 
+# The `bsr_dense.convert` call below is doing the heavy lifting of identifying
+# which weights in the model can be made sparse by checking if they are
+# at least `sparsity_threshold` percent sparse. If so, it converts those
+# weights into *Block Compressed Row Format (BSR)*. BSR is essentially
+# a representation that indexes into the nonzero chunks of the tensor,
+# making it easy for an algorithm to load those non-zero chunks and ignore
+# the rest of the tensor. Once the sparse weights are in BSR format,
+# `relay.transform.DenseToSparse` is applied to actually replace
+# `relay.dense` operations with `relay.sparse_dense` calls that can be
+# run faster.
+def random_bsr_matrix(M, N, BS_R, BS_C, density, dtype="float32"):
+    Y = np.zeros((M, N), dtype=dtype)
+    assert M % BS_R == 0
+    assert N % BS_C == 0
+    nnz = int(density * M * N)
+    num_blocks = int(nnz / (BS_R * BS_C)) + 1
+    candidate_blocks = np.asarray(
+        list(itertools.product(range(0, M, BS_R), range(0, N, BS_C)))
+    )
+    assert candidate_blocks.shape[0] == M // BS_R * N // BS_C
+    chosen_blocks = candidate_blocks[
+        np.random.choice(candidate_blocks.shape[0], size=num_blocks, replace=False)
+    ]
+    for i in range(len(chosen_blocks)):
+        r, c = chosen_blocks[i]
+        Y[r : r + BS_R, c : c + BS_C] = np.random.uniform(-0.1, 0.1, (BS_R, BS_C))
+    s = sp.bsr_matrix(Y, blocksize=(BS_R, BS_C))
+    assert s.data.shape == (num_blocks, BS_R, BS_C)
+    assert s.data.size >= nnz
+    assert s.indices.shape == (num_blocks,)
+    assert s.indptr.shape == (M // BS_R + 1,)
+    return s.todense()
+
+
+def random_sparse_bert_params(func, params, density, BS_R, BS_C):
+    def deepcopy(param_dic):
+        ret = {}
+        for k, v in param_dic.items():
+            ret[k] = tvm.nd.array(v.asnumpy())
+        return ret
+
+    new_params = deepcopy(params)
+    dense_weight_names = relay.analysis.sparse_dense._search_dense_op_weight(func)
+    for item in dense_weight_names:
+        name = str(item)
+        shape = new_params[name].shape
+        if shape[0] % BS_R == 0 and shape[1] % BS_C == 0:
+            new_w = random_bsr_matrix(shape[0], shape[1], BS_R, BS_C, density)
+            new_params[name] = tvm.nd.array(new_w)
+    return new_params
+
+
+def run_sparse(mod, params, shape_dict, target, ctx, bs_r, sparsity, gen_weights):
+    mod, params = ddo.simplify_fc_transpose.convert(mod["main"], params)
+    if gen_weights:
+        params = random_sparse_bert_params(
+            mod, params, BS_R=bs_r, BS_C=1, density=1 - sparsity
+        )
+    mod, params = ddo.bsr_dense.convert(mod, params, (bs_r, 1), sparsity_threshold=0.8)
+    print("Block Sparse Model with {blocksize}x1 blocks:".format(blocksize=bs_r))
+    return run_relay_graph(mod, params, shape_dict, target, ctx)
+
+
+###############################################################################
+# Run All the Code!
+# -----------------
+# And that's it! Now we'll simply call all the needed function to benchmark
+# the model according to the set parameters. Note that to run this code
+# you'll need to uncomment the last line first.
+def benchmark():
+    mod, params, shape_dict = import_graphdef(name, batch_size, seq_len)
+    run_dense(mod, params, shape_dict, target, ctx)
+    if measure_sparse:
+        gen_weights = "prune" not in name
+        run_sparse(mod, params, shape_dict, target, ctx, bs_r, sparsity, gen_weights)
+
+
+# benchmark()
+
+###############################################################################
+# Sample Output
+# -------------
+# For reference, below is the output of the script when run on an AMD CPU
+# and shows about a 2.5X speedup from using sparsity.
+
+# Dense Model Benchmark:
+# Cannot find config for target=llvm, workload=('dense_nopack.x86', ('TENSOR', (1, 768), 'float32'), ('TENSOR', (2, 768), 'float32'), None, 'float32'). A fallback configuration is used, which may bring great performance regression.
+# Cannot find config for target=llvm, workload=('dense_nopack.x86', ('TENSOR', (1, 768), 'float32'), ('TENSOR', (768, 768), 'float32'), None, 'float32'). A fallback configuration is used, which may bring great performance regression.
+# Cannot find config for target=llvm, workload=('dense_nopack.x86', ('TENSOR', (128, 3072), 'float32'), ('TENSOR', (768, 3072), 'float32'), None, 'float32'). A fallback configuration is used, which may bring great performance regression.
+# Cannot find config for target=llvm, workload=('dense_nopack.x86', ('TENSOR', (128, 768), 'float32'), ('TENSOR', (3072, 768), 'float32'), None, 'float32'). A fallback configuration is used, which may bring great performance regression.
+# Cannot find config for target=llvm, workload=('dense_nopack.x86', ('TENSOR', (128, 768), 'float32'), ('TENSOR', (768, 768), 'float32'), None, 'float32'). A fallback configuration is used, which may bring great performance regression.
+# Cannot find config for target=llvm, workload=('batch_matmul.x86', ('TENSOR', (12, 128, 128), 'float32'), ('TENSOR', (12, 64, 128), 'float32')). A fallback configuration is used, which may bring great performance regression.
+# Cannot find config for target=llvm, workload=('batch_matmul.x86', ('TENSOR', (12, 128, 64), 'float32'), ('TENSOR', (12, 128, 64), 'float32')). A fallback configuration is used, which may bring great performance regression.
+# Runtime:             165.26 ms           (12.83 ms)
+# Block Sparse Model with 1x1 blocks:
+# Runtime:             67.75 ms            (8.83 ms)
diff --git a/docs/_downloads/5df1a8bfe653027789c10728e74a65c0/intrin_math.ipynb b/docs/_downloads/5df1a8bfe653027789c10728e74a65c0/intrin_math.ipynb
index c8643d9..19c1b72 100644
--- a/docs/_downloads/5df1a8bfe653027789c10728e74a65c0/intrin_math.ipynb
+++ b/docs/_downloads/5df1a8bfe653027789c10728e74a65c0/intrin_math.ipynb
@@ -98,7 +98,7 @@
       },
       "outputs": [],
       "source": [
-        "def my_cuda_math_rule(op):\n    \"\"\"Customized CUDA intrinsic lowering rule\"\"\"\n    assert isinstance(op, tvm.tir.Call)\n    if op.dtype == \"float32\":\n        # call float function\n        return tvm.tir.call_pure_extern(\"float32\", \"%sf\" % op.name, op.args[0])\n    elif op.dtype == \"float64\":\n        # call double function\n        return tvm.tir.call_pure_extern(\"float32\", op.name, op.args[0])\n    else:\n        # cannot do translation, return self.\n         [...]
+        "def my_cuda_math_rule(op):\n    \"\"\"Customized CUDA intrinsic lowering rule\"\"\"\n    assert isinstance(op, tvm.tir.Call)\n    name = op.op.name\n    assert name.startswith(\"tir.\")\n    dispatch_name = name[4:]\n    if op.dtype == \"float32\":\n        # call float function\n        return tvm.tir.call_pure_extern(\"float32\", \"%sf\" % dispatch_name, op.args[0])\n    elif op.dtype == \"float64\":\n        # call double function\n        return tvm.tir.call_pure_extern(\"fl [...]
       ]
     },
     {
@@ -134,7 +134,7 @@
       },
       "outputs": [],
       "source": [
-        "def mylog(x):\n    \"\"\"customized log intrinsic function\"\"\"\n    return tvm.tir.call_pure_intrin(x.dtype, \"mylog\", x)\n\n\ndef my_cuda_mylog_rule(op):\n    \"\"\"CUDA lowering rule for log\"\"\"\n    if op.dtype == \"float32\":\n        return tvm.tir.call_pure_extern(\"float32\", \"logf\", op.args[0])\n    elif op.dtype == \"float64\":\n        return tvm.tir.call_pure_extern(\"float64\", \"log\", op.args[0])\n    else:\n        return op\n\n\ntvm.target.register_intrin_ [...]
+        "def mylog(x):\n    \"\"\"customized log intrinsic function\"\"\"\n    return tvm.tir.call_intrin(x.dtype, \"tir.mylog\", x)\n\n\ndef my_cuda_mylog_rule(op):\n    \"\"\"CUDA lowering rule for log\"\"\"\n    if op.dtype == \"float32\":\n        return tvm.tir.call_pure_extern(\"float32\", \"logf\", op.args[0])\n    elif op.dtype == \"float64\":\n        return tvm.tir.call_pure_extern(\"float64\", \"log\", op.args[0])\n    else:\n        return op\n\n# new op registration is trigg [...]
       ]
     },
     {
diff --git a/docs/_downloads/87b9e8307245d848689e4cdc3e6fa9bf/deploy_sparse.ipynb b/docs/_downloads/87b9e8307245d848689e4cdc3e6fa9bf/deploy_sparse.ipynb
new file mode 100644
index 0000000..d6157fe
--- /dev/null
+++ b/docs/_downloads/87b9e8307245d848689e4cdc3e6fa9bf/deploy_sparse.ipynb
@@ -0,0 +1,187 @@
+{
+  "cells": [
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "%matplotlib inline"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "\nDeploy a Hugging Face Pruned Model on CPU\n=========================================\n**Author**: `Josh Fromm <https://github.com/jwfromm>`_\n\nThis tutorial demonstrates how to take any pruned model, in this case `PruneBert\nfrom Hugging Face\n<https://huggingface.co/huggingface/prunebert-base-uncased-6-finepruned-w-distil-squad>`_,\nand use TVM to leverage the model's sparsity support to produce real speedups. Although\nthe primary purpose of this tutorial is to realize spee [...]
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "Load Required Modules\n---------------------\nOther than TVM, scipy, the latest transformers, and\ntensorflow 2.2+ are required.\n\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "import os\nimport tvm\nimport time\nimport itertools\nimport numpy as np\nimport tensorflow as tf\nfrom tvm import relay\nfrom tvm.contrib import graph_runtime\nfrom tvm.relay import data_dep_optimization as ddo\nfrom tensorflow.python.framework.convert_to_constants import (\n    convert_variables_to_constants_v2,\n)\nimport scipy.sparse as sp"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "Configure Settings\n------------------\nLet's start by defining some parameters that define the type of model\nand sparsity to run.\n\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "# The name of the transformer model to download and run.\nname = \"huggingface/prunebert-base-uncased-6-finepruned-w-distil-squad\"\n# The number of batches in an input.\nbatch_size = 1\n# The length of each input sequence.\nseq_len = 128\n# TVM platform identifier. Although cuda is also supported, it requires\n# tuning that is outside the scope of this tutorial. Note that best\n# cpu performance can be achieved by setting -mcpu appropriately for\n# your specific machine.\ntarge [...]
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "Download and Convert Transformers Model\n---------------------------------------\nNow we'll grab a model from the transformers module, download it,\nconvert it into a TensorFlow graphdef in preperation for converting that graphdef into\na relay graph that we can optimize and deploy.\n\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "def load_keras_model(module, name, seq_len, batch_size, report_runtime=True):\n    model = module.from_pretrained(name)\n    dummy_input = tf.keras.Input(shape=[seq_len], batch_size=batch_size, dtype=\"int32\")\n    dummy_out = model(dummy_input)  # Propagate shapes through the keras model.\n    if report_runtime:\n        np_input = np.random.uniform(\n            size=[batch_size, seq_len], low=0, high=seq_len\n        ).astype(\"int32\")\n        start = time.time()\n         [...]
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "Convert to Relay Graph\n----------------------\nWe now have all the tooling to get a transformers model in the right format\nfor relay conversion. Let's import it! In the following function we\nsave the imported graph in relay's json format so that we dont have\nto reimport from tensorflow each time this script is run.\n\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "def import_graphdef(\n    name,\n    batch_size,\n    seq_len,\n    save_relay=True,\n    relay_file=\"model.json\",\n    relay_params=\"model.params\",\n):\n    abs_path = os.path.dirname(os.path.abspath(__file__))\n    shape_dict = {\"input_1\": (batch_size, seq_len)}\n    relay_file = (\"%s_%d_%d_%s\" % (name, batch_size, seq_len, relay_file)).replace(\n        \"/\", \"_\"\n    )\n    relay_params = (\"%s_%d_%d_%s\" % (name, batch_size, seq_len, relay_params)).replace(\n     [...]
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "Run the Dense Graph\n-------------------\nLet's run the default version of the imported model. Note that even if\nthe weights are sparse, we won't see any speedup because we are using\nregular dense matrix multiplications on these dense (but mostly zero)\ntensors instead of sparse aware kernels.\n\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "def run_relay_graph(mod, params, shape_dict, target, ctx):\n    with relay.build_config(opt_level=3):\n        graph, lib, params = relay.build(mod, target=target, params=params)\n    input_shape = shape_dict[\"input_1\"]\n    dummy_data = np.random.uniform(size=input_shape, low=0, high=input_shape[1]).astype(\n        \"int32\"\n    )\n\n    m = graph_runtime.create(graph, lib, ctx)\n    m.set_input(0, dummy_data)\n    m.set_input(**params)\n    m.run()\n    tvm_output = m.get_ [...]
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "Run the Sparse Graph\n--------------------\nNext we'll convert the graph into a sparse representation and generate\nfake sparse weights if needed. Then we'll use the same benchmarking\nscript as dense to see how much faster we go! We apply a few relay passes\nto the graph to get it leveraging sparsity. First we use\n`simplify_fc_transpose` to use transposes on the weights of dense layers\ninto the parameters. This makes it easier to convert to matrix multiplies\nto sparse versio [...]
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "def random_bsr_matrix(M, N, BS_R, BS_C, density, dtype=\"float32\"):\n    Y = np.zeros((M, N), dtype=dtype)\n    assert M % BS_R == 0\n    assert N % BS_C == 0\n    nnz = int(density * M * N)\n    num_blocks = int(nnz / (BS_R * BS_C)) + 1\n    candidate_blocks = np.asarray(\n        list(itertools.product(range(0, M, BS_R), range(0, N, BS_C)))\n    )\n    assert candidate_blocks.shape[0] == M // BS_R * N // BS_C\n    chosen_blocks = candidate_blocks[\n        np.random.choice(ca [...]
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "Run All the Code!\n-----------------\nAnd that's it! Now we'll simply call all the needed function to benchmark\nthe model according to the set parameters. Note that to run this code\nyou'll need to uncomment the last line first.\n\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "def benchmark():\n    mod, params, shape_dict = import_graphdef(name, batch_size, seq_len)\n    run_dense(mod, params, shape_dict, target, ctx)\n    if measure_sparse:\n        gen_weights = \"prune\" not in name\n        run_sparse(mod, params, shape_dict, target, ctx, bs_r, sparsity, gen_weights)\n\n\n# benchmark()"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "Sample Output\n-------------\nFor reference, below is the output of the script when run on an AMD CPU\nand shows about a 2.5X speedup from using sparsity.\n\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "# Dense Model Benchmark:\n# Cannot find config for target=llvm, workload=('dense_nopack.x86', ('TENSOR', (1, 768), 'float32'), ('TENSOR', (2, 768), 'float32'), None, 'float32'). A fallback configuration is used, which may bring great performance regression.\n# Cannot find config for target=llvm, workload=('dense_nopack.x86', ('TENSOR', (1, 768), 'float32'), ('TENSOR', (768, 768), 'float32'), None, 'float32'). A fallback configuration is used, which may bring great performance re [...]
+      ]
+    }
+  ],
+  "metadata": {
+    "kernelspec": {
+      "display_name": "Python 3",
+      "language": "python",
+      "name": "python3"
+    },
+    "language_info": {
+      "codemirror_mode": {
+        "name": "ipython",
+        "version": 3
+      },
+      "file_extension": ".py",
+      "mimetype": "text/x-python",
+      "name": "python",
+      "nbconvert_exporter": "python",
+      "pygments_lexer": "ipython3",
+      "version": "3.6.10"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}
\ No newline at end of file
diff --git a/docs/_downloads/9a7956336431664ba6d628347b08f5cb/opt_conv_tensorcore.ipynb b/docs/_downloads/9a7956336431664ba6d628347b08f5cb/opt_conv_tensorcore.ipynb
index e8ff100..a13a62c 100644
--- a/docs/_downloads/9a7956336431664ba6d628347b08f5cb/opt_conv_tensorcore.ipynb
+++ b/docs/_downloads/9a7956336431664ba6d628347b08f5cb/opt_conv_tensorcore.ipynb
@@ -76,7 +76,7 @@
       },
       "outputs": [],
       "source": [
-        "def intrin_wmma_load_matrix(scope):\n    n = 16\n    A = te.placeholder((n, n), name='A', dtype='float16')\n    BA = tvm.tir.decl_buffer(A.shape, A.dtype, scope='shared', data_alignment=32, offset_factor=256)\n    C = te.compute((n, n), lambda i, j: A[i, j], name='C')\n    BC = tvm.tir.decl_buffer(C.shape, C.dtype, scope=scope, data_alignment=32, offset_factor=256)\n\n    def intrin_func(ins, outs):\n        ib = tvm.tir.ir_builder.create()\n\n        BA = ins[0]\n        BC = o [...]
+        "def intrin_wmma_load_matrix(scope):\n    n = 16\n    A = te.placeholder((n, n), name='A', dtype='float16')\n    BA = tvm.tir.decl_buffer(A.shape, A.dtype, scope='shared', data_alignment=32, offset_factor=256)\n    C = te.compute((n, n), lambda i, j: A[i, j], name='C')\n    BC = tvm.tir.decl_buffer(C.shape, C.dtype, scope=scope, data_alignment=32, offset_factor=256)\n\n    def intrin_func(ins, outs):\n        ib = tvm.tir.ir_builder.create()\n\n        BA = ins[0]\n        BC = o [...]
       ]
     },
     {
diff --git a/docs/_downloads/cd8ac9c09164cc04dd9ecd131c536680/micro_tflite.ipynb b/docs/_downloads/cd8ac9c09164cc04dd9ecd131c536680/micro_tflite.ipynb
new file mode 100644
index 0000000..6937630
--- /dev/null
+++ b/docs/_downloads/cd8ac9c09164cc04dd9ecd131c536680/micro_tflite.ipynb
@@ -0,0 +1,157 @@
+{
+  "cells": [
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "%matplotlib inline"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "\nMicro TVM with TFLite Models\n============================\n**Author**: `Tom Gall <https://github.com/tom-gall>`_\n\nThis tutorial is an introduction to working with MicroTVM and a TFLite \nmodel with Relay.\n\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "# %%\n# Setup\n# -----\n#\n# To get started, TFLite package needs to be installed as prerequisite.\n#\n# install tflite\n#\n# .. code-block:: bash\n#\n#   pip install tflite=2.1.0 --user\n#\n# or you could generate TFLite package yourself. The steps are the following:\n#\n#   Get the flatc compiler.\n#   Please refer to https://github.com/google/flatbuffers for details\n#   and make sure it is properly installed.\n#\n# .. code-block:: bash\n#\n#   flatc --version\n#\n# Get the T [...]
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "Using the buffer, transform into a tflite model python object\n\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "try:\n    import tflite\n    tflite_model = tflite.Model.GetRootAsModel(tflite_model_buf, 0)\nexcept AttributeError:\n    import tflite.Model\n    tflite_model = tflite.Model.Model.GetRootAsModel(tflite_model_buf, 0)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "Print out the version of the model\n\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "version = tflite_model.Version()\nprint (\"Model Version: \" + str(version))"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "Parse the python model object to convert it into a relay module\nand weights.\nIt is important to note that the input tensor name must match what\nis contained in the model.\n\nIf you are unsure what that might be, this can be discovered by using\nthe visualize.py script within the Tensorflow project.\nSee : How do I inspect a .tflite file? `<https://www.tensorflow.org/lite/guide/faq>`_\n\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "input_tensor = \"dense_4_input\"\ninput_shape = (1,)\ninput_dtype = \"float32\"\n\nmod, params = relay.frontend.from_tflite(tflite_model,\n                                         shape_dict={input_tensor: input_shape},\n                                         dtype_dict={input_tensor: input_dtype})\n\n# %%\n# Running on device\n# ----------------------------------------------\n#\n# Setup the device config which is what will be used to communicate\n# with the microcontroller (a [...]
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "Next with the dev_config, we establish a micro session and create\na context\n\n.. code-block:: python\n\n  with micro.Session(dev_config) as sess:\n      ctx = tvm.micro_dev(0)\n\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "Now we create a build config for relay. turning off two options\nand then calling relay.build which will result in a C source\nfile.\n\n.. code-block:: python\n\n  with tvm.transform.PassContext(opt_level=3, config={'tir.disable_vectorize': True},disabled_pass=['FuseOps']):\n      graph, c_mod, params = relay.build(mod, target=TARGET, params=params)\n\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "With the c_mod that is the handle to our C source code, we create a\nmicro module, followed by a compiled object which behind the scenes\nis linked to the microTVM runtime for running on the target board\n\n.. code-block:: python\n\n  micro_mod = micro.create_micro_mod(c_mod, dev_config)\n  mod = graph_runtime.create(graph, micro_mod, ctx)\n\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "Pass the weights to get ready to perform inference\n\n.. code-block:: python\n\n  mod.set_input(**params)\n\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "The model consumes a single float32 value and returns a predicted\nsine value.\nTo pass the input value we construct a tvm.nd.array object\nwith a single contrived number as input. For this model values of\n0 to 2Pi are acceptable.\n\n.. code-block:: python\n\n  mod.set_input(input_tensor, tvm.nd.array(np.array([0.5], dtype=\"float32\")))\n\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "Run the model on device\n\n.. code-block:: python\n\n  mod.run()\n\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "Get output from the run and print\n\n.. code-block:: python\n\n  tvm_output = mod.get_output(0).asnumpy()\n  print(\"result is: \"+str(tvm_output))\n\n"
+      ]
+    }
+  ],
+  "metadata": {
+    "kernelspec": {
+      "display_name": "Python 3",
+      "language": "python",
+      "name": "python3"
+    },
+    "language_info": {
+      "codemirror_mode": {
+        "name": "ipython",
+        "version": 3
+      },
+      "file_extension": ".py",
+      "mimetype": "text/x-python",
+      "name": "python",
+      "nbconvert_exporter": "python",
+      "pygments_lexer": "ipython3",
+      "version": "3.6.10"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}
\ No newline at end of file
diff --git a/docs/_downloads/e3748c6e5b8a427385ff1afdf1562c3c/opt_conv_tensorcore.py b/docs/_downloads/e3748c6e5b8a427385ff1afdf1562c3c/opt_conv_tensorcore.py
index cd40a91..4b2823c 100644
--- a/docs/_downloads/e3748c6e5b8a427385ff1afdf1562c3c/opt_conv_tensorcore.py
+++ b/docs/_downloads/e3748c6e5b8a427385ff1afdf1562c3c/opt_conv_tensorcore.py
@@ -163,7 +163,7 @@ def intrin_wmma_load_matrix(scope):
 
         BA = ins[0]
         BC = outs[0]
-        ib.emit(tvm.tir.call_intrin('handle', 'tvm_load_matrix_sync',
+        ib.emit(tvm.tir.call_intrin('handle', 'tir.tvm_load_matrix_sync',
                                 BC.data, n, n, n, BC.elem_offset // 256,
                                 BA.access_ptr('r'), n, 'row_major'))
         return ib.get()
@@ -190,12 +190,12 @@ def intrin_wmma_gemm():
 
         def init():
             ib = tvm.tir.ir_builder.create()
-            ib.emit(tvm.tir.call_intrin('handle', 'tvm_fill_fragment', BC.data, n, n, n, BC.elem_offset // 256, 0.0))
+            ib.emit(tvm.tir.call_intrin('handle', 'tir.tvm_fill_fragment', BC.data, n, n, n, BC.elem_offset // 256, 0.0))
             return ib.get()
 
         def update():
             ib = tvm.tir.ir_builder.create()
-            ib.emit(tvm.tir.call_intrin('handle', 'tvm_mma_sync',
+            ib.emit(tvm.tir.call_intrin('handle', 'tir.tvm_mma_sync',
                                     BC.data, BC.elem_offset // 256,
                                     BA.data, BA.elem_offset // 256,
                                     BB.data, BB.elem_offset // 256,
@@ -218,7 +218,7 @@ def intrin_wmma_store_matrix():
         ib = tvm.tir.ir_builder.create()
         BA = ins[0]
         BC = outs[0]
-        ib.emit(tvm.tir.call_intrin('handle', 'tvm_store_matrix_sync',
+        ib.emit(tvm.tir.call_intrin('handle', 'tir.tvm_store_matrix_sync',
                                 BA.data, n, n, n, BA.elem_offset // 256,
                                 BC.access_ptr('w'), n, 'row_major'))
         return ib.get()
diff --git a/docs/_downloads/fb810a10b128e7d81f5eec1c5908e0f5/intrin_math.py b/docs/_downloads/fb810a10b128e7d81f5eec1c5908e0f5/intrin_math.py
index 146263d..4a4ff96 100644
--- a/docs/_downloads/fb810a10b128e7d81f5eec1c5908e0f5/intrin_math.py
+++ b/docs/_downloads/fb810a10b128e7d81f5eec1c5908e0f5/intrin_math.py
@@ -100,12 +100,15 @@ print(fopencl.imported_modules[0].get_source())
 def my_cuda_math_rule(op):
     """Customized CUDA intrinsic lowering rule"""
     assert isinstance(op, tvm.tir.Call)
+    name = op.op.name
+    assert name.startswith("tir.")
+    dispatch_name = name[4:]
     if op.dtype == "float32":
         # call float function
-        return tvm.tir.call_pure_extern("float32", "%sf" % op.name, op.args[0])
+        return tvm.tir.call_pure_extern("float32", "%sf" % dispatch_name, op.args[0])
     elif op.dtype == "float64":
         # call double function
-        return tvm.tir.call_pure_extern("float32", op.name, op.args[0])
+        return tvm.tir.call_pure_extern("float32", dispatch_name, op.args[0])
     else:
         # cannot do translation, return self.
         return op
@@ -132,7 +135,7 @@ print(fcuda.imported_modules[0].get_source())
 
 def mylog(x):
     """customized log intrinsic function"""
-    return tvm.tir.call_pure_intrin(x.dtype, "mylog", x)
+    return tvm.tir.call_intrin(x.dtype, "tir.mylog", x)
 
 
 def my_cuda_mylog_rule(op):
@@ -144,7 +147,8 @@ def my_cuda_mylog_rule(op):
     else:
         return op
 
-
+# new op registration is triggered by registering an attribute of the op
+tvm.ir.register_op_attr("tir.mylog", "TCallEffectKind", tvm.tir.CallEffectKind.Pure)
 tvm.target.register_intrin_rule("cuda", "mylog", my_cuda_mylog_rule, override=True)
 
 n = te.var("n")
diff --git a/docs/_downloads/fd012fa7b67f4e333acce1d25a8e62bc/micro_tflite.py b/docs/_downloads/fd012fa7b67f4e333acce1d25a8e62bc/micro_tflite.py
new file mode 100644
index 0000000..9838df7
--- /dev/null
+++ b/docs/_downloads/fd012fa7b67f4e333acce1d25a8e62bc/micro_tflite.py
@@ -0,0 +1,218 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""
+Micro TVM with TFLite Models
+============================
+**Author**: `Tom Gall <https://github.com/tom-gall>`_
+
+This tutorial is an introduction to working with MicroTVM and a TFLite 
+model with Relay.
+"""
+
+# %%
+# Setup
+# -----
+#
+# To get started, TFLite package needs to be installed as prerequisite.
+#
+# install tflite
+#
+# .. code-block:: bash
+#
+#   pip install tflite=2.1.0 --user
+#
+# or you could generate TFLite package yourself. The steps are the following:
+#
+#   Get the flatc compiler.
+#   Please refer to https://github.com/google/flatbuffers for details
+#   and make sure it is properly installed.
+#
+# .. code-block:: bash
+#
+#   flatc --version
+#
+# Get the TFLite schema.
+#
+# .. code-block:: bash
+#
+#   wget https://raw.githubusercontent.com/tensorflow/tensorflow/r1.13/tensorflow/lite/schema/schema.fbs
+#
+# Generate TFLite package.
+#
+# .. code-block:: bash
+#
+#   flatc --python schema.fbs
+#
+# Add the current folder (which contains generated tflite module) to PYTHONPATH.
+#
+# .. code-block:: bash
+#
+#   export PYTHONPATH=${PYTHONPATH:+$PYTHONPATH:}$(pwd)
+#
+# To validate that the TFLite package was installed successfully, ``python -c "import tflite"``
+#
+# CMSIS needs to be downloaded and the CMSIS_ST_PATH environment variable setup
+# This tutorial only supports the STM32F7xx series of boards.
+# Download from : https://www.st.com/en/embedded-software/stm32cubef7.html
+# After you've expanded the zip file
+#
+# .. code-block:: bash
+#
+#   export CMSIS_ST_PATH=/path/to/STM32Cube_FW_F7_V1.16.0/Drivers/CMSIS
+
+# %%
+# Recreating your own Pre-Trained TFLite model
+# --------------------------------------------
+#
+# The tutorial downloads a pretrained TFLite model. When working with microcontrollers
+# you need to be mindful these are highly resource constrained devices as such standard
+# models like MobileNet may not fit into their modest memory.
+#
+# For this tutorial, we'll make use of one of the TF Micro example models.
+#
+# If you wish to replicate the training steps see:
+# https://github.com/tensorflow/tensorflow/tree/master/tensorflow/lite/micro/examples/hello_world/train
+#
+#   .. note::
+#
+#     If you accidentally download the example pretrained model from:
+#     wget https://storage.googleapis.com/download.tensorflow.org/models/tflite/micro/hello_world_2020_04_13.zip
+#     this will fail due to an unimplemented opcode (114)
+
+import os
+import numpy as np
+import tvm
+import tvm.micro as micro
+from tvm.contrib.download import download_testdata
+from tvm.contrib import graph_runtime, util
+from tvm import relay
+
+# %%
+# Load and prepare the Pre-Trained Model
+# --------------------------------------
+#
+# Load the pretrained TFLite model from a file in your current
+# directory into a buffer
+
+model_url = 'https://people.linaro.org/~tom.gall/sine_model.tflite'
+model_file = 'sine_model.tflite'
+model_path = download_testdata(model_url, model_file, module='data')
+
+tflite_model_buf = open(model_path, "rb").read()
+
+######################################################################
+# Using the buffer, transform into a tflite model python object
+try:
+    import tflite
+    tflite_model = tflite.Model.GetRootAsModel(tflite_model_buf, 0)
+except AttributeError:
+    import tflite.Model
+    tflite_model = tflite.Model.Model.GetRootAsModel(tflite_model_buf, 0)
+
+######################################################################
+# Print out the version of the model
+version = tflite_model.Version()
+print ("Model Version: " + str(version))
+
+######################################################################
+# Parse the python model object to convert it into a relay module
+# and weights.
+# It is important to note that the input tensor name must match what
+# is contained in the model.
+#
+# If you are unsure what that might be, this can be discovered by using
+# the visualize.py script within the Tensorflow project.
+# See : How do I inspect a .tflite file? `<https://www.tensorflow.org/lite/guide/faq>`_
+ 
+input_tensor = "dense_4_input"
+input_shape = (1,)
+input_dtype = "float32"
+
+mod, params = relay.frontend.from_tflite(tflite_model,
+                                         shape_dict={input_tensor: input_shape},
+                                         dtype_dict={input_tensor: input_dtype})
+
+# %%
+# Running on device
+# ----------------------------------------------
+#
+# Setup the device config which is what will be used to communicate
+# with the microcontroller (a STM32F746 Discovery board)
+TARGET = 'c -device=micro_dev'
+dev_config = micro.device.arm.stm32f746xx.generate_config("127.0.0.1", 6666)
+
+######################################################################
+# Next with the dev_config, we establish a micro session and create
+# a context
+#
+# .. code-block:: python
+#
+#   with micro.Session(dev_config) as sess:
+#       ctx = tvm.micro_dev(0)
+
+######################################################################
+# Now we create a build config for relay. turning off two options
+# and then calling relay.build which will result in a C source
+# file.
+#
+# .. code-block:: python
+#
+#   with tvm.transform.PassContext(opt_level=3, config={'tir.disable_vectorize': True},disabled_pass=['FuseOps']):
+#       graph, c_mod, params = relay.build(mod, target=TARGET, params=params)
+
+######################################################################
+# With the c_mod that is the handle to our C source code, we create a
+# micro module, followed by a compiled object which behind the scenes
+# is linked to the microTVM runtime for running on the target board
+#
+# .. code-block:: python
+#
+#   micro_mod = micro.create_micro_mod(c_mod, dev_config)
+#   mod = graph_runtime.create(graph, micro_mod, ctx)
+
+######################################################################
+# Pass the weights to get ready to perform inference
+#
+# .. code-block:: python
+#
+#   mod.set_input(**params)
+
+######################################################################
+# The model consumes a single float32 value and returns a predicted
+# sine value.
+# To pass the input value we construct a tvm.nd.array object
+# with a single contrived number as input. For this model values of
+# 0 to 2Pi are acceptable.
+#
+# .. code-block:: python
+#
+#   mod.set_input(input_tensor, tvm.nd.array(np.array([0.5], dtype="float32")))
+
+######################################################################
+# Run the model on device
+#
+# .. code-block:: python
+#
+#   mod.run()
+
+######################################################################
+# Get output from the run and print
+#
+# .. code-block:: python
+#
+#   tvm_output = mod.get_output(0).asnumpy()
+#   print("result is: "+str(tvm_output))
diff --git a/docs/_images/sphx_glr_deploy_sparse_thumb.png b/docs/_images/sphx_glr_deploy_sparse_thumb.png
new file mode 100644
index 0000000..233f8e6
Binary files /dev/null and b/docs/_images/sphx_glr_deploy_sparse_thumb.png differ
diff --git a/docs/_images/sphx_glr_micro_tflite_thumb.png b/docs/_images/sphx_glr_micro_tflite_thumb.png
new file mode 100644
index 0000000..233f8e6
Binary files /dev/null and b/docs/_images/sphx_glr_micro_tflite_thumb.png differ
diff --git a/docs/_sources/api/python/topi.rst.txt b/docs/_sources/api/python/topi.rst.txt
index 53f2f3c..09c3318 100644
--- a/docs/_sources/api/python/topi.rst.txt
+++ b/docs/_sources/api/python/topi.rst.txt
@@ -104,6 +104,7 @@ List of operators
    topi.logical_not
    topi.logical_xor
    topi.arange
+   topi.meshgrid
    topi.stack
    topi.repeat
    topi.tile
@@ -187,6 +188,7 @@ topi
 .. autofunction:: topi.greater
 .. autofunction:: topi.less
 .. autofunction:: topi.arange
+.. autofunction:: topi.meshgrid
 .. autofunction:: topi.stack
 .. autofunction:: topi.repeat
 .. autofunction:: topi.tile
diff --git a/docs/_sources/contribute/code_review.rst.txt b/docs/_sources/contribute/code_review.rst.txt
index f6f2136..9b2ed0c 100644
--- a/docs/_sources/contribute/code_review.rst.txt
+++ b/docs/_sources/contribute/code_review.rst.txt
@@ -22,28 +22,28 @@ Perform Code Reviews
 
 This is a general guideline for code reviewers. First of all, while it is great to add new features to a project, we must also be aware that each line of code we introduce also brings **technical debt** that we may have to eventually pay.
 
-Open source code is maintained by a community with diverse backend, and it is even more important to bring clear, documented and maintainable code. Code reviews are shepherding process to spot potential problems, improve quality of the code. We should, however, not rely on code review process to get the code into a ready state. Contributors are encouraged to polish the code to a ready state before requesting reviews. This is especially expected for code owner and committer candidates.
+Open source code is maintained by a community with diverse background, and hence it is even more important to provide clear, documented and maintainable code. Code reviews are a shepherding process to spot potential problems, improve quality of the code. We should, however, not rely on the code review process to get the code into a ready state. Contributors are encouraged to polish the code to a ready state before requesting reviews. This is especially expected for code owner and committ [...]
 
-Here are some checklists for code reviews, it is also helpful reference for contributors
+Here are some checklists for code reviews, it is also helpful reference for contributors.
 
 
 
 Hold the Highest Standard
 -------------------------
-The first rule for code reviewers is to always keep the highest standard, and do not approve code just to "be friendly". Good, informative critics each other learn and prevents technical debt in early stages.
+The first rule for code reviewers is to always keep the highest standard, and do not approve code just to "be friendly". Good, informative critics each other learn and prevent technical debt in early stages.
 
 Deliberate on API and Data Structures
 -------------------------------------
 A minimum and stable API is critical to the project’s life. A good API makes a huge difference. Always think very carefully about all the aspects including naming, argument definitions and behavior.
 
-When possible, pay more time and thoughts into the API design during code reviews.
-Remember, it is easier to improve code implementation, but it is extremely hard to change an API.
-We should do the same for data structures that are shared across modules(e.g. AST).
-When uncertain, start a conversation with more developers.
+When possible, pay more attention still to the proposed API design during code reviews.
+Remember, it is easier to improve code implementation, but it is extremely hard to change an API once accepted.
+We should treat data structures that are shared across modules(e.g. AST) in the same way.
+If/when uncertain, start a conversation with more developers before committing.
 
 Here are some useful principles for designing APIs:
 
-- Be consistent with existing well-known package’s APIs if the feature overlap.
+- Be consistent with existing well-known package’s APIs if the features overlap.
   For example, tensor operation APIs should always be consistent with the numpy API.
 - Be consistent with existing APIs in the same project.
   For example, we should use the same argument ordering across all the optimization passes,
@@ -51,49 +51,50 @@ Here are some useful principles for designing APIs:
 - Think about whether the API will change in the future.
   For example, we will have more options like loop_unrolling and device placement policy
   as we add more optimizations in build. We can package optimization knobs into a build
-  configuration object. So that the build API is stable over time.
-- Write down documents. Documents are mandatory for APIs and sometimes writing documents helps
-  us to think about whether we need clarification.
+  configuration object. In this way, the build API is stable over time, even though it may be enriched.
+- Write documentation. Documentation is mandatory for APIs and sometimes writing documents helps
+  us to think further about the design as well as whether we need to add further clarifications.
 - Minimum. Think about how many lines of code a user has to write to use the API.
   Remove layers of abstraction when possible.
 
 
 Ensure Test Coverage
 --------------------
-Each new change of features should introduce test cases, bug fixes should include regression tests that prevent the problem from happening again.
+Each new change of features should introduce test cases.
+Bug fixes should include regression tests that prevent the problem from happening again.
 
-Documentations are Mandatory
-----------------------------
-Documentation is usually a place we overlooked, new functions or change to a function should be directly updated in documents. A new feature is meaningless without documentation to make it accessible. See more at :ref:`doc_guide`
+Documentation is Mandatory
+---------------------------
+Documentation is often overlooked. When adding new functions or changing an existing function, the documentation should be directly updated. A new feature is meaningless without documentation to make it accessible. See more at :ref:`doc_guide`
 
 Minimum Dependency
 ------------------
-Always be cautious in introducing dependencies. While it is important to reuse code and not reinventing the wheel, dependencies can increase burden of users in deployment. A good design principle only depends on the part when a user actually use it.
+Always be cautious in introducing dependencies. While it is important to reuse code and avoid reinventing the wheel, dependencies can increase burden of users in deployment. A good design principle is that a feature or function should only have a dependecy if/when a user actually use it.
 
 Ensure Readability
 ------------------
-While it is hard to implement a new feature, it is even harder to make others understand and maintain the code you wrote. It is common for a PMC or committer to not being able to understand certain contributions. In such case, a reviewer should say "I don’t understand" and ask the contributor to clarify. We highly encourage code comments which explain the code logic along with the code.
+While it is hard to implement a new feature, it is even harder to make others understand and maintain the code you wrote. It is common for a PMC or committer to not be
+able to understand certain contributions. In such case, a reviewer should say "I don’t understand" and ask the contributor to clarify. We highly encourage code comments which explain the code logic along with the code.
 
 Concise Implementation
 ----------------------
-Some basic principles applied here: favor vectorized array code over loops, is there existing API that solves the problem.
+Some basic principles applied here: favor vectorized array code over loops, use existing APIs that solve the problem.
 
 Document Lessons in Code Reviews
 --------------------------------
-When you find there are some common lessons that can be summarized in the guideline,
+When you find there are some common or recurring lessons that can be summarized,
 add it to the :ref:`code_guide`.
 It is always good to refer to the guideline document when requesting changes,
 so the lessons can be shared to all the community.
 
 Respect each other
 ------------------
-The code reviewers and contributors are paying the most precious currencies in the world -- time. We are volunteers in the community to spend the time to build good code, help each other, learn and have fun hacking.
+The code reviewers and contributors are paying the most precious currency in the world -- time. We are volunteers in the community to spend the time to build good code, help each other, learn and have fun hacking.
 
 Learn from other Code Reviews
 -----------------------------
-There can be multiple reviewers reviewing the same changes. Many cases other reviewers
-may spot things you did not find. Try to learn from other code reviews,
-when possible, document these lessons.
+There can be multiple reviewers reviewing the same changes. Many times other reviewers
+may spot things you did not find. Try to learn from other code reviews, when possible, document these lessons.
 
 Approve and Request Changes Explicitly
 --------------------------------------
diff --git a/docs/_sources/contribute/release_process.rst.txt b/docs/_sources/contribute/release_process.rst.txt
index ceead3d..9fca35a 100644
--- a/docs/_sources/contribute/release_process.rst.txt
+++ b/docs/_sources/contribute/release_process.rst.txt
@@ -17,8 +17,8 @@
 
 .. _release_process:
 
-Apache TVM (incubator) Release Process
-======================================
+Apache TVM (incubating) Release Process
+=======================================
 
 The release manager role in TVM means you are responsible for a few different things:
 
@@ -57,7 +57,7 @@ You can skip this section if you have already uploaded your key.
 
 After generating the gpg key, you need to upload your key to a public key server. Please refer to https://www.apache.org/dev/openpgp.html#generate-key for details.
 
-If you want to do the release on another machine, you can transfer your gpg key to that machine via the gpg --export and gpg --import commands.
+If you want to do the release on another machine, you can transfer your gpg key to that machine via the :code:`gpg --export` and :code:`gpg --import` commands.
 
 The last step is to update the KEYS file with your code signing key https://www.apache.org/dev/openpgp.html#export-public-key. Check in the changes to the master branch.
 
@@ -80,6 +80,7 @@ Go to the GitHub repositories "releases" tab and click "Draft a new release",
 - Select the commit by clicking Target: branch > Recent commits > $commit_hash 
 - Copy and paste release note draft into the description box
 - Select "This is a pre-release"
+- Click "Publish release"
 
 Notice that one can still apply changes to the BRANCH after the cut, while the TAG is fixed. If any change is required for this release, a new TAG has to be created.
 
@@ -94,11 +95,13 @@ Create source code artifacts,
 .. code-block:: bash
 
 	git clone git@github.com:apache/incubator-tvm.git apache-tvm-src-v0.6.0.rc0-incubating
+	cd apache-tvm-src-v0.6.0.rc0-incubating
 	git checkout v0.6
 	git submodule update --init --recursive
 	git checkout v0.6.0.rc0
 	rm -rf .DS_Store
 	find . -name ".git*" -print0 | xargs -0 rm -rf
+	cd ..
 	brew install gnu-tar 
 	gtar -czvf apache-tvm-src-v0.6.0.rc0-incubating.tar.gz apache-tvm-src-v0.6.0.rc0-incubating
 
diff --git a/docs/_sources/dev/inferbound.rst.txt b/docs/_sources/dev/inferbound.rst.txt
index 6520732..63954ac 100644
--- a/docs/_sources/dev/inferbound.rst.txt
+++ b/docs/_sources/dev/inferbound.rst.txt
@@ -181,8 +181,8 @@ The Ranges of the inner and outer IterVars of the split are set based on the par
 
 .. code:: cpp
 
-   rmap[split->inner] = Range::make_by_min_extent(0, split->factor)
-   rmap[split->outer] = Range::make_by_min_extent(0, DivCeil(rmap[split->parent]->extent, split->factor))
+   rmap[split->inner] = Range::FromMinExtent(0, split->factor)
+   rmap[split->outer] = Range::FromMinExtent(0, DivCeil(rmap[split->parent]->extent, split->factor))
 
 There is an opportunity here to tighten the bounds produced by InferBound, when ``split->factor`` does not evenly divide the parent's extent. Suppose the parent's extent is 20, and the split factor is 16. Then on the second iteration of the outer loop, the inner loop only needs to perform 4 iterations, not 16. If PassDownDomain could set the extent of ``split->inner`` to ``min(split->factor, rmap[split->parent]->extent - (split->outer * split->factor))``, then the extent of the inner var [...]
 
@@ -190,7 +190,7 @@ For Fuse relations, the Range of the fused IterVar is set based on the known Ran
 
 .. code:: cpp
 
-   rmap[fuse->fused] = Range::make_by_min_extent(0, rmap[fuse->outer]->extent * rmap[fuse->inner]->extent)
+   rmap[fuse->fused] = Range::FromMinExtent(0, rmap[fuse->outer]->extent * rmap[fuse->inner]->extent)
 
 
 InferRootBound
diff --git a/docs/_sources/frontend/tensorflow.rst.txt b/docs/_sources/frontend/tensorflow.rst.txt
index a158db9..bca0fc1 100644
--- a/docs/_sources/frontend/tensorflow.rst.txt
+++ b/docs/_sources/frontend/tensorflow.rst.txt
@@ -79,6 +79,10 @@ The model should be exported with a number of transformations to prepare the mod
                 [
                     "remove_nodes(op=Identity, op=CheckNumerics, op=StopGradient)",
                     "sort_by_execution_order", # sort by execution order after each transform to ensure correct node ordering
+                    "remove_attribute(attribute_name=_XlaSeparateCompiledGradients)",
+                    "remove_attribute(attribute_name=_XlaCompile)",
+                    "remove_attribute(attribute_name=_XlaScope)",
+                    "sort_by_execution_order",
                     "remove_device",
                     "sort_by_execution_order",
                     "fold_batch_norms",
diff --git a/docs/_sources/langref/relay_op.rst.txt b/docs/_sources/langref/relay_op.rst.txt
index 86e0c0d..febe542 100644
--- a/docs/_sources/langref/relay_op.rst.txt
+++ b/docs/_sources/langref/relay_op.rst.txt
@@ -128,6 +128,7 @@ This level enables additional math and transform operators.
    tvm.relay.reinterpret
    tvm.relay.split
    tvm.relay.arange
+   tvm.relay.meshgrid
    tvm.relay.stack
    tvm.relay.repeat
    tvm.relay.tile
diff --git a/docs/_sources/langref/relay_pattern.rst.txt b/docs/_sources/langref/relay_pattern.rst.txt
index 962dcc6..6cacff2 100644
--- a/docs/_sources/langref/relay_pattern.rst.txt
+++ b/docs/_sources/langref/relay_pattern.rst.txt
@@ -80,6 +80,7 @@ Here is another example to match an op with a specific attribute:
         y = relay.var('y')
         assert not is_conv2d.match(relay.op.nn.conv2d(x, y))
 
+
 Matching an Optional Op
 ***********************
 
@@ -102,6 +103,36 @@ we can match the graph of conv2d+bias_add+relu or the graph of conv2d+bias_add.
         relu = relay.op.nn.relu(bias)
         assert pat.match(relu)
 
+
+Matching Types
+**************
+
+In addition to matching ops with attributes, we can also make a pattern to match their types, in interms of the shape and data type. Here are some examples:
+
+.. code-block:: python
+
+    def test_match_type():
+        # Match any op with float32
+        pat1 = has_dtype('float32')
+        x = relay.var('x', shape=(10, 10), dtype='float32')
+        assert pat1.match(x)
+
+        # Match any op with shape (10, 10)
+        pat2 = has_shape((10, 10))
+        x = relay.var('x', shape=(10, 10), dtype='float32')
+        assert pat2.match(x)
+
+        # Match conv2d+relu with a certain shape
+        conv2d = is_op('nn.conv2d')(wildcard(), wildcard())
+        pat3 = is_op('nn.relu')(conv2d).has_shape((1, 32, 28, 28))
+
+        x = relay.var('x', shape=(1, 3, 28, 28), dtype='float32')
+        w = relay.var('w', shape=(32, 3, 3, 3), dtype='float32')
+        conv2d = relay.nn.conv2d(x, w, strides=(1, 1), padding=(1, 1))
+        relu = relay.nn.relu(conv2d)
+        assert pat3.match(relu)
+
+
 Matching Non-Call Nodes
 ***********************
 
@@ -117,7 +148,7 @@ Since there are not call nodes, we need to use specific pattern nodes to match t
         tuple_pattern = is_tuple((wildcard(), wildcard(), wildcard()))
         assert tuple_pattern.match(relay.expr.Tuple((x,y,z)))
 
-The next example is matching a pattern of batch_norm -> get(0) -> relu:
+The next example is matching a pattern of batch_norm -> get(0) -> relu. Note that you can also use `is_tuple_get_item(bn_node)` to match a `TupleGetItem` node with any index.
 
 .. code-block:: python
 
@@ -186,6 +217,7 @@ The next example is matching function nodes with a specific attribute:
         f = relay.Function([x, y], x + y).with_attr("Composite", "add")
         assert pattern.match(f)
 
+
 Matching Diamonds and Post-Dominator Graphs
 *******************************************
 
@@ -228,6 +260,7 @@ The final example is matching diamonds with a post-dominator relationship. We em
         # Check
         assert diamond.match(out)
 
+
 Pattern Language Design
 =======================
 
@@ -238,14 +271,16 @@ The high level design is to introduce a language of patterns for now we propose
     Pattern ::= expr
             | *
             | pattern(pattern1, ... patternN)
-            | has_type(pattern, type)
-            | has_attr(pattern, attrs)
+            | has_type(type)
+            | has_dtype(type)
+            | has_shape(shape)
+            | has_attr(attrs)
             | is_var(name)
             | is_constant()
             | is_expr(expr)
             | is_op(op_name)
             | is_tuple()
-            | is_tuple_get_item()
+            | is_tuple_get_item(pattern, index = None)
             | pattern1 `|` pattern2
             | dominates(parent_pattern, path_pattern, child_pattern)
 
@@ -266,6 +301,16 @@ Type Pattern
 
 Check that the expression matched by the nested pattern has a particular type.
 
+DType Pattern
+*************
+
+Check that the expression matched by the nested pattern has a particular data type.
+
+Shape Pattern
+*************
+
+Check that the expression matched by the nested pattern has a particular output shape.
+
 Attribute Pattern
 *****************
 
diff --git a/docs/_sources/tutorials/autotvm/sg_execution_times.rst.txt b/docs/_sources/tutorials/autotvm/sg_execution_times.rst.txt
index bba1d45..09f15f8 100644
--- a/docs/_sources/tutorials/autotvm/sg_execution_times.rst.txt
+++ b/docs/_sources/tutorials/autotvm/sg_execution_times.rst.txt
@@ -5,11 +5,11 @@
 
 Computation times
 =================
-**00:53.012** total execution time for **tutorials_autotvm** files:
-
-- **00:28.544**: :ref:`sphx_glr_tutorials_autotvm_tune_simple_template.py` (``tune_simple_template.py``)
-- **00:23.866**: :ref:`sphx_glr_tutorials_autotvm_tune_conv2d_cuda.py` (``tune_conv2d_cuda.py``)
-- **00:00.169**: :ref:`sphx_glr_tutorials_autotvm_tune_relay_x86.py` (``tune_relay_x86.py``)
-- **00:00.147**: :ref:`sphx_glr_tutorials_autotvm_tune_relay_arm.py` (``tune_relay_arm.py``)
-- **00:00.144**: :ref:`sphx_glr_tutorials_autotvm_tune_relay_cuda.py` (``tune_relay_cuda.py``)
-- **00:00.143**: :ref:`sphx_glr_tutorials_autotvm_tune_relay_mobile_gpu.py` (``tune_relay_mobile_gpu.py``)
+**00:47.023** total execution time for **tutorials_autotvm** files:
+
+- **00:26.532**: :ref:`sphx_glr_tutorials_autotvm_tune_simple_template.py` (``tune_simple_template.py``)
+- **00:19.876**: :ref:`sphx_glr_tutorials_autotvm_tune_conv2d_cuda.py` (``tune_conv2d_cuda.py``)
+- **00:00.180**: :ref:`sphx_glr_tutorials_autotvm_tune_relay_x86.py` (``tune_relay_x86.py``)
+- **00:00.148**: :ref:`sphx_glr_tutorials_autotvm_tune_relay_mobile_gpu.py` (``tune_relay_mobile_gpu.py``)
+- **00:00.145**: :ref:`sphx_glr_tutorials_autotvm_tune_relay_cuda.py` (``tune_relay_cuda.py``)
+- **00:00.142**: :ref:`sphx_glr_tutorials_autotvm_tune_relay_arm.py` (``tune_relay_arm.py``)
diff --git a/docs/_sources/tutorials/autotvm/tune_conv2d_cuda.rst.txt b/docs/_sources/tutorials/autotvm/tune_conv2d_cuda.rst.txt
index 635e539..5bdc8b8 100644
--- a/docs/_sources/tutorials/autotvm/tune_conv2d_cuda.rst.txt
+++ b/docs/_sources/tutorials/autotvm/tune_conv2d_cuda.rst.txt
@@ -234,26 +234,26 @@ for this template
        7 unroll_explicit: OtherOption([0, 1]) len=2
     )
     Get devices for measurement successfully!
-    No: 1   GFLOPS: 0.00/0.00       result: MeasureResult(costs=(InstantiationError('Traceback (most recent call last):\n  [bt] (4) /workspace/build/libtvm.so(TVMFuncCall+0x61) [0x7f259437cd11]\n  [bt] (3) /workspace/build/libtvm.so(+0x4cc4d4) [0x7f25939fe4d4]\n  [bt] (2) /workspace/build/libtvm.so(tvm::transform::SequentialNode::operator()(tvm::IRModule, tvm::transform::PassContext const&) const+0x3e6) [0x7f25939fdf76]\n  [bt] (1) /workspace/build/libtvm.so(tvm::tir::transform::PrimFunc [...]
-    No: 2   GFLOPS: 0.00/0.00       result: MeasureResult(costs=(InstantiationError('Traceback (most recent call last):\n  [bt] (4) /workspace/build/libtvm.so(TVMFuncCall+0x61) [0x7f259437cd11]\n  [bt] (3) /workspace/build/libtvm.so(+0x4cc4d4) [0x7f25939fe4d4]\n  [bt] (2) /workspace/build/libtvm.so(tvm::transform::SequentialNode::operator()(tvm::IRModule, tvm::transform::PassContext const&) const+0x3e6) [0x7f25939fdf76]\n  [bt] (1) /workspace/build/libtvm.so(tvm::tir::transform::PrimFunc [...]
-    No: 3   GFLOPS: 0.00/0.00       result: MeasureResult(costs=(InstantiationError('Traceback (most recent call last):\n  [bt] (4) /workspace/build/libtvm.so(TVMFuncCall+0x61) [0x7f259437cd11]\n  [bt] (3) /workspace/build/libtvm.so(+0x4cc4d4) [0x7f25939fe4d4]\n  [bt] (2) /workspace/build/libtvm.so(tvm::transform::SequentialNode::operator()(tvm::IRModule, tvm::transform::PassContext const&) const+0x3e6) [0x7f25939fdf76]\n  [bt] (1) /workspace/build/libtvm.so(tvm::tir::transform::PrimFunc [...]
-    No: 4   GFLOPS: 0.00/0.00       result: MeasureResult(costs=(InstantiationError('Traceback (most recent call last):\n  [bt] (4) /workspace/build/libtvm.so(TVMFuncCall+0x61) [0x7f259437cd11]\n  [bt] (3) /workspace/build/libtvm.so(+0x4cc4d4) [0x7f25939fe4d4]\n  [bt] (2) /workspace/build/libtvm.so(tvm::transform::SequentialNode::operator()(tvm::IRModule, tvm::transform::PassContext const&) const+0x3e6) [0x7f25939fdf76]\n  [bt] (1) /workspace/build/libtvm.so(tvm::tir::transform::PrimFunc [...]
-    No: 5   GFLOPS: 0.00/0.00       result: MeasureResult(costs=(InstantiationError('Traceback (most recent call last):\n  [bt] (4) /workspace/build/libtvm.so(TVMFuncCall+0x61) [0x7f259437cd11]\n  [bt] (3) /workspace/build/libtvm.so(+0x4cc4d4) [0x7f25939fe4d4]\n  [bt] (2) /workspace/build/libtvm.so(tvm::transform::SequentialNode::operator()(tvm::IRModule, tvm::transform::PassContext const&) const+0x3e6) [0x7f25939fdf76]\n  [bt] (1) /workspace/build/libtvm.so(tvm::tir::transform::PrimFunc [...]
-    No: 6   GFLOPS: 0.00/0.00       result: MeasureResult(costs=(InstantiationError('Traceback (most recent call last):\n  [bt] (4) /workspace/build/libtvm.so(TVMFuncCall+0x61) [0x7f259437cd11]\n  [bt] (3) /workspace/build/libtvm.so(+0x4cc4d4) [0x7f25939fe4d4]\n  [bt] (2) /workspace/build/libtvm.so(tvm::transform::SequentialNode::operator()(tvm::IRModule, tvm::transform::PassContext const&) const+0x3e6) [0x7f25939fdf76]\n  [bt] (1) /workspace/build/libtvm.so(tvm::tir::transform::PrimFunc [...]
-    No: 7   GFLOPS: 0.00/0.00       result: MeasureResult(costs=(InstantiationError('Traceback (most recent call last):\n  [bt] (4) /workspace/build/libtvm.so(TVMFuncCall+0x61) [0x7f259437cd11]\n  [bt] (3) /workspace/build/libtvm.so(+0x4cc4d4) [0x7f25939fe4d4]\n  [bt] (2) /workspace/build/libtvm.so(tvm::transform::SequentialNode::operator()(tvm::IRModule, tvm::transform::PassContext const&) const+0x3e6) [0x7f25939fdf76]\n  [bt] (1) /workspace/build/libtvm.so(tvm::tir::transform::PrimFunc [...]
-    No: 8   GFLOPS: 0.00/0.00       result: MeasureResult(costs=(InstantiationError('Traceback (most recent call last):\n  [bt] (4) /workspace/build/libtvm.so(TVMFuncCall+0x61) [0x7f259437cd11]\n  [bt] (3) /workspace/build/libtvm.so(+0x4cc4d4) [0x7f25939fe4d4]\n  [bt] (2) /workspace/build/libtvm.so(tvm::transform::SequentialNode::operator()(tvm::IRModule, tvm::transform::PassContext const&) const+0x3e6) [0x7f25939fdf76]\n  [bt] (1) /workspace/build/libtvm.so(tvm::tir::transform::PrimFunc [...]
-    No: 9   GFLOPS: 0.00/0.00       result: MeasureResult(costs=(InstantiationError('Traceback (most recent call last):\n  [bt] (4) /workspace/build/libtvm.so(TVMFuncCall+0x61) [0x7f259437cd11]\n  [bt] (3) /workspace/build/libtvm.so(+0x4cc4d4) [0x7f25939fe4d4]\n  [bt] (2) /workspace/build/libtvm.so(tvm::transform::SequentialNode::operator()(tvm::IRModule, tvm::transform::PassContext const&) const+0x3e6) [0x7f25939fdf76]\n  [bt] (1) /workspace/build/libtvm.so(tvm::tir::transform::PrimFunc [...]
-    No: 10  GFLOPS: 0.00/0.00       result: MeasureResult(costs=(InstantiationError('Traceback (most recent call last):\n  [bt] (4) /workspace/build/libtvm.so(TVMFuncCall+0x61) [0x7f259437cd11]\n  [bt] (3) /workspace/build/libtvm.so(+0x4cc4d4) [0x7f25939fe4d4]\n  [bt] (2) /workspace/build/libtvm.so(tvm::transform::SequentialNode::operator()(tvm::IRModule, tvm::transform::PassContext const&) const+0x3e6) [0x7f25939fdf76]\n  [bt] (1) /workspace/build/libtvm.so(tvm::tir::transform::PrimFunc [...]
-    No: 11  GFLOPS: 0.00/0.00       result: MeasureResult(costs=(InstantiationError('Traceback (most recent call last):\n  [bt] (4) /workspace/build/libtvm.so(TVMFuncCall+0x61) [0x7f259437cd11]\n  [bt] (3) /workspace/build/libtvm.so(+0x4cc4d4) [0x7f25939fe4d4]\n  [bt] (2) /workspace/build/libtvm.so(tvm::transform::SequentialNode::operator()(tvm::IRModule, tvm::transform::PassContext const&) const+0x3e6) [0x7f25939fdf76]\n  [bt] (1) /workspace/build/libtvm.so(tvm::tir::transform::PrimFunc [...]
-    No: 12  GFLOPS: 63.43/63.43     result: MeasureResult(costs=(0.003649818875,), error_no=0, all_cost=1.636338472366333, timestamp=1592418257.054786)     [('tile_f', [-1, 1, 1, 1]), ('tile_y', [-1, 1, 1, 7]), ('tile_x', [-1, 1, 1, 1]), ('tile_rc', [-1, 16, 1]), ('tile_ry', [-1, 1, 3]), ('tile_rx', [-1, 1, 3]), ('auto_unroll_max_step', 512), ('unroll_explicit', 1)],None,8533140
-    No: 13  GFLOPS: 0.00/63.43      result: MeasureResult(costs=(InstantiationError('Traceback (most recent call last):\n  [bt] (4) /workspace/build/libtvm.so(TVMFuncCall+0x61) [0x7f259437cd11]\n  [bt] (3) /workspace/build/libtvm.so(+0x4cc4d4) [0x7f25939fe4d4]\n  [bt] (2) /workspace/build/libtvm.so(tvm::transform::SequentialNode::operator()(tvm::IRModule, tvm::transform::PassContext const&) const+0x3e6) [0x7f25939fdf76]\n  [bt] (1) /workspace/build/libtvm.so(tvm::tir::transform::PrimFunc [...]
-    No: 14  GFLOPS: 0.00/63.43      result: MeasureResult(costs=(InstantiationError('Traceback (most recent call last):\n  [bt] (4) /workspace/build/libtvm.so(TVMFuncCall+0x61) [0x7f259437cd11]\n  [bt] (3) /workspace/build/libtvm.so(+0x4cc4d4) [0x7f25939fe4d4]\n  [bt] (2) /workspace/build/libtvm.so(tvm::transform::SequentialNode::operator()(tvm::IRModule, tvm::transform::PassContext const&) const+0x3e6) [0x7f25939fdf76]\n  [bt] (1) /workspace/build/libtvm.so(tvm::tir::transform::PrimFunc [...]
-    No: 15  GFLOPS: 0.00/63.43      result: MeasureResult(costs=(InstantiationError('Traceback (most recent call last):\n  [bt] (4) /workspace/build/libtvm.so(TVMFuncCall+0x61) [0x7f259437cd11]\n  [bt] (3) /workspace/build/libtvm.so(+0x4cc4d4) [0x7f25939fe4d4]\n  [bt] (2) /workspace/build/libtvm.so(tvm::transform::SequentialNode::operator()(tvm::IRModule, tvm::transform::PassContext const&) const+0x3e6) [0x7f25939fdf76]\n  [bt] (1) /workspace/build/libtvm.so(tvm::tir::transform::PrimFunc [...]
-    No: 16  GFLOPS: 0.00/63.43      result: MeasureResult(costs=(InstantiationError('Traceback (most recent call last):\n  [bt] (4) /workspace/build/libtvm.so(TVMFuncCall+0x61) [0x7f259437cd11]\n  [bt] (3) /workspace/build/libtvm.so(+0x4cc4d4) [0x7f25939fe4d4]\n  [bt] (2) /workspace/build/libtvm.so(tvm::transform::SequentialNode::operator()(tvm::IRModule, tvm::transform::PassContext const&) const+0x3e6) [0x7f25939fdf76]\n  [bt] (1) /workspace/build/libtvm.so(tvm::tir::transform::PrimFunc [...]
-    No: 17  GFLOPS: 0.00/63.43      result: MeasureResult(costs=(InstantiationError('Traceback (most recent call last):\n  [bt] (4) /workspace/build/libtvm.so(TVMFuncCall+0x61) [0x7f259437cd11]\n  [bt] (3) /workspace/build/libtvm.so(+0x4cc4d4) [0x7f25939fe4d4]\n  [bt] (2) /workspace/build/libtvm.so(tvm::transform::SequentialNode::operator()(tvm::IRModule, tvm::transform::PassContext const&) const+0x3e6) [0x7f25939fdf76]\n  [bt] (1) /workspace/build/libtvm.so(tvm::tir::transform::PrimFunc [...]
-    No: 18  GFLOPS: 0.00/63.43      result: MeasureResult(costs=(InstantiationError('Traceback (most recent call last):\n  [bt] (4) /workspace/build/libtvm.so(TVMFuncCall+0x61) [0x7f259437cd11]\n  [bt] (3) /workspace/build/libtvm.so(+0x4cc4d4) [0x7f25939fe4d4]\n  [bt] (2) /workspace/build/libtvm.so(tvm::transform::SequentialNode::operator()(tvm::IRModule, tvm::transform::PassContext const&) const+0x3e6) [0x7f25939fdf76]\n  [bt] (1) /workspace/build/libtvm.so(tvm::tir::transform::PrimFunc [...]
-    No: 19  GFLOPS: 777.37/777.37   result: MeasureResult(costs=(0.00029780138189845474,), error_no=0, all_cost=2.0230767726898193, timestamp=1592418261.583229)    [('tile_f', [-1, 2, 1, 1]), ('tile_y', [-1, 1, 7, 1]), ('tile_x', [-1, 1, 7, 1]), ('tile_rc', [-1, 32, 1]), ('tile_ry', [-1, 1, 3]), ('tile_rx', [-1, 3, 1]), ('auto_unroll_max_step', 1500), ('unroll_explicit', 1)],None,9699801
-    No: 20  GFLOPS: 6.95/777.37     result: MeasureResult(costs=(0.03328762325,), error_no=0, all_cost=1.7611923217773438, timestamp=1592418262.514915)     [('tile_f', [-1, 2, 1, 16]), ('tile_y', [-1, 1, 1, 1]), ('tile_x', [-1, 7, 1, 1]), ('tile_rc', [-1, 2, 32]), ('tile_ry', [-1, 1, 3]), ('tile_rx', [-1, 1, 1]), ('auto_unroll_max_step', 512), ('unroll_explicit', 1)],None,7502165
+    No: 1   GFLOPS: 0.00/0.00       result: MeasureResult(costs=(InstantiationError('Traceback (most recent call last):\n  [bt] (4) /workspace/build/libtvm.so(TVMFuncCall+0x61) [0x7f16252121a1]\n  [bt] (3) /workspace/build/libtvm.so(+0x4ed7d7) [0x7f16248497d7]\n  [bt] (2) /workspace/build/libtvm.so(tvm::transform::SequentialNode::operator()(tvm::IRModule, tvm::transform::PassContext const&) const+0x3e6) [0x7f1624849276]\n  [bt] (1) /workspace/build/libtvm.so(tvm::tir::transform::PrimFunc [...]
+    No: 2   GFLOPS: 0.00/0.00       result: MeasureResult(costs=(InstantiationError('Traceback (most recent call last):\n  [bt] (4) /workspace/build/libtvm.so(TVMFuncCall+0x61) [0x7f16252121a1]\n  [bt] (3) /workspace/build/libtvm.so(+0x4ed7d7) [0x7f16248497d7]\n  [bt] (2) /workspace/build/libtvm.so(tvm::transform::SequentialNode::operator()(tvm::IRModule, tvm::transform::PassContext const&) const+0x3e6) [0x7f1624849276]\n  [bt] (1) /workspace/build/libtvm.so(tvm::tir::transform::PrimFunc [...]
+    No: 3   GFLOPS: 0.00/0.00       result: MeasureResult(costs=(InstantiationError('Traceback (most recent call last):\n  [bt] (4) /workspace/build/libtvm.so(TVMFuncCall+0x61) [0x7f16252121a1]\n  [bt] (3) /workspace/build/libtvm.so(+0x4ed7d7) [0x7f16248497d7]\n  [bt] (2) /workspace/build/libtvm.so(tvm::transform::SequentialNode::operator()(tvm::IRModule, tvm::transform::PassContext const&) const+0x3e6) [0x7f1624849276]\n  [bt] (1) /workspace/build/libtvm.so(tvm::tir::transform::PrimFunc [...]
+    No: 4   GFLOPS: 0.00/0.00       result: MeasureResult(costs=(InstantiationError('Traceback (most recent call last):\n  [bt] (4) /workspace/build/libtvm.so(TVMFuncCall+0x61) [0x7f16252121a1]\n  [bt] (3) /workspace/build/libtvm.so(+0x4ed7d7) [0x7f16248497d7]\n  [bt] (2) /workspace/build/libtvm.so(tvm::transform::SequentialNode::operator()(tvm::IRModule, tvm::transform::PassContext const&) const+0x3e6) [0x7f1624849276]\n  [bt] (1) /workspace/build/libtvm.so(tvm::tir::transform::PrimFunc [...]
+    No: 5   GFLOPS: 0.00/0.00       result: MeasureResult(costs=(InstantiationError('Traceback (most recent call last):\n  [bt] (4) /workspace/build/libtvm.so(TVMFuncCall+0x61) [0x7f16252121a1]\n  [bt] (3) /workspace/build/libtvm.so(+0x4ed7d7) [0x7f16248497d7]\n  [bt] (2) /workspace/build/libtvm.so(tvm::transform::SequentialNode::operator()(tvm::IRModule, tvm::transform::PassContext const&) const+0x3e6) [0x7f1624849276]\n  [bt] (1) /workspace/build/libtvm.so(tvm::tir::transform::PrimFunc [...]
+    No: 6   GFLOPS: 0.00/0.00       result: MeasureResult(costs=(InstantiationError('Traceback (most recent call last):\n  [bt] (4) /workspace/build/libtvm.so(TVMFuncCall+0x61) [0x7f16252121a1]\n  [bt] (3) /workspace/build/libtvm.so(+0x4ed7d7) [0x7f16248497d7]\n  [bt] (2) /workspace/build/libtvm.so(tvm::transform::SequentialNode::operator()(tvm::IRModule, tvm::transform::PassContext const&) const+0x3e6) [0x7f1624849276]\n  [bt] (1) /workspace/build/libtvm.so(tvm::tir::transform::PrimFunc [...]
+    No: 7   GFLOPS: 0.00/0.00       result: MeasureResult(costs=(InstantiationError('Traceback (most recent call last):\n  [bt] (4) /workspace/build/libtvm.so(TVMFuncCall+0x61) [0x7f16252121a1]\n  [bt] (3) /workspace/build/libtvm.so(+0x4ed7d7) [0x7f16248497d7]\n  [bt] (2) /workspace/build/libtvm.so(tvm::transform::SequentialNode::operator()(tvm::IRModule, tvm::transform::PassContext const&) const+0x3e6) [0x7f1624849276]\n  [bt] (1) /workspace/build/libtvm.so(tvm::tir::transform::PrimFunc [...]
+    No: 8   GFLOPS: 0.00/0.00       result: MeasureResult(costs=(InstantiationError('Traceback (most recent call last):\n  [bt] (4) /workspace/build/libtvm.so(TVMFuncCall+0x61) [0x7f16252121a1]\n  [bt] (3) /workspace/build/libtvm.so(+0x4ed7d7) [0x7f16248497d7]\n  [bt] (2) /workspace/build/libtvm.so(tvm::transform::SequentialNode::operator()(tvm::IRModule, tvm::transform::PassContext const&) const+0x3e6) [0x7f1624849276]\n  [bt] (1) /workspace/build/libtvm.so(tvm::tir::transform::PrimFunc [...]
+    No: 9   GFLOPS: 0.00/0.00       result: MeasureResult(costs=(InstantiationError('Traceback (most recent call last):\n  [bt] (4) /workspace/build/libtvm.so(TVMFuncCall+0x61) [0x7f16252121a1]\n  [bt] (3) /workspace/build/libtvm.so(+0x4ed7d7) [0x7f16248497d7]\n  [bt] (2) /workspace/build/libtvm.so(tvm::transform::SequentialNode::operator()(tvm::IRModule, tvm::transform::PassContext const&) const+0x3e6) [0x7f1624849276]\n  [bt] (1) /workspace/build/libtvm.so(tvm::tir::transform::PrimFunc [...]
+    No: 10  GFLOPS: 0.00/0.00       result: MeasureResult(costs=(InstantiationError('Traceback (most recent call last):\n  [bt] (4) /workspace/build/libtvm.so(TVMFuncCall+0x61) [0x7f16252121a1]\n  [bt] (3) /workspace/build/libtvm.so(+0x4ed7d7) [0x7f16248497d7]\n  [bt] (2) /workspace/build/libtvm.so(tvm::transform::SequentialNode::operator()(tvm::IRModule, tvm::transform::PassContext const&) const+0x3e6) [0x7f1624849276]\n  [bt] (1) /workspace/build/libtvm.so(tvm::tir::transform::PrimFunc [...]
+    No: 11  GFLOPS: 0.00/0.00       result: MeasureResult(costs=(InstantiationError('Traceback (most recent call last):\n  [bt] (4) /workspace/build/libtvm.so(TVMFuncCall+0x61) [0x7f16252121a1]\n  [bt] (3) /workspace/build/libtvm.so(+0x4ed7d7) [0x7f16248497d7]\n  [bt] (2) /workspace/build/libtvm.so(tvm::transform::SequentialNode::operator()(tvm::IRModule, tvm::transform::PassContext const&) const+0x3e6) [0x7f1624849276]\n  [bt] (1) /workspace/build/libtvm.so(tvm::tir::transform::PrimFunc [...]
+    No: 12  GFLOPS: 71.32/71.32     result: MeasureResult(costs=(0.0032459606750000003,), error_no=0, all_cost=1.6292922496795654, timestamp=1593745237.4247222)    [('tile_f', [-1, 1, 1, 1]), ('tile_y', [-1, 1, 1, 7]), ('tile_x', [-1, 1, 1, 1]), ('tile_rc', [-1, 16, 1]), ('tile_ry', [-1, 1, 3]), ('tile_rx', [-1, 1, 3]), ('auto_unroll_max_step', 512), ('unroll_explicit', 1)],None,8533140
+    No: 13  GFLOPS: 0.00/71.32      result: MeasureResult(costs=(InstantiationError('Traceback (most recent call last):\n  [bt] (4) /workspace/build/libtvm.so(TVMFuncCall+0x61) [0x7f16252121a1]\n  [bt] (3) /workspace/build/libtvm.so(+0x4ed7d7) [0x7f16248497d7]\n  [bt] (2) /workspace/build/libtvm.so(tvm::transform::SequentialNode::operator()(tvm::IRModule, tvm::transform::PassContext const&) const+0x3e6) [0x7f1624849276]\n  [bt] (1) /workspace/build/libtvm.so(tvm::tir::transform::PrimFunc [...]
+    No: 14  GFLOPS: 0.00/71.32      result: MeasureResult(costs=(InstantiationError('Traceback (most recent call last):\n  [bt] (4) /workspace/build/libtvm.so(TVMFuncCall+0x61) [0x7f16252121a1]\n  [bt] (3) /workspace/build/libtvm.so(+0x4ed7d7) [0x7f16248497d7]\n  [bt] (2) /workspace/build/libtvm.so(tvm::transform::SequentialNode::operator()(tvm::IRModule, tvm::transform::PassContext const&) const+0x3e6) [0x7f1624849276]\n  [bt] (1) /workspace/build/libtvm.so(tvm::tir::transform::PrimFunc [...]
+    No: 15  GFLOPS: 0.00/71.32      result: MeasureResult(costs=(InstantiationError('Traceback (most recent call last):\n  [bt] (4) /workspace/build/libtvm.so(TVMFuncCall+0x61) [0x7f16252121a1]\n  [bt] (3) /workspace/build/libtvm.so(+0x4ed7d7) [0x7f16248497d7]\n  [bt] (2) /workspace/build/libtvm.so(tvm::transform::SequentialNode::operator()(tvm::IRModule, tvm::transform::PassContext const&) const+0x3e6) [0x7f1624849276]\n  [bt] (1) /workspace/build/libtvm.so(tvm::tir::transform::PrimFunc [...]
+    No: 16  GFLOPS: 0.00/71.32      result: MeasureResult(costs=(InstantiationError('Traceback (most recent call last):\n  [bt] (4) /workspace/build/libtvm.so(TVMFuncCall+0x61) [0x7f16252121a1]\n  [bt] (3) /workspace/build/libtvm.so(+0x4ed7d7) [0x7f16248497d7]\n  [bt] (2) /workspace/build/libtvm.so(tvm::transform::SequentialNode::operator()(tvm::IRModule, tvm::transform::PassContext const&) const+0x3e6) [0x7f1624849276]\n  [bt] (1) /workspace/build/libtvm.so(tvm::tir::transform::PrimFunc [...]
+    No: 17  GFLOPS: 0.00/71.32      result: MeasureResult(costs=(InstantiationError('Traceback (most recent call last):\n  [bt] (4) /workspace/build/libtvm.so(TVMFuncCall+0x61) [0x7f16252121a1]\n  [bt] (3) /workspace/build/libtvm.so(+0x4ed7d7) [0x7f16248497d7]\n  [bt] (2) /workspace/build/libtvm.so(tvm::transform::SequentialNode::operator()(tvm::IRModule, tvm::transform::PassContext const&) const+0x3e6) [0x7f1624849276]\n  [bt] (1) /workspace/build/libtvm.so(tvm::tir::transform::PrimFunc [...]
+    No: 18  GFLOPS: 0.00/71.32      result: MeasureResult(costs=(InstantiationError('Traceback (most recent call last):\n  [bt] (4) /workspace/build/libtvm.so(TVMFuncCall+0x61) [0x7f16252121a1]\n  [bt] (3) /workspace/build/libtvm.so(+0x4ed7d7) [0x7f16248497d7]\n  [bt] (2) /workspace/build/libtvm.so(tvm::transform::SequentialNode::operator()(tvm::IRModule, tvm::transform::PassContext const&) const+0x3e6) [0x7f1624849276]\n  [bt] (1) /workspace/build/libtvm.so(tvm::tir::transform::PrimFunc [...]
+    No: 19  GFLOPS: 793.41/793.41   result: MeasureResult(costs=(0.0002917792207505519,), error_no=0, all_cost=2.0187714099884033, timestamp=1593745241.1444745)    [('tile_f', [-1, 2, 1, 1]), ('tile_y', [-1, 1, 7, 1]), ('tile_x', [-1, 1, 7, 1]), ('tile_rc', [-1, 32, 1]), ('tile_ry', [-1, 1, 3]), ('tile_rx', [-1, 3, 1]), ('auto_unroll_max_step', 1500), ('unroll_explicit', 1)],None,9699801
+    No: 20  GFLOPS: 6.99/793.41     result: MeasureResult(costs=(0.033099542,), error_no=0, all_cost=1.5997724533081055, timestamp=1593745241.9657192)      [('tile_f', [-1, 2, 1, 16]), ('tile_y', [-1, 1, 1, 1]), ('tile_x', [-1, 7, 1, 1]), ('tile_rc', [-1, 2, 32]), ('tile_ry', [-1, 1, 3]), ('tile_rx', [-1, 1, 1]), ('auto_unroll_max_step', 512), ('unroll_explicit', 1)],None,7502165
 
 
 
@@ -307,7 +307,7 @@ and measure running time.
 
     Best config:
     [('tile_f', [-1, 2, 1, 1]), ('tile_y', [-1, 1, 7, 1]), ('tile_x', [-1, 1, 7, 1]), ('tile_rc', [-1, 32, 1]), ('tile_ry', [-1, 1, 3]), ('tile_rx', [-1, 3, 1]), ('auto_unroll_max_step', 1500), ('unroll_explicit', 1)],None,9699801
-    Time cost of this operator: 0.000389
+    Time cost of this operator: 0.000355
 
 
 
diff --git a/docs/_sources/tutorials/autotvm/tune_simple_template.rst.txt b/docs/_sources/tutorials/autotvm/tune_simple_template.rst.txt
index eecf258..cba0e45 100644
--- a/docs/_sources/tutorials/autotvm/tune_simple_template.rst.txt
+++ b/docs/_sources/tutorials/autotvm/tune_simple_template.rst.txt
@@ -361,16 +361,16 @@ used to get the best config later.
  .. code-block:: none
 
     Get devices for measurement successfully!
-    No: 1   GFLOPS: 6.66/6.66       result: MeasureResult(costs=(0.040317947199999996,), error_no=0, all_cost=0.9689126014709473, timestamp=1592418223.5055163)     [('tile_y', [-1, 8]), ('tile_x', [-1, 8])],None,33
-    No: 2   GFLOPS: 2.20/6.66       result: MeasureResult(costs=(0.1219625674,), error_no=0, all_cost=2.2740604877471924, timestamp=1592418225.8873503)     [('tile_y', [-1, 8]), ('tile_x', [-1, 2])],None,13
-    No: 3   GFLOPS: 7.04/7.04       result: MeasureResult(costs=(0.038118197799999996,), error_no=0, all_cost=1.473351001739502, timestamp=1592418226.9196477)      [('tile_y', [-1, 16]), ('tile_x', [-1, 128])],None,74
-    No: 4   GFLOPS: 4.35/7.04       result: MeasureResult(costs=(0.061688380200000004,), error_no=0, all_cost=1.7359662055969238, timestamp=1592418228.323466)      [('tile_y', [-1, 16]), ('tile_x', [-1, 32])],None,54
-    No: 5   GFLOPS: 5.80/7.04       result: MeasureResult(costs=(0.046256228999999996,), error_no=0, all_cost=1.5193562507629395, timestamp=1592418229.4731617)     [('tile_y', [-1, 128]), ('tile_x', [-1, 8])],None,37
-    No: 6   GFLOPS: 6.15/7.04       result: MeasureResult(costs=(0.0436479732,), error_no=0, all_cost=1.2018964290618896, timestamp=1592418230.6016724)     [('tile_y', [-1, 64]), ('tile_x', [-1, 8])],None,36
-    No: 7   GFLOPS: 25.13/25.13     result: MeasureResult(costs=(0.0106810936,), error_no=0, all_cost=0.6128425598144531, timestamp=1592418231.2234533)     [('tile_y', [-1, 1]), ('tile_x', [-1, 128])],None,70
-    No: 8   GFLOPS: 21.21/25.13     result: MeasureResult(costs=(0.012655053199999999,), error_no=0, all_cost=0.5169622898101807, timestamp=1592418231.8622746)     [('tile_y', [-1, 4]), ('tile_x', [-1, 512])],None,92
-    No: 9   GFLOPS: 0.84/25.13      result: MeasureResult(costs=(0.3187542196,), error_no=0, all_cost=5.419588088989258, timestamp=1592418237.4022605)      [('tile_y', [-1, 512]), ('tile_x', [-1, 2])],None,19
-    No: 10  GFLOPS: 0.00/25.13      result: MeasureResult(costs=(RuntimeError('Traceback (most recent call last):\n  [bt] (5) /workspace/build/libtvm.so(TVMFuncCall+0x61) [0x7f259437cd11]\n  [bt] (4) /workspace/build/libtvm.so(+0xe791f2) [0x7f25943ab1f2]\n  [bt] (3) /workspace/build/libtvm.so(tvm::runtime::RPCWrappedFunc::operator()(tvm::runtime::TVMArgs, tvm::runtime::TVMRetValue*) const+0x26b) [0x7f25943ae56b]\n  [bt] (2) /workspace/build/libtvm.so(tvm::runtime::RPCClientSession::CallF [...]
+    No: 1   GFLOPS: 6.78/6.78       result: MeasureResult(costs=(0.039591604,), error_no=0, all_cost=1.0488479137420654, timestamp=1593745207.5358753)      [('tile_y', [-1, 8]), ('tile_x', [-1, 8])],None,33
+    No: 2   GFLOPS: 2.24/6.78       result: MeasureResult(costs=(0.1196258708,), error_no=0, all_cost=2.235098361968994, timestamp=1593745209.7621841)      [('tile_y', [-1, 8]), ('tile_x', [-1, 2])],None,13
+    No: 3   GFLOPS: 7.30/7.30       result: MeasureResult(costs=(0.0367613626,), error_no=0, all_cost=1.3152332305908203, timestamp=1593745210.7636383)     [('tile_y', [-1, 16]), ('tile_x', [-1, 128])],None,74
+    No: 4   GFLOPS: 4.46/7.30       result: MeasureResult(costs=(0.060166502600000005,), error_no=0, all_cost=1.3689393997192383, timestamp=1593745212.0622144)     [('tile_y', [-1, 16]), ('tile_x', [-1, 32])],None,54
+    No: 5   GFLOPS: 5.89/7.30       result: MeasureResult(costs=(0.0455629548,), error_no=0, all_cost=1.023970127105713, timestamp=1593745213.1131737)      [('tile_y', [-1, 128]), ('tile_x', [-1, 8])],None,37
+    No: 6   GFLOPS: 6.05/7.30       result: MeasureResult(costs=(0.0443622662,), error_no=0, all_cost=1.5819377899169922, timestamp=1593745214.1444423)     [('tile_y', [-1, 64]), ('tile_x', [-1, 8])],None,36
+    No: 7   GFLOPS: 25.76/25.76     result: MeasureResult(costs=(0.010421797600000001,), error_no=0, all_cost=0.6426005363464355, timestamp=1593745214.6381452)     [('tile_y', [-1, 1]), ('tile_x', [-1, 128])],None,70
+    No: 8   GFLOPS: 21.44/25.76     result: MeasureResult(costs=(0.0125213934,), error_no=0, all_cost=0.611748218536377, timestamp=1593745215.1676252)      [('tile_y', [-1, 4]), ('tile_x', [-1, 512])],None,92
+    No: 9   GFLOPS: 0.88/25.76      result: MeasureResult(costs=(0.3052182662,), error_no=0, all_cost=5.2150187492370605, timestamp=1593745220.3343065)     [('tile_y', [-1, 512]), ('tile_x', [-1, 2])],None,19
+    No: 10  GFLOPS: 0.00/25.76      result: MeasureResult(costs=(RuntimeError('Traceback (most recent call last):\n  [bt] (5) /workspace/build/libtvm.so(TVMFuncCall+0x61) [0x7f16252121a1]\n  [bt] (4) /workspace/build/libtvm.so(+0xee55a2) [0x7f16252415a2]\n  [bt] (3) /workspace/build/libtvm.so(tvm::runtime::RPCWrappedFunc::operator()(tvm::runtime::TVMArgs, tvm::runtime::TVMRetValue*) const+0x26b) [0x7f162524491b]\n  [bt] (2) /workspace/build/libtvm.so(tvm::runtime::RPCClientSession::CallF [...]
 
 
 
diff --git a/docs/_sources/tutorials/cross_compilation_and_rpc.rst.txt b/docs/_sources/tutorials/cross_compilation_and_rpc.rst.txt
index 0c50c9a..a8d1709 100644
--- a/docs/_sources/tutorials/cross_compilation_and_rpc.rst.txt
+++ b/docs/_sources/tutorials/cross_compilation_and_rpc.rst.txt
@@ -235,7 +235,7 @@ device and returns the measured cost. Network overhead is excluded.
 
  .. code-block:: none
 
-    1.723e-07 secs/op
+    1.693e-07 secs/op
 
 
 
diff --git a/docs/_sources/tutorials/dev/low_level_custom_pass.rst.txt b/docs/_sources/tutorials/dev/low_level_custom_pass.rst.txt
index 6925319..8ffdf2b 100644
--- a/docs/_sources/tutorials/dev/low_level_custom_pass.rst.txt
+++ b/docs/_sources/tutorials/dev/low_level_custom_pass.rst.txt
@@ -73,13 +73,13 @@ our customized lowering pass to manipulate the IR directly instead of using sche
  .. code-block:: none
 
     primfn(a_1: handle, b_1: handle, c_1: handle) -> ()
-      attr = {"tir.noalias": True, "global_symbol": "main"}
-      buffers = {a: Buffer(a_2: handle, float32, [128], []),
+      attr = {"global_symbol": "main", "tir.noalias": True}
+      buffers = {c: Buffer(c_2: handle, float32, [128], []),
                  b: Buffer(b_2: handle, float32, [128], []),
-                 c: Buffer(c_2: handle, float32, [128], [])}
-      buffer_map = {c_1: c, b_1: b, a_1: a} {
+                 a: Buffer(a_2: handle, float32, [128], [])}
+      buffer_map = {a_1: a, b_1: b, c_1: c} {
       for (i: int32, 0, 128) {
-        c_2[i] = ((float32*)a_2[i]) + (float32*)b_2[i]))
+        c_2[i] = ((float32*)a_2[i] + (float32*)b_2[i])
       }
     }
 
@@ -216,13 +216,13 @@ Thus, a good place to put this transformation pass is just after Phase 1.
  .. code-block:: none
 
     primfn(a_1: handle, b_1: handle, c_1: handle) -> ()
-      attr = {"tir.noalias": True, "global_symbol": "main"}
-      buffers = {a: Buffer(a_2: handle, float32, [128], []),
+      attr = {"global_symbol": "main", "tir.noalias": True}
+      buffers = {c: Buffer(c_2: handle, float32, [128], []),
                  b: Buffer(b_2: handle, float32, [128], []),
-                 c: Buffer(c_2: handle, float32, [128], [])}
-      buffer_map = {c_1: c, b_1: b, a_1: a} {
+                 a: Buffer(a_2: handle, float32, [128], [])}
+      buffer_map = {a_1: a, b_1: b, c_1: c} {
       for (i.outer: int32, 0, 16) {
-        c_2[ramp((i.outer*8), 1, 8)] = ((float32x8*)a_2[ramp((i.outer*8), 1, 8)]) + (float32x8*)b_2[ramp((i.outer*8), 1, 8)]))
+        c_2[ramp((i.outer*8), 1, 8)] = ((float32x8*)a_2[ramp((i.outer*8), 1, 8)] + (float32x8*)b_2[ramp((i.outer*8), 1, 8)])
       }
     }
 
diff --git a/docs/_sources/tutorials/dev/sg_execution_times.rst.txt b/docs/_sources/tutorials/dev/sg_execution_times.rst.txt
index afb89de..e2fb71b 100644
--- a/docs/_sources/tutorials/dev/sg_execution_times.rst.txt
+++ b/docs/_sources/tutorials/dev/sg_execution_times.rst.txt
@@ -5,7 +5,7 @@
 
 Computation times
 =================
-**00:00.472** total execution time for **tutorials_dev** files:
+**00:00.458** total execution time for **tutorials_dev** files:
 
-- **00:00.321**: :ref:`sphx_glr_tutorials_dev_relay_pass_infra.py` (``relay_pass_infra.py``)
-- **00:00.151**: :ref:`sphx_glr_tutorials_dev_low_level_custom_pass.py` (``low_level_custom_pass.py``)
+- **00:00.314**: :ref:`sphx_glr_tutorials_dev_relay_pass_infra.py` (``relay_pass_infra.py``)
+- **00:00.143**: :ref:`sphx_glr_tutorials_dev_low_level_custom_pass.py` (``low_level_custom_pass.py``)
diff --git a/docs/_sources/tutorials/frontend/deploy_model_on_android.rst.txt b/docs/_sources/tutorials/frontend/deploy_model_on_android.rst.txt
index 689fd88..1ad02a0 100644
--- a/docs/_sources/tutorials/frontend/deploy_model_on_android.rst.txt
+++ b/docs/_sources/tutorials/frontend/deploy_model_on_android.rst.txt
@@ -415,7 +415,7 @@ Execute on TVM
 
     TVM prediction top-1: tiger cat
     Evaluate inference time cost...
-    Mean inference time (std dev): 5.55 ms (0.04 ms)
+    Mean inference time (std dev): 4.31 ms (0.46 ms)
 
 
 
diff --git a/docs/_sources/tutorials/frontend/deploy_prequantized.rst.txt b/docs/_sources/tutorials/frontend/deploy_prequantized.rst.txt
index e600e83..8e07388 100644
--- a/docs/_sources/tutorials/frontend/deploy_prequantized.rst.txt
+++ b/docs/_sources/tutorials/frontend/deploy_prequantized.rst.txt
@@ -352,7 +352,7 @@ Here we give an example of how to measure performance of TVM compiled models.
 
  .. code-block:: none
 
-    Elapsed average ms: 14.098849819999998
+    Elapsed average ms: 13.114505149999998
 
 
 
diff --git a/docs/_sources/tutorials/frontend/deploy_prequantized_tflite.rst.txt b/docs/_sources/tutorials/frontend/deploy_prequantized_tflite.rst.txt
index 830791a..f84690e 100644
--- a/docs/_sources/tutorials/frontend/deploy_prequantized_tflite.rst.txt
+++ b/docs/_sources/tutorials/frontend/deploy_prequantized_tflite.rst.txt
@@ -361,7 +361,7 @@ Here we give an example of how to measure performance of TVM compiled models.
 
  .. code-block:: none
 
-    Elapsed average ms: 35.247464699999995
+    Elapsed average ms: 33.84403376999999
 
 
 
@@ -394,7 +394,7 @@ Here we give an example of how to measure performance of TVM compiled models.
 
 .. rst-class:: sphx-glr-timing
 
-   **Total running time of the script:** ( 2 minutes  10.254 seconds)
+   **Total running time of the script:** ( 2 minutes  4.482 seconds)
 
 
 .. _sphx_glr_download_tutorials_frontend_deploy_prequantized_tflite.py:
diff --git a/docs/_sources/tutorials/frontend/deploy_sparse.rst.txt b/docs/_sources/tutorials/frontend/deploy_sparse.rst.txt
new file mode 100644
index 0000000..c5d2b43
--- /dev/null
+++ b/docs/_sources/tutorials/frontend/deploy_sparse.rst.txt
@@ -0,0 +1,442 @@
+.. note::
+    :class: sphx-glr-download-link-note
+
+    Click :ref:`here <sphx_glr_download_tutorials_frontend_deploy_sparse.py>` to download the full example code
+.. rst-class:: sphx-glr-example-title
+
+.. _sphx_glr_tutorials_frontend_deploy_sparse.py:
+
+
+Deploy a Hugging Face Pruned Model on CPU
+=========================================
+**Author**: `Josh Fromm <https://github.com/jwfromm>`_
+
+This tutorial demonstrates how to take any pruned model, in this case `PruneBert
+from Hugging Face
+<https://huggingface.co/huggingface/prunebert-base-uncased-6-finepruned-w-distil-squad>`_,
+and use TVM to leverage the model's sparsity support to produce real speedups. Although
+the primary purpose of this tutorial is to realize speedups on already pruned
+models, it may also be useful to estimate how fast a model would be *if* it were
+pruned. To this end, we also provide a function that takes an unpruned model and
+replaces its weights
+with random and pruned weights at a specified sparsity. This may be a useful
+feature when trying to decide if a model is worth pruning or not.
+
+Before we get into the code, it's useful to discuss sparsity and pruning
+and dig into the two
+different types of sparsity: **structured** and **unstructured**.
+
+Pruning is a technique primarily used to reduce the parameter size of a model
+by replacing weight values with 0s. Although many methods exist for choosing which
+weights should be set to 0, the most straight forward is by picking the 
+weights with the smallest value. Typically, weights are pruned to a desired
+sparsity percentage. For example, a 95% sparse model would have only 5% of
+its weights non-zero. Pruning to very high sparsities often requires
+finetuning or full retraining as it tends to be a lossy approximation.
+Although parameter size benefits are quite easy to obtain from a pruned model
+through simple compression, leveraging sparsity to yield runtime speedups
+is more complicated.
+
+In structured sparsity weights are pruned with the goal of clustering
+pruned weights together. In other words, they are pruned using both their
+value and location. The benefit of bunching up pruned weights is that it allows
+an algorithm such as matrix multiplication to skip entire blocks. It turns out
+that some degree of *block sparsity* is very important to realizing significant
+speedups on most hardware available today. 
+This is because when loading memory in most CPUs or GPUs, 
+it doesn't save any work to skip reading a single value at a time, instead an entire
+chunk or tile is read in and executed using something like vectorized instructions.
+
+Unstructured sparse weights are those that are pruned only on the value of
+the original weights. They may appear to be scattered randomly throughout
+a tensor rather than in chunks like we'd see in block sparse weights.
+At low sparsities, unstructured pruning techniques are difficult to
+accelerate. However, at high sparsities many blocks of all 0 values
+will naturally appear, making it possible to accelerate.
+
+This tutorial interacts with both structured and unstructured sparsity.
+Hugging Face's PruneBert model is unstructured but 95% sparse, allowing us
+to apply TVM's block sparse optimizations to it, even if not optimally.
+When generating random sparse weights for an unpruned model, we do so with structured
+sparsity. A fun exercise is comparing the real speed of PruneBert with the block
+sparse speed using fake weights to see the benefit of structured sparsity.
+
+Load Required Modules
+---------------------
+Other than TVM, scipy, the latest transformers, and
+tensorflow 2.2+ are required.
+
+
+.. code-block:: default
+
+    import os
+    import tvm
+    import time
+    import itertools
+    import numpy as np
+    import tensorflow as tf
+    from tvm import relay
+    from tvm.contrib import graph_runtime
+    from tvm.relay import data_dep_optimization as ddo
+    from tensorflow.python.framework.convert_to_constants import (
+        convert_variables_to_constants_v2,
+    )
+    import scipy.sparse as sp
+
+
+
+
+
+
+
+
+Configure Settings
+------------------
+Let's start by defining some parameters that define the type of model
+and sparsity to run.
+
+
+.. code-block:: default
+
+
+    # The name of the transformer model to download and run.
+    name = "huggingface/prunebert-base-uncased-6-finepruned-w-distil-squad"
+    # The number of batches in an input.
+    batch_size = 1
+    # The length of each input sequence.
+    seq_len = 128
+    # TVM platform identifier. Although cuda is also supported, it requires
+    # tuning that is outside the scope of this tutorial. Note that best
+    # cpu performance can be achieved by setting -mcpu appropriately for
+    # your specific machine.
+    target = "llvm"
+    # Which device to run on. Should be one of tvm.cpu() or tvm.gpu().
+    ctx = tvm.cpu()
+    # If true, then a sparse variant of the network will be run and
+    # benchmarked.
+    measure_sparse = True
+    # The block size of structured sparsity to convert weight tensors
+    # into. Changing this parameter may yield speedups for some platforms.
+    bs_r = 1
+    # For models besides PruneBert (which is 95% sparse), this parameter
+    # determines how sparse the generated weights should be. The higher
+    # the sparsity, the faster the result.
+    sparsity = 0.85
+
+
+
+
+
+
+
+
+Download and Convert Transformers Model
+---------------------------------------
+Now we'll grab a model from the transformers module, download it,
+convert it into a TensorFlow graphdef in preperation for converting that graphdef into
+a relay graph that we can optimize and deploy.
+
+
+.. code-block:: default
+
+    def load_keras_model(module, name, seq_len, batch_size, report_runtime=True):
+        model = module.from_pretrained(name)
+        dummy_input = tf.keras.Input(shape=[seq_len], batch_size=batch_size, dtype="int32")
+        dummy_out = model(dummy_input)  # Propagate shapes through the keras model.
+        if report_runtime:
+            np_input = np.random.uniform(
+                size=[batch_size, seq_len], low=0, high=seq_len
+            ).astype("int32")
+            start = time.time()
+            repeats = 50
+            for i in range(repeats):
+                np_out = model(np_input)
+            end = time.time()
+            print("Keras Runtime: %f ms." % (1000 * ((end - start) / repeats)))
+        return model
+
+
+    def convert_to_graphdef(model, batch_size, seq_len):
+        model_func = tf.function(lambda x: model(x))
+        input_dict = model._saved_model_inputs_spec
+        input_spec = input_dict[list(input_dict.keys())[0]]
+        model_func = model_func.get_concrete_function(
+            tf.TensorSpec([batch_size, seq_len], input_spec.dtype)
+        )
+        frozen_func = convert_variables_to_constants_v2(model_func)
+        return frozen_func.graph.as_graph_def()
+
+
+    def download_model(name, batch_size, seq_len):
+        import transformers
+
+        module = getattr(transformers, "TFBertForSequenceClassification")
+        model = load_keras_model(module, name=name, batch_size=batch_size, seq_len=seq_len)
+        return convert_to_graphdef(model, batch_size, seq_len)
+
+
+
+
+
+
+
+
+Convert to Relay Graph
+----------------------
+We now have all the tooling to get a transformers model in the right format
+for relay conversion. Let's import it! In the following function we
+save the imported graph in relay's json format so that we dont have
+to reimport from tensorflow each time this script is run.
+
+
+.. code-block:: default
+
+    def import_graphdef(
+        name,
+        batch_size,
+        seq_len,
+        save_relay=True,
+        relay_file="model.json",
+        relay_params="model.params",
+    ):
+        abs_path = os.path.dirname(os.path.abspath(__file__))
+        shape_dict = {"input_1": (batch_size, seq_len)}
+        relay_file = ("%s_%d_%d_%s" % (name, batch_size, seq_len, relay_file)).replace(
+            "/", "_"
+        )
+        relay_params = ("%s_%d_%d_%s" % (name, batch_size, seq_len, relay_params)).replace(
+            "/", "_"
+        )
+        if os.path.exists(os.path.join(abs_path, relay_file)) and os.path.exists(
+            os.path.join(abs_path, relay_params)
+        ):
+            with open(os.path.join(abs_path, relay_file), "r") as fi:
+                mod = tvm.ir.load_json(fi.read())
+            with open(os.path.join(abs_path, relay_params), "rb") as fi:
+                params = relay.load_param_dict(fi.read())
+        else:
+            graph_def = download_model(name, batch_size, seq_len)
+
+            mod, params = relay.frontend.from_tensorflow(graph_def, shape=shape_dict)
+
+            if save_relay:
+                with open(os.path.join(abs_path, relay_file), "w") as fo:
+                    fo.write(tvm.ir.save_json(mod))
+                with open(os.path.join(abs_path, relay_params), "wb") as fo:
+                    fo.write(relay.save_param_dict(params))
+
+        return mod, params, shape_dict
+
+
+
+
+
+
+
+
+Run the Dense Graph
+-------------------
+Let's run the default version of the imported model. Note that even if
+the weights are sparse, we won't see any speedup because we are using
+regular dense matrix multiplications on these dense (but mostly zero)
+tensors instead of sparse aware kernels.
+
+
+.. code-block:: default
+
+    def run_relay_graph(mod, params, shape_dict, target, ctx):
+        with relay.build_config(opt_level=3):
+            graph, lib, params = relay.build(mod, target=target, params=params)
+        input_shape = shape_dict["input_1"]
+        dummy_data = np.random.uniform(size=input_shape, low=0, high=input_shape[1]).astype(
+            "int32"
+        )
+
+        m = graph_runtime.create(graph, lib, ctx)
+        m.set_input(0, dummy_data)
+        m.set_input(**params)
+        m.run()
+        tvm_output = m.get_output(0)
+
+        ftimer = m.module.time_evaluator("run", ctx, repeat=5, number=5)
+        prof_res = np.array(ftimer().results) * 1000
+        print(
+            "%-20s %-19s (%s)"
+            % ("Runtime:", "%.2f ms" % np.mean(prof_res), "%.2f ms" % np.std(prof_res))
+        )
+        return tvm_output
+
+
+    def run_dense(mod, params, shape_dict, target, ctx):
+        print("Dense Model Benchmark:")
+        return run_relay_graph(mod, params, shape_dict, target, ctx)
+
+
+
+
+
+
+
+
+Run the Sparse Graph
+--------------------
+Next we'll convert the graph into a sparse representation and generate
+fake sparse weights if needed. Then we'll use the same benchmarking
+script as dense to see how much faster we go! We apply a few relay passes
+to the graph to get it leveraging sparsity. First we use
+`simplify_fc_transpose` to use transposes on the weights of dense layers
+into the parameters. This makes it easier to convert to matrix multiplies
+to sparse versions. Next we apply `bsr_dense.convert` to identify all
+weight matrices that can be sparse, and automatically replace them.
+
+The `bsr_dense.convert` call below is doing the heavy lifting of identifying
+which weights in the model can be made sparse by checking if they are
+at least `sparsity_threshold` percent sparse. If so, it converts those
+weights into *Block Compressed Row Format (BSR)*. BSR is essentially
+a representation that indexes into the nonzero chunks of the tensor,
+making it easy for an algorithm to load those non-zero chunks and ignore
+the rest of the tensor. Once the sparse weights are in BSR format,
+`relay.transform.DenseToSparse` is applied to actually replace
+`relay.dense` operations with `relay.sparse_dense` calls that can be
+run faster.
+
+
+.. code-block:: default
+
+    def random_bsr_matrix(M, N, BS_R, BS_C, density, dtype="float32"):
+        Y = np.zeros((M, N), dtype=dtype)
+        assert M % BS_R == 0
+        assert N % BS_C == 0
+        nnz = int(density * M * N)
+        num_blocks = int(nnz / (BS_R * BS_C)) + 1
+        candidate_blocks = np.asarray(
+            list(itertools.product(range(0, M, BS_R), range(0, N, BS_C)))
+        )
+        assert candidate_blocks.shape[0] == M // BS_R * N // BS_C
+        chosen_blocks = candidate_blocks[
+            np.random.choice(candidate_blocks.shape[0], size=num_blocks, replace=False)
+        ]
+        for i in range(len(chosen_blocks)):
+            r, c = chosen_blocks[i]
+            Y[r : r + BS_R, c : c + BS_C] = np.random.uniform(-0.1, 0.1, (BS_R, BS_C))
+        s = sp.bsr_matrix(Y, blocksize=(BS_R, BS_C))
+        assert s.data.shape == (num_blocks, BS_R, BS_C)
+        assert s.data.size >= nnz
+        assert s.indices.shape == (num_blocks,)
+        assert s.indptr.shape == (M // BS_R + 1,)
+        return s.todense()
+
+
+    def random_sparse_bert_params(func, params, density, BS_R, BS_C):
+        def deepcopy(param_dic):
+            ret = {}
+            for k, v in param_dic.items():
+                ret[k] = tvm.nd.array(v.asnumpy())
+            return ret
+
+        new_params = deepcopy(params)
+        dense_weight_names = relay.analysis.sparse_dense._search_dense_op_weight(func)
+        for item in dense_weight_names:
+            name = str(item)
+            shape = new_params[name].shape
+            if shape[0] % BS_R == 0 and shape[1] % BS_C == 0:
+                new_w = random_bsr_matrix(shape[0], shape[1], BS_R, BS_C, density)
+                new_params[name] = tvm.nd.array(new_w)
+        return new_params
+
+
+    def run_sparse(mod, params, shape_dict, target, ctx, bs_r, sparsity, gen_weights):
+        mod, params = ddo.simplify_fc_transpose.convert(mod["main"], params)
+        if gen_weights:
+            params = random_sparse_bert_params(
+                mod, params, BS_R=bs_r, BS_C=1, density=1 - sparsity
+            )
+        mod, params = ddo.bsr_dense.convert(mod, params, (bs_r, 1), sparsity_threshold=0.8)
+        print("Block Sparse Model with {blocksize}x1 blocks:".format(blocksize=bs_r))
+        return run_relay_graph(mod, params, shape_dict, target, ctx)
+
+
+
+
+
+
+
+
+Run All the Code!
+-----------------
+And that's it! Now we'll simply call all the needed function to benchmark
+the model according to the set parameters. Note that to run this code
+you'll need to uncomment the last line first.
+
+
+.. code-block:: default
+
+    def benchmark():
+        mod, params, shape_dict = import_graphdef(name, batch_size, seq_len)
+        run_dense(mod, params, shape_dict, target, ctx)
+        if measure_sparse:
+            gen_weights = "prune" not in name
+            run_sparse(mod, params, shape_dict, target, ctx, bs_r, sparsity, gen_weights)
+
+
+    # benchmark()
+
+
+
+
+
+
+
+Sample Output
+-------------
+For reference, below is the output of the script when run on an AMD CPU
+and shows about a 2.5X speedup from using sparsity.
+
+
+.. code-block:: default
+
+
+    # Dense Model Benchmark:
+    # Cannot find config for target=llvm, workload=('dense_nopack.x86', ('TENSOR', (1, 768), 'float32'), ('TENSOR', (2, 768), 'float32'), None, 'float32'). A fallback configuration is used, which may bring great performance regression.
+    # Cannot find config for target=llvm, workload=('dense_nopack.x86', ('TENSOR', (1, 768), 'float32'), ('TENSOR', (768, 768), 'float32'), None, 'float32'). A fallback configuration is used, which may bring great performance regression.
+    # Cannot find config for target=llvm, workload=('dense_nopack.x86', ('TENSOR', (128, 3072), 'float32'), ('TENSOR', (768, 3072), 'float32'), None, 'float32'). A fallback configuration is used, which may bring great performance regression.
+    # Cannot find config for target=llvm, workload=('dense_nopack.x86', ('TENSOR', (128, 768), 'float32'), ('TENSOR', (3072, 768), 'float32'), None, 'float32'). A fallback configuration is used, which may bring great performance regression.
+    # Cannot find config for target=llvm, workload=('dense_nopack.x86', ('TENSOR', (128, 768), 'float32'), ('TENSOR', (768, 768), 'float32'), None, 'float32'). A fallback configuration is used, which may bring great performance regression.
+    # Cannot find config for target=llvm, workload=('batch_matmul.x86', ('TENSOR', (12, 128, 128), 'float32'), ('TENSOR', (12, 64, 128), 'float32')). A fallback configuration is used, which may bring great performance regression.
+    # Cannot find config for target=llvm, workload=('batch_matmul.x86', ('TENSOR', (12, 128, 64), 'float32'), ('TENSOR', (12, 128, 64), 'float32')). A fallback configuration is used, which may bring great performance regression.
+    # Runtime:             165.26 ms           (12.83 ms)
+    # Block Sparse Model with 1x1 blocks:
+    # Runtime:             67.75 ms            (8.83 ms)
+
+
+
+
+
+
+
+.. _sphx_glr_download_tutorials_frontend_deploy_sparse.py:
+
+
+.. only :: html
+
+ .. container:: sphx-glr-footer
+    :class: sphx-glr-footer-example
+
+
+
+  .. container:: sphx-glr-download
+
+     :download:`Download Python source code: deploy_sparse.py <deploy_sparse.py>`
+
+
+
+  .. container:: sphx-glr-download
+
+     :download:`Download Jupyter notebook: deploy_sparse.ipynb <deploy_sparse.ipynb>`
+
+
+.. only:: html
+
+ .. rst-class:: sphx-glr-signature
+
+    `Gallery generated by Sphinx-Gallery <https://sphinx-gallery.github.io>`_
diff --git a/docs/_sources/tutorials/frontend/deploy_ssd_gluoncv.rst.txt b/docs/_sources/tutorials/frontend/deploy_ssd_gluoncv.rst.txt
index 9f616a0..e340eae 100644
--- a/docs/_sources/tutorials/frontend/deploy_ssd_gluoncv.rst.txt
+++ b/docs/_sources/tutorials/frontend/deploy_ssd_gluoncv.rst.txt
@@ -169,61 +169,6 @@ Create TVM runtime and do inference
 
  .. code-block:: none
 
-    Cannot find config for target=cuda, workload=('conv2d_nchw.cuda', ('TENSOR', (1, 3, 512, 512), 'float32'), ('TENSOR', (64, 3, 7, 7), 'float32'), (2, 2), (3, 3, 3, 3), (1, 1), 'float32'). A fallback configuration is used, which may bring great performance regression.
-    Cannot find config for target=cuda, workload=('conv2d_nchw.cuda', ('TENSOR', (1, 64, 128, 128), 'float32'), ('TENSOR', (64, 64, 1, 1), 'float32'), (1, 1), (0, 0, 0, 0), (1, 1), 'float32'). A fallback configuration is used, which may bring great performance regression.
-    Cannot find config for target=cuda, workload=('conv2d_nchw.cuda', ('TENSOR', (1, 64, 128, 128), 'float32'), ('TENSOR', (64, 64, 3, 3), 'float32'), (1, 1), (1, 1, 1, 1), (1, 1), 'float32'). A fallback configuration is used, which may bring great performance regression.
-    Cannot find config for target=cuda, workload=('conv2d_nchw_winograd.cuda', ('TENSOR', (1, 64, 128, 128), 'float32'), ('TENSOR', (64, 64, 3, 3), 'float32'), (1, 1), (1, 1, 1, 1), (1, 1), 'float32'). A fallback configuration is used, which may bring great performance regression.
-    Cannot find config for target=cuda, workload=('conv2d_nchw.cuda', ('TENSOR', (1, 64, 128, 128), 'float32'), ('TENSOR', (256, 64, 1, 1), 'float32'), (1, 1), (0, 0, 0, 0), (1, 1), 'float32'). A fallback configuration is used, which may bring great performance regression.
-    Cannot find config for target=cuda, workload=('conv2d_nchw.cuda', ('TENSOR', (1, 256, 128, 128), 'float32'), ('TENSOR', (64, 256, 1, 1), 'float32'), (1, 1), (0, 0, 0, 0), (1, 1), 'float32'). A fallback configuration is used, which may bring great performance regression.
-    Cannot find config for target=cuda, workload=('conv2d_nchw.cuda', ('TENSOR', (1, 256, 128, 128), 'float32'), ('TENSOR', (128, 256, 1, 1), 'float32'), (2, 2), (0, 0, 0, 0), (1, 1), 'float32'). A fallback configuration is used, which may bring great performance regression.
-    Cannot find config for target=cuda, workload=('conv2d_nchw.cuda', ('TENSOR', (1, 128, 64, 64), 'float32'), ('TENSOR', (128, 128, 3, 3), 'float32'), (1, 1), (1, 1, 1, 1), (1, 1), 'float32'). A fallback configuration is used, which may bring great performance regression.
-    Cannot find config for target=cuda, workload=('conv2d_nchw_winograd.cuda', ('TENSOR', (1, 128, 64, 64), 'float32'), ('TENSOR', (128, 128, 3, 3), 'float32'), (1, 1), (1, 1, 1, 1), (1, 1), 'float32'). A fallback configuration is used, which may bring great performance regression.
-    Cannot find config for target=cuda, workload=('conv2d_nchw.cuda', ('TENSOR', (1, 128, 64, 64), 'float32'), ('TENSOR', (512, 128, 1, 1), 'float32'), (1, 1), (0, 0, 0, 0), (1, 1), 'float32'). A fallback configuration is used, which may bring great performance regression.
-    Cannot find config for target=cuda, workload=('conv2d_nchw.cuda', ('TENSOR', (1, 256, 128, 128), 'float32'), ('TENSOR', (512, 256, 1, 1), 'float32'), (2, 2), (0, 0, 0, 0), (1, 1), 'float32'). A fallback configuration is used, which may bring great performance regression.
-    Cannot find config for target=cuda, workload=('conv2d_nchw.cuda', ('TENSOR', (1, 512, 64, 64), 'float32'), ('TENSOR', (128, 512, 1, 1), 'float32'), (1, 1), (0, 0, 0, 0), (1, 1), 'float32'). A fallback configuration is used, which may bring great performance regression.
-    Cannot find config for target=cuda, workload=('conv2d_nchw.cuda', ('TENSOR', (1, 512, 64, 64), 'float32'), ('TENSOR', (256, 512, 1, 1), 'float32'), (2, 2), (0, 0, 0, 0), (1, 1), 'float32'). A fallback configuration is used, which may bring great performance regression.
-    Cannot find config for target=cuda, workload=('conv2d_nchw.cuda', ('TENSOR', (1, 256, 32, 32), 'float32'), ('TENSOR', (256, 256, 3, 3), 'float32'), (1, 1), (1, 1, 1, 1), (1, 1), 'float32'). A fallback configuration is used, which may bring great performance regression.
-    Cannot find config for target=cuda, workload=('conv2d_nchw_winograd.cuda', ('TENSOR', (1, 256, 32, 32), 'float32'), ('TENSOR', (256, 256, 3, 3), 'float32'), (1, 1), (1, 1, 1, 1), (1, 1), 'float32'). A fallback configuration is used, which may bring great performance regression.
-    Cannot find config for target=cuda, workload=('conv2d_nchw.cuda', ('TENSOR', (1, 256, 32, 32), 'float32'), ('TENSOR', (1024, 256, 1, 1), 'float32'), (1, 1), (0, 0, 0, 0), (1, 1), 'float32'). A fallback configuration is used, which may bring great performance regression.
-    Cannot find config for target=cuda, workload=('conv2d_nchw.cuda', ('TENSOR', (1, 512, 64, 64), 'float32'), ('TENSOR', (1024, 512, 1, 1), 'float32'), (2, 2), (0, 0, 0, 0), (1, 1), 'float32'). A fallback configuration is used, which may bring great performance regression.
-    Cannot find config for target=cuda, workload=('conv2d_nchw.cuda', ('TENSOR', (1, 1024, 32, 32), 'float32'), ('TENSOR', (256, 1024, 1, 1), 'float32'), (1, 1), (0, 0, 0, 0), (1, 1), 'float32'). A fallback configuration is used, which may bring great performance regression.
-    Cannot find config for target=cuda, workload=('conv2d_nchw.cuda', ('TENSOR', (1, 1024, 32, 32), 'float32'), ('TENSOR', (84, 1024, 3, 3), 'float32'), (1, 1), (1, 1, 1, 1), (1, 1), 'float32'). A fallback configuration is used, which may bring great performance regression.
-    Cannot find config for target=cuda, workload=('conv2d_nchw_winograd.cuda', ('TENSOR', (1, 1024, 32, 32), 'float32'), ('TENSOR', (84, 1024, 3, 3), 'float32'), (1, 1), (1, 1, 1, 1), (1, 1), 'float32'). A fallback configuration is used, which may bring great performance regression.
-    Cannot find config for target=cuda, workload=('conv2d_nchw.cuda', ('TENSOR', (1, 1024, 32, 32), 'float32'), ('TENSOR', (512, 1024, 1, 1), 'float32'), (2, 2), (0, 0, 0, 0), (1, 1), 'float32'). A fallback configuration is used, which may bring great performance regression.
-    Cannot find config for target=cuda, workload=('conv2d_nchw.cuda', ('TENSOR', (1, 512, 16, 16), 'float32'), ('TENSOR', (512, 512, 3, 3), 'float32'), (1, 1), (1, 1, 1, 1), (1, 1), 'float32'). A fallback configuration is used, which may bring great performance regression.
-    Cannot find config for target=cuda, workload=('conv2d_nchw_winograd.cuda', ('TENSOR', (1, 512, 16, 16), 'float32'), ('TENSOR', (512, 512, 3, 3), 'float32'), (1, 1), (1, 1, 1, 1), (1, 1), 'float32'). A fallback configuration is used, which may bring great performance regression.
-    Cannot find config for target=cuda, workload=('conv2d_nchw.cuda', ('TENSOR', (1, 512, 16, 16), 'float32'), ('TENSOR', (2048, 512, 1, 1), 'float32'), (1, 1), (0, 0, 0, 0), (1, 1), 'float32'). A fallback configuration is used, which may bring great performance regression.
-    Cannot find config for target=cuda, workload=('conv2d_nchw.cuda', ('TENSOR', (1, 1024, 32, 32), 'float32'), ('TENSOR', (2048, 1024, 1, 1), 'float32'), (2, 2), (0, 0, 0, 0), (1, 1), 'float32'). A fallback configuration is used, which may bring great performance regression.
-    Cannot find config for target=cuda, workload=('conv2d_nchw.cuda', ('TENSOR', (1, 2048, 16, 16), 'float32'), ('TENSOR', (512, 2048, 1, 1), 'float32'), (1, 1), (0, 0, 0, 0), (1, 1), 'float32'). A fallback configuration is used, which may bring great performance regression.
-    Cannot find config for target=cuda, workload=('conv2d_nchw.cuda', ('TENSOR', (1, 2048, 16, 16), 'float32'), ('TENSOR', (126, 2048, 3, 3), 'float32'), (1, 1), (1, 1, 1, 1), (1, 1), 'float32'). A fallback configuration is used, which may bring great performance regression.
-    Cannot find config for target=cuda, workload=('conv2d_nchw_winograd.cuda', ('TENSOR', (1, 2048, 16, 16), 'float32'), ('TENSOR', (126, 2048, 3, 3), 'float32'), (1, 1), (1, 1, 1, 1), (1, 1), 'float32'). A fallback configuration is used, which may bring great performance regression.
-    Cannot find config for target=cuda, workload=('conv2d_nchw.cuda', ('TENSOR', (1, 512, 16, 16), 'float32'), ('TENSOR', (512, 512, 3, 3), 'float32'), (2, 2), (1, 1, 1, 1), (1, 1), 'float32'). A fallback configuration is used, which may bring great performance regression.
-    Cannot find config for target=cuda, workload=('conv2d_nchw.cuda', ('TENSOR', (1, 512, 8, 8), 'float32'), ('TENSOR', (126, 512, 3, 3), 'float32'), (1, 1), (1, 1, 1, 1), (1, 1), 'float32'). A fallback configuration is used, which may bring great performance regression.
-    Cannot find config for target=cuda, workload=('conv2d_nchw_winograd.cuda', ('TENSOR', (1, 512, 8, 8), 'float32'), ('TENSOR', (126, 512, 3, 3), 'float32'), (1, 1), (1, 1, 1, 1), (1, 1), 'float32'). A fallback configuration is used, which may bring great performance regression.
-    Cannot find config for target=cuda, workload=('conv2d_nchw.cuda', ('TENSOR', (1, 512, 8, 8), 'float32'), ('TENSOR', (512, 512, 1, 1), 'float32'), (1, 1), (0, 0, 0, 0), (1, 1), 'float32'). A fallback configuration is used, which may bring great performance regression.
-    Cannot find config for target=cuda, workload=('conv2d_nchw.cuda', ('TENSOR', (1, 512, 8, 8), 'float32'), ('TENSOR', (512, 512, 3, 3), 'float32'), (2, 2), (1, 1, 1, 1), (1, 1), 'float32'). A fallback configuration is used, which may bring great performance regression.
-    Cannot find config for target=cuda, workload=('conv2d_nchw.cuda', ('TENSOR', (1, 512, 4, 4), 'float32'), ('TENSOR', (126, 512, 3, 3), 'float32'), (1, 1), (1, 1, 1, 1), (1, 1), 'float32'). A fallback configuration is used, which may bring great performance regression.
-    Cannot find config for target=cuda, workload=('conv2d_nchw_winograd.cuda', ('TENSOR', (1, 512, 4, 4), 'float32'), ('TENSOR', (126, 512, 3, 3), 'float32'), (1, 1), (1, 1, 1, 1), (1, 1), 'float32'). A fallback configuration is used, which may bring great performance regression.
-    Cannot find config for target=cuda, workload=('conv2d_nchw.cuda', ('TENSOR', (1, 512, 4, 4), 'float32'), ('TENSOR', (256, 512, 1, 1), 'float32'), (1, 1), (0, 0, 0, 0), (1, 1), 'float32'). A fallback configuration is used, which may bring great performance regression.
-    Cannot find config for target=cuda, workload=('conv2d_nchw.cuda', ('TENSOR', (1, 256, 4, 4), 'float32'), ('TENSOR', (256, 256, 3, 3), 'float32'), (2, 2), (1, 1, 1, 1), (1, 1), 'float32'). A fallback configuration is used, which may bring great performance regression.
-    Cannot find config for target=cuda, workload=('conv2d_nchw.cuda', ('TENSOR', (1, 256, 2, 2), 'float32'), ('TENSOR', (84, 256, 3, 3), 'float32'), (1, 1), (1, 1, 1, 1), (1, 1), 'float32'). A fallback configuration is used, which may bring great performance regression.
-    Cannot find config for target=cuda, workload=('conv2d_nchw_winograd.cuda', ('TENSOR', (1, 256, 2, 2), 'float32'), ('TENSOR', (84, 256, 3, 3), 'float32'), (1, 1), (1, 1, 1, 1), (1, 1), 'float32'). A fallback configuration is used, which may bring great performance regression.
-    Cannot find config for target=cuda, workload=('conv2d_nchw.cuda', ('TENSOR', (1, 256, 2, 2), 'float32'), ('TENSOR', (256, 256, 1, 1), 'float32'), (1, 1), (0, 0, 0, 0), (1, 1), 'float32'). A fallback configuration is used, which may bring great performance regression.
-    Cannot find config for target=cuda, workload=('conv2d_nchw.cuda', ('TENSOR', (1, 256, 2, 2), 'float32'), ('TENSOR', (256, 256, 3, 3), 'float32'), (2, 2), (1, 1, 1, 1), (1, 1), 'float32'). A fallback configuration is used, which may bring great performance regression.
-    Cannot find config for target=cuda, workload=('conv2d_nchw.cuda', ('TENSOR', (1, 256, 1, 1), 'float32'), ('TENSOR', (84, 256, 3, 3), 'float32'), (1, 1), (1, 1, 1, 1), (1, 1), 'float32'). A fallback configuration is used, which may bring great performance regression.
-    Cannot find config for target=cuda, workload=('conv2d_nchw_winograd.cuda', ('TENSOR', (1, 256, 1, 1), 'float32'), ('TENSOR', (84, 256, 3, 3), 'float32'), (1, 1), (1, 1, 1, 1), (1, 1), 'float32'). A fallback configuration is used, which may bring great performance regression.
-    Cannot find config for target=cuda, workload=('conv2d_nchw.cuda', ('TENSOR', (1, 1024, 32, 32), 'float32'), ('TENSOR', (16, 1024, 3, 3), 'float32'), (1, 1), (1, 1, 1, 1), (1, 1), 'float32'). A fallback configuration is used, which may bring great performance regression.
-    Cannot find config for target=cuda, workload=('conv2d_nchw_winograd.cuda', ('TENSOR', (1, 1024, 32, 32), 'float32'), ('TENSOR', (16, 1024, 3, 3), 'float32'), (1, 1), (1, 1, 1, 1), (1, 1), 'float32'). A fallback configuration is used, which may bring great performance regression.
-    Cannot find config for target=cuda, workload=('conv2d_nchw.cuda', ('TENSOR', (1, 2048, 16, 16), 'float32'), ('TENSOR', (24, 2048, 3, 3), 'float32'), (1, 1), (1, 1, 1, 1), (1, 1), 'float32'). A fallback configuration is used, which may bring great performance regression.
-    Cannot find config for target=cuda, workload=('conv2d_nchw_winograd.cuda', ('TENSOR', (1, 2048, 16, 16), 'float32'), ('TENSOR', (24, 2048, 3, 3), 'float32'), (1, 1), (1, 1, 1, 1), (1, 1), 'float32'). A fallback configuration is used, which may bring great performance regression.
-    Cannot find config for target=cuda, workload=('conv2d_nchw.cuda', ('TENSOR', (1, 512, 8, 8), 'float32'), ('TENSOR', (24, 512, 3, 3), 'float32'), (1, 1), (1, 1, 1, 1), (1, 1), 'float32'). A fallback configuration is used, which may bring great performance regression.
-    Cannot find config for target=cuda, workload=('conv2d_nchw_winograd.cuda', ('TENSOR', (1, 512, 8, 8), 'float32'), ('TENSOR', (24, 512, 3, 3), 'float32'), (1, 1), (1, 1, 1, 1), (1, 1), 'float32'). A fallback configuration is used, which may bring great performance regression.
-    Cannot find config for target=cuda, workload=('conv2d_nchw.cuda', ('TENSOR', (1, 512, 4, 4), 'float32'), ('TENSOR', (24, 512, 3, 3), 'float32'), (1, 1), (1, 1, 1, 1), (1, 1), 'float32'). A fallback configuration is used, which may bring great performance regression.
-    Cannot find config for target=cuda, workload=('conv2d_nchw_winograd.cuda', ('TENSOR', (1, 512, 4, 4), 'float32'), ('TENSOR', (24, 512, 3, 3), 'float32'), (1, 1), (1, 1, 1, 1), (1, 1), 'float32'). A fallback configuration is used, which may bring great performance regression.
-    Cannot find config for target=cuda, workload=('conv2d_nchw.cuda', ('TENSOR', (1, 256, 2, 2), 'float32'), ('TENSOR', (16, 256, 3, 3), 'float32'), (1, 1), (1, 1, 1, 1), (1, 1), 'float32'). A fallback configuration is used, which may bring great performance regression.
-    Cannot find config for target=cuda, workload=('conv2d_nchw_winograd.cuda', ('TENSOR', (1, 256, 2, 2), 'float32'), ('TENSOR', (16, 256, 3, 3), 'float32'), (1, 1), (1, 1, 1, 1), (1, 1), 'float32'). A fallback configuration is used, which may bring great performance regression.
-    Cannot find config for target=cuda, workload=('conv2d_nchw.cuda', ('TENSOR', (1, 256, 1, 1), 'float32'), ('TENSOR', (16, 256, 3, 3), 'float32'), (1, 1), (1, 1, 1, 1), (1, 1), 'float32'). A fallback configuration is used, which may bring great performance regression.
-    Cannot find config for target=cuda, workload=('conv2d_nchw_winograd.cuda', ('TENSOR', (1, 256, 1, 1), 'float32'), ('TENSOR', (16, 256, 3, 3), 'float32'), (1, 1), (1, 1, 1, 1), (1, 1), 'float32'). A fallback configuration is used, which may bring great performance regression.
     Cannot find config for target=llvm, workload=('conv2d_NCHWc.x86', ('TENSOR', (1, 3, 512, 512), 'float32'), ('TENSOR', (64, 3, 7, 7), 'float32'), (2, 2), (3, 3, 3, 3), (1, 1), 'NCHW', 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.
     Cannot find config for target=llvm, workload=('conv2d_NCHWc.x86', ('TENSOR', (1, 64, 128, 128), 'float32'), ('TENSOR', (64, 64, 1, 1), 'float32'), (1, 1), (0, 0, 0, 0), (1, 1), 'NCHW', 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.
     Cannot find config for target=llvm, workload=('conv2d_NCHWc.x86', ('TENSOR', (1, 64, 128, 128), 'float32'), ('TENSOR', (64, 64, 3, 3), 'float32'), (1, 1), (1, 1, 1, 1), (1, 1), 'NCHW', 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.
@@ -263,6 +208,84 @@ Create TVM runtime and do inference
     Cannot find config for target=llvm, workload=('conv2d_NCHWc.x86', ('TENSOR', (1, 512, 4, 4), 'float32'), ('TENSOR', (24, 512, 3, 3), 'float32'), (1, 1), (1, 1, 1, 1), (1, 1), 'NCHW', 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.
     Cannot find config for target=llvm, workload=('conv2d_NCHWc.x86', ('TENSOR', (1, 256, 2, 2), 'float32'), ('TENSOR', (16, 256, 3, 3), 'float32'), (1, 1), (1, 1, 1, 1), (1, 1), 'NCHW', 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.
     Cannot find config for target=llvm, workload=('conv2d_NCHWc.x86', ('TENSOR', (1, 256, 1, 1), 'float32'), ('TENSOR', (16, 256, 3, 3), 'float32'), (1, 1), (1, 1, 1, 1), (1, 1), 'NCHW', 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.
+    Cannot find config for target=llvm, workload=('conv2d_NCHWc.x86', ('TENSOR', (1, 256, 32, 32, 4), 'float32'), ('TENSOR', (12, 256, 3, 3, 4, 7), 'float32'), (1, 1), (1, 1, 1, 1), (1, 1), 'NCHW4c', 'NCHW7c', 'float32'). A fallback configuration is used, which may bring great performance regression.
+    Cannot find config for target=llvm, workload=('conv2d_NCHWc.x86', ('TENSOR', (1, 32, 32, 32, 8), 'float32'), ('TENSOR', (128, 32, 1, 1, 8, 8), 'float32'), (1, 1), (0, 0, 0, 0), (1, 1), 'NCHW8c', 'NCHW8c', 'float32'). A fallback configuration is used, which may bring great performance regression.
+    Cannot find config for target=llvm, workload=('conv2d_NCHWc.x86', ('TENSOR', (1, 32, 32, 32, 8), 'float32'), ('TENSOR', (32, 32, 3, 3, 8, 8), 'float32'), (1, 1), (1, 1, 1, 1), (1, 1), 'NCHW8c', 'NCHW8c', 'float32'). A fallback configuration is used, which may bring great performance regression.
+    Cannot find config for target=llvm, workload=('conv2d_NCHWc.x86', ('TENSOR', (1, 128, 32, 32, 8), 'float32'), ('TENSOR', (32, 128, 1, 1, 8, 8), 'float32'), (1, 1), (0, 0, 0, 0), (1, 1), 'NCHW8c', 'NCHW8c', 'float32'). A fallback configuration is used, which may bring great performance regression.
+    Cannot find config for target=llvm, workload=('conv2d_NCHWc.x86', ('TENSOR', (1, 64, 64, 64, 8), 'float32'), ('TENSOR', (32, 64, 1, 1, 8, 8), 'float32'), (2, 2), (0, 0, 0, 0), (1, 1), 'NCHW8c', 'NCHW8c', 'float32'). A fallback configuration is used, which may bring great performance regression.
+    Cannot find config for target=llvm, workload=('conv2d_NCHWc.x86', ('TENSOR', (1, 16, 64, 64, 8), 'float32'), ('TENSOR', (64, 16, 1, 1, 8, 8), 'float32'), (1, 1), (0, 0, 0, 0), (1, 1), 'NCHW8c', 'NCHW8c', 'float32'). A fallback configuration is used, which may bring great performance regression.
+    Cannot find config for target=llvm, workload=('conv2d_NCHWc.x86', ('TENSOR', (1, 16, 64, 64, 8), 'float32'), ('TENSOR', (16, 16, 3, 3, 8, 8), 'float32'), (1, 1), (1, 1, 1, 1), (1, 1), 'NCHW8c', 'NCHW8c', 'float32'). A fallback configuration is used, which may bring great performance regression.
+    Cannot find config for target=llvm, workload=('conv2d_NCHWc.x86', ('TENSOR', (1, 64, 64, 64, 8), 'float32'), ('TENSOR', (16, 64, 1, 1, 8, 8), 'float32'), (1, 1), (0, 0, 0, 0), (1, 1), 'NCHW8c', 'NCHW8c', 'float32'). A fallback configuration is used, which may bring great performance regression.
+    Cannot find config for target=llvm, workload=('conv2d_NCHWc.x86', ('TENSOR', (1, 32, 128, 128, 8), 'float32'), ('TENSOR', (16, 32, 1, 1, 8, 8), 'float32'), (2, 2), (0, 0, 0, 0), (1, 1), 'NCHW8c', 'NCHW8c', 'float32'). A fallback configuration is used, which may bring great performance regression.
+    Cannot find config for target=llvm, workload=('conv2d_NCHWc.x86', ('TENSOR', (1, 8, 128, 128, 8), 'float32'), ('TENSOR', (32, 8, 1, 1, 8, 8), 'float32'), (1, 1), (0, 0, 0, 0), (1, 1), 'NCHW8c', 'NCHW8c', 'float32'). A fallback configuration is used, which may bring great performance regression.
+    Cannot find config for target=llvm, workload=('conv2d_NCHWc.x86', ('TENSOR', (1, 8, 128, 128, 8), 'float32'), ('TENSOR', (8, 8, 3, 3, 8, 8), 'float32'), (1, 1), (1, 1, 1, 1), (1, 1), 'NCHW8c', 'NCHW8c', 'float32'). A fallback configuration is used, which may bring great performance regression.
+    Cannot find config for target=llvm, workload=('conv2d_NCHWc.x86', ('TENSOR', (1, 32, 128, 128, 8), 'float32'), ('TENSOR', (8, 32, 1, 1, 8, 8), 'float32'), (1, 1), (0, 0, 0, 0), (1, 1), 'NCHW8c', 'NCHW8c', 'float32'). A fallback configuration is used, which may bring great performance regression.
+    Cannot find config for target=llvm, workload=('conv2d_NCHWc.x86', ('TENSOR', (1, 8, 128, 128, 8), 'float32'), ('TENSOR', (8, 8, 1, 1, 8, 8), 'float32'), (1, 1), (0, 0, 0, 0), (1, 1), 'NCHW8c', 'NCHW8c', 'float32'). A fallback configuration is used, which may bring great performance regression.
+    Cannot find config for target=llvm, workload=('conv2d_NCHWc.x86', ('TENSOR', (1, 1, 512, 512, 3), 'float32'), ('TENSOR', (8, 1, 7, 7, 3, 8), 'float32'), (2, 2), (3, 3, 3, 3), (1, 1), 'NCHW3c', 'NCHW8c', 'float32'). A fallback configuration is used, which may bring great performance regression.
+    Cannot find config for target=llvm, workload=('conv2d_NCHWc.x86', ('TENSOR', (1, 32, 128, 128, 8), 'float32'), ('TENSOR', (64, 32, 1, 1, 8, 8), 'float32'), (2, 2), (0, 0, 0, 0), (1, 1), 'NCHW8c', 'NCHW8c', 'float32'). A fallback configuration is used, which may bring great performance regression.
+    Cannot find config for target=llvm, workload=('conv2d_NCHWc.x86', ('TENSOR', (1, 64, 64, 64, 8), 'float32'), ('TENSOR', (128, 64, 1, 1, 8, 8), 'float32'), (2, 2), (0, 0, 0, 0), (1, 1), 'NCHW8c', 'NCHW8c', 'float32'). A fallback configuration is used, which may bring great performance regression.
+    Cannot find config for target=llvm, workload=('conv2d_NCHWc.x86', ('TENSOR', (1, 512, 16, 16, 4), 'float32'), ('TENSOR', (18, 512, 3, 3, 4, 7), 'float32'), (1, 1), (1, 1, 1, 1), (1, 1), 'NCHW4c', 'NCHW7c', 'float32'). A fallback configuration is used, which may bring great performance regression.
+    Cannot find config for target=llvm, workload=('conv2d_NCHWc.x86', ('TENSOR', (1, 64, 16, 16, 8), 'float32'), ('TENSOR', (256, 64, 1, 1, 8, 8), 'float32'), (1, 1), (0, 0, 0, 0), (1, 1), 'NCHW8c', 'NCHW8c', 'float32'). A fallback configuration is used, which may bring great performance regression.
+    Cannot find config for target=llvm, workload=('conv2d_NCHWc.x86', ('TENSOR', (1, 64, 16, 16, 8), 'float32'), ('TENSOR', (64, 64, 3, 3, 8, 8), 'float32'), (1, 1), (1, 1, 1, 1), (1, 1), 'NCHW8c', 'NCHW8c', 'float32'). A fallback configuration is used, which may bring great performance regression.
+    Cannot find config for target=llvm, workload=('conv2d_NCHWc.x86', ('TENSOR', (1, 256, 16, 16, 8), 'float32'), ('TENSOR', (64, 256, 1, 1, 8, 8), 'float32'), (1, 1), (0, 0, 0, 0), (1, 1), 'NCHW8c', 'NCHW8c', 'float32'). A fallback configuration is used, which may bring great performance regression.
+    Cannot find config for target=llvm, workload=('conv2d_NCHWc.x86', ('TENSOR', (1, 128, 32, 32, 8), 'float32'), ('TENSOR', (64, 128, 1, 1, 8, 8), 'float32'), (2, 2), (0, 0, 0, 0), (1, 1), 'NCHW8c', 'NCHW8c', 'float32'). A fallback configuration is used, which may bring great performance regression.
+    Cannot find config for target=llvm, workload=('conv2d_NCHWc.x86', ('TENSOR', (1, 128, 32, 32, 8), 'float32'), ('TENSOR', (256, 128, 1, 1, 8, 8), 'float32'), (2, 2), (0, 0, 0, 0), (1, 1), 'NCHW8c', 'NCHW8c', 'float32'). A fallback configuration is used, which may bring great performance regression.
+    Cannot find config for target=llvm, workload=('conv2d_NCHWc.x86', ('TENSOR', (1, 128, 8, 8, 4), 'float32'), ('TENSOR', (18, 128, 3, 3, 4, 7), 'float32'), (1, 1), (1, 1, 1, 1), (1, 1), 'NCHW4c', 'NCHW7c', 'float32'). A fallback configuration is used, which may bring great performance regression.
+    Cannot find config for target=llvm, workload=('conv2d_NCHWc.x86', ('TENSOR', (1, 64, 16, 16, 8), 'float32'), ('TENSOR', (64, 64, 3, 3, 8, 8), 'float32'), (2, 2), (1, 1, 1, 1), (1, 1), 'NCHW8c', 'NCHW8c', 'float32'). A fallback configuration is used, which may bring great performance regression.
+    Cannot find config for target=llvm, workload=('conv2d_NCHWc.x86', ('TENSOR', (1, 128, 4, 4, 4), 'float32'), ('TENSOR', (18, 128, 3, 3, 4, 7), 'float32'), (1, 1), (1, 1, 1, 1), (1, 1), 'NCHW4c', 'NCHW7c', 'float32'). A fallback configuration is used, which may bring great performance regression.
+    Cannot find config for target=llvm, workload=('conv2d_NCHWc.x86', ('TENSOR', (1, 64, 8, 8, 8), 'float32'), ('TENSOR', (64, 64, 3, 3, 8, 8), 'float32'), (2, 2), (1, 1, 1, 1), (1, 1), 'NCHW8c', 'NCHW8c', 'float32'). A fallback configuration is used, which may bring great performance regression.
+    Cannot find config for target=llvm, workload=('conv2d_NCHWc.x86', ('TENSOR', (1, 64, 8, 8, 8), 'float32'), ('TENSOR', (64, 64, 1, 1, 8, 8), 'float32'), (1, 1), (0, 0, 0, 0), (1, 1), 'NCHW8c', 'NCHW8c', 'float32'). A fallback configuration is used, which may bring great performance regression.
+    Cannot find config for target=llvm, workload=('conv2d_NCHWc.x86', ('TENSOR', (1, 64, 2, 2, 4), 'float32'), ('TENSOR', (12, 64, 3, 3, 4, 7), 'float32'), (1, 1), (1, 1, 1, 1), (1, 1), 'NCHW4c', 'NCHW7c', 'float32'). A fallback configuration is used, which may bring great performance regression.
+    Cannot find config for target=llvm, workload=('conv2d_NCHWc.x86', ('TENSOR', (1, 32, 4, 4, 8), 'float32'), ('TENSOR', (32, 32, 3, 3, 8, 8), 'float32'), (2, 2), (1, 1, 1, 1), (1, 1), 'NCHW8c', 'NCHW8c', 'float32'). A fallback configuration is used, which may bring great performance regression.
+    Cannot find config for target=llvm, workload=('conv2d_NCHWc.x86', ('TENSOR', (1, 64, 4, 4, 8), 'float32'), ('TENSOR', (32, 64, 1, 1, 8, 8), 'float32'), (1, 1), (0, 0, 0, 0), (1, 1), 'NCHW8c', 'NCHW8c', 'float32'). A fallback configuration is used, which may bring great performance regression.
+    Cannot find config for target=llvm, workload=('conv2d_NCHWc.x86', ('TENSOR', (1, 64, 1, 1, 4), 'float32'), ('TENSOR', (12, 64, 3, 3, 4, 7), 'float32'), (1, 1), (1, 1, 1, 1), (1, 1), 'NCHW4c', 'NCHW7c', 'float32'). A fallback configuration is used, which may bring great performance regression.
+    Cannot find config for target=llvm, workload=('conv2d_NCHWc.x86', ('TENSOR', (1, 32, 2, 2, 8), 'float32'), ('TENSOR', (32, 32, 3, 3, 8, 8), 'float32'), (2, 2), (1, 1, 1, 1), (1, 1), 'NCHW8c', 'NCHW8c', 'float32'). A fallback configuration is used, which may bring great performance regression.
+    Cannot find config for target=llvm, workload=('conv2d_NCHWc.x86', ('TENSOR', (1, 32, 2, 2, 8), 'float32'), ('TENSOR', (32, 32, 1, 1, 8, 8), 'float32'), (1, 1), (0, 0, 0, 0), (1, 1), 'NCHW8c', 'NCHW8c', 'float32'). A fallback configuration is used, which may bring great performance regression.
+    Cannot find config for target=llvm, workload=('conv2d_NCHWc.x86', ('TENSOR', (1, 128, 32, 32, 8), 'float32'), ('TENSOR', (2, 128, 3, 3, 8, 8), 'float32'), (1, 1), (1, 1, 1, 1), (1, 1), 'NCHW8c', 'NCHW8c', 'float32'). A fallback configuration is used, which may bring great performance regression.
+    Cannot find config for target=llvm, workload=('conv2d_NCHWc.x86', ('TENSOR', (1, 256, 16, 16, 8), 'float32'), ('TENSOR', (3, 256, 3, 3, 8, 8), 'float32'), (1, 1), (1, 1, 1, 1), (1, 1), 'NCHW8c', 'NCHW8c', 'float32'). A fallback configuration is used, which may bring great performance regression.
+    Cannot find config for target=llvm, workload=('conv2d_NCHWc.x86', ('TENSOR', (1, 64, 8, 8, 8), 'float32'), ('TENSOR', (3, 64, 3, 3, 8, 8), 'float32'), (1, 1), (1, 1, 1, 1), (1, 1), 'NCHW8c', 'NCHW8c', 'float32'). A fallback configuration is used, which may bring great performance regression.
+    Cannot find config for target=llvm, workload=('conv2d_NCHWc.x86', ('TENSOR', (1, 64, 4, 4, 8), 'float32'), ('TENSOR', (3, 64, 3, 3, 8, 8), 'float32'), (1, 1), (1, 1, 1, 1), (1, 1), 'NCHW8c', 'NCHW8c', 'float32'). A fallback configuration is used, which may bring great performance regression.
+    Cannot find config for target=llvm, workload=('conv2d_NCHWc.x86', ('TENSOR', (1, 32, 2, 2, 8), 'float32'), ('TENSOR', (2, 32, 3, 3, 8, 8), 'float32'), (1, 1), (1, 1, 1, 1), (1, 1), 'NCHW8c', 'NCHW8c', 'float32'). A fallback configuration is used, which may bring great performance regression.
+    Cannot find config for target=llvm, workload=('conv2d_NCHWc.x86', ('TENSOR', (1, 32, 1, 1, 8), 'float32'), ('TENSOR', (2, 32, 3, 3, 8, 8), 'float32'), (1, 1), (1, 1, 1, 1), (1, 1), 'NCHW8c', 'NCHW8c', 'float32'). A fallback configuration is used, which may bring great performance regression.
+    Cannot find config for target=cuda, workload=('conv2d_nchw.cuda', ('TENSOR', (1, 3, 512, 512), 'float32'), ('TENSOR', (64, 3, 7, 7), 'float32'), (2, 2), (3, 3, 3, 3), (1, 1), 'float32'). A fallback configuration is used, which may bring great performance regression.
+    Cannot find config for target=cuda, workload=('conv2d_nchw.cuda', ('TENSOR', (1, 64, 128, 128), 'float32'), ('TENSOR', (64, 64, 1, 1), 'float32'), (1, 1), (0, 0, 0, 0), (1, 1), 'float32'). A fallback configuration is used, which may bring great performance regression.
+    Cannot find config for target=cuda, workload=('conv2d_nchw.cuda', ('TENSOR', (1, 64, 128, 128), 'float32'), ('TENSOR', (64, 64, 3, 3), 'float32'), (1, 1), (1, 1, 1, 1), (1, 1), 'float32'). A fallback configuration is used, which may bring great performance regression.
+    Cannot find config for target=cuda, workload=('conv2d_nchw.cuda', ('TENSOR', (1, 64, 128, 128), 'float32'), ('TENSOR', (256, 64, 1, 1), 'float32'), (1, 1), (0, 0, 0, 0), (1, 1), 'float32'). A fallback configuration is used, which may bring great performance regression.
+    Cannot find config for target=cuda, workload=('conv2d_nchw.cuda', ('TENSOR', (1, 256, 128, 128), 'float32'), ('TENSOR', (64, 256, 1, 1), 'float32'), (1, 1), (0, 0, 0, 0), (1, 1), 'float32'). A fallback configuration is used, which may bring great performance regression.
+    Cannot find config for target=cuda, workload=('conv2d_nchw.cuda', ('TENSOR', (1, 256, 128, 128), 'float32'), ('TENSOR', (128, 256, 1, 1), 'float32'), (2, 2), (0, 0, 0, 0), (1, 1), 'float32'). A fallback configuration is used, which may bring great performance regression.
+    Cannot find config for target=cuda, workload=('conv2d_nchw.cuda', ('TENSOR', (1, 128, 64, 64), 'float32'), ('TENSOR', (128, 128, 3, 3), 'float32'), (1, 1), (1, 1, 1, 1), (1, 1), 'float32'). A fallback configuration is used, which may bring great performance regression.
+    Cannot find config for target=cuda, workload=('conv2d_nchw.cuda', ('TENSOR', (1, 128, 64, 64), 'float32'), ('TENSOR', (512, 128, 1, 1), 'float32'), (1, 1), (0, 0, 0, 0), (1, 1), 'float32'). A fallback configuration is used, which may bring great performance regression.
+    Cannot find config for target=cuda, workload=('conv2d_nchw.cuda', ('TENSOR', (1, 256, 128, 128), 'float32'), ('TENSOR', (512, 256, 1, 1), 'float32'), (2, 2), (0, 0, 0, 0), (1, 1), 'float32'). A fallback configuration is used, which may bring great performance regression.
+    Cannot find config for target=cuda, workload=('conv2d_nchw.cuda', ('TENSOR', (1, 512, 64, 64), 'float32'), ('TENSOR', (128, 512, 1, 1), 'float32'), (1, 1), (0, 0, 0, 0), (1, 1), 'float32'). A fallback configuration is used, which may bring great performance regression.
+    Cannot find config for target=cuda, workload=('conv2d_nchw.cuda', ('TENSOR', (1, 512, 64, 64), 'float32'), ('TENSOR', (256, 512, 1, 1), 'float32'), (2, 2), (0, 0, 0, 0), (1, 1), 'float32'). A fallback configuration is used, which may bring great performance regression.
+    Cannot find config for target=cuda, workload=('conv2d_nchw.cuda', ('TENSOR', (1, 256, 32, 32), 'float32'), ('TENSOR', (256, 256, 3, 3), 'float32'), (1, 1), (1, 1, 1, 1), (1, 1), 'float32'). A fallback configuration is used, which may bring great performance regression.
+    Cannot find config for target=cuda, workload=('conv2d_nchw.cuda', ('TENSOR', (1, 256, 32, 32), 'float32'), ('TENSOR', (1024, 256, 1, 1), 'float32'), (1, 1), (0, 0, 0, 0), (1, 1), 'float32'). A fallback configuration is used, which may bring great performance regression.
+    Cannot find config for target=cuda, workload=('conv2d_nchw.cuda', ('TENSOR', (1, 512, 64, 64), 'float32'), ('TENSOR', (1024, 512, 1, 1), 'float32'), (2, 2), (0, 0, 0, 0), (1, 1), 'float32'). A fallback configuration is used, which may bring great performance regression.
+    Cannot find config for target=cuda, workload=('conv2d_nchw.cuda', ('TENSOR', (1, 1024, 32, 32), 'float32'), ('TENSOR', (256, 1024, 1, 1), 'float32'), (1, 1), (0, 0, 0, 0), (1, 1), 'float32'). A fallback configuration is used, which may bring great performance regression.
+    Cannot find config for target=cuda, workload=('conv2d_nchw.cuda', ('TENSOR', (1, 1024, 32, 32), 'float32'), ('TENSOR', (84, 1024, 3, 3), 'float32'), (1, 1), (1, 1, 1, 1), (1, 1), 'float32'). A fallback configuration is used, which may bring great performance regression.
+    Cannot find config for target=cuda, workload=('conv2d_nchw.cuda', ('TENSOR', (1, 1024, 32, 32), 'float32'), ('TENSOR', (512, 1024, 1, 1), 'float32'), (2, 2), (0, 0, 0, 0), (1, 1), 'float32'). A fallback configuration is used, which may bring great performance regression.
+    Cannot find config for target=cuda, workload=('conv2d_nchw.cuda', ('TENSOR', (1, 512, 16, 16), 'float32'), ('TENSOR', (512, 512, 3, 3), 'float32'), (1, 1), (1, 1, 1, 1), (1, 1), 'float32'). A fallback configuration is used, which may bring great performance regression.
+    Cannot find config for target=cuda, workload=('conv2d_nchw.cuda', ('TENSOR', (1, 512, 16, 16), 'float32'), ('TENSOR', (2048, 512, 1, 1), 'float32'), (1, 1), (0, 0, 0, 0), (1, 1), 'float32'). A fallback configuration is used, which may bring great performance regression.
+    Cannot find config for target=cuda, workload=('conv2d_nchw.cuda', ('TENSOR', (1, 1024, 32, 32), 'float32'), ('TENSOR', (2048, 1024, 1, 1), 'float32'), (2, 2), (0, 0, 0, 0), (1, 1), 'float32'). A fallback configuration is used, which may bring great performance regression.
+    Cannot find config for target=cuda, workload=('conv2d_nchw.cuda', ('TENSOR', (1, 2048, 16, 16), 'float32'), ('TENSOR', (512, 2048, 1, 1), 'float32'), (1, 1), (0, 0, 0, 0), (1, 1), 'float32'). A fallback configuration is used, which may bring great performance regression.
+    Cannot find config for target=cuda, workload=('conv2d_nchw.cuda', ('TENSOR', (1, 2048, 16, 16), 'float32'), ('TENSOR', (126, 2048, 3, 3), 'float32'), (1, 1), (1, 1, 1, 1), (1, 1), 'float32'). A fallback configuration is used, which may bring great performance regression.
+    Cannot find config for target=cuda, workload=('conv2d_nchw.cuda', ('TENSOR', (1, 512, 16, 16), 'float32'), ('TENSOR', (512, 512, 3, 3), 'float32'), (2, 2), (1, 1, 1, 1), (1, 1), 'float32'). A fallback configuration is used, which may bring great performance regression.
+    Cannot find config for target=cuda, workload=('conv2d_nchw.cuda', ('TENSOR', (1, 512, 8, 8), 'float32'), ('TENSOR', (126, 512, 3, 3), 'float32'), (1, 1), (1, 1, 1, 1), (1, 1), 'float32'). A fallback configuration is used, which may bring great performance regression.
+    Cannot find config for target=cuda, workload=('conv2d_nchw.cuda', ('TENSOR', (1, 512, 8, 8), 'float32'), ('TENSOR', (512, 512, 1, 1), 'float32'), (1, 1), (0, 0, 0, 0), (1, 1), 'float32'). A fallback configuration is used, which may bring great performance regression.
+    Cannot find config for target=cuda, workload=('conv2d_nchw.cuda', ('TENSOR', (1, 512, 8, 8), 'float32'), ('TENSOR', (512, 512, 3, 3), 'float32'), (2, 2), (1, 1, 1, 1), (1, 1), 'float32'). A fallback configuration is used, which may bring great performance regression.
+    Cannot find config for target=cuda, workload=('conv2d_nchw.cuda', ('TENSOR', (1, 512, 4, 4), 'float32'), ('TENSOR', (126, 512, 3, 3), 'float32'), (1, 1), (1, 1, 1, 1), (1, 1), 'float32'). A fallback configuration is used, which may bring great performance regression.
+    Cannot find config for target=cuda, workload=('conv2d_nchw.cuda', ('TENSOR', (1, 512, 4, 4), 'float32'), ('TENSOR', (256, 512, 1, 1), 'float32'), (1, 1), (0, 0, 0, 0), (1, 1), 'float32'). A fallback configuration is used, which may bring great performance regression.
+    Cannot find config for target=cuda, workload=('conv2d_nchw.cuda', ('TENSOR', (1, 256, 4, 4), 'float32'), ('TENSOR', (256, 256, 3, 3), 'float32'), (2, 2), (1, 1, 1, 1), (1, 1), 'float32'). A fallback configuration is used, which may bring great performance regression.
+    Cannot find config for target=cuda, workload=('conv2d_nchw.cuda', ('TENSOR', (1, 256, 2, 2), 'float32'), ('TENSOR', (84, 256, 3, 3), 'float32'), (1, 1), (1, 1, 1, 1), (1, 1), 'float32'). A fallback configuration is used, which may bring great performance regression.
+    Cannot find config for target=cuda, workload=('conv2d_nchw.cuda', ('TENSOR', (1, 256, 2, 2), 'float32'), ('TENSOR', (256, 256, 1, 1), 'float32'), (1, 1), (0, 0, 0, 0), (1, 1), 'float32'). A fallback configuration is used, which may bring great performance regression.
+    Cannot find config for target=cuda, workload=('conv2d_nchw.cuda', ('TENSOR', (1, 256, 2, 2), 'float32'), ('TENSOR', (256, 256, 3, 3), 'float32'), (2, 2), (1, 1, 1, 1), (1, 1), 'float32'). A fallback configuration is used, which may bring great performance regression.
+    Cannot find config for target=cuda, workload=('conv2d_nchw.cuda', ('TENSOR', (1, 256, 1, 1), 'float32'), ('TENSOR', (84, 256, 3, 3), 'float32'), (1, 1), (1, 1, 1, 1), (1, 1), 'float32'). A fallback configuration is used, which may bring great performance regression.
+    Cannot find config for target=cuda, workload=('conv2d_nchw.cuda', ('TENSOR', (1, 1024, 32, 32), 'float32'), ('TENSOR', (16, 1024, 3, 3), 'float32'), (1, 1), (1, 1, 1, 1), (1, 1), 'float32'). A fallback configuration is used, which may bring great performance regression.
+    Cannot find config for target=cuda, workload=('conv2d_nchw.cuda', ('TENSOR', (1, 2048, 16, 16), 'float32'), ('TENSOR', (24, 2048, 3, 3), 'float32'), (1, 1), (1, 1, 1, 1), (1, 1), 'float32'). A fallback configuration is used, which may bring great performance regression.
+    Cannot find config for target=cuda, workload=('conv2d_nchw.cuda', ('TENSOR', (1, 512, 8, 8), 'float32'), ('TENSOR', (24, 512, 3, 3), 'float32'), (1, 1), (1, 1, 1, 1), (1, 1), 'float32'). A fallback configuration is used, which may bring great performance regression.
+    Cannot find config for target=cuda, workload=('conv2d_nchw.cuda', ('TENSOR', (1, 512, 4, 4), 'float32'), ('TENSOR', (24, 512, 3, 3), 'float32'), (1, 1), (1, 1, 1, 1), (1, 1), 'float32'). A fallback configuration is used, which may bring great performance regression.
+    Cannot find config for target=cuda, workload=('conv2d_nchw.cuda', ('TENSOR', (1, 256, 2, 2), 'float32'), ('TENSOR', (16, 256, 3, 3), 'float32'), (1, 1), (1, 1, 1, 1), (1, 1), 'float32'). A fallback configuration is used, which may bring great performance regression.
+    Cannot find config for target=cuda, workload=('conv2d_nchw.cuda', ('TENSOR', (1, 256, 1, 1), 'float32'), ('TENSOR', (16, 256, 3, 3), 'float32'), (1, 1), (1, 1, 1, 1), (1, 1), 'float32'). A fallback configuration is used, which may bring great performance regression.
 
 
 
diff --git a/docs/_sources/tutorials/frontend/from_onnx.rst.txt b/docs/_sources/tutorials/frontend/from_onnx.rst.txt
index e201470..c6c05d6 100644
--- a/docs/_sources/tutorials/frontend/from_onnx.rst.txt
+++ b/docs/_sources/tutorials/frontend/from_onnx.rst.txt
@@ -125,7 +125,7 @@ Compile the model with relay
 
  .. code-block:: none
 
-    /workspace/docs/../python/tvm/relay/frontend/onnx.py:2286: UserWarning: Mismatched attribute type in ' : kernel_shape'
+    /workspace/docs/../python/tvm/relay/frontend/onnx.py:2287: UserWarning: Mismatched attribute type in ' : kernel_shape'
 
     ==> Context: Bad node spec: input: "1" input: "2" output: "11" op_type: "Conv" attribute { name: "kernel_shape" ints: 5 ints: 5 } attribute { name: "strides" ints: 1 ints: 1 } attribute { name: "pads" ints: 2 ints: 2 ints: 2 ints: 2 } attribute { name: "dilations" ints: 1 ints: 1 } attribute { name: "group" i: 1 }
       warnings.warn(str(e))
@@ -151,7 +151,7 @@ Execute on TVM
 
  .. code-block:: none
 
-
    ...47%, 0.01 MB, 755 KB/s, 0 seconds passed
    ...94%, 0.02 MB, 1410 KB/s, 0 seconds passed
    ...100%, 0.02 MB, 2077 KB/s, 0 seconds passed
+
    ...47%, 0.01 MB, 47 KB/s, 0 seconds passed
    ...94%, 0.02 MB, 94 KB/s, 0 seconds passed
    ...100%, 0.02 MB, 141 KB/s, 0 seconds passed
     Cannot find config for target=llvm, workload=('conv2d_NCHWc.x86', ('TENSOR', (1, 32, 224, 224), 'float32'), ('TENSOR', (9, 32, 3, 3), 'float32'), (1, 1), (1, 1, 1, 1), (1, 1), 'NCHW', 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.
     Cannot find config for target=llvm, workload=('conv2d_NCHWc.x86', ('TENSOR', (1, 64, 224, 224), 'float32'), ('TENSOR', (32, 64, 3, 3), 'float32'), (1, 1), (1, 1, 1, 1), (1, 1), 'NCHW', 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.
     Cannot find config for target=llvm, workload=('conv2d_NCHWc.x86', ('TENSOR', (1, 1, 224, 224), 'float32'), ('TENSOR', (64, 1, 5, 5), 'float32'), (1, 1), (2, 2, 2, 2), (1, 1), 'NCHW', 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.
diff --git a/docs/_sources/tutorials/frontend/from_tensorflow.rst.txt b/docs/_sources/tutorials/frontend/from_tensorflow.rst.txt
index 8ff019d..64ef6e7 100644
--- a/docs/_sources/tutorials/frontend/from_tensorflow.rst.txt
+++ b/docs/_sources/tutorials/frontend/from_tensorflow.rst.txt
@@ -197,9 +197,9 @@ Results:
 
     ANTLR runtime and generated code versions disagree: 4.8!=4.7.2
     ANTLR runtime and generated code versions disagree: 4.8!=4.7.2
-    /workspace/docs/../python/tvm/relay/frontend/tensorflow.py:2843: UserWarning: Ignore the passed shape. Shape in graphdef will be used for operator DecodeJpeg/contents.
+    /workspace/docs/../python/tvm/relay/frontend/tensorflow.py:2842: UserWarning: Ignore the passed shape. Shape in graphdef will be used for operator DecodeJpeg/contents.
       "will be used for operator %s." % node.name)
-    /workspace/docs/../python/tvm/relay/frontend/tensorflow.py:684: UserWarning: DecodeJpeg: It's a pass through, please handle preprocessing before input
+    /workspace/docs/../python/tvm/relay/frontend/tensorflow.py:683: UserWarning: DecodeJpeg: It's a pass through, please handle preprocessing before input
       warnings.warn("DecodeJpeg: It's a pass through, please handle preprocessing before input")
     Tensorflow protobuf imported to relay frontend.
 
diff --git a/docs/_sources/tutorials/frontend/sg_execution_times.rst.txt b/docs/_sources/tutorials/frontend/sg_execution_times.rst.txt
index 68534b2..051dbd5 100644
--- a/docs/_sources/tutorials/frontend/sg_execution_times.rst.txt
+++ b/docs/_sources/tutorials/frontend/sg_execution_times.rst.txt
@@ -5,22 +5,23 @@
 
 Computation times
 =================
-**05:38.916** total execution time for **tutorials_frontend** files:
+**05:27.339** total execution time for **tutorials_frontend** files:
 
-- **02:10.254**: :ref:`sphx_glr_tutorials_frontend_deploy_prequantized_tflite.py` (``deploy_prequantized_tflite.py``)
-- **00:30.124**: :ref:`sphx_glr_tutorials_frontend_deploy_ssd_gluoncv.py` (``deploy_ssd_gluoncv.py``)
-- **00:29.983**: :ref:`sphx_glr_tutorials_frontend_deploy_prequantized.py` (``deploy_prequantized.py``)
-- **00:28.080**: :ref:`sphx_glr_tutorials_frontend_from_tensorflow.py` (``from_tensorflow.py``)
-- **00:21.902**: :ref:`sphx_glr_tutorials_frontend_from_tflite.py` (``from_tflite.py``)
-- **00:18.913**: :ref:`sphx_glr_tutorials_frontend_deploy_quantized.py` (``deploy_quantized.py``)
-- **00:17.142**: :ref:`sphx_glr_tutorials_frontend_from_darknet.py` (``from_darknet.py``)
-- **00:10.471**: :ref:`sphx_glr_tutorials_frontend_from_caffe2.py` (``from_caffe2.py``)
-- **00:10.211**: :ref:`sphx_glr_tutorials_frontend_deploy_model_on_android.py` (``deploy_model_on_android.py``)
-- **00:08.790**: :ref:`sphx_glr_tutorials_frontend_from_keras.py` (``from_keras.py``)
-- **00:07.050**: :ref:`sphx_glr_tutorials_frontend_deploy_model_on_rasp.py` (``deploy_model_on_rasp.py``)
-- **00:06.472**: :ref:`sphx_glr_tutorials_frontend_from_pytorch.py` (``from_pytorch.py``)
-- **00:06.164**: :ref:`sphx_glr_tutorials_frontend_from_coreml.py` (``from_coreml.py``)
-- **00:04.943**: :ref:`sphx_glr_tutorials_frontend_from_mxnet.py` (``from_mxnet.py``)
-- **00:04.864**: :ref:`sphx_glr_tutorials_frontend_build_gcn.py` (``build_gcn.py``)
-- **00:02.175**: :ref:`sphx_glr_tutorials_frontend_using_external_lib.py` (``using_external_lib.py``)
-- **00:01.378**: :ref:`sphx_glr_tutorials_frontend_from_onnx.py` (``from_onnx.py``)
+- **02:04.482**: :ref:`sphx_glr_tutorials_frontend_deploy_prequantized_tflite.py` (``deploy_prequantized_tflite.py``)
+- **00:29.629**: :ref:`sphx_glr_tutorials_frontend_deploy_ssd_gluoncv.py` (``deploy_ssd_gluoncv.py``)
+- **00:29.048**: :ref:`sphx_glr_tutorials_frontend_deploy_prequantized.py` (``deploy_prequantized.py``)
+- **00:26.466**: :ref:`sphx_glr_tutorials_frontend_from_tensorflow.py` (``from_tensorflow.py``)
+- **00:20.677**: :ref:`sphx_glr_tutorials_frontend_from_tflite.py` (``from_tflite.py``)
+- **00:18.378**: :ref:`sphx_glr_tutorials_frontend_deploy_quantized.py` (``deploy_quantized.py``)
+- **00:16.755**: :ref:`sphx_glr_tutorials_frontend_from_darknet.py` (``from_darknet.py``)
+- **00:10.554**: :ref:`sphx_glr_tutorials_frontend_from_caffe2.py` (``from_caffe2.py``)
+- **00:09.594**: :ref:`sphx_glr_tutorials_frontend_deploy_model_on_android.py` (``deploy_model_on_android.py``)
+- **00:08.537**: :ref:`sphx_glr_tutorials_frontend_from_keras.py` (``from_keras.py``)
+- **00:06.934**: :ref:`sphx_glr_tutorials_frontend_from_pytorch.py` (``from_pytorch.py``)
+- **00:06.808**: :ref:`sphx_glr_tutorials_frontend_deploy_model_on_rasp.py` (``deploy_model_on_rasp.py``)
+- **00:06.118**: :ref:`sphx_glr_tutorials_frontend_from_coreml.py` (``from_coreml.py``)
+- **00:04.935**: :ref:`sphx_glr_tutorials_frontend_from_mxnet.py` (``from_mxnet.py``)
+- **00:04.635**: :ref:`sphx_glr_tutorials_frontend_build_gcn.py` (``build_gcn.py``)
+- **00:02.086**: :ref:`sphx_glr_tutorials_frontend_using_external_lib.py` (``using_external_lib.py``)
+- **00:01.550**: :ref:`sphx_glr_tutorials_frontend_from_onnx.py` (``from_onnx.py``)
+- **00:00.152**: :ref:`sphx_glr_tutorials_frontend_deploy_sparse.py` (``deploy_sparse.py``)
diff --git a/docs/_sources/tutorials/frontend/using_external_lib.rst.txt b/docs/_sources/tutorials/frontend/using_external_lib.rst.txt
index 8f4e4fe..1cd717a 100644
--- a/docs/_sources/tutorials/frontend/using_external_lib.rst.txt
+++ b/docs/_sources/tutorials/frontend/using_external_lib.rst.txt
@@ -114,7 +114,6 @@ By setting the logging level to DEBUG, the result of Relay graph compilation wil
     INFO:compile_engine:Use implementation injective.cpu for op negative
     INFO:compile_engine:Use implementation injective.cpu for op add
     WARNING:autotvm:Cannot find config for target=cuda, workload=('conv2d_nchw.cuda', ('TENSOR', (1, 3, 224, 224), 'float32'), ('TENSOR', (16, 3, 3, 3), 'float32'), (1, 1), (1, 1, 1, 1), (1, 1), 'float32'). A fallback configuration is used, which may bring great performance regression.
-    WARNING:autotvm:Cannot find config for target=cuda, workload=('conv2d_nchw_winograd.cuda', ('TENSOR', (1, 3, 224, 224), 'float32'), ('TENSOR', (16, 3, 3, 3), 'float32'), (1, 1), (1, 1, 1, 1), (1, 1), 'float32'). A fallback configuration is used, which may bring great performance regression.
     INFO:compile_engine:Use implementation conv2d_nchw.cuda for op nn.conv2d
     INFO:compile_engine:Use implementation injective.cuda for op multiply
     INFO:compile_engine:Use implementation injective.cuda for op add
diff --git a/docs/_sources/tutorials/index.rst.txt b/docs/_sources/tutorials/index.rst.txt
index bdbda07..ec1dc46 100644
--- a/docs/_sources/tutorials/index.rst.txt
+++ b/docs/_sources/tutorials/index.rst.txt
@@ -423,6 +423,26 @@ Compile Deep Learning Models
    :hidden:
 
    /tutorials/frontend/build_gcn
+
+.. raw:: html
+
+    <div class="sphx-glr-thumbcontainer" tooltip="This tutorial demonstrates how to take any pruned model, in this case `PruneBert from Hugging F...">
+
+.. only:: html
+
+    .. figure:: /tutorials/frontend/images/thumb/sphx_glr_deploy_sparse_thumb.png
+
+        :ref:`sphx_glr_tutorials_frontend_deploy_sparse.py`
+
+.. raw:: html
+
+    </div>
+
+
+.. toctree::
+   :hidden:
+
+   /tutorials/frontend/deploy_sparse
 .. raw:: html
 
     <div style='clear:both'></div>
@@ -912,6 +932,40 @@ TOPI: TVM Operator Inventory
 
 
 
+.. _sphx_glr_tutorials_micro:
+
+.. _tutorial-micro:
+
+Micro TVM 
+---------
+
+
+
+.. raw:: html
+
+    <div class="sphx-glr-thumbcontainer" tooltip="This tutorial is an introduction to working with MicroTVM and a TFLite  model with Relay. ">
+
+.. only:: html
+
+    .. figure:: /tutorials/micro/images/thumb/sphx_glr_micro_tflite_thumb.png
+
+        :ref:`sphx_glr_tutorials_micro_micro_tflite.py`
+
+.. raw:: html
+
+    </div>
+
+
+.. toctree::
+   :hidden:
+
+   /tutorials/micro/micro_tflite
+.. raw:: html
+
+    <div style='clear:both'></div>
+
+
+
 .. only:: html
 
  .. rst-class:: sphx-glr-signature
diff --git a/docs/_sources/tutorials/language/intrin_math.rst.txt b/docs/_sources/tutorials/language/intrin_math.rst.txt
index e5d0a82..f7584fb 100644
--- a/docs/_sources/tutorials/language/intrin_math.rst.txt
+++ b/docs/_sources/tutorials/language/intrin_math.rst.txt
@@ -185,12 +185,15 @@ The following example customizes CUDA lowering rule for :code:`exp`.
     def my_cuda_math_rule(op):
         """Customized CUDA intrinsic lowering rule"""
         assert isinstance(op, tvm.tir.Call)
+        name = op.op.name
+        assert name.startswith("tir.")
+        dispatch_name = name[4:]
         if op.dtype == "float32":
             # call float function
-            return tvm.tir.call_pure_extern("float32", "%sf" % op.name, op.args[0])
+            return tvm.tir.call_pure_extern("float32", "%sf" % dispatch_name, op.args[0])
         elif op.dtype == "float64":
             # call double function
-            return tvm.tir.call_pure_extern("float32", op.name, op.args[0])
+            return tvm.tir.call_pure_extern("float32", dispatch_name, op.args[0])
         else:
             # cannot do translation, return self.
             return op
@@ -253,7 +256,7 @@ The following example add an intrinsic :code:`mylog` to the system.
 
     def mylog(x):
         """customized log intrinsic function"""
-        return tvm.tir.call_pure_intrin(x.dtype, "mylog", x)
+        return tvm.tir.call_intrin(x.dtype, "tir.mylog", x)
 
 
     def my_cuda_mylog_rule(op):
@@ -265,7 +268,8 @@ The following example add an intrinsic :code:`mylog` to the system.
         else:
             return op
 
-
+    # new op registration is triggered by registering an attribute of the op
+    tvm.ir.register_op_attr("tir.mylog", "TCallEffectKind", tvm.tir.CallEffectKind.Pure)
     tvm.target.register_intrin_rule("cuda", "mylog", my_cuda_mylog_rule, override=True)
 
     n = te.var("n")
diff --git a/docs/_sources/tutorials/language/reduction.rst.txt b/docs/_sources/tutorials/language/reduction.rst.txt
index 8165bf1..5938d1e 100644
--- a/docs/_sources/tutorials/language/reduction.rst.txt
+++ b/docs/_sources/tutorials/language/reduction.rst.txt
@@ -93,14 +93,14 @@ Before doing anything, let us print out the IR code of default schedule.
  .. code-block:: none
 
     primfn(A_1: handle, B_1: handle) -> ()
-      attr = {"tir.noalias": True, "global_symbol": "main"}
-      buffers = {A: Buffer(A_2: handle, float32, [n: int32, m: int32], [stride: int32, stride_1: int32], type="auto"),
-                 B: Buffer(B_2: handle, float32, [n], [stride_2: int32], type="auto")}
-      buffer_map = {B_1: B, A_1: A} {
+      attr = {"global_symbol": "main", "tir.noalias": True}
+      buffers = {B: Buffer(B_2: handle, float32, [n: int32], [stride: int32], type="auto"),
+                 A: Buffer(A_2: handle, float32, [n, m: int32], [stride_1: int32, stride_2: int32], type="auto")}
+      buffer_map = {A_1: A, B_1: B} {
       for (i: int32, 0, n) {
-        B_2[(i*stride_2)] = 0f32
+        B_2[(i*stride)] = 0f32
         for (k: int32, 0, m) {
-          B_2[(i*stride_2)] = ((float32*)B_2[(i*stride_2)]) + (float32*)A_2[((i*stride) + (k*stride_1))]))
+          B_2[(i*stride)] = ((float32*)B_2[(i*stride)] + (float32*)A_2[((i*stride_1) + (k*stride_2))])
         }
       }
     }
@@ -134,20 +134,20 @@ axis by different factors. The result is a nested reduction.
  .. code-block:: none
 
     primfn(A_1: handle, B_1: handle) -> ()
-      attr = {"tir.noalias": True, "global_symbol": "main"}
-      buffers = {A: Buffer(A_2: handle, float32, [n: int32, m: int32], [stride: int32, stride_1: int32], type="auto"),
-                 B: Buffer(B_2: handle, float32, [n], [stride_2: int32], type="auto")}
-      buffer_map = {B_1: B, A_1: A} {
+      attr = {"global_symbol": "main", "tir.noalias": True}
+      buffers = {B: Buffer(B_2: handle, float32, [n: int32], [stride: int32], type="auto"),
+                 A: Buffer(A_2: handle, float32, [n, m: int32], [stride_1: int32, stride_2: int32], type="auto")}
+      buffer_map = {A_1: A, B_1: B} {
       for (i.outer: int32, 0, floordiv((n + 31), 32)) {
         for (i.inner: int32, 0, 32) {
-          if @likely((((i.outer*32) + i.inner) < n), dtype=bool, type="pure_intrin") {
-            B_2[(((i.outer*32) + i.inner)*stride_2)] = 0f32
+          if @tir.likely((((i.outer*32) + i.inner) < n), dtype=bool) {
+            B_2[(((i.outer*32) + i.inner)*stride)] = 0f32
           }
           for (k.outer: int32, 0, floordiv((m + 15), 16)) {
             for (k.inner: int32, 0, 16) {
-              if @likely((((i.outer*32) + i.inner) < n), dtype=bool, type="pure_intrin") {
-                if @likely((((k.outer*16) + k.inner) < m), dtype=bool, type="pure_intrin") {
-                  B_2[(((i.outer*32) + i.inner)*stride_2)] = ((float32*)B_2[(((i.outer*32) + i.inner)*stride_2)]) + (float32*)A_2[((((i.outer*32) + i.inner)*stride) + (((k.outer*16) + k.inner)*stride_1))]))
+              if @tir.likely((((i.outer*32) + i.inner) < n), dtype=bool) {
+                if @tir.likely((((k.outer*16) + k.inner) < m), dtype=bool) {
+                  B_2[(((i.outer*32) + i.inner)*stride)] = ((float32*)B_2[(((i.outer*32) + i.inner)*stride)] + (float32*)A_2[((((i.outer*32) + i.inner)*stride_1) + (((k.outer*16) + k.inner)*stride_2))])
                 }
               }
             }
@@ -180,20 +180,20 @@ If we are building a GPU kernel, we can bind the rows of B to GPU threads.
  .. code-block:: none
 
     primfn(A_1: handle, B_1: handle) -> ()
-      attr = {"tir.noalias": True, "global_symbol": "main"}
-      buffers = {A: Buffer(A_2: handle, float32, [n: int32, m: int32], [stride: int32, stride_1: int32], type="auto"),
-                 B: Buffer(B_2: handle, float32, [n], [stride_2: int32], type="auto")}
-      buffer_map = {B_1: B, A_1: A} {
+      attr = {"global_symbol": "main", "tir.noalias": True}
+      buffers = {B: Buffer(B_2: handle, float32, [n: int32], [stride: int32], type="auto"),
+                 A: Buffer(A_2: handle, float32, [n, m: int32], [stride_1: int32, stride_2: int32], type="auto")}
+      buffer_map = {A_1: A, B_1: B} {
       attr [IterVar(blockIdx.x: int32, (nullptr), "ThreadIndex", "blockIdx.x")] "thread_extent" = floordiv((n + 31), 32);
       attr [IterVar(threadIdx.x: int32, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 32 {
-        if @likely((((blockIdx.x*32) + threadIdx.x) < n), dtype=bool, type="pure_intrin") {
-          B_2[(((blockIdx.x*32) + threadIdx.x)*stride_2)] = 0f32
+        if @tir.likely((((blockIdx.x*32) + threadIdx.x) < n), dtype=bool) {
+          B_2[(((blockIdx.x*32) + threadIdx.x)*stride)] = 0f32
         }
         for (k.outer: int32, 0, floordiv((m + 15), 16)) {
           for (k.inner: int32, 0, 16) {
-            if @likely((((blockIdx.x*32) + threadIdx.x) < n), dtype=bool, type="pure_intrin") {
-              if @likely((((k.outer*16) + k.inner) < m), dtype=bool, type="pure_intrin") {
-                B_2[(((blockIdx.x*32) + threadIdx.x)*stride_2)] = ((float32*)B_2[(((blockIdx.x*32) + threadIdx.x)*stride_2)]) + (float32*)A_2[((((blockIdx.x*32) + threadIdx.x)*stride) + (((k.outer*16) + k.inner)*stride_1))]))
+            if @tir.likely((((blockIdx.x*32) + threadIdx.x) < n), dtype=bool) {
+              if @tir.likely((((k.outer*16) + k.inner) < m), dtype=bool) {
+                B_2[(((blockIdx.x*32) + threadIdx.x)*stride)] = ((float32*)B_2[(((blockIdx.x*32) + threadIdx.x)*stride)] + (float32*)A_2[((((blockIdx.x*32) + threadIdx.x)*stride_1) + (((k.outer*16) + k.inner)*stride_2))])
               }
             }
           }
@@ -236,26 +236,26 @@ result B.rf. The factored dimension becomes the first dimension of B.rf.
  .. code-block:: none
 
     primfn(A_1: handle, B_1: handle) -> ()
-      attr = {"tir.noalias": True, "global_symbol": "main"}
-      buffers = {A: Buffer(A_2: handle, float32, [n: int32, m: int32], [stride: int32, stride_1: int32], type="auto"),
-                 B: Buffer(B_2: handle, float32, [n], [stride_2: int32], type="auto")}
-      buffer_map = {B_1: B, A_1: A} {
+      attr = {"global_symbol": "main", "tir.noalias": True}
+      buffers = {B: Buffer(B_2: handle, float32, [n: int32], [stride: int32], type="auto"),
+                 A: Buffer(A_2: handle, float32, [n, m: int32], [stride_1: int32, stride_2: int32], type="auto")}
+      buffer_map = {A_1: A, B_1: B} {
       attr [B.rf: handle] "storage_scope" = "global";
       allocate(B.rf, float32, [(n*16)]) {
         for (k.inner: int32, 0, 16) {
           for (i: int32, 0, n) {
             B.rf[((k.inner*n) + i)] = 0f32
             for (k.outer: int32, 0, floordiv((m + 15), 16)) {
-              if @likely((((k.outer*16) + k.inner) < m), dtype=bool, type="pure_intrin") {
-                B.rf[((k.inner*n) + i)] = ((float32*)B.rf[((k.inner*n) + i)]) + (float32*)A_2[((i*stride) + (((k.outer*16) + k.inner)*stride_1))]))
+              if @tir.likely((((k.outer*16) + k.inner) < m), dtype=bool) {
+                B.rf[((k.inner*n) + i)] = ((float32*)B.rf[((k.inner*n) + i)] + (float32*)A_2[((i*stride_1) + (((k.outer*16) + k.inner)*stride_2))])
               }
             }
           }
         }
         for (ax0: int32, 0, n) {
-          B_2[(ax0*stride_2)] = 0f32
+          B_2[(ax0*stride)] = 0f32
           for (k.inner.v: int32, 0, 16) {
-            B_2[(ax0*stride_2)] = ((float32*)B_2[(ax0*stride_2)]) + (float32*)B.rf[((k.inner.v*n) + ax0)]))
+            B_2[(ax0*stride)] = ((float32*)B_2[(ax0*stride)] + (float32*)B.rf[((k.inner.v*n) + ax0)])
           }
         }
       }
@@ -411,17 +411,17 @@ Here is an example for 2D convolution with filter size = [3, 3] and strides = [1
  .. code-block:: none
 
     primfn(Input_1: handle, Filter_1: handle, Output_1: handle) -> ()
-      attr = {"tir.noalias": True, "global_symbol": "main"}
+      attr = {"global_symbol": "main", "tir.noalias": True}
       buffers = {Output: Buffer(Output_2: handle, float32, [(n: int32 - 2), (n - 2)], []),
-                 Input: Buffer(Input_2: handle, float32, [n, n], [stride: int32, stride_1: int32], type="auto"),
-                 Filter: Buffer(Filter_2: handle, float32, [3, 3], [])}
-      buffer_map = {Filter_1: Filter, Output_1: Output, Input_1: Input} {
+                 Filter: Buffer(Filter_2: handle, float32, [3, 3], []),
+                 Input: Buffer(Input_2: handle, float32, [n, n], [stride: int32, stride_1: int32], type="auto")}
+      buffer_map = {Input_1: Input, Filter_1: Filter, Output_1: Output} {
       for (i: int32, 0, (n - 2)) {
         for (j: int32, 0, (n - 2)) {
           Output_2[((i*(n - 2)) + j)] = 0f32
           for (di: int32, 0, 3) {
             for (dj: int32, 0, 3) {
-              Output_2[((i*(n - 2)) + j)] = ((float32*)Output_2[((i*(n - 2)) + j)]) + ((float32*)Input_2[(((i + di)*stride) + ((j + dj)*stride_1))])*(float32*)Filter_2[((di*3) + dj)])))
+              Output_2[((i*(n - 2)) + j)] = ((float32*)Output_2[((i*(n - 2)) + j)] + ((float32*)Input_2[(((i + di)*stride) + ((j + dj)*stride_1))]*(float32*)Filter_2[((di*3) + dj)]))
             }
           }
         }
diff --git a/docs/_sources/tutorials/language/scan.rst.txt b/docs/_sources/tutorials/language/scan.rst.txt
index a3dd3c2..f63c38a 100644
--- a/docs/_sources/tutorials/language/scan.rst.txt
+++ b/docs/_sources/tutorials/language/scan.rst.txt
@@ -99,20 +99,20 @@ To split on the time iteration, user can schedule on scan_op.scan_axis instead.
  .. code-block:: none
 
     primfn(X_1: handle, scan_1: handle) -> ()
-      attr = {"tir.noalias": True, "global_symbol": "main"}
-      buffers = {X: Buffer(X_2: handle, float32, [m: int32, n: int32], [stride: int32, stride_1: int32], type="auto"),
-                 scan: Buffer(scan_2: handle, float32, [m, n], [stride_2: int32, stride_3: int32], type="auto")}
-      buffer_map = {scan_1: scan, X_1: X} {
+      attr = {"global_symbol": "main", "tir.noalias": True}
+      buffers = {scan: Buffer(scan_2: handle, float32, [m: int32, n: int32], [stride: int32, stride_1: int32], type="auto"),
+                 X: Buffer(X_2: handle, float32, [m, n], [stride_2: int32, stride_3: int32], type="auto")}
+      buffer_map = {X_1: X, scan_1: scan} {
       attr [IterVar(blockIdx.x: int32, (nullptr), "ThreadIndex", "blockIdx.x")] "thread_extent" = floordiv((n + 255), 256);
       attr [IterVar(threadIdx.x: int32, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 256;
-      if @likely((((blockIdx.x*256) + threadIdx.x) < n), dtype=bool, type="pure_intrin") {
-        scan_2[(((blockIdx.x*256) + threadIdx.x)*stride_3)] = (float32*)X_2[(((blockIdx.x*256) + threadIdx.x)*stride_1)])
+      if @tir.likely((((blockIdx.x*256) + threadIdx.x) < n), dtype=bool) {
+        scan_2[(((blockIdx.x*256) + threadIdx.x)*stride_1)] = (float32*)X_2[(((blockIdx.x*256) + threadIdx.x)*stride_3)]
       }
       for (scan.idx: int32, 0, (m - 1)) {
         attr [IterVar(blockIdx.x, (nullptr), "ThreadIndex", "blockIdx.x")] "thread_extent" = floordiv((n + 255), 256);
         attr [IterVar(threadIdx.x, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 256;
-        if @likely((((blockIdx.x*256) + threadIdx.x) < n), dtype=bool, type="pure_intrin") {
-          scan_2[(((scan.idx + 1)*stride_2) + (((blockIdx.x*256) + threadIdx.x)*stride_3))] = ((float32*)scan_2[((scan.idx*stride_2) + (((blockIdx.x*256) + threadIdx.x)*stride_3))]) + (float32*)X_2[(((scan.idx + 1)*stride) + (((blockIdx.x*256) + threadIdx.x)*stride_1))]))
+        if @tir.likely((((blockIdx.x*256) + threadIdx.x) < n), dtype=bool) {
+          scan_2[(((scan.idx + 1)*stride) + (((blockIdx.x*256) + threadIdx.x)*stride_1))] = ((float32*)scan_2[((scan.idx*stride) + (((blockIdx.x*256) + threadIdx.x)*stride_1))] + (float32*)X_2[(((scan.idx + 1)*stride_2) + (((blockIdx.x*256) + threadIdx.x)*stride_3))])
         }
       }
     }
@@ -198,25 +198,25 @@ the body of scan to be compute_at locations outside the scan loop.
  .. code-block:: none
 
     primfn(X_1: handle, scan_1: handle) -> ()
-      attr = {"tir.noalias": True, "global_symbol": "main"}
-      buffers = {X: Buffer(X_2: handle, float32, [m: int32, n: int32], [stride: int32, stride_1: int32], type="auto"),
-                 scan: Buffer(scan_2: handle, float32, [m, n], [stride_2: int32, stride_3: int32], type="auto")}
-      buffer_map = {scan_1: scan, X_1: X} {
+      attr = {"global_symbol": "main", "tir.noalias": True}
+      buffers = {scan: Buffer(scan_2: handle, float32, [m: int32, n: int32], [stride: int32, stride_1: int32], type="auto"),
+                 X: Buffer(X_2: handle, float32, [m, n], [stride_2: int32, stride_3: int32], type="auto")}
+      buffer_map = {X_1: X, scan_1: scan} {
       attr [s1: handle] "storage_scope" = "global";
       allocate(s1, float32, [32]) {
         for (i: int32, 0, n) {
-          scan_2[(i*stride_3)] = (float32*)X_2[(i*stride_1)])
+          scan_2[(i*stride_1)] = (float32*)X_2[(i*stride_3)]
         }
         for (scan.idx: int32, 0, (m - 1)) {
           for (i.outer: int32, 0, floordiv((n + 31), 32)) {
             for (i_1: int32, 0, 32) {
-              if @likely((((i.outer*32) + i_1) < n), dtype=bool, type="pure_intrin") {
-                s1[i_1] = ((float32*)scan_2[((scan.idx*stride_2) + (((i.outer*32) + i_1)*stride_3))])*2f32)
+              if @tir.likely((((i.outer*32) + i_1) < n), dtype=bool) {
+                s1[i_1] = ((float32*)scan_2[((scan.idx*stride) + (((i.outer*32) + i_1)*stride_1))]*2f32)
               }
             }
             for (i.inner: int32, 0, 32) {
-              if @likely((((i.outer*32) + i.inner) < n), dtype=bool, type="pure_intrin") {
-                scan_2[(((scan.idx + 1)*stride_2) + (((i.outer*32) + i.inner)*stride_3))] = ((float32*)s1[i.inner]) + (float32*)X_2[(((scan.idx + 1)*stride) + (((i.outer*32) + i.inner)*stride_1))]))
+              if @tir.likely((((i.outer*32) + i.inner) < n), dtype=bool) {
+                scan_2[(((scan.idx + 1)*stride) + (((i.outer*32) + i.inner)*stride_1))] = ((float32*)s1[i.inner] + (float32*)X_2[(((scan.idx + 1)*stride_2) + (((i.outer*32) + i.inner)*stride_3))])
               }
             }
           }
@@ -265,23 +265,23 @@ The following example demonstrates how we can build recurrence with two states.
  .. code-block:: none
 
     primfn(X_1: handle, scan.v0_1: handle, scan.v1_1: handle) -> ()
-      attr = {"tir.noalias": True, "global_symbol": "main"}
-      buffers = {X: Buffer(X_2: handle, float32, [m: int32, n: int32], [stride: int32, stride_1: int32], type="auto"),
-                 scan.v0: Buffer(scan.v0_2: handle, float32, [m, n], [stride_2: int32, stride_3: int32], type="auto"),
-                 scan.v1: Buffer(scan.v1_2: handle, float32, [m, l: int32], [stride_4: int32, stride_5: int32], type="auto")}
-      buffer_map = {scan.v1_1: scan.v1, scan.v0_1: scan.v0, X_1: X} {
+      attr = {"global_symbol": "main", "tir.noalias": True}
+      buffers = {scan.v1: Buffer(scan.v1_2: handle, float32, [m: int32, l: int32], [stride: int32, stride_1: int32], type="auto"),
+                 scan.v0: Buffer(scan.v0_2: handle, float32, [m, n: int32], [stride_2: int32, stride_3: int32], type="auto"),
+                 X: Buffer(X_2: handle, float32, [m, n], [stride_4: int32, stride_5: int32], type="auto")}
+      buffer_map = {X_1: X, scan.v0_1: scan.v0, scan.v1_1: scan.v1} {
       for (i: int32, 0, n) {
-        scan.v0_2[(i*stride_3)] = (float32*)X_2[(i*stride_1)])
+        scan.v0_2[(i*stride_3)] = (float32*)X_2[(i*stride_5)]
       }
       for (i_1: int32, 0, l) {
-        scan.v1_2[(i_1*stride_5)] = 0f32
+        scan.v1_2[(i_1*stride_1)] = 0f32
       }
       for (scan.idx: int32, 0, (m - 1)) {
         for (i_2: int32, 0, n) {
-          scan.v0_2[(((scan.idx + 1)*stride_2) + (i_2*stride_3))] = ((float32*)scan.v0_2[((scan.idx*stride_2) + (i_2*stride_3))]) + (float32*)X_2[(((scan.idx + 1)*stride) + (i_2*stride_1))]))
+          scan.v0_2[(((scan.idx + 1)*stride_2) + (i_2*stride_3))] = ((float32*)scan.v0_2[((scan.idx*stride_2) + (i_2*stride_3))] + (float32*)X_2[(((scan.idx + 1)*stride_4) + (i_2*stride_5))])
         }
         for (i_3: int32, 0, l) {
-          scan.v1_2[(((scan.idx + 1)*stride_4) + (i_3*stride_5))] = ((float32*)scan.v1_2[((scan.idx*stride_4) + (i_3*stride_5))]) + (float32*)scan.v0_2[(scan.idx*stride_2)]))
+          scan.v1_2[(((scan.idx + 1)*stride) + (i_3*stride_1))] = ((float32*)scan.v1_2[((scan.idx*stride) + (i_3*stride_1))] + (float32*)scan.v0_2[(scan.idx*stride_2)])
         }
       }
     }
diff --git a/docs/_sources/tutorials/language/schedule_primitives.rst.txt b/docs/_sources/tutorials/language/schedule_primitives.rst.txt
index 42072a3..52f46b2 100644
--- a/docs/_sources/tutorials/language/schedule_primitives.rst.txt
+++ b/docs/_sources/tutorials/language/schedule_primitives.rst.txt
@@ -84,14 +84,14 @@ schedule computes tensor in a serial manner in a row-major order.
  .. code-block:: none
 
     primfn(A_1: handle, B_1: handle, C_1: handle) -> ()
-      attr = {"tir.noalias": True, "global_symbol": "main"}
-      buffers = {A: Buffer(A_2: handle, float32, [m: int32, n: int32], [stride: int32, stride_1: int32], type="auto"),
+      attr = {"global_symbol": "main", "tir.noalias": True}
+      buffers = {C: Buffer(C_2: handle, float32, [m: int32, n: int32], [stride: int32, stride_1: int32], type="auto"),
                  B: Buffer(B_2: handle, float32, [m, n], [stride_2: int32, stride_3: int32], type="auto"),
-                 C: Buffer(C_2: handle, float32, [m, n], [stride_4: int32, stride_5: int32], type="auto")}
-      buffer_map = {C_1: C, B_1: B, A_1: A} {
+                 A: Buffer(A_2: handle, float32, [m, n], [stride_4: int32, stride_5: int32], type="auto")}
+      buffer_map = {A_1: A, B_1: B, C_1: C} {
       for (i: int32, 0, m) {
         for (j: int32, 0, n) {
-          C_2[((i*stride_4) + (j*stride_5))] = ((float32*)A_2[((i*stride) + (j*stride_1))])*(float32*)B_2[((i*stride_2) + (j*stride_3))]))
+          C_2[((i*stride) + (j*stride_1))] = ((float32*)A_2[((i*stride_4) + (j*stride_5))]*(float32*)B_2[((i*stride_2) + (j*stride_3))])
         }
       }
     }
@@ -130,14 +130,14 @@ split
  .. code-block:: none
 
     primfn(A_1: handle, B_1: handle) -> ()
-      attr = {"tir.noalias": True, "global_symbol": "main"}
-      buffers = {A: Buffer(A_2: handle, float32, [m: int32], [stride: int32], type="auto"),
-                 B: Buffer(B_2: handle, float32, [m], [stride_1: int32], type="auto")}
-      buffer_map = {B_1: B, A_1: A} {
+      attr = {"global_symbol": "main", "tir.noalias": True}
+      buffers = {B: Buffer(B_2: handle, float32, [m: int32], [stride: int32], type="auto"),
+                 A: Buffer(A_2: handle, float32, [m], [stride_1: int32], type="auto")}
+      buffer_map = {A_1: A, B_1: B} {
       for (i.outer: int32, 0, floordiv((m + 31), 32)) {
         for (i.inner: int32, 0, 32) {
-          if @likely((((i.outer*32) + i.inner) < m), dtype=bool, type="pure_intrin") {
-            B_2[(((i.outer*32) + i.inner)*stride_1)] = ((float32*)A_2[(((i.outer*32) + i.inner)*stride)])*2f32)
+          if @tir.likely((((i.outer*32) + i.inner) < m), dtype=bool) {
+            B_2[(((i.outer*32) + i.inner)*stride)] = ((float32*)A_2[(((i.outer*32) + i.inner)*stride_1)]*2f32)
           }
         }
       }
@@ -171,14 +171,14 @@ contrary with :code:`factor`.
  .. code-block:: none
 
     primfn(A_1: handle, B_1: handle) -> ()
-      attr = {"tir.noalias": True, "global_symbol": "main"}
-      buffers = {A: Buffer(A_2: handle, float32, [m: int32], [stride: int32], type="auto"),
-                 B: Buffer(B_2: handle, float32, [m], [stride_1: int32], type="auto")}
-      buffer_map = {B_1: B, A_1: A} {
+      attr = {"global_symbol": "main", "tir.noalias": True}
+      buffers = {B: Buffer(B_2: handle, float32, [m: int32], [stride: int32], type="auto"),
+                 A: Buffer(A_2: handle, float32, [m], [stride_1: int32], type="auto")}
+      buffer_map = {A_1: A, B_1: B} {
       for (i.outer: int32, 0, 32) {
         for (i.inner: int32, 0, floordiv((m + 31), 32)) {
-          if @likely(((i.inner + (i.outer*floordiv((m + 31), 32))) < m), dtype=bool, type="pure_intrin") {
-            B_2[((i.inner + (i.outer*floordiv((m + 31), 32)))*stride_1)] = (float32*)A_2[((i.inner + (i.outer*floordiv((m + 31), 32)))*stride)])
+          if @tir.likely(((i.inner + (i.outer*floordiv((m + 31), 32))) < m), dtype=bool) {
+            B_2[((i.inner + (i.outer*floordiv((m + 31), 32)))*stride)] = (float32*)A_2[((i.inner + (i.outer*floordiv((m + 31), 32)))*stride_1)]
           }
         }
       }
@@ -214,17 +214,17 @@ axises.
  .. code-block:: none
 
     primfn(A_1: handle, B_1: handle) -> ()
-      attr = {"tir.noalias": True, "global_symbol": "main"}
-      buffers = {A: Buffer(A_2: handle, float32, [m: int32, n: int32], [stride: int32, stride_1: int32], type="auto"),
-                 B: Buffer(B_2: handle, float32, [m, n], [stride_2: int32, stride_3: int32], type="auto")}
-      buffer_map = {B_1: B, A_1: A} {
+      attr = {"global_symbol": "main", "tir.noalias": True}
+      buffers = {B: Buffer(B_2: handle, float32, [m: int32, n: int32], [stride: int32, stride_1: int32], type="auto"),
+                 A: Buffer(A_2: handle, float32, [m, n], [stride_2: int32, stride_3: int32], type="auto")}
+      buffer_map = {A_1: A, B_1: B} {
       for (i.outer: int32, 0, floordiv((m + 9), 10)) {
         for (j.outer: int32, 0, floordiv((n + 4), 5)) {
           for (i.inner: int32, 0, 10) {
             for (j.inner: int32, 0, 5) {
-              if @likely((((i.outer*10) + i.inner) < m), dtype=bool, type="pure_intrin") {
-                if @likely((((j.outer*5) + j.inner) < n), dtype=bool, type="pure_intrin") {
-                  B_2[((((i.outer*10) + i.inner)*stride_2) + (((j.outer*5) + j.inner)*stride_3))] = (float32*)A_2[((((i.outer*10) + i.inner)*stride) + (((j.outer*5) + j.inner)*stride_1))])
+              if @tir.likely((((i.outer*10) + i.inner) < m), dtype=bool) {
+                if @tir.likely((((j.outer*5) + j.inner) < n), dtype=bool) {
+                  B_2[((((i.outer*10) + i.inner)*stride) + (((j.outer*5) + j.inner)*stride_1))] = (float32*)A_2[((((i.outer*10) + i.inner)*stride_2) + (((j.outer*5) + j.inner)*stride_3))]
                 }
               }
             }
@@ -265,16 +265,16 @@ fuse
  .. code-block:: none
 
     primfn(A_1: handle, B_1: handle) -> ()
-      attr = {"tir.noalias": True, "global_symbol": "main"}
-      buffers = {A: Buffer(A_2: handle, float32, [m: int32, n: int32], [stride: int32, stride_1: int32], type="auto"),
-                 B: Buffer(B_2: handle, float32, [m, n], [stride_2: int32, stride_3: int32], type="auto")}
-      buffer_map = {B_1: B, A_1: A} {
+      attr = {"global_symbol": "main", "tir.noalias": True}
+      buffers = {B: Buffer(B_2: handle, float32, [m: int32, n: int32], [stride: int32, stride_1: int32], type="auto"),
+                 A: Buffer(A_2: handle, float32, [m, n], [stride_2: int32, stride_3: int32], type="auto")}
+      buffer_map = {A_1: A, B_1: B} {
       for (i.outer: int32, 0, floordiv((m + 9), 10)) {
         for (j.outer: int32, 0, floordiv((n + 4), 5)) {
           for (i.inner.j.inner.fused: int32, 0, 50) {
-            if @likely((((i.outer*10) + floordiv(i.inner.j.inner.fused, 5)) < m), dtype=bool, type="pure_intrin") {
-              if @likely((((j.outer*5) + floormod(i.inner.j.inner.fused, 5)) < n), dtype=bool, type="pure_intrin") {
-                B_2[((((i.outer*10) + floordiv(i.inner.j.inner.fused, 5))*stride_2) + (((j.outer*5) + floormod(i.inner.j.inner.fused, 5))*stride_3))] = (float32*)A_2[((((i.outer*10) + floordiv(i.inner.j.inner.fused, 5))*stride) + (((j.outer*5) + floormod(i.inner.j.inner.fused, 5))*stride_1))])
+            if @tir.likely((((i.outer*10) + floordiv(i.inner.j.inner.fused, 5)) < m), dtype=bool) {
+              if @tir.likely((((j.outer*5) + floormod(i.inner.j.inner.fused, 5)) < n), dtype=bool) {
+                B_2[((((i.outer*10) + floordiv(i.inner.j.inner.fused, 5))*stride) + (((j.outer*5) + floormod(i.inner.j.inner.fused, 5))*stride_1))] = (float32*)A_2[((((i.outer*10) + floordiv(i.inner.j.inner.fused, 5))*stride_2) + (((j.outer*5) + floormod(i.inner.j.inner.fused, 5))*stride_3))]
               }
             }
           }
@@ -314,17 +314,17 @@ reorder
  .. code-block:: none
 
     primfn(A_1: handle, B_1: handle) -> ()
-      attr = {"tir.noalias": True, "global_symbol": "main"}
-      buffers = {A: Buffer(A_2: handle, float32, [m: int32, n: int32], [stride: int32, stride_1: int32], type="auto"),
-                 B: Buffer(B_2: handle, float32, [m, n], [stride_2: int32, stride_3: int32], type="auto")}
-      buffer_map = {B_1: B, A_1: A} {
+      attr = {"global_symbol": "main", "tir.noalias": True}
+      buffers = {B: Buffer(B_2: handle, float32, [m: int32, n: int32], [stride: int32, stride_1: int32], type="auto"),
+                 A: Buffer(A_2: handle, float32, [m, n], [stride_2: int32, stride_3: int32], type="auto")}
+      buffer_map = {A_1: A, B_1: B} {
       for (i.inner: int32, 0, 10) {
         for (j.outer: int32, 0, floordiv((n + 4), 5)) {
           for (i.outer: int32, 0, floordiv((m + 9), 10)) {
             for (j.inner: int32, 0, 5) {
-              if @likely((((i.outer*10) + i.inner) < m), dtype=bool, type="pure_intrin") {
-                if @likely((((j.outer*5) + j.inner) < n), dtype=bool, type="pure_intrin") {
-                  B_2[((((i.outer*10) + i.inner)*stride_2) + (((j.outer*5) + j.inner)*stride_3))] = (float32*)A_2[((((i.outer*10) + i.inner)*stride) + (((j.outer*5) + j.inner)*stride_1))])
+              if @tir.likely((((i.outer*10) + i.inner) < m), dtype=bool) {
+                if @tir.likely((((j.outer*5) + j.inner) < n), dtype=bool) {
+                  B_2[((((i.outer*10) + i.inner)*stride) + (((j.outer*5) + j.inner)*stride_1))] = (float32*)A_2[((((i.outer*10) + i.inner)*stride_2) + (((j.outer*5) + j.inner)*stride_3))]
                 }
               }
             }
@@ -365,14 +365,14 @@ in gpu programming.
  .. code-block:: none
 
     primfn(A_1: handle, B_1: handle) -> ()
-      attr = {"tir.noalias": True, "global_symbol": "main"}
-      buffers = {A: Buffer(A_2: handle, float32, [n: int32], [stride: int32], type="auto"),
-                 B: Buffer(B_2: handle, float32, [n], [stride_1: int32], type="auto")}
-      buffer_map = {B_1: B, A_1: A} {
+      attr = {"global_symbol": "main", "tir.noalias": True}
+      buffers = {B: Buffer(B_2: handle, float32, [n: int32], [stride: int32], type="auto"),
+                 A: Buffer(A_2: handle, float32, [n], [stride_1: int32], type="auto")}
+      buffer_map = {A_1: A, B_1: B} {
       attr [IterVar(blockIdx.x: int32, (nullptr), "ThreadIndex", "blockIdx.x")] "thread_extent" = floordiv((n + 63), 64);
       attr [IterVar(threadIdx.x: int32, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
-      if @likely((((blockIdx.x*64) + threadIdx.x) < n), dtype=bool, type="pure_intrin") {
-        B_2[(((blockIdx.x*64) + threadIdx.x)*stride_1)] = ((float32*)A_2[(((blockIdx.x*64) + threadIdx.x)*stride)])*2f32)
+      if @tir.likely((((blockIdx.x*64) + threadIdx.x) < n), dtype=bool) {
+        B_2[(((blockIdx.x*64) + threadIdx.x)*stride)] = ((float32*)A_2[(((blockIdx.x*64) + threadIdx.x)*stride_1)]*2f32)
       }
     }
 
@@ -406,16 +406,16 @@ tensors at the root separately by default.
  .. code-block:: none
 
     primfn(A_1: handle, B_1: handle, C_1: handle) -> ()
-      attr = {"tir.noalias": True, "global_symbol": "main"}
-      buffers = {A: Buffer(A_2: handle, float32, [m: int32], [stride: int32], type="auto"),
-                 C: Buffer(C_2: handle, float32, [m], [stride_1: int32], type="auto"),
-                 B: Buffer(B_2: handle, float32, [m], [stride_2: int32], type="auto")}
-      buffer_map = {B_1: B, C_1: C, A_1: A} {
+      attr = {"global_symbol": "main", "tir.noalias": True}
+      buffers = {C: Buffer(C_2: handle, float32, [m: int32], [stride: int32], type="auto"),
+                 B: Buffer(B_2: handle, float32, [m], [stride_1: int32], type="auto"),
+                 A: Buffer(A_2: handle, float32, [m], [stride_2: int32], type="auto")}
+      buffer_map = {A_1: A, B_1: B, C_1: C} {
       for (i: int32, 0, m) {
-        B_2[(i*stride_2)] = ((float32*)A_2[(i*stride)]) + 1f32)
+        B_2[(i*stride_1)] = ((float32*)A_2[(i*stride_2)] + 1f32)
       }
       for (i_1: int32, 0, m) {
-        C_2[(i_1*stride_1)] = ((float32*)B_2[(i_1*stride_2)])*2f32)
+        C_2[(i_1*stride)] = ((float32*)B_2[(i_1*stride_1)]*2f32)
       }
     }
 
@@ -448,14 +448,14 @@ of computation of `C`.
  .. code-block:: none
 
     primfn(A_1: handle, B_1: handle, C_1: handle) -> ()
-      attr = {"tir.noalias": True, "global_symbol": "main"}
-      buffers = {A: Buffer(A_2: handle, float32, [m: int32], [stride: int32], type="auto"),
+      attr = {"global_symbol": "main", "tir.noalias": True}
+      buffers = {C: Buffer(C_2: handle, float32, [m: int32], [stride: int32], type="auto"),
                  B: Buffer(B_2: handle, float32, [m], [stride_1: int32], type="auto"),
-                 C: Buffer(C_2: handle, float32, [m], [stride_2: int32], type="auto")}
-      buffer_map = {C_1: C, B_1: B, A_1: A} {
+                 A: Buffer(A_2: handle, float32, [m], [stride_2: int32], type="auto")}
+      buffer_map = {A_1: A, B_1: B, C_1: C} {
       for (i: int32, 0, m) {
-        B_2[(i*stride_1)] = ((float32*)A_2[(i*stride)]) + 1f32)
-        C_2[(i*stride_2)] = ((float32*)B_2[(i*stride_1)])*2f32)
+        B_2[(i*stride_1)] = ((float32*)A_2[(i*stride_2)] + 1f32)
+        C_2[(i*stride)] = ((float32*)B_2[(i*stride_1)]*2f32)
       }
     }
 
@@ -491,13 +491,13 @@ tensor is required.
  .. code-block:: none
 
     primfn(A_1: handle, B_1: handle, C_1: handle) -> ()
-      attr = {"tir.noalias": True, "global_symbol": "main"}
+      attr = {"global_symbol": "main", "tir.noalias": True}
       buffers = {B: Buffer(B_2: handle, float32, [m: int32], [stride: int32], type="auto"),
-                 A: Buffer(A_2: handle, float32, [m], [stride_1: int32], type="auto"),
-                 C: Buffer(C_2: handle, float32, [m], [stride_2: int32], type="auto")}
-      buffer_map = {C_1: C, B_1: B, A_1: A} {
+                 C: Buffer(C_2: handle, float32, [m], [stride_1: int32], type="auto"),
+                 A: Buffer(A_2: handle, float32, [m], [stride_2: int32], type="auto")}
+      buffer_map = {A_1: A, B_1: B, C_1: C} {
       for (i: int32, 0, m) {
-        C_2[(i*stride_2)] = (((float32*)A_2[(i*stride_1)]) + 1f32)*2f32)
+        C_2[(i*stride_1)] = (((float32*)A_2[(i*stride_2)] + 1f32)*2f32)
       }
     }
 
@@ -532,16 +532,16 @@ compute_root
  .. code-block:: none
 
     primfn(A_1: handle, B_1: handle, C_1: handle) -> ()
-      attr = {"tir.noalias": True, "global_symbol": "main"}
-      buffers = {A: Buffer(A_2: handle, float32, [m: int32], [stride: int32], type="auto"),
+      attr = {"global_symbol": "main", "tir.noalias": True}
+      buffers = {C: Buffer(C_2: handle, float32, [m: int32], [stride: int32], type="auto"),
                  B: Buffer(B_2: handle, float32, [m], [stride_1: int32], type="auto"),
-                 C: Buffer(C_2: handle, float32, [m], [stride_2: int32], type="auto")}
-      buffer_map = {C_1: C, B_1: B, A_1: A} {
+                 A: Buffer(A_2: handle, float32, [m], [stride_2: int32], type="auto")}
+      buffer_map = {A_1: A, B_1: B, C_1: C} {
       for (i: int32, 0, m) {
-        B_2[(i*stride_1)] = ((float32*)A_2[(i*stride)]) + 1f32)
+        B_2[(i*stride_1)] = ((float32*)A_2[(i*stride_2)] + 1f32)
       }
       for (i_1: int32, 0, m) {
-        C_2[(i_1*stride_2)] = ((float32*)B_2[(i_1*stride_1)])*2f32)
+        C_2[(i_1*stride)] = ((float32*)B_2[(i_1*stride_1)]*2f32)
       }
     }
 
diff --git a/docs/_sources/tutorials/language/sg_execution_times.rst.txt b/docs/_sources/tutorials/language/sg_execution_times.rst.txt
index 728d3e8..faff215 100644
--- a/docs/_sources/tutorials/language/sg_execution_times.rst.txt
+++ b/docs/_sources/tutorials/language/sg_execution_times.rst.txt
@@ -5,13 +5,13 @@
 
 Computation times
 =================
-**00:03.887** total execution time for **tutorials_language** files:
+**00:03.276** total execution time for **tutorials_language** files:
 
-- **00:01.442**: :ref:`sphx_glr_tutorials_language_intrin_math.py` (``intrin_math.py``)
-- **00:00.697**: :ref:`sphx_glr_tutorials_language_tensorize.py` (``tensorize.py``)
-- **00:00.505**: :ref:`sphx_glr_tutorials_language_reduction.py` (``reduction.py``)
-- **00:00.501**: :ref:`sphx_glr_tutorials_language_scan.py` (``scan.py``)
-- **00:00.234**: :ref:`sphx_glr_tutorials_language_extern_op.py` (``extern_op.py``)
-- **00:00.190**: :ref:`sphx_glr_tutorials_language_tedd.py` (``tedd.py``)
-- **00:00.169**: :ref:`sphx_glr_tutorials_language_schedule_primitives.py` (``schedule_primitives.py``)
-- **00:00.149**: :ref:`sphx_glr_tutorials_language_tuple_inputs.py` (``tuple_inputs.py``)
+- **00:01.150**: :ref:`sphx_glr_tutorials_language_intrin_math.py` (``intrin_math.py``)
+- **00:00.555**: :ref:`sphx_glr_tutorials_language_tensorize.py` (``tensorize.py``)
+- **00:00.450**: :ref:`sphx_glr_tutorials_language_reduction.py` (``reduction.py``)
+- **00:00.413**: :ref:`sphx_glr_tutorials_language_scan.py` (``scan.py``)
+- **00:00.226**: :ref:`sphx_glr_tutorials_language_extern_op.py` (``extern_op.py``)
+- **00:00.165**: :ref:`sphx_glr_tutorials_language_schedule_primitives.py` (``schedule_primitives.py``)
+- **00:00.164**: :ref:`sphx_glr_tutorials_language_tedd.py` (``tedd.py``)
+- **00:00.154**: :ref:`sphx_glr_tutorials_language_tuple_inputs.py` (``tuple_inputs.py``)
diff --git a/docs/_sources/tutorials/language/tensorize.rst.txt b/docs/_sources/tutorials/language/tensorize.rst.txt
index 0abc7ac..ed11121 100644
--- a/docs/_sources/tutorials/language/tensorize.rst.txt
+++ b/docs/_sources/tutorials/language/tensorize.rst.txt
@@ -70,16 +70,16 @@ The following lines describe the computation :code:`A * B^T` in TVM.
  .. code-block:: none
 
     primfn(A_1: handle, B_1: handle, C_1: handle) -> ()
-      attr = {"tir.noalias": True, "global_symbol": "main"}
-      buffers = {B: Buffer(B_2: handle, float32, [512, 64], []),
-                 A: Buffer(A_2: handle, float32, [1024, 64], []),
-                 C: Buffer(C_2: handle, float32, [1024, 512], [])}
-      buffer_map = {C_1: C, B_1: B, A_1: A} {
+      attr = {"global_symbol": "main", "tir.noalias": True}
+      buffers = {C: Buffer(C_2: handle, float32, [1024, 512], []),
+                 B: Buffer(B_2: handle, float32, [512, 64], []),
+                 A: Buffer(A_2: handle, float32, [1024, 64], [])}
+      buffer_map = {A_1: A, B_1: B, C_1: C} {
       for (i: int32, 0, 1024) {
         for (j: int32, 0, 512) {
           C_2[((i*512) + j)] = 0f32
           for (k: int32, 0, 64) {
-            C_2[((i*512) + j)] = ((float32*)C_2[((i*512) + j)]) + ((float32*)A_2[((i*64) + k)])*(float32*)B_2[((j*64) + k)])))
+            C_2[((i*512) + j)] = ((float32*)C_2[((i*512) + j)] + ((float32*)A_2[((i*64) + k)]*(float32*)B_2[((j*64) + k)]))
           }
         }
       }
@@ -119,17 +119,17 @@ Thus we break down the matmul loops to make the innermost loops a (16x64) GEMV.
  .. code-block:: none
 
     primfn(A_1: handle, B_1: handle, C_1: handle) -> ()
-      attr = {"tir.noalias": True, "global_symbol": "main"}
-      buffers = {A: Buffer(A_2: handle, float32, [1024, 64], []),
+      attr = {"global_symbol": "main", "tir.noalias": True}
+      buffers = {C: Buffer(C_2: handle, float32, [1024, 512], []),
                  B: Buffer(B_2: handle, float32, [512, 64], []),
-                 C: Buffer(C_2: handle, float32, [1024, 512], [])}
-      buffer_map = {C_1: C, B_1: B, A_1: A} {
+                 A: Buffer(A_2: handle, float32, [1024, 64], [])}
+      buffer_map = {A_1: A, B_1: B, C_1: C} {
       for (i: int32, 0, 1024) {
         for (j.outer: int32, 0, 32) {
           for (j.inner: int32, 0, 16) {
             C_2[(((i*512) + (j.outer*16)) + j.inner)] = 0f32
             for (k: int32, 0, 64) {
-              C_2[(((i*512) + (j.outer*16)) + j.inner)] = ((float32*)C_2[(((i*512) + (j.outer*16)) + j.inner)]) + ((float32*)A_2[((i*64) + k)])*(float32*)B_2[(((j.outer*1024) + (j.inner*64)) + k)])))
+              C_2[(((i*512) + (j.outer*16)) + j.inner)] = ((float32*)C_2[(((i*512) + (j.outer*16)) + j.inner)] + ((float32*)A_2[((i*64) + k)]*(float32*)B_2[(((j.outer*1024) + (j.inner*64)) + k)]))
             }
           }
         }
@@ -235,14 +235,14 @@ such placeholder can be put to let TVM automatically bind the inferred value for
  .. code-block:: none
 
     primfn(A_1: handle, B_1: handle, C_1: handle) -> ()
-      attr = {"tir.noalias": True, "global_symbol": "main"}
-      buffers = {A: Buffer(A_2: handle, float32, [1024, 64], []),
+      attr = {"global_symbol": "main", "tir.noalias": True}
+      buffers = {C: Buffer(C_2: handle, float32, [1024, 512], []),
                  B: Buffer(B_2: handle, float32, [512, 64], []),
-                 C: Buffer(C_2: handle, float32, [1024, 512], [])}
-      buffer_map = {C_1: C, B_1: B, A_1: A} {
+                 A: Buffer(A_2: handle, float32, [1024, 64], [])}
+      buffer_map = {A_1: A, B_1: B, C_1: C} {
       for (i: int32, 0, 1024) {
         for (j.outer: int32, 0, 32) {
-          @gemv_update(@tvm_access_ptr(@type_annotation(, dtype=float32, type="pure_intrin"), C_2, ((i*512) + (j.outer*16)), 16, 2, dtype=handle, type="intrin"), @tvm_access_ptr(@type_annotation(, dtype=float32, type="pure_intrin"), A_2, (i*64), 64, 1, dtype=handle, type="intrin"), @tvm_access_ptr(@type_annotation(, dtype=float32, type="pure_intrin"), B_2, (j.outer*1024), 1024, 1, dtype=handle, type="intrin"), 16, 64, 64, dtype=int32, type="extern")
+          @tir.call_extern("gemv_update", @tir.tvm_access_ptr(@tir.type_annotation(, dtype=float32), C_2, ((i*512) + (j.outer*16)), 16, 2, dtype=handle), @tir.tvm_access_ptr(@tir.type_annotation(, dtype=float32), A_2, (i*64), 64, 1, dtype=handle), @tir.tvm_access_ptr(@tir.type_annotation(, dtype=float32), B_2, (j.outer*1024), 1024, 1, dtype=handle), 16, 64, 64, dtype=int32)
         }
       }
     }
@@ -305,13 +305,13 @@ The importing needs to happen before the tensorized GEMV being executed.
  .. code-block:: none
 
     primfn(A_1: handle, B_1: handle, C_1: handle) -> ()
-      attr = {"tir.noalias": True, "global_symbol": "main"}
-      buffers = {A: Buffer(A_2: handle, float32, [1024, 64], []),
+      attr = {"global_symbol": "main", "tir.noalias": True}
+      buffers = {C: Buffer(C_2: handle, float32, [1024, 512], []),
                  B: Buffer(B_2: handle, float32, [512, 64], []),
-                 C: Buffer(C_2: handle, float32, [1024, 512], [])}
-      buffer_map = {C_1: C, B_1: B, A_1: A} {
-      attr [IterVar(i: int32, (nullptr), "DataPar", "")] "pragma_import_llvm" = "; ModuleID = '/tmp/tmpcw7zd4dq/input0.cc'
-    source_filename = "/tmp/tmpcw7zd4dq/input0.cc"
+                 A: Buffer(A_2: handle, float32, [1024, 64], [])}
+      buffer_map = {A_1: A, B_1: B, C_1: C} {
+      attr [IterVar(i: int32, (nullptr), "DataPar", "")] "pragma_import_llvm" = "; ModuleID = '/tmp/tmpmel936f0/input0.cc'
+    source_filename = "/tmp/tmpmel936f0/input0.cc"
     target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
     target triple = "x86_64-pc-linux-gnu"
 
@@ -404,7 +404,7 @@ The importing needs to happen before the tensorized GEMV being executed.
     ";
       for (i, 0, 1024) {
         for (j.outer: int32, 0, 32) {
-          @gemv_update(@tvm_access_ptr(@type_annotation(, dtype=float32, type="pure_intrin"), C_2, ((i*512) + (j.outer*16)), 16, 2, dtype=handle, type="intrin"), @tvm_access_ptr(@type_annotation(, dtype=float32, type="pure_intrin"), A_2, (i*64), 64, 1, dtype=handle, type="intrin"), @tvm_access_ptr(@type_annotation(, dtype=float32, type="pure_intrin"), B_2, (j.outer*1024), 1024, 1, dtype=handle, type="intrin"), 16, 64, 64, dtype=int32, type="extern")
+          @tir.call_extern("gemv_update", @tir.tvm_access_ptr(@tir.type_annotation(, dtype=float32), C_2, ((i*512) + (j.outer*16)), 16, 2, dtype=handle), @tir.tvm_access_ptr(@tir.type_annotation(, dtype=float32), A_2, (i*64), 64, 1, dtype=handle), @tir.tvm_access_ptr(@tir.type_annotation(, dtype=float32), B_2, (j.outer*1024), 1024, 1, dtype=handle), 16, 64, 64, dtype=int32)
         }
       }
     }
diff --git a/docs/_sources/tutorials/language/tuple_inputs.rst.txt b/docs/_sources/tutorials/language/tuple_inputs.rst.txt
index 1911840..0ed5d9c 100644
--- a/docs/_sources/tutorials/language/tuple_inputs.rst.txt
+++ b/docs/_sources/tutorials/language/tuple_inputs.rst.txt
@@ -63,16 +63,16 @@ together in the next schedule procedure.
  .. code-block:: none
 
     primfn(A0_1: handle, A1_1: handle, B.v0_1: handle, B.v1_1: handle) -> ()
-      attr = {"tir.noalias": True, "global_symbol": "main"}
-      buffers = {A1: Buffer(A1_2: handle, float32, [m: int32, n: int32], [stride: int32, stride_1: int32], type="auto"),
-                 B.v0: Buffer(B.v0_2: handle, float32, [m, n], [stride_2: int32, stride_3: int32], type="auto"),
-                 A0: Buffer(A0_2: handle, float32, [m, n], [stride_4: int32, stride_5: int32], type="auto"),
-                 B.v1: Buffer(B.v1_2: handle, float32, [m, n], [stride_6: int32, stride_7: int32], type="auto")}
-      buffer_map = {B.v1_1: B.v1, B.v0_1: B.v0, A1_1: A1, A0_1: A0} {
+      attr = {"global_symbol": "main", "tir.noalias": True}
+      buffers = {B.v0: Buffer(B.v0_2: handle, float32, [m: int32, n: int32], [stride: int32, stride_1: int32], type="auto"),
+                 A1: Buffer(A1_2: handle, float32, [m, n], [stride_2: int32, stride_3: int32], type="auto"),
+                 B.v1: Buffer(B.v1_2: handle, float32, [m, n], [stride_4: int32, stride_5: int32], type="auto"),
+                 A0: Buffer(A0_2: handle, float32, [m, n], [stride_6: int32, stride_7: int32], type="auto")}
+      buffer_map = {A0_1: A0, A1_1: A1, B.v0_1: B.v0, B.v1_1: B.v1} {
       for (i: int32, 0, m) {
         for (j: int32, 0, n) {
-          B.v0_2[((i*stride_2) + (j*stride_3))] = ((float32*)A0_2[((i*stride_4) + (j*stride_5))]) + 2f32)
-          B.v1_2[((i*stride_6) + (j*stride_7))] = ((float32*)A1_2[((i*stride) + (j*stride_1))])*3f32)
+          B.v0_2[((i*stride) + (j*stride_1))] = ((float32*)A0_2[((i*stride_6) + (j*stride_7))] + 2f32)
+          B.v1_2[((i*stride_4) + (j*stride_5))] = ((float32*)A1_2[((i*stride_2) + (j*stride_3))]*3f32)
         }
       }
     }
@@ -132,18 +132,18 @@ with :py:func:`te.comm_reducer` as below:
  .. code-block:: none
 
     primfn(idx_1: handle, val_1: handle, T.v0_1: handle, T.v1_1: handle) -> ()
-      attr = {"tir.noalias": True, "global_symbol": "main"}
-      buffers = {val: Buffer(val_2: handle, int32, [m: int32, n: int32], [stride: int32, stride_1: int32], type="auto"),
-                 T.v0: Buffer(T.v0_2: handle, int32, [m], [stride_2: int32], type="auto"),
-                 idx: Buffer(idx_2: handle, int32, [m, n], [stride_3: int32, stride_4: int32], type="auto"),
-                 T.v1: Buffer(T.v1_2: handle, int32, [m], [stride_5: int32], type="auto")}
-      buffer_map = {T.v1_1: T.v1, T.v0_1: T.v0, val_1: val, idx_1: idx} {
+      attr = {"global_symbol": "main", "tir.noalias": True}
+      buffers = {T.v1: Buffer(T.v1_2: handle, int32, [m: int32], [stride: int32], type="auto"),
+                 T.v0: Buffer(T.v0_2: handle, int32, [m], [stride_1: int32], type="auto"),
+                 val: Buffer(val_2: handle, int32, [m, n: int32], [stride_2: int32, stride_3: int32], type="auto"),
+                 idx: Buffer(idx_2: handle, int32, [m, n], [stride_4: int32, stride_5: int32], type="auto")}
+      buffer_map = {idx_1: idx, val_1: val, T.v0_1: T.v0, T.v1_1: T.v1} {
       for (i: int32, 0, m) {
-        T.v0_2[(i*stride_2)] = -1
-        T.v1_2[(i*stride_5)] = -2147483648
+        T.v0_2[(i*stride_1)] = -1
+        T.v1_2[(i*stride)] = -2147483648
         for (k: int32, 0, n) {
-          T.v0_2[(i*stride_2)] = @tvm_if_then_else(((int32*)val_2[((i*stride) + (k*stride_1))]) <= (int32*)T.v1_2[(i*stride_5)])), (int32*)T.v0_2[(i*stride_2)]), (int32*)idx_2[((i*stride_3) + (k*stride_4))]), dtype=int32, type="intrin")
-          T.v1_2[(i*stride_5)] = @tvm_if_then_else(((int32*)val_2[((i*stride) + (k*stride_1))]) <= (int32*)T.v1_2[(i*stride_5)])), (int32*)T.v1_2[(i*stride_5)]), (int32*)val_2[((i*stride) + (k*stride_1))]), dtype=int32, type="intrin")
+          T.v0_2[(i*stride_1)] = @tir.if_then_else(((int32*)val_2[((i*stride_2) + (k*stride_3))] <= (int32*)T.v1_2[(i*stride)]), (int32*)T.v0_2[(i*stride_1)], (int32*)idx_2[((i*stride_4) + (k*stride_5))], dtype=int32)
+          T.v1_2[(i*stride)] = @tir.if_then_else(((int32*)val_2[((i*stride_2) + (k*stride_3))] <= (int32*)T.v1_2[(i*stride)]), (int32*)T.v1_2[(i*stride)], (int32*)val_2[((i*stride_2) + (k*stride_3))], dtype=int32)
         }
       }
     }
@@ -190,22 +190,22 @@ in terms of operation.
  .. code-block:: none
 
     primfn(A0_1: handle, A1_1: handle, C_1: handle) -> ()
-      attr = {"tir.noalias": True, "global_symbol": "main"}
-      buffers = {A0: Buffer(A0_2: handle, float32, [m: int32, n: int32], [stride: int32, stride_1: int32], type="auto"),
+      attr = {"global_symbol": "main", "tir.noalias": True}
+      buffers = {C: Buffer(C_2: handle, float32, [m: int32, n: int32], [stride: int32, stride_1: int32], type="auto"),
                  A1: Buffer(A1_2: handle, float32, [m, n], [stride_2: int32, stride_3: int32], type="auto"),
-                 C: Buffer(C_2: handle, float32, [m, n], [stride_4: int32, stride_5: int32], type="auto")}
-      buffer_map = {C_1: C, A1_1: A1, A0_1: A0} {
+                 A0: Buffer(A0_2: handle, float32, [m, n], [stride_4: int32, stride_5: int32], type="auto")}
+      buffer_map = {A0_1: A0, A1_1: A1, C_1: C} {
       attr [B.v0: handle] "storage_scope" = "global";
       allocate(B.v0, float32, [n]);
       attr [B.v1: handle] "storage_scope" = "global";
       allocate(B.v1, float32, [n]);
       for (i: int32, 0, m) {
         for (j: int32, 0, n) {
-          B.v0[j] = ((float32*)A0_2[((i*stride) + (j*stride_1))]) + 2f32)
-          B.v1[j] = ((float32*)A0_2[((i*stride) + (j*stride_1))])*3f32)
+          B.v0[j] = ((float32*)A0_2[((i*stride_4) + (j*stride_5))] + 2f32)
+          B.v1[j] = ((float32*)A0_2[((i*stride_4) + (j*stride_5))]*3f32)
         }
         for (j_1: int32, 0, n) {
-          C_2[((i*stride_4) + (j_1*stride_5))] = ((float32*)A1_2[((i*stride_2) + (j_1*stride_3))]) + (float32*)B.v0[j_1]))
+          C_2[((i*stride) + (j_1*stride_1))] = ((float32*)A1_2[((i*stride_2) + (j_1*stride_3))] + (float32*)B.v0[j_1])
         }
       }
     }
diff --git a/docs/_sources/tutorials/micro/micro_tflite.rst.txt b/docs/_sources/tutorials/micro/micro_tflite.rst.txt
new file mode 100644
index 0000000..668be1b
--- /dev/null
+++ b/docs/_sources/tutorials/micro/micro_tflite.rst.txt
@@ -0,0 +1,283 @@
+.. note::
+    :class: sphx-glr-download-link-note
+
+    Click :ref:`here <sphx_glr_download_tutorials_micro_micro_tflite.py>` to download the full example code
+.. rst-class:: sphx-glr-example-title
+
+.. _sphx_glr_tutorials_micro_micro_tflite.py:
+
+
+Micro TVM with TFLite Models
+============================
+**Author**: `Tom Gall <https://github.com/tom-gall>`_
+
+This tutorial is an introduction to working with MicroTVM and a TFLite 
+model with Relay.
+
+
+.. code-block:: default
+
+
+    # %%
+    # Setup
+    # -----
+    #
+    # To get started, TFLite package needs to be installed as prerequisite.
+    #
+    # install tflite
+    #
+    # .. code-block:: bash
+    #
+    #   pip install tflite=2.1.0 --user
+    #
+    # or you could generate TFLite package yourself. The steps are the following:
+    #
+    #   Get the flatc compiler.
+    #   Please refer to https://github.com/google/flatbuffers for details
+    #   and make sure it is properly installed.
+    #
+    # .. code-block:: bash
+    #
+    #   flatc --version
+    #
+    # Get the TFLite schema.
+    #
+    # .. code-block:: bash
+    #
+    #   wget https://raw.githubusercontent.com/tensorflow/tensorflow/r1.13/tensorflow/lite/schema/schema.fbs
+    #
+    # Generate TFLite package.
+    #
+    # .. code-block:: bash
+    #
+    #   flatc --python schema.fbs
+    #
+    # Add the current folder (which contains generated tflite module) to PYTHONPATH.
+    #
+    # .. code-block:: bash
+    #
+    #   export PYTHONPATH=${PYTHONPATH:+$PYTHONPATH:}$(pwd)
+    #
+    # To validate that the TFLite package was installed successfully, ``python -c "import tflite"``
+    #
+    # CMSIS needs to be downloaded and the CMSIS_ST_PATH environment variable setup
+    # This tutorial only supports the STM32F7xx series of boards.
+    # Download from : https://www.st.com/en/embedded-software/stm32cubef7.html
+    # After you've expanded the zip file
+    #
+    # .. code-block:: bash
+    #
+    #   export CMSIS_ST_PATH=/path/to/STM32Cube_FW_F7_V1.16.0/Drivers/CMSIS
+
+    # %%
+    # Recreating your own Pre-Trained TFLite model
+    # --------------------------------------------
+    #
+    # The tutorial downloads a pretrained TFLite model. When working with microcontrollers
+    # you need to be mindful these are highly resource constrained devices as such standard
+    # models like MobileNet may not fit into their modest memory.
+    #
+    # For this tutorial, we'll make use of one of the TF Micro example models.
+    #
+    # If you wish to replicate the training steps see:
+    # https://github.com/tensorflow/tensorflow/tree/master/tensorflow/lite/micro/examples/hello_world/train
+    #
+    #   .. note::
+    #
+    #     If you accidentally download the example pretrained model from:
+    #     wget https://storage.googleapis.com/download.tensorflow.org/models/tflite/micro/hello_world_2020_04_13.zip
+    #     this will fail due to an unimplemented opcode (114)
+
+    import os
+    import numpy as np
+    import tvm
+    import tvm.micro as micro
+    from tvm.contrib.download import download_testdata
+    from tvm.contrib import graph_runtime, util
+    from tvm import relay
+
+    # %%
+    # Load and prepare the Pre-Trained Model
+    # --------------------------------------
+    #
+    # Load the pretrained TFLite model from a file in your current
+    # directory into a buffer
+
+    model_url = 'https://people.linaro.org/~tom.gall/sine_model.tflite'
+    model_file = 'sine_model.tflite'
+    model_path = download_testdata(model_url, model_file, module='data')
+
+    tflite_model_buf = open(model_path, "rb").read()
+
+
+
+
+
+.. rst-class:: sphx-glr-script-out
+
+ Out:
+
+ .. code-block:: none
+
+    File /workspace/.tvm_test_data/data/sine_model.tflite exists, skip.
+
+
+
+Using the buffer, transform into a tflite model python object
+
+
+.. code-block:: default
+
+    try:
+        import tflite
+        tflite_model = tflite.Model.GetRootAsModel(tflite_model_buf, 0)
+    except AttributeError:
+        import tflite.Model
+        tflite_model = tflite.Model.Model.GetRootAsModel(tflite_model_buf, 0)
+
+
+
+
+
+
+
+Print out the version of the model
+
+
+.. code-block:: default
+
+    version = tflite_model.Version()
+    print ("Model Version: " + str(version))
+
+
+
+
+
+.. rst-class:: sphx-glr-script-out
+
+ Out:
+
+ .. code-block:: none
+
+    Model Version: 3
+
+
+
+Parse the python model object to convert it into a relay module
+and weights.
+It is important to note that the input tensor name must match what
+is contained in the model.
+
+If you are unsure what that might be, this can be discovered by using
+the visualize.py script within the Tensorflow project.
+See : How do I inspect a .tflite file? `<https://www.tensorflow.org/lite/guide/faq>`_
+
+
+.. code-block:: default
+
+ 
+    input_tensor = "dense_4_input"
+    input_shape = (1,)
+    input_dtype = "float32"
+
+    mod, params = relay.frontend.from_tflite(tflite_model,
+                                             shape_dict={input_tensor: input_shape},
+                                             dtype_dict={input_tensor: input_dtype})
+
+    # %%
+    # Running on device
+    # ----------------------------------------------
+    #
+    # Setup the device config which is what will be used to communicate
+    # with the microcontroller (a STM32F746 Discovery board)
+    TARGET = 'c -device=micro_dev'
+    dev_config = micro.device.arm.stm32f746xx.generate_config("127.0.0.1", 6666)
+
+
+
+
+
+
+
+Next with the dev_config, we establish a micro session and create
+a context
+
+.. code-block:: python
+
+  with micro.Session(dev_config) as sess:
+      ctx = tvm.micro_dev(0)
+
+Now we create a build config for relay. turning off two options
+and then calling relay.build which will result in a C source
+file.
+
+.. code-block:: python
+
+  with tvm.transform.PassContext(opt_level=3, config={'tir.disable_vectorize': True},disabled_pass=['FuseOps']):
+      graph, c_mod, params = relay.build(mod, target=TARGET, params=params)
+
+With the c_mod that is the handle to our C source code, we create a
+micro module, followed by a compiled object which behind the scenes
+is linked to the microTVM runtime for running on the target board
+
+.. code-block:: python
+
+  micro_mod = micro.create_micro_mod(c_mod, dev_config)
+  mod = graph_runtime.create(graph, micro_mod, ctx)
+
+Pass the weights to get ready to perform inference
+
+.. code-block:: python
+
+  mod.set_input(**params)
+
+The model consumes a single float32 value and returns a predicted
+sine value.
+To pass the input value we construct a tvm.nd.array object
+with a single contrived number as input. For this model values of
+0 to 2Pi are acceptable.
+
+.. code-block:: python
+
+  mod.set_input(input_tensor, tvm.nd.array(np.array([0.5], dtype="float32")))
+
+Run the model on device
+
+.. code-block:: python
+
+  mod.run()
+
+Get output from the run and print
+
+.. code-block:: python
+
+  tvm_output = mod.get_output(0).asnumpy()
+  print("result is: "+str(tvm_output))
+
+
+.. _sphx_glr_download_tutorials_micro_micro_tflite.py:
+
+
+.. only :: html
+
+ .. container:: sphx-glr-footer
+    :class: sphx-glr-footer-example
+
+
+
+  .. container:: sphx-glr-download
+
+     :download:`Download Python source code: micro_tflite.py <micro_tflite.py>`
+
+
+
+  .. container:: sphx-glr-download
+
+     :download:`Download Jupyter notebook: micro_tflite.ipynb <micro_tflite.ipynb>`
+
+
+.. only:: html
+
+ .. rst-class:: sphx-glr-signature
+
+    `Gallery generated by Sphinx-Gallery <https://sphinx-gallery.github.io>`_
diff --git a/docs/_sources/tutorials/micro/sg_execution_times.rst.txt b/docs/_sources/tutorials/micro/sg_execution_times.rst.txt
new file mode 100644
index 0000000..b7fde98
--- /dev/null
+++ b/docs/_sources/tutorials/micro/sg_execution_times.rst.txt
@@ -0,0 +1,10 @@
+
+:orphan:
+
+.. _sphx_glr_tutorials_micro_sg_execution_times:
+
+Computation times
+=================
+**00:00.155** total execution time for **tutorials_micro** files:
+
+- **00:00.155**: :ref:`sphx_glr_tutorials_micro_micro_tflite.py` (``micro_tflite.py``)
diff --git a/docs/_sources/tutorials/optimize/opt_conv_cuda.rst.txt b/docs/_sources/tutorials/optimize/opt_conv_cuda.rst.txt
index 3210c23..e04350e 100644
--- a/docs/_sources/tutorials/optimize/opt_conv_cuda.rst.txt
+++ b/docs/_sources/tutorials/optimize/opt_conv_cuda.rst.txt
@@ -293,7 +293,7 @@ latency of convolution.
 
  .. code-block:: none
 
-    Convolution: 19.651037 ms
+    Convolution: 19.549104 ms
 
 
 
diff --git a/docs/_sources/tutorials/optimize/opt_conv_tensorcore.rst.txt b/docs/_sources/tutorials/optimize/opt_conv_tensorcore.rst.txt
index a768242..795f5c0 100644
--- a/docs/_sources/tutorials/optimize/opt_conv_tensorcore.rst.txt
+++ b/docs/_sources/tutorials/optimize/opt_conv_tensorcore.rst.txt
@@ -174,7 +174,7 @@ are both used in matrix multiplication, so we can just write following three int
 
             BA = ins[0]
             BC = outs[0]
-            ib.emit(tvm.tir.call_intrin('handle', 'tvm_load_matrix_sync',
+            ib.emit(tvm.tir.call_intrin('handle', 'tir.tvm_load_matrix_sync',
                                     BC.data, n, n, n, BC.elem_offset // 256,
                                     BA.access_ptr('r'), n, 'row_major'))
             return ib.get()
@@ -201,12 +201,12 @@ are both used in matrix multiplication, so we can just write following three int
 
             def init():
                 ib = tvm.tir.ir_builder.create()
-                ib.emit(tvm.tir.call_intrin('handle', 'tvm_fill_fragment', BC.data, n, n, n, BC.elem_offset // 256, 0.0))
+                ib.emit(tvm.tir.call_intrin('handle', 'tir.tvm_fill_fragment', BC.data, n, n, n, BC.elem_offset // 256, 0.0))
                 return ib.get()
 
             def update():
                 ib = tvm.tir.ir_builder.create()
-                ib.emit(tvm.tir.call_intrin('handle', 'tvm_mma_sync',
+                ib.emit(tvm.tir.call_intrin('handle', 'tir.tvm_mma_sync',
                                         BC.data, BC.elem_offset // 256,
                                         BA.data, BA.elem_offset // 256,
                                         BB.data, BB.elem_offset // 256,
@@ -229,7 +229,7 @@ are both used in matrix multiplication, so we can just write following three int
             ib = tvm.tir.ir_builder.create()
             BA = ins[0]
             BC = outs[0]
-            ib.emit(tvm.tir.call_intrin('handle', 'tvm_store_matrix_sync',
+            ib.emit(tvm.tir.call_intrin('handle', 'tir.tvm_store_matrix_sync',
                                     BA.data, n, n, n, BA.elem_offset // 256,
                                     BC.access_ptr('w'), n, 'row_major'))
             return ib.get()
@@ -341,11 +341,11 @@ one time.
  .. code-block:: none
 
     primfn(A_1: handle, W_1: handle, Conv_1: handle) -> ()
-      attr = {"tir.noalias": True, "global_symbol": "main"}
-      buffers = {A: Buffer(A_2: handle, float16, [16, 14, 14, 16, 16, 16], []),
+      attr = {"global_symbol": "main", "tir.noalias": True}
+      buffers = {Conv: Buffer(Conv_2: handle, float32, [16, 14, 14, 32, 16, 16], []),
                  W: Buffer(W_2: handle, float16, [3, 3, 16, 32, 16, 16], []),
-                 Conv: Buffer(Conv_2: handle, float32, [16, 14, 14, 32, 16, 16], [])}
-      buffer_map = {Conv_1: Conv, W_1: W, A_1: A} {
+                 A: Buffer(A_2: handle, float16, [16, 14, 14, 16, 16, 16], [])}
+      buffer_map = {A_1: A, W_1: W, Conv_1: Conv} {
       attr [IterVar(blockIdx.z: int32, (nullptr), "ThreadIndex", "blockIdx.z")] "thread_extent" = 196;
       attr [Conv.wmma.accumulator: handle] "storage_scope" = "wmma.accumulator";
       allocate(Conv.wmma.accumulator, float32, [2048]);
@@ -376,14 +376,14 @@ one time.
               for (ax3: int32, 0, 2) {
                 for (ax4.ax5.fused.outer: int32, 0, 8) {
                   attr [IterVar(threadIdx.x: int32, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 32;
-                  Apad.shared[((((((threadIdx.y*3072) + (threadIdx.z*1536)) + (ax2*512)) + (ax3*256)) + (ax4.ax5.fused.outer*32)) + threadIdx.x)] = @tvm_if_then_else(((((1 <= (floordiv(blockIdx.z, 14) + kh)) && ((floordiv(blockIdx.z, 14) + kh) < 15)) && (1 <= (ax2 + floormod(blockIdx.z, 14)))) && ((ax2 + floormod(blockIdx.z, 14)) < 15)), (float16*)A_2[(((((((((((blockIdx.x*6422528) + (threadIdx.y*1605632)) + (threadIdx.z*802816)) + (kh*57344)) + (blockIdx.z*4096)) + (ax2*4096)) + (ic.out [...]
+                  Apad.shared[((((((threadIdx.y*3072) + (threadIdx.z*1536)) + (ax2*512)) + (ax3*256)) + (ax4.ax5.fused.outer*32)) + threadIdx.x)] = @tir.if_then_else(((((1 <= (floordiv(blockIdx.z, 14) + kh)) && ((floordiv(blockIdx.z, 14) + kh) < 15)) && (1 <= (ax2 + floormod(blockIdx.z, 14)))) && ((ax2 + floormod(blockIdx.z, 14)) < 15)), (float16*)A_2[(((((((((((blockIdx.x*6422528) + (threadIdx.y*1605632)) + (threadIdx.z*802816)) + (kh*57344)) + (blockIdx.z*4096)) + (ax2*4096)) + (ic.out [...]
                 }
               }
             }
             for (ax1: int32, 0, 3) {
               for (ax2_1: int32, 0, 2) {
                 attr [IterVar(threadIdx.x, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 32;
-                W.shared[ramp((((((ax1*4096) + (ax2_1*2048)) + (threadIdx.y*512)) + (threadIdx.z*256)) + (threadIdx.x*8)), 1, 8)] = (float16x8*)W_2[ramp(((((((((kh*393216) + (ax1*131072)) + (ic.outer*16384)) + (ax2_1*8192)) + (blockIdx.y*2048)) + (threadIdx.y*512)) + (threadIdx.z*256)) + (threadIdx.x*8)), 1, 8)])
+                W.shared[ramp((((((ax1*4096) + (ax2_1*2048)) + (threadIdx.y*512)) + (threadIdx.z*256)) + (threadIdx.x*8)), 1, 8)] = (float16x8*)W_2[ramp(((((((((kh*393216) + (ax1*131072)) + (ic.outer*16384)) + (ax2_1*8192)) + (blockIdx.y*2048)) + (threadIdx.y*512)) + (threadIdx.z*256)) + (threadIdx.x*8)), 1, 8)]
               }
             }
             for (ic.inner: int32, 0, 2) {
@@ -391,14 +391,14 @@ one time.
                 for (ax0: int32, 0, 2) {
                   for (ax4: int32, 0, 16) {
                     for (ax5: int32, 0, 16) {
-                      Apad.shared.wmma.matrix_a[(((ax0*256) + (ax4*16)) + ax5)] = (float16*)Apad.shared[((((((threadIdx.y*3072) + (ax0*1536)) + (kw*512)) + (ic.inner*256)) + (ax4*16)) + ax5)])
+                      Apad.shared.wmma.matrix_a[(((ax0*256) + (ax4*16)) + ax5)] = (float16*)Apad.shared[((((((threadIdx.y*3072) + (ax0*1536)) + (kw*512)) + (ic.inner*256)) + (ax4*16)) + ax5)]
                     }
                   }
                 }
                 for (ax3_1: int32, 0, 4) {
                   for (ax4_1: int32, 0, 16) {
                     for (ax5_1: int32, 0, 16) {
-                      W.shared.wmma.matrix_b[(((ax3_1*256) + (ax4_1*16)) + ax5_1)] = (float16*)W.shared[((((((kw*4096) + (ic.inner*2048)) + (threadIdx.z*1024)) + (ax3_1*256)) + (ax4_1*16)) + ax5_1)])
+                      W.shared.wmma.matrix_b[(((ax3_1*256) + (ax4_1*16)) + ax5_1)] = (float16*)W.shared[((((((kw*4096) + (ic.inner*2048)) + (threadIdx.z*1024)) + (ax3_1*256)) + (ax4_1*16)) + ax5_1)]
                     }
                   }
                 }
@@ -407,7 +407,7 @@ one time.
                     for (nn.c: int32, 0, 16) {
                       for (oo.c: int32, 0, 16) {
                         for (ii: int32, 0, 16) {
-                          Conv.wmma.accumulator[((((n.c*1024) + (o.c*256)) + (nn.c*16)) + oo.c)] = ((float32*)Conv.wmma.accumulator[((((n.c*1024) + (o.c*256)) + (nn.c*16)) + oo.c)]) + (cast(float32, (float16*)Apad.shared.wmma.matrix_a[(((n.c*256) + (nn.c*16)) + ii)]))*cast(float32, (float16*)W.shared.wmma.matrix_b[(((o.c*256) + (ii*16)) + oo.c)]))))
+                          Conv.wmma.accumulator[((((n.c*1024) + (o.c*256)) + (nn.c*16)) + oo.c)] = ((float32*)Conv.wmma.accumulator[((((n.c*1024) + (o.c*256)) + (nn.c*16)) + oo.c)] + (cast(float32, (float16*)Apad.shared.wmma.matrix_a[(((n.c*256) + (nn.c*16)) + ii)])*cast(float32, (float16*)W.shared.wmma.matrix_b[(((o.c*256) + (ii*16)) + oo.c)])))
                         }
                       }
                     }
@@ -421,7 +421,7 @@ one time.
           for (o.inner: int32, 0, 4) {
             for (nn: int32, 0, 16) {
               for (oo: int32, 0, 16) {
-                Conv_2[(((((((((blockIdx.x*12845056) + (threadIdx.y*3211264)) + (n.inner*1605632)) + (blockIdx.z*8192)) + (blockIdx.y*2048)) + (threadIdx.z*1024)) + (o.inner*256)) + (nn*16)) + oo)] = (float32*)Conv.wmma.accumulator[((((n.inner*1024) + (o.inner*256)) + (nn*16)) + oo)])
+                Conv_2[(((((((((blockIdx.x*12845056) + (threadIdx.y*3211264)) + (n.inner*1605632)) + (blockIdx.z*8192)) + (blockIdx.y*2048)) + (threadIdx.z*1024)) + (o.inner*256)) + (nn*16)) + oo)] = (float32*)Conv.wmma.accumulator[((((n.inner*1024) + (o.inner*256)) + (nn*16)) + oo)]
               }
             }
           }
@@ -459,11 +459,11 @@ by mapping the 2D convolution to tensor intrinsics
  .. code-block:: none
 
     primfn(A_1: handle, W_1: handle, Conv_1: handle) -> ()
-      attr = {"tir.noalias": True, "global_symbol": "main"}
-      buffers = {A: Buffer(A_2: handle, float16, [16, 14, 14, 16, 16, 16], []),
-                 Conv: Buffer(Conv_2: handle, float32, [16, 14, 14, 32, 16, 16], []),
-                 W: Buffer(W_2: handle, float16, [3, 3, 16, 32, 16, 16], [])}
-      buffer_map = {W_1: W, Conv_1: Conv, A_1: A} {
+      attr = {"global_symbol": "main", "tir.noalias": True}
+      buffers = {Conv: Buffer(Conv_2: handle, float32, [16, 14, 14, 32, 16, 16], []),
+                 W: Buffer(W_2: handle, float16, [3, 3, 16, 32, 16, 16], []),
+                 A: Buffer(A_2: handle, float16, [16, 14, 14, 16, 16, 16], [])}
+      buffer_map = {A_1: A, W_1: W, Conv_1: Conv} {
       attr [IterVar(blockIdx.z: int32, (nullptr), "ThreadIndex", "blockIdx.z")] "thread_extent" = 196;
       attr [Conv.wmma.accumulator: handle] "storage_scope" = "wmma.accumulator";
       allocate(Conv.wmma.accumulator, float32, [2048]);
@@ -481,7 +481,7 @@ by mapping the 2D convolution to tensor intrinsics
       attr [IterVar(threadIdx.z: int32, (nullptr), "ThreadIndex", "threadIdx.z")] "thread_extent" = 2 {
         for (n.c.init: int32, 0, 2) {
           for (o.c.init: int32, 0, 4) {
-            @tvm_fill_fragment(Conv.wmma.accumulator, 16, 16, 16, ((n.c.init*4) + o.c.init), 0f32, dtype=handle, type="intrin")
+            @tir.tvm_fill_fragment(Conv.wmma.accumulator, 16, 16, 16, ((n.c.init*4) + o.c.init), 0f32, dtype=handle)
           }
         }
         for (ic.outer: int32, 0, 8) {
@@ -490,27 +490,27 @@ by mapping the 2D convolution to tensor intrinsics
               for (ax3: int32, 0, 2) {
                 for (ax4.ax5.fused.outer: int32, 0, 8) {
                   attr [IterVar(threadIdx.x: int32, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 32;
-                  Apad.shared[((((((threadIdx.y*3072) + (threadIdx.z*1536)) + (ax2*512)) + (ax3*256)) + (ax4.ax5.fused.outer*32)) + threadIdx.x)] = @tvm_if_then_else(((((1 <= (floordiv(blockIdx.z, 14) + kh)) && ((floordiv(blockIdx.z, 14) + kh) < 15)) && (1 <= (ax2 + floormod(blockIdx.z, 14)))) && ((ax2 + floormod(blockIdx.z, 14)) < 15)), (float16*)A_2[(((((((((((blockIdx.x*6422528) + (threadIdx.y*1605632)) + (threadIdx.z*802816)) + (kh*57344)) + (blockIdx.z*4096)) + (ax2*4096)) + (ic.out [...]
+                  Apad.shared[((((((threadIdx.y*3072) + (threadIdx.z*1536)) + (ax2*512)) + (ax3*256)) + (ax4.ax5.fused.outer*32)) + threadIdx.x)] = @tir.if_then_else(((((1 <= (floordiv(blockIdx.z, 14) + kh)) && ((floordiv(blockIdx.z, 14) + kh) < 15)) && (1 <= (ax2 + floormod(blockIdx.z, 14)))) && ((ax2 + floormod(blockIdx.z, 14)) < 15)), (float16*)A_2[(((((((((((blockIdx.x*6422528) + (threadIdx.y*1605632)) + (threadIdx.z*802816)) + (kh*57344)) + (blockIdx.z*4096)) + (ax2*4096)) + (ic.out [...]
                 }
               }
             }
             for (ax1: int32, 0, 3) {
               for (ax2_1: int32, 0, 2) {
                 attr [IterVar(threadIdx.x, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 32;
-                W.shared[ramp((((((ax1*4096) + (ax2_1*2048)) + (threadIdx.y*512)) + (threadIdx.z*256)) + (threadIdx.x*8)), 1, 8)] = (float16x8*)W_2[ramp(((((((((kh*393216) + (ax1*131072)) + (ic.outer*16384)) + (ax2_1*8192)) + (blockIdx.y*2048)) + (threadIdx.y*512)) + (threadIdx.z*256)) + (threadIdx.x*8)), 1, 8)])
+                W.shared[ramp((((((ax1*4096) + (ax2_1*2048)) + (threadIdx.y*512)) + (threadIdx.z*256)) + (threadIdx.x*8)), 1, 8)] = (float16x8*)W_2[ramp(((((((((kh*393216) + (ax1*131072)) + (ic.outer*16384)) + (ax2_1*8192)) + (blockIdx.y*2048)) + (threadIdx.y*512)) + (threadIdx.z*256)) + (threadIdx.x*8)), 1, 8)]
               }
             }
             for (ic.inner: int32, 0, 2) {
               for (kw: int32, 0, 3) {
                 for (ax0: int32, 0, 2) {
-                  @tvm_load_matrix_sync(Apad.shared.wmma.matrix_a, 16, 16, 16, ax0, @tvm_access_ptr(@type_annotation(, dtype=float16, type="pure_intrin"), Apad.shared, ((((threadIdx.y*3072) + (ax0*1536)) + (kw*512)) + (ic.inner*256)), 256, 1, dtype=handle, type="intrin"), 16, "row_major", dtype=handle, type="intrin")
+                  @tir.tvm_load_matrix_sync(Apad.shared.wmma.matrix_a, 16, 16, 16, ax0, @tir.tvm_access_ptr(@tir.type_annotation(, dtype=float16), Apad.shared, ((((threadIdx.y*3072) + (ax0*1536)) + (kw*512)) + (ic.inner*256)), 256, 1, dtype=handle), 16, "row_major", dtype=handle)
                 }
                 for (ax3_1: int32, 0, 4) {
-                  @tvm_load_matrix_sync(W.shared.wmma.matrix_b, 16, 16, 16, ax3_1, @tvm_access_ptr(@type_annotation(, dtype=float16, type="pure_intrin"), W.shared, ((((kw*4096) + (ic.inner*2048)) + (threadIdx.z*1024)) + (ax3_1*256)), 256, 1, dtype=handle, type="intrin"), 16, "row_major", dtype=handle, type="intrin")
+                  @tir.tvm_load_matrix_sync(W.shared.wmma.matrix_b, 16, 16, 16, ax3_1, @tir.tvm_access_ptr(@tir.type_annotation(, dtype=float16), W.shared, ((((kw*4096) + (ic.inner*2048)) + (threadIdx.z*1024)) + (ax3_1*256)), 256, 1, dtype=handle), 16, "row_major", dtype=handle)
                 }
                 for (n.c: int32, 0, 2) {
                   for (o.c: int32, 0, 4) {
-                    @tvm_mma_sync(Conv.wmma.accumulator, ((n.c*4) + o.c), Apad.shared.wmma.matrix_a, n.c, W.shared.wmma.matrix_b, o.c, Conv.wmma.accumulator, ((n.c*4) + o.c), dtype=handle, type="intrin")
+                    @tir.tvm_mma_sync(Conv.wmma.accumulator, ((n.c*4) + o.c), Apad.shared.wmma.matrix_a, n.c, W.shared.wmma.matrix_b, o.c, Conv.wmma.accumulator, ((n.c*4) + o.c), dtype=handle)
                   }
                 }
               }
@@ -519,7 +519,7 @@ by mapping the 2D convolution to tensor intrinsics
         }
         for (n.inner: int32, 0, 2) {
           for (o.inner: int32, 0, 4) {
-            @tvm_store_matrix_sync(Conv.wmma.accumulator, 16, 16, 16, ((n.inner*4) + o.inner), @tvm_access_ptr(@type_annotation(, dtype=float32, type="pure_intrin"), Conv_2, (((((((blockIdx.x*12845056) + (threadIdx.y*3211264)) + (n.inner*1605632)) + (blockIdx.z*8192)) + (blockIdx.y*2048)) + (threadIdx.z*1024)) + (o.inner*256)), 256, 2, dtype=handle, type="intrin"), 16, "row_major", dtype=handle, type="intrin")
+            @tir.tvm_store_matrix_sync(Conv.wmma.accumulator, 16, 16, 16, ((n.inner*4) + o.inner), @tir.tvm_access_ptr(@tir.type_annotation(, dtype=float32), Conv_2, (((((((blockIdx.x*12845056) + (threadIdx.y*3211264)) + (n.inner*1605632)) + (blockIdx.z*8192)) + (blockIdx.y*2048)) + (threadIdx.z*1024)) + (o.inner*256)), 256, 2, dtype=handle), 16, "row_major", dtype=handle)
           }
         }
       }
@@ -563,7 +563,7 @@ be able to run on our build server
 
  .. code-block:: none
 
-    conv2d with tensor core: 6.328851 ms
+    conv2d with tensor core: 6.305341 ms
 
 
 
diff --git a/docs/_sources/tutorials/optimize/opt_gemm.rst.txt b/docs/_sources/tutorials/optimize/opt_gemm.rst.txt
index 04bac12..6d12cd1 100644
--- a/docs/_sources/tutorials/optimize/opt_gemm.rst.txt
+++ b/docs/_sources/tutorials/optimize/opt_gemm.rst.txt
@@ -118,8 +118,8 @@ Then we write a baseline implementation, the simplest way to write a matrix mult
 
  .. code-block:: none
 
-    Numpy running time: 0.007867
-    Baseline: 6.026107
+    Numpy running time: 0.007645
+    Baseline: 5.973757
 
 
 
@@ -143,16 +143,16 @@ Here is the generated IR using our baseline schedule.
  .. code-block:: none
 
     primfn(A_1: handle, B_1: handle, C_1: handle) -> ()
-      attr = {"tir.noalias": True, "global_symbol": "main"}
-      buffers = {A: Buffer(A_2: handle, float32, [1024, 1024], []),
+      attr = {"global_symbol": "main", "tir.noalias": True}
+      buffers = {C: Buffer(C_2: handle, float32, [1024, 1024], []),
                  B: Buffer(B_2: handle, float32, [1024, 1024], []),
-                 C: Buffer(C_2: handle, float32, [1024, 1024], [])}
-      buffer_map = {C_1: C, B_1: B, A_1: A} {
+                 A: Buffer(A_2: handle, float32, [1024, 1024], [])}
+      buffer_map = {A_1: A, B_1: B, C_1: C} {
       for (x: int32, 0, 1024) {
         for (y: int32, 0, 1024) {
           C_2[((x*1024) + y)] = 0f32
           for (k: int32, 0, 1024) {
-            C_2[((x*1024) + y)] = ((float32*)C_2[((x*1024) + y)]) + ((float32*)A_2[((x*1024) + k)])*(float32*)B_2[((k*1024) + y)])))
+            C_2[((x*1024) + y)] = ((float32*)C_2[((x*1024) + y)] + ((float32*)A_2[((x*1024) + k)]*(float32*)B_2[((k*1024) + y)]))
           }
         }
       }
@@ -206,7 +206,7 @@ fill 32 * 32 * sizeof(float) which is 4KB in the cache whose total size is 32KB
 
  .. code-block:: none
 
-    Opt1: 0.124150
+    Opt1: 0.109398
 
 
 
@@ -229,11 +229,11 @@ Here is the generated IR after blocking.
  .. code-block:: none
 
     primfn(A_1: handle, B_1: handle, C_1: handle) -> ()
-      attr = {"tir.noalias": True, "global_symbol": "main"}
-      buffers = {A: Buffer(A_2: handle, float32, [1024, 1024], []),
+      attr = {"global_symbol": "main", "tir.noalias": True}
+      buffers = {C: Buffer(C_2: handle, float32, [1024, 1024], []),
                  B: Buffer(B_2: handle, float32, [1024, 1024], []),
-                 C: Buffer(C_2: handle, float32, [1024, 1024], [])}
-      buffer_map = {C_1: C, B_1: B, A_1: A} {
+                 A: Buffer(A_2: handle, float32, [1024, 1024], [])}
+      buffer_map = {A_1: A, B_1: B, C_1: C} {
       for (x.outer: int32, 0, 32) {
         for (y.outer: int32, 0, 32) {
           for (x.inner.init: int32, 0, 32) {
@@ -245,7 +245,7 @@ Here is the generated IR after blocking.
             for (k.inner: int32, 0, 4) {
               for (x.inner: int32, 0, 32) {
                 for (y.inner: int32, 0, 32) {
-                  C_2[((((x.outer*32768) + (x.inner*1024)) + (y.outer*32)) + y.inner)] = ((float32*)C_2[((((x.outer*32768) + (x.inner*1024)) + (y.outer*32)) + y.inner)]) + ((float32*)A_2[((((x.outer*32768) + (x.inner*1024)) + (k.outer*4)) + k.inner)])*(float32*)B_2[((((k.outer*4096) + (k.inner*1024)) + (y.outer*32)) + y.inner)])))
+                  C_2[((((x.outer*32768) + (x.inner*1024)) + (y.outer*32)) + y.inner)] = ((float32*)C_2[((((x.outer*32768) + (x.inner*1024)) + (y.outer*32)) + y.inner)] + ((float32*)A_2[((((x.outer*32768) + (x.inner*1024)) + (k.outer*4)) + k.inner)]*(float32*)B_2[((((k.outer*4096) + (k.inner*1024)) + (y.outer*32)) + y.inner)]))
                 }
               }
             }
@@ -300,7 +300,7 @@ In this tutorial, we chose to vectorize the inner loop row data since it is cach
 
  .. code-block:: none
 
-    Opt2: 0.124186
+    Opt2: 0.118614
 
 
 
@@ -323,11 +323,11 @@ Here is the generated IR after vectorization.
  .. code-block:: none
 
     primfn(A_1: handle, B_1: handle, C_1: handle) -> ()
-      attr = {"tir.noalias": True, "global_symbol": "main"}
-      buffers = {A: Buffer(A_2: handle, float32, [1024, 1024], []),
+      attr = {"global_symbol": "main", "tir.noalias": True}
+      buffers = {C: Buffer(C_2: handle, float32, [1024, 1024], []),
                  B: Buffer(B_2: handle, float32, [1024, 1024], []),
-                 C: Buffer(C_2: handle, float32, [1024, 1024], [])}
-      buffer_map = {C_1: C, B_1: B, A_1: A} {
+                 A: Buffer(A_2: handle, float32, [1024, 1024], [])}
+      buffer_map = {A_1: A, B_1: B, C_1: C} {
       for (x.outer: int32, 0, 32) {
         for (y.outer: int32, 0, 32) {
           for (x.inner.init: int32, 0, 32) {
@@ -336,7 +336,7 @@ Here is the generated IR after vectorization.
           for (k.outer: int32, 0, 256) {
             for (k.inner: int32, 0, 4) {
               for (x.inner: int32, 0, 32) {
-                C_2[ramp((((x.outer*32768) + (x.inner*1024)) + (y.outer*32)), 1, 32)] = ((float32x32*)C_2[ramp((((x.outer*32768) + (x.inner*1024)) + (y.outer*32)), 1, 32)]) + (broadcast((float32*)A_2[((((x.outer*32768) + (x.inner*1024)) + (k.outer*4)) + k.inner)]), 32)*(float32x32*)B_2[ramp((((k.outer*4096) + (k.inner*1024)) + (y.outer*32)), 1, 32)])))
+                C_2[ramp((((x.outer*32768) + (x.inner*1024)) + (y.outer*32)), 1, 32)] = ((float32x32*)C_2[ramp((((x.outer*32768) + (x.inner*1024)) + (y.outer*32)), 1, 32)] + (broadcast((float32*)A_2[((((x.outer*32768) + (x.inner*1024)) + (k.outer*4)) + k.inner)], 32)*(float32x32*)B_2[ramp((((k.outer*4096) + (k.inner*1024)) + (y.outer*32)), 1, 32)]))
               }
             }
           }
@@ -389,7 +389,7 @@ the access pattern for A matrix is more cache friendly.
 
  .. code-block:: none
 
-    Opt3: 0.069148
+    Opt3: 0.062121
 
 
 
@@ -412,11 +412,11 @@ Here is the generated IR after loop permutation.
  .. code-block:: none
 
     primfn(A_1: handle, B_1: handle, C_1: handle) -> ()
-      attr = {"tir.noalias": True, "global_symbol": "main"}
-      buffers = {B: Buffer(B_2: handle, float32, [1024, 1024], []),
-                 A: Buffer(A_2: handle, float32, [1024, 1024], []),
-                 C: Buffer(C_2: handle, float32, [1024, 1024], [])}
-      buffer_map = {C_1: C, B_1: B, A_1: A} {
+      attr = {"global_symbol": "main", "tir.noalias": True}
+      buffers = {C: Buffer(C_2: handle, float32, [1024, 1024], []),
+                 B: Buffer(B_2: handle, float32, [1024, 1024], []),
+                 A: Buffer(A_2: handle, float32, [1024, 1024], [])}
+      buffer_map = {A_1: A, B_1: B, C_1: C} {
       for (x.outer: int32, 0, 32) {
         for (y.outer: int32, 0, 32) {
           for (x.inner.init: int32, 0, 32) {
@@ -425,7 +425,7 @@ Here is the generated IR after loop permutation.
           for (k.outer: int32, 0, 256) {
             for (x.inner: int32, 0, 32) {
               for (k.inner: int32, 0, 4) {
-                C_2[ramp((((x.outer*32768) + (x.inner*1024)) + (y.outer*32)), 1, 32)] = ((float32x32*)C_2[ramp((((x.outer*32768) + (x.inner*1024)) + (y.outer*32)), 1, 32)]) + (broadcast((float32*)A_2[((((x.outer*32768) + (x.inner*1024)) + (k.outer*4)) + k.inner)]), 32)*(float32x32*)B_2[ramp((((k.outer*4096) + (k.inner*1024)) + (y.outer*32)), 1, 32)])))
+                C_2[ramp((((x.outer*32768) + (x.inner*1024)) + (y.outer*32)), 1, 32)] = ((float32x32*)C_2[ramp((((x.outer*32768) + (x.inner*1024)) + (y.outer*32)), 1, 32)] + (broadcast((float32*)A_2[((((x.outer*32768) + (x.inner*1024)) + (k.outer*4)) + k.inner)], 32)*(float32x32*)B_2[ramp((((k.outer*4096) + (k.inner*1024)) + (y.outer*32)), 1, 32)]))
               }
             }
           }
@@ -497,7 +497,7 @@ the corresponding value from the packed array.
 
  .. code-block:: none
 
-    Opt4: 0.064161
+    Opt4: 0.063075
 
 
 
@@ -520,16 +520,16 @@ Here is the generated IR after array packing.
  .. code-block:: none
 
     primfn(A_1: handle, B_1: handle, C_1: handle) -> ()
-      attr = {"tir.noalias": True, "global_symbol": "main"}
-      buffers = {A: Buffer(A_2: handle, float32, [1024, 1024], []),
-                 B: Buffer(B_2: handle, float32, [1024, 1024], []),
-                 C: Buffer(C_2: handle, float32, [1024, 1024], [])}
-      buffer_map = {C_1: C, B_1: B, A_1: A} {
+      attr = {"global_symbol": "main", "tir.noalias": True}
+      buffers = {B: Buffer(B_2: handle, float32, [1024, 1024], []),
+                 C: Buffer(C_2: handle, float32, [1024, 1024], []),
+                 A: Buffer(A_2: handle, float32, [1024, 1024], [])}
+      buffer_map = {A_1: A, B_1: B, C_1: C} {
       attr [packedB: handle] "storage_scope" = "global";
       allocate(packedB, float32x32, [32768]) {
         for (x: int32, 0, 32) "parallel" {
           for (y: int32, 0, 1024) {
-            packedB[ramp(((x*32768) + (y*32)), 1, 32)] = (float32x32*)B_2[ramp(((y*1024) + (x*32)), 1, 32)])
+            packedB[ramp(((x*32768) + (y*32)), 1, 32)] = (float32x32*)B_2[ramp(((y*1024) + (x*32)), 1, 32)]
           }
         }
         for (x.outer: int32, 0, 32) {
@@ -540,7 +540,7 @@ Here is the generated IR after array packing.
             for (k.outer: int32, 0, 256) {
               for (x.inner: int32, 0, 32) {
                 for (k.inner: int32, 0, 4) {
-                  C_2[ramp((((x.outer*32768) + (x.inner*1024)) + (y.outer*32)), 1, 32)] = ((float32x32*)C_2[ramp((((x.outer*32768) + (x.inner*1024)) + (y.outer*32)), 1, 32)]) + (broadcast((float32*)A_2[((((x.outer*32768) + (x.inner*1024)) + (k.outer*4)) + k.inner)]), 32)*(float32x32*)packedB[ramp((((y.outer*32768) + (k.outer*128)) + (k.inner*32)), 1, 32)])))
+                  C_2[ramp((((x.outer*32768) + (x.inner*1024)) + (y.outer*32)), 1, 32)] = ((float32x32*)C_2[ramp((((x.outer*32768) + (x.inner*1024)) + (y.outer*32)), 1, 32)] + (broadcast((float32*)A_2[((((x.outer*32768) + (x.inner*1024)) + (k.outer*4)) + k.inner)], 32)*(float32x32*)packedB[ramp((((y.outer*32768) + (k.outer*128)) + (k.inner*32)), 1, 32)]))
                 }
               }
             }
@@ -607,7 +607,7 @@ write to C when all the block results are ready.
 
  .. code-block:: none
 
-    Opt5: 0.071793
+    Opt5: 0.065756
 
 
 
@@ -630,18 +630,18 @@ Here is the generated IR after blocking.
  .. code-block:: none
 
     primfn(A_1: handle, B_1: handle, C_1: handle) -> ()
-      attr = {"tir.noalias": True, "global_symbol": "main"}
-      buffers = {A: Buffer(A_2: handle, float32, [1024, 1024], []),
+      attr = {"global_symbol": "main", "tir.noalias": True}
+      buffers = {C: Buffer(C_2: handle, float32, [1024, 1024], []),
                  B: Buffer(B_2: handle, float32, [1024, 1024], []),
-                 C: Buffer(C_2: handle, float32, [1024, 1024], [])}
-      buffer_map = {C_1: C, B_1: B, A_1: A} {
+                 A: Buffer(A_2: handle, float32, [1024, 1024], [])}
+      buffer_map = {A_1: A, B_1: B, C_1: C} {
       attr [packedB: handle] "storage_scope" = "global";
       allocate(packedB, float32x32, [32768]);
       attr [C.global: handle] "storage_scope" = "global";
       allocate(C.global, float32, [1024]) {
         for (x: int32, 0, 32) "parallel" {
           for (y: int32, 0, 1024) {
-            packedB[ramp(((x*32768) + (y*32)), 1, 32)] = (float32x32*)B_2[ramp(((y*1024) + (x*32)), 1, 32)])
+            packedB[ramp(((x*32768) + (y*32)), 1, 32)] = (float32x32*)B_2[ramp(((y*1024) + (x*32)), 1, 32)]
           }
         }
         for (x.outer: int32, 0, 32) {
@@ -651,15 +651,15 @@ Here is the generated IR after blocking.
             }
             for (k.outer: int32, 0, 256) {
               for (x.c: int32, 0, 32) {
-                C.global[ramp((x.c*32), 1, 32)] = ((float32x32*)C.global[ramp((x.c*32), 1, 32)]) + (broadcast((float32*)A_2[(((x.outer*32768) + (x.c*1024)) + (k.outer*4))]), 32)*(float32x32*)packedB[ramp(((y.outer*32768) + (k.outer*128)), 1, 32)])))
-                C.global[ramp((x.c*32), 1, 32)] = ((float32x32*)C.global[ramp((x.c*32), 1, 32)]) + (broadcast((float32*)A_2[((((x.outer*32768) + (x.c*1024)) + (k.outer*4)) + 1)]), 32)*(float32x32*)packedB[ramp((((y.outer*32768) + (k.outer*128)) + 32), 1, 32)])))
-                C.global[ramp((x.c*32), 1, 32)] = ((float32x32*)C.global[ramp((x.c*32), 1, 32)]) + (broadcast((float32*)A_2[((((x.outer*32768) + (x.c*1024)) + (k.outer*4)) + 2)]), 32)*(float32x32*)packedB[ramp((((y.outer*32768) + (k.outer*128)) + 64), 1, 32)])))
-                C.global[ramp((x.c*32), 1, 32)] = ((float32x32*)C.global[ramp((x.c*32), 1, 32)]) + (broadcast((float32*)A_2[((((x.outer*32768) + (x.c*1024)) + (k.outer*4)) + 3)]), 32)*(float32x32*)packedB[ramp((((y.outer*32768) + (k.outer*128)) + 96), 1, 32)])))
+                C.global[ramp((x.c*32), 1, 32)] = ((float32x32*)C.global[ramp((x.c*32), 1, 32)] + (broadcast((float32*)A_2[(((x.outer*32768) + (x.c*1024)) + (k.outer*4))], 32)*(float32x32*)packedB[ramp(((y.outer*32768) + (k.outer*128)), 1, 32)]))
+                C.global[ramp((x.c*32), 1, 32)] = ((float32x32*)C.global[ramp((x.c*32), 1, 32)] + (broadcast((float32*)A_2[((((x.outer*32768) + (x.c*1024)) + (k.outer*4)) + 1)], 32)*(float32x32*)packedB[ramp((((y.outer*32768) + (k.outer*128)) + 32), 1, 32)]))
+                C.global[ramp((x.c*32), 1, 32)] = ((float32x32*)C.global[ramp((x.c*32), 1, 32)] + (broadcast((float32*)A_2[((((x.outer*32768) + (x.c*1024)) + (k.outer*4)) + 2)], 32)*(float32x32*)packedB[ramp((((y.outer*32768) + (k.outer*128)) + 64), 1, 32)]))
+                C.global[ramp((x.c*32), 1, 32)] = ((float32x32*)C.global[ramp((x.c*32), 1, 32)] + (broadcast((float32*)A_2[((((x.outer*32768) + (x.c*1024)) + (k.outer*4)) + 3)], 32)*(float32x32*)packedB[ramp((((y.outer*32768) + (k.outer*128)) + 96), 1, 32)]))
               }
             }
             for (x.inner: int32, 0, 32) {
               for (y.inner: int32, 0, 32) {
-                C_2[((((x.outer*32768) + (x.inner*1024)) + (y.outer*32)) + y.inner)] = (float32*)C.global[((x.inner*32) + y.inner)])
+                C_2[((((x.outer*32768) + (x.inner*1024)) + (y.outer*32)) + y.inner)] = (float32*)C.global[((x.inner*32) + y.inner)]
               }
             }
           }
@@ -723,7 +723,7 @@ Futhermore, we can also utilize multi-core processors to do the thread-level par
 
  .. code-block:: none
 
-    Opt6: 0.022614
+    Opt6: 0.016878
 
 
 
@@ -746,16 +746,16 @@ Here is the generated IR after parallelization.
  .. code-block:: none
 
     primfn(A_1: handle, B_1: handle, C_1: handle) -> ()
-      attr = {"tir.noalias": True, "global_symbol": "main"}
-      buffers = {A: Buffer(A_2: handle, float32, [1024, 1024], []),
-                 C: Buffer(C_2: handle, float32, [1024, 1024], []),
-                 B: Buffer(B_2: handle, float32, [1024, 1024], [])}
-      buffer_map = {B_1: B, C_1: C, A_1: A} {
+      attr = {"global_symbol": "main", "tir.noalias": True}
+      buffers = {C: Buffer(C_2: handle, float32, [1024, 1024], []),
+                 B: Buffer(B_2: handle, float32, [1024, 1024], []),
+                 A: Buffer(A_2: handle, float32, [1024, 1024], [])}
+      buffer_map = {A_1: A, B_1: B, C_1: C} {
       attr [packedB: handle] "storage_scope" = "global";
       allocate(packedB, float32x32, [32768]) {
         for (x: int32, 0, 32) "parallel" {
           for (y: int32, 0, 1024) {
-            packedB[ramp(((x*32768) + (y*32)), 1, 32)] = (float32x32*)B_2[ramp(((y*1024) + (x*32)), 1, 32)])
+            packedB[ramp(((x*32768) + (y*32)), 1, 32)] = (float32x32*)B_2[ramp(((y*1024) + (x*32)), 1, 32)]
           }
         }
         for (x.outer: int32, 0, 32) "parallel" {
@@ -767,15 +767,15 @@ Here is the generated IR after parallelization.
             }
             for (k.outer: int32, 0, 256) {
               for (x.c: int32, 0, 32) {
-                C.global[ramp((x.c*32), 1, 32)] = ((float32x32*)C.global[ramp((x.c*32), 1, 32)]) + (broadcast((float32*)A_2[(((x.outer*32768) + (x.c*1024)) + (k.outer*4))]), 32)*(float32x32*)packedB[ramp(((y.outer*32768) + (k.outer*128)), 1, 32)])))
-                C.global[ramp((x.c*32), 1, 32)] = ((float32x32*)C.global[ramp((x.c*32), 1, 32)]) + (broadcast((float32*)A_2[((((x.outer*32768) + (x.c*1024)) + (k.outer*4)) + 1)]), 32)*(float32x32*)packedB[ramp((((y.outer*32768) + (k.outer*128)) + 32), 1, 32)])))
-                C.global[ramp((x.c*32), 1, 32)] = ((float32x32*)C.global[ramp((x.c*32), 1, 32)]) + (broadcast((float32*)A_2[((((x.outer*32768) + (x.c*1024)) + (k.outer*4)) + 2)]), 32)*(float32x32*)packedB[ramp((((y.outer*32768) + (k.outer*128)) + 64), 1, 32)])))
-                C.global[ramp((x.c*32), 1, 32)] = ((float32x32*)C.global[ramp((x.c*32), 1, 32)]) + (broadcast((float32*)A_2[((((x.outer*32768) + (x.c*1024)) + (k.outer*4)) + 3)]), 32)*(float32x32*)packedB[ramp((((y.outer*32768) + (k.outer*128)) + 96), 1, 32)])))
+                C.global[ramp((x.c*32), 1, 32)] = ((float32x32*)C.global[ramp((x.c*32), 1, 32)] + (broadcast((float32*)A_2[(((x.outer*32768) + (x.c*1024)) + (k.outer*4))], 32)*(float32x32*)packedB[ramp(((y.outer*32768) + (k.outer*128)), 1, 32)]))
+                C.global[ramp((x.c*32), 1, 32)] = ((float32x32*)C.global[ramp((x.c*32), 1, 32)] + (broadcast((float32*)A_2[((((x.outer*32768) + (x.c*1024)) + (k.outer*4)) + 1)], 32)*(float32x32*)packedB[ramp((((y.outer*32768) + (k.outer*128)) + 32), 1, 32)]))
+                C.global[ramp((x.c*32), 1, 32)] = ((float32x32*)C.global[ramp((x.c*32), 1, 32)] + (broadcast((float32*)A_2[((((x.outer*32768) + (x.c*1024)) + (k.outer*4)) + 2)], 32)*(float32x32*)packedB[ramp((((y.outer*32768) + (k.outer*128)) + 64), 1, 32)]))
+                C.global[ramp((x.c*32), 1, 32)] = ((float32x32*)C.global[ramp((x.c*32), 1, 32)] + (broadcast((float32*)A_2[((((x.outer*32768) + (x.c*1024)) + (k.outer*4)) + 3)], 32)*(float32x32*)packedB[ramp((((y.outer*32768) + (k.outer*128)) + 96), 1, 32)]))
               }
             }
             for (x.inner: int32, 0, 32) {
               for (y.inner: int32, 0, 32) {
-                C_2[((((x.outer*32768) + (x.inner*1024)) + (y.outer*32)) + y.inner)] = (float32*)C.global[((x.inner*32) + y.inner)])
+                C_2[((((x.outer*32768) + (x.inner*1024)) + (y.outer*32)) + y.inner)] = (float32*)C.global[((x.inner*32) + y.inner)]
               }
             }
           }
diff --git a/docs/_sources/tutorials/optimize/sg_execution_times.rst.txt b/docs/_sources/tutorials/optimize/sg_execution_times.rst.txt
index b044deb..8245160 100644
--- a/docs/_sources/tutorials/optimize/sg_execution_times.rst.txt
+++ b/docs/_sources/tutorials/optimize/sg_execution_times.rst.txt
@@ -5,9 +5,9 @@
 
 Computation times
 =================
-**00:28.349** total execution time for **tutorials_optimize** files:
+**00:27.059** total execution time for **tutorials_optimize** files:
 
-- **00:26.434**: :ref:`sphx_glr_tutorials_optimize_opt_gemm.py` (``opt_gemm.py``)
-- **00:01.002**: :ref:`sphx_glr_tutorials_optimize_opt_conv_tensorcore.py` (``opt_conv_tensorcore.py``)
-- **00:00.752**: :ref:`sphx_glr_tutorials_optimize_opt_conv_cuda.py` (``opt_conv_cuda.py``)
-- **00:00.161**: :ref:`sphx_glr_tutorials_optimize_opt_matmul_auto_tensorcore.py` (``opt_matmul_auto_tensorcore.py``)
+- **00:25.353**: :ref:`sphx_glr_tutorials_optimize_opt_gemm.py` (``opt_gemm.py``)
+- **00:00.888**: :ref:`sphx_glr_tutorials_optimize_opt_conv_tensorcore.py` (``opt_conv_tensorcore.py``)
+- **00:00.664**: :ref:`sphx_glr_tutorials_optimize_opt_conv_cuda.py` (``opt_conv_cuda.py``)
+- **00:00.155**: :ref:`sphx_glr_tutorials_optimize_opt_matmul_auto_tensorcore.py` (``opt_matmul_auto_tensorcore.py``)
diff --git a/docs/_sources/tutorials/relay_quick_start.rst.txt b/docs/_sources/tutorials/relay_quick_start.rst.txt
index a20c6cf..310518f 100644
--- a/docs/_sources/tutorials/relay_quick_start.rst.txt
+++ b/docs/_sources/tutorials/relay_quick_start.rst.txt
@@ -223,11 +223,7 @@ in this example. Then the machine code will be generated as the module library.
 
  .. code-block:: none
 
-
    ...1%, 0.01 MB, 98 KB/s, 0 seconds passed
    ...3%, 0.02 MB, 194 KB/s, 0 seconds passed
    ...5%, 0.02 MB, 290 KB/s, 0 seconds passed
    ...7%, 0.03 MB, 386 KB/s, 0 seconds passed
    ...9%, 0.04 MB, 481 KB/s, 0 seconds passed
    ...11%, 0.05 MB, 573 KB/s, 0 seconds passed
    ...13%, 0.05 MB, 667 KB/s, 0 seconds passed
    ...15%, 0.06 MB, 756 KB/s, 0 seconds passed
    ...17%, 0.07 MB, 849 KB/s, 0 seconds passed
    ...19%, 0.08 MB, 939 KB/s, 0 seconds passed
    ...21%, 0.09 MB, 1028 KB/s, 0 seconds passed
    ...23%, 0.09 MB, 1119 KB/s, 0 seconds passed
    ...25%, 0.10 MB, 1203 KB/s, 0 seconds passed
    ...27%, 0.11 MB, 1293 KB/s, 0 seconds passed
    ...29%, 0.12 MB, 1382 KB/s, 0 seconds passed
    ...31%, 0.12 MB, 1471 KB/s, 0 seconds passed
    ...33%, 0.13 MB, 1554 KB/s, 0 seconds passed
    ...35%, 0.14 MB, 1643 KB/s, 0 seconds passed
    ...37%, 0.15 MB, 1730 KB/s, 0 seconds passed
    ...39%, 0.16 MB, 1818 KB/s, 0 seconds passed
    ...41%, 0.16 MB, 1899 KB/s, 
 0 seconds passed
    ...43%, 0.17 MB, 1980 KB/s, 0 seconds passed
    ...45%, 0.18 MB, 2067 KB/s, 0 seconds passed
    ...47%, 0.19 MB, 2145 KB/s, 0 seconds passed
    ...49%, 0.20 MB, 2231 KB/s, 0 seconds passed
    ...51%, 0.20 MB, 2314 KB/s, 0 seconds passed
    ...53%, 0.21 MB, 2360 KB/s, 0 seconds passed
    ...55%, 0.22 MB, 2445 KB/s, 0 seconds passed
    ...57%, 0.23 MB, 2525 KB/s, 0 seconds passed
    ...59%, 0.23 MB, 2609 KB/s, 0 seconds passed
    ...61%, 0.24 MB, 2682 KB/s, 0 seconds passed
    ...63%, 0.25 MB, 2765 KB/s, 0 seconds passed
    ...65%, 0.26 MB, 2836 KB/s, 0 seconds passed
    ...67%, 0.27 MB, 2919 KB/s, 0 seconds passed
    ...69%, 0.27 MB, 2989 KB/s, 0 seconds passed
    ...71%, 0.28 MB, 3071 KB/s, 0 seconds passed
    ...73%, 0.29 MB, 3141 KB/s, 0 seconds passed
    ...75%, 0.30 MB, 3222 KB/s, 0 seconds passed
    ...77%, 0.30 MB, 3254 KB/s, 0 seconds passed
    ...79%, 0.31 MB, 3334 KB/s, 0 seconds passed
    ...81%, 0.32 MB, 3413 KB/s, 0 seconds passed
     ...83%, 0.33 MB, 3493 KB/s, 0 seconds passed
    ...85%, 0.34 MB, 3553 KB/s, 0 seconds passed
    ...87%, 0.34 MB, 3632 KB/s, 0 seconds passed
    ...89%, 0.35 MB, 3710 KB/s, 0 seconds passed
    ...91%, 0.36 MB, 3789 KB/s, 0 seconds passed
    ...93%, 0.37 MB, 3866 KB/s, 0 seconds passed
    ...95%, 0.38 MB, 3945 KB/s, 0 seconds passed
    ...97%, 0.38 MB, 4009 KB/s, 0 seconds passed
    ...99%, 0.39 MB, 4087 KB/s, 0 seconds passed
    ...100%, 0.40 MB, 4158 KB/s, 0 seconds passed
-    Cannot find config for target=cuda -model=unknown, workload=('conv2d_nchw.cuda', ('TENSOR', (1, 64, 56, 56), 'float32'), ('TENSOR', (64, 64, 3, 3), 'float32'), (1, 1), (1, 1, 1, 1), (1, 1), 'float32'). A fallback configuration is used, which may bring great performance regression.
-    Cannot find config for target=cuda -model=unknown, workload=('conv2d_nchw.cuda', ('TENSOR', (1, 128, 28, 28), 'float32'), ('TENSOR', (128, 128, 3, 3), 'float32'), (1, 1), (1, 1, 1, 1), (1, 1), 'float32'). A fallback configuration is used, which may bring great performance regression.
-    Cannot find config for target=cuda -model=unknown, workload=('conv2d_nchw.cuda', ('TENSOR', (1, 256, 14, 14), 'float32'), ('TENSOR', (256, 256, 3, 3), 'float32'), (1, 1), (1, 1, 1, 1), (1, 1), 'float32'). A fallback configuration is used, which may bring great performance regression.
-    Cannot find config for target=cuda -model=unknown, workload=('conv2d_nchw.cuda', ('TENSOR', (1, 512, 7, 7), 'float32'), ('TENSOR', (512, 512, 3, 3), 'float32'), (1, 1), (1, 1, 1, 1), (1, 1), 'float32'). A fallback configuration is used, which may bring great performance regression.
+
    ...1%, 0.01 MB, 37 KB/s, 0 seconds passed
    ...3%, 0.02 MB, 75 KB/s, 0 seconds passed
    ...5%, 0.02 MB, 113 KB/s, 0 seconds passed
    ...7%, 0.03 MB, 150 KB/s, 0 seconds passed
    ...9%, 0.04 MB, 187 KB/s, 0 seconds passed
    ...11%, 0.05 MB, 224 KB/s, 0 seconds passed
    ...13%, 0.05 MB, 261 KB/s, 0 seconds passed
    ...15%, 0.06 MB, 298 KB/s, 0 seconds passed
    ...17%, 0.07 MB, 335 KB/s, 0 seconds passed
    ...19%, 0.08 MB, 372 KB/s, 0 seconds passed
    ...21%, 0.09 MB, 409 KB/s, 0 seconds passed
    ...23%, 0.09 MB, 445 KB/s, 0 seconds passed
    ...25%, 0.10 MB, 481 KB/s, 0 seconds passed
    ...27%, 0.11 MB, 518 KB/s, 0 seconds passed
    ...29%, 0.12 MB, 554 KB/s, 0 seconds passed
    ...31%, 0.12 MB, 590 KB/s, 0 seconds passed
    ...33%, 0.13 MB, 626 KB/s, 0 seconds passed
    ...35%, 0.14 MB, 662 KB/s, 0 seconds passed
    ...37%, 0.15 MB, 698 KB/s, 0 seconds passed
    ...39%, 0.16 MB, 734 KB/s, 0 seconds passed
    ...41%, 0.16 MB, 770 KB/s, 0 seconds pa
 ssed
    ...43%, 0.17 MB, 804 KB/s, 0 seconds passed
    ...45%, 0.18 MB, 840 KB/s, 0 seconds passed
    ...47%, 0.19 MB, 876 KB/s, 0 seconds passed
    ...49%, 0.20 MB, 912 KB/s, 0 seconds passed
    ...51%, 0.20 MB, 948 KB/s, 0 seconds passed
    ...53%, 0.21 MB, 981 KB/s, 0 seconds passed
    ...55%, 0.22 MB, 1017 KB/s, 0 seconds passed
    ...57%, 0.23 MB, 1052 KB/s, 0 seconds passed
    ...59%, 0.23 MB, 1087 KB/s, 0 seconds passed
    ...61%, 0.24 MB, 1121 KB/s, 0 seconds passed
    ...63%, 0.25 MB, 1157 KB/s, 0 seconds passed
    ...65%, 0.26 MB, 1192 KB/s, 0 seconds passed
    ...67%, 0.27 MB, 1228 KB/s, 0 seconds passed
    ...69%, 0.27 MB, 1262 KB/s, 0 seconds passed
    ...71%, 0.28 MB, 1297 KB/s, 0 seconds passed
    ...73%, 0.29 MB, 1332 KB/s, 0 seconds passed
    ...75%, 0.30 MB, 1367 KB/s, 0 seconds passed
    ...77%, 0.30 MB, 1399 KB/s, 0 seconds passed
    ...79%, 0.31 MB, 1434 KB/s, 0 seconds passed
    ...81%, 0.32 MB, 1469 KB/s, 0 seconds passed
    ...83%, 0.33 M
 B, 1504 KB/s, 0 seconds passed
    ...85%, 0.34 MB, 1539 KB/s, 0 seconds passed
    ...87%, 0.34 MB, 1575 KB/s, 0 seconds passed
    ...89%, 0.35 MB, 1606 KB/s, 0 seconds passed
    ...91%, 0.36 MB, 1641 KB/s, 0 seconds passed
    ...93%, 0.37 MB, 1676 KB/s, 0 seconds passed
    ...95%, 0.38 MB, 1711 KB/s, 0 seconds passed
    ...97%, 0.38 MB, 1745 KB/s, 0 seconds passed
    ...99%, 0.39 MB, 1780 KB/s, 0 seconds passed
    ...100%, 0.40 MB, 1814 KB/s, 0 seconds passed
     Cannot find config for target=cuda -model=unknown, workload=('dense_small_batch.cuda', ('TENSOR', (1, 512), 'float32'), ('TENSOR', (1000, 512), 'float32'), None, 'float32'). A fallback configuration is used, which may bring great performance regression.
 
 
diff --git a/docs/_sources/tutorials/sg_execution_times.rst.txt b/docs/_sources/tutorials/sg_execution_times.rst.txt
index 5a43a2a..c8197f8 100644
--- a/docs/_sources/tutorials/sg_execution_times.rst.txt
+++ b/docs/_sources/tutorials/sg_execution_times.rst.txt
@@ -5,8 +5,8 @@
 
 Computation times
 =================
-**00:09.016** total execution time for **tutorials** files:
+**00:09.684** total execution time for **tutorials** files:
 
-- **00:08.536**: :ref:`sphx_glr_tutorials_relay_quick_start.py` (``relay_quick_start.py``)
-- **00:00.342**: :ref:`sphx_glr_tutorials_tensor_expr_get_started.py` (``tensor_expr_get_started.py``)
-- **00:00.139**: :ref:`sphx_glr_tutorials_cross_compilation_and_rpc.py` (``cross_compilation_and_rpc.py``)
+- **00:09.247**: :ref:`sphx_glr_tutorials_relay_quick_start.py` (``relay_quick_start.py``)
+- **00:00.309**: :ref:`sphx_glr_tutorials_tensor_expr_get_started.py` (``tensor_expr_get_started.py``)
+- **00:00.129**: :ref:`sphx_glr_tutorials_cross_compilation_and_rpc.py` (``cross_compilation_and_rpc.py``)
diff --git a/docs/_sources/tutorials/topi/intro_topi.rst.txt b/docs/_sources/tutorials/topi/intro_topi.rst.txt
index 42e786c..1229000 100644
--- a/docs/_sources/tutorials/topi/intro_topi.rst.txt
+++ b/docs/_sources/tutorials/topi/intro_topi.rst.txt
@@ -73,7 +73,7 @@ and to examine the IR code in human readable format, we can do
  .. code-block:: none
 
     primfn(A_1: handle) -> ()
-      attr = {"tir.noalias": True, "global_symbol": "main"}
+      attr = {"global_symbol": "main", "tir.noalias": True}
       buffers = {A: Buffer(A_2: handle, float32, [n: int32, m: int32], [stride: int32, stride_1: int32], type="auto")}
       buffer_map = {A_1: A} {
       attr [B: handle] "storage_scope" = "global";
@@ -81,7 +81,7 @@ and to examine the IR code in human readable format, we can do
       for (i: int32, 0, n) {
         B[i] = 0f32
         for (k: int32, 0, m) {
-          B[i] = ((float32*)B[i]) + (float32*)A_2[((i*stride) + (k*stride_1))]))
+          B[i] = ((float32*)B[i] + (float32*)A_2[((i*stride) + (k*stride_1))])
         }
       }
     }
@@ -113,7 +113,7 @@ Fortunately, we can replace those two lines with simple :code:`topi.sum` much li
  .. code-block:: none
 
     primfn(A_1: handle) -> ()
-      attr = {"tir.noalias": True, "global_symbol": "main"}
+      attr = {"global_symbol": "main", "tir.noalias": True}
       buffers = {A: Buffer(A_2: handle, float32, [n: int32, m: int32], [stride: int32, stride_1: int32], type="auto")}
       buffer_map = {A_1: A} {
       attr [A_red: handle] "storage_scope" = "global";
@@ -121,7 +121,7 @@ Fortunately, we can replace those two lines with simple :code:`topi.sum` much li
       for (ax0: int32, 0, n) {
         A_red[ax0] = 0f32
         for (k1: int32, 0, m) {
-          A_red[ax0] = ((float32*)A_red[ax0]) + (float32*)A_2[((ax0*stride) + (k1*stride_1))]))
+          A_red[ax0] = ((float32*)A_red[ax0] + (float32*)A_2[((ax0*stride) + (k1*stride_1))])
         }
       }
     }
@@ -183,10 +183,10 @@ we can schedule the following series of operations ending with :code:`topi.sum`
  .. code-block:: none
 
     primfn(a_1: handle, b_1: handle) -> ()
-      attr = {"tir.noalias": True, "global_symbol": "main"}
-      buffers = {a: Buffer(a_2: handle, float32, [100, 10, 10], []),
-                 b: Buffer(b_2: handle, float32, [10, 10], [])}
-      buffer_map = {b_1: b, a_1: a} {
+      attr = {"global_symbol": "main", "tir.noalias": True}
+      buffers = {b: Buffer(b_2: handle, float32, [10, 10], []),
+                 a: Buffer(a_2: handle, float32, [100, 10, 10], [])}
+      buffer_map = {a_1: a, b_1: b} {
       attr [T_divide_red: handle] "storage_scope" = "global";
       allocate(T_divide_red, float32, [1]);
       attr [IterVar(threadIdx.x: int32, [0:1024], "ThreadIndex", "threadIdx.x")] "thread_extent" = 1024;
@@ -196,14 +196,14 @@ we can schedule the following series of operations ending with :code:`topi.sum`
       allocate(reduce_temp0, float32, [1]) {
         T_divide_red.rf[0] = 0f32
         for (k0.k1.fused.k2.fused.outer: int32, 0, 10) {
-          if @likely((((((k0.k1.fused.k2.fused.outer*1024) + threadIdx.x) < 10000) && (((k0.k1.fused.k2.fused.outer*1024) + threadIdx.x) < 10000)) && (((k0.k1.fused.k2.fused.outer*1024) + threadIdx.x) < 10000)), dtype=bool, type="pure_intrin") {
-            T_divide_red.rf[0] = ((float32*)T_divide_red.rf[0]) + ((((float32*)a_2[((k0.k1.fused.k2.fused.outer*1024) + threadIdx.x)]) + (float32*)b_2[floormod(((k0.k1.fused.k2.fused.outer*1024) + threadIdx.x), 100)])) + ((float32*)a_2[((k0.k1.fused.k2.fused.outer*1024) + threadIdx.x)])*(float32*)b_2[floormod(((k0.k1.fused.k2.fused.outer*1024) + threadIdx.x), 100)])))*0.5f32))
+          if @tir.likely((((((k0.k1.fused.k2.fused.outer*1024) + threadIdx.x) < 10000) && (((k0.k1.fused.k2.fused.outer*1024) + threadIdx.x) < 10000)) && (((k0.k1.fused.k2.fused.outer*1024) + threadIdx.x) < 10000)), dtype=bool) {
+            T_divide_red.rf[0] = ((float32*)T_divide_red.rf[0] + ((((float32*)a_2[((k0.k1.fused.k2.fused.outer*1024) + threadIdx.x)] + (float32*)b_2[floormod(((k0.k1.fused.k2.fused.outer*1024) + threadIdx.x), 100)]) + ((float32*)a_2[((k0.k1.fused.k2.fused.outer*1024) + threadIdx.x)]*(float32*)b_2[floormod(((k0.k1.fused.k2.fused.outer*1024) + threadIdx.x), 100)]))*0.5f32))
           }
         }
-        attr [meta[tir.CommReducer][0]] "reduce_scope" = @reinterpret(0u64, dtype=handle, type="pure_intrin");
-        @tvm_thread_allreduce(1u32, (float32*)T_divide_red.rf[0]), True, reduce_temp0, threadIdx.x, dtype=handle, type="intrin")
+        attr [meta[tir.CommReducer][0]] "reduce_scope" = @tir.reinterpret(0u64, dtype=handle);
+        @tir.tvm_thread_allreduce(1u32, (float32*)T_divide_red.rf[0], True, reduce_temp0, threadIdx.x, dtype=handle)
         if (threadIdx.x == 0) {
-          T_divide_red[0] = (float32*)reduce_temp0[0])
+          T_divide_red[0] = (float32*)reduce_temp0[0]
         }
       }
     }
@@ -230,7 +230,7 @@ As you can see, scheduled stages of computation have been accumulated and we can
 
  .. code-block:: none
 
-    [stage(a, 0x12858c960), stage(b, 0x13bb5b610), stage(T_add, 0xc0ddf7a0), stage(T_multiply, 0x14f9e45e0), stage(T_elemwise_sum, 0x13d37cad0), stage(T_divide, 0x11cc3d6e0), stage(T_divide_red.rf, 0x11cc374f0), stage(T_divide_red, 0x11c91e300)]
+    [stage(a, 0x1230f5830), stage(b, 0x14e175730), stage(T_add, 0x11b48e260), stage(T_multiply, 0xa6350d70), stage(T_elemwise_sum, 0x12868aef0), stage(T_divide, 0x14ac01630), stage(T_divide_red.rf, 0xc7c0ebb0), stage(T_divide_red, 0x128535490)]
 
 
 
@@ -280,7 +280,7 @@ TOPI also provides common neural nets operations such as _softmax_ with optimize
  .. code-block:: none
 
     primfn(tarray_1: handle) -> ()
-      attr = {"tir.noalias": True, "global_symbol": "main"}
+      attr = {"global_symbol": "main", "tir.noalias": True}
       buffers = {tarray: Buffer(tarray_2: handle, float32, [512, 512], [])}
       buffer_map = {tarray_1: tarray} {
       attr [T_softmax_norm: handle] "storage_scope" = "global";
@@ -299,23 +299,23 @@ TOPI also provides common neural nets operations such as _softmax_ with optimize
         attr [IterVar(threadIdx.x: int32, [0:32], "ThreadIndex", "threadIdx.x")] "thread_extent" = 32 {
           normal_reduce_temp0[0] = -3.40282e+38f32
           for (k.inner: int32, 0, 16) {
-            normal_reduce_temp0[0] = max((float32*)normal_reduce_temp0[0]), (float32*)tarray_2[(((blockIdx.x*512) + (threadIdx.x*16)) + k.inner)]))
+            normal_reduce_temp0[0] = max((float32*)normal_reduce_temp0[0], (float32*)tarray_2[(((blockIdx.x*512) + (threadIdx.x*16)) + k.inner)])
           }
-          attr [meta[tir.CommReducer][0]] "reduce_scope" = @reinterpret(0u64, dtype=handle, type="pure_intrin");
-          @tvm_thread_allreduce(1u32, (float32*)normal_reduce_temp0[0]), True, reduce_temp0, threadIdx.x, dtype=handle, type="intrin")
+          attr [meta[tir.CommReducer][0]] "reduce_scope" = @tir.reinterpret(0u64, dtype=handle);
+          @tir.tvm_thread_allreduce(1u32, (float32*)normal_reduce_temp0[0], True, reduce_temp0, threadIdx.x, dtype=handle)
           for (i1.inner.outer: int32, 0, 4) {
-            T_softmax_exp[ramp(((threadIdx.x*16) + (i1.inner.outer*4)), 1, 4)] = @exp(((float32x4*)tarray_2[ramp((((blockIdx.x*512) + (threadIdx.x*16)) + (i1.inner.outer*4)), 1, 4)]) - broadcast((float32*)reduce_temp0[0]), 4)), dtype=float32x4, type="pure_intrin")
+            T_softmax_exp[ramp(((threadIdx.x*16) + (i1.inner.outer*4)), 1, 4)] = @tir.exp(((float32x4*)tarray_2[ramp((((blockIdx.x*512) + (threadIdx.x*16)) + (i1.inner.outer*4)), 1, 4)] - broadcast((float32*)reduce_temp0[0], 4)), dtype=float32x4)
           }
         }
         attr [IterVar(threadIdx.x, [0:32], "ThreadIndex", "threadIdx.x")] "thread_extent" = 32 {
           normal_reduce_temp0_1[0] = 0f32
           for (k.inner_1: int32, 0, 16) {
-            normal_reduce_temp0_1[0] = ((float32*)normal_reduce_temp0_1[0]) + (float32*)T_softmax_exp[((threadIdx.x*16) + k.inner_1)]))
+            normal_reduce_temp0_1[0] = ((float32*)normal_reduce_temp0_1[0] + (float32*)T_softmax_exp[((threadIdx.x*16) + k.inner_1)])
           }
-          attr [meta[tir.CommReducer][1]] "reduce_scope" = @reinterpret(0u64, dtype=handle, type="pure_intrin");
-          @tvm_thread_allreduce(1u32, (float32*)normal_reduce_temp0_1[0]), True, reduce_temp0_1, threadIdx.x, dtype=handle, type="intrin")
+          attr [meta[tir.CommReducer][1]] "reduce_scope" = @tir.reinterpret(0u64, dtype=handle);
+          @tir.tvm_thread_allreduce(1u32, (float32*)normal_reduce_temp0_1[0], True, reduce_temp0_1, threadIdx.x, dtype=handle)
           for (i1.inner.outer_1: int32, 0, 4) {
-            T_softmax_norm[ramp((((blockIdx.x*512) + (threadIdx.x*16)) + (i1.inner.outer_1*4)), 1, 4)] = ((float32x4*)T_softmax_exp[ramp(((threadIdx.x*16) + (i1.inner.outer_1*4)), 1, 4)]) / broadcast((float32*)reduce_temp0_1[0]), 4))
+            T_softmax_norm[ramp((((blockIdx.x*512) + (threadIdx.x*16)) + (i1.inner.outer_1*4)), 1, 4)] = ((float32x4*)T_softmax_exp[ramp(((threadIdx.x*16) + (i1.inner.outer_1*4)), 1, 4)] / broadcast((float32*)reduce_temp0_1[0], 4))
           }
         }
       }
@@ -361,10 +361,10 @@ We can fuse :code:`topi.nn.conv2d` and :code:`topi.nn.relu` together.
  .. code-block:: none
 
     primfn(placeholder_2: handle, placeholder_3: handle) -> ()
-      attr = {"tir.noalias": True, "global_symbol": "main"}
-      buffers = {placeholder_1: Buffer(placeholder_4: handle, float32, [1, 3, 224, 224], []),
-                 placeholder: Buffer(placeholder_5: handle, float32, [10, 3, 5, 5], [])}
-      buffer_map = {placeholder_3: placeholder, placeholder_2: placeholder_1} {
+      attr = {"global_symbol": "main", "tir.noalias": True}
+      buffers = {placeholder_1: Buffer(placeholder_4: handle, float32, [10, 3, 5, 5], []),
+                 placeholder: Buffer(placeholder_5: handle, float32, [1, 3, 224, 224], [])}
+      buffer_map = {placeholder_2: placeholder, placeholder_3: placeholder_1} {
       attr [compute: handle] "storage_scope" = "global";
       allocate(compute, float32, [501760]);
       attr [IterVar(blockIdx.z: int32, (nullptr), "ThreadIndex", "blockIdx.z")] "thread_extent" = 5;
@@ -398,174 +398,174 @@ We can fuse :code:`topi.nn.conv2d` and :code:`topi.nn.relu` together.
             attr [IterVar(threadIdx.z_1: int32, (nullptr), "ThreadIndex", "threadIdx.z")] "thread_extent" = 1;
             attr [IterVar(threadIdx.y_1: int32, (nullptr), "ThreadIndex", "threadIdx.y")] "thread_extent" = 1;
             attr [IterVar(threadIdx.x_1: int32, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 16 {
-              pad_temp.shared[(threadIdx.x_1*7)] = @tvm_if_then_else((((2 <= (blockIdx.y + ry.outer)) && ((blockIdx.y + ry.outer) < 226)) && (2 <= ((blockIdx.x*112) + (threadIdx.x_1*7)))), (float32*)placeholder_4[((((((rc.outer*50176) + (blockIdx.y*224)) + (ry.outer*224)) + (blockIdx.x*112)) + (threadIdx.x_1*7)) - 450)]), 0f32, dtype=float32, type="pure_intrin")
-              pad_temp.shared[((threadIdx.x_1*7) + 1)] = @tvm_if_then_else((((2 <= (blockIdx.y + ry.outer)) && ((blockIdx.y + ry.outer) < 226)) && (1 <= ((blockIdx.x*112) + (threadIdx.x_1*7)))), (float32*)placeholder_4[((((((rc.outer*50176) + (blockIdx.y*224)) + (ry.outer*224)) + (blockIdx.x*112)) + (threadIdx.x_1*7)) - 449)]), 0f32, dtype=float32, type="pure_intrin")
-              pad_temp.shared[((threadIdx.x_1*7) + 2)] = @tvm_if_then_else(((2 <= (blockIdx.y + ry.outer)) && ((blockIdx.y + ry.outer) < 226)), (float32*)placeholder_4[((((((rc.outer*50176) + (blockIdx.y*224)) + (ry.outer*224)) + (blockIdx.x*112)) + (threadIdx.x_1*7)) - 448)]), 0f32, dtype=float32, type="pure_intrin")
-              pad_temp.shared[((threadIdx.x_1*7) + 3)] = @tvm_if_then_else(((2 <= (blockIdx.y + ry.outer)) && ((blockIdx.y + ry.outer) < 226)), (float32*)placeholder_4[((((((rc.outer*50176) + (blockIdx.y*224)) + (ry.outer*224)) + (blockIdx.x*112)) + (threadIdx.x_1*7)) - 447)]), 0f32, dtype=float32, type="pure_intrin")
-              pad_temp.shared[((threadIdx.x_1*7) + 4)] = @tvm_if_then_else(((2 <= (blockIdx.y + ry.outer)) && ((blockIdx.y + ry.outer) < 226)), (float32*)placeholder_4[((((((rc.outer*50176) + (blockIdx.y*224)) + (ry.outer*224)) + (blockIdx.x*112)) + (threadIdx.x_1*7)) - 446)]), 0f32, dtype=float32, type="pure_intrin")
-              pad_temp.shared[((threadIdx.x_1*7) + 5)] = @tvm_if_then_else(((2 <= (blockIdx.y + ry.outer)) && ((blockIdx.y + ry.outer) < 226)), (float32*)placeholder_4[((((((rc.outer*50176) + (blockIdx.y*224)) + (ry.outer*224)) + (blockIdx.x*112)) + (threadIdx.x_1*7)) - 445)]), 0f32, dtype=float32, type="pure_intrin")
-              pad_temp.shared[((threadIdx.x_1*7) + 6)] = @tvm_if_then_else(((2 <= (blockIdx.y + ry.outer)) && ((blockIdx.y + ry.outer) < 226)), (float32*)placeholder_4[((((((rc.outer*50176) + (blockIdx.y*224)) + (ry.outer*224)) + (blockIdx.x*112)) + (threadIdx.x_1*7)) - 444)]), 0f32, dtype=float32, type="pure_intrin")
+              pad_temp.shared[(threadIdx.x_1*7)] = @tir.if_then_else((((2 <= (blockIdx.y + ry.outer)) && ((blockIdx.y + ry.outer) < 226)) && (2 <= ((blockIdx.x*112) + (threadIdx.x_1*7)))), (float32*)placeholder_5[((((((rc.outer*50176) + (blockIdx.y*224)) + (ry.outer*224)) + (blockIdx.x*112)) + (threadIdx.x_1*7)) - 450)], 0f32, dtype=float32)
+              pad_temp.shared[((threadIdx.x_1*7) + 1)] = @tir.if_then_else((((2 <= (blockIdx.y + ry.outer)) && ((blockIdx.y + ry.outer) < 226)) && (1 <= ((blockIdx.x*112) + (threadIdx.x_1*7)))), (float32*)placeholder_5[((((((rc.outer*50176) + (blockIdx.y*224)) + (ry.outer*224)) + (blockIdx.x*112)) + (threadIdx.x_1*7)) - 449)], 0f32, dtype=float32)
+              pad_temp.shared[((threadIdx.x_1*7) + 2)] = @tir.if_then_else(((2 <= (blockIdx.y + ry.outer)) && ((blockIdx.y + ry.outer) < 226)), (float32*)placeholder_5[((((((rc.outer*50176) + (blockIdx.y*224)) + (ry.outer*224)) + (blockIdx.x*112)) + (threadIdx.x_1*7)) - 448)], 0f32, dtype=float32)
+              pad_temp.shared[((threadIdx.x_1*7) + 3)] = @tir.if_then_else(((2 <= (blockIdx.y + ry.outer)) && ((blockIdx.y + ry.outer) < 226)), (float32*)placeholder_5[((((((rc.outer*50176) + (blockIdx.y*224)) + (ry.outer*224)) + (blockIdx.x*112)) + (threadIdx.x_1*7)) - 447)], 0f32, dtype=float32)
+              pad_temp.shared[((threadIdx.x_1*7) + 4)] = @tir.if_then_else(((2 <= (blockIdx.y + ry.outer)) && ((blockIdx.y + ry.outer) < 226)), (float32*)placeholder_5[((((((rc.outer*50176) + (blockIdx.y*224)) + (ry.outer*224)) + (blockIdx.x*112)) + (threadIdx.x_1*7)) - 446)], 0f32, dtype=float32)
+              pad_temp.shared[((threadIdx.x_1*7) + 5)] = @tir.if_then_else(((2 <= (blockIdx.y + ry.outer)) && ((blockIdx.y + ry.outer) < 226)), (float32*)placeholder_5[((((((rc.outer*50176) + (blockIdx.y*224)) + (ry.outer*224)) + (blockIdx.x*112)) + (threadIdx.x_1*7)) - 445)], 0f32, dtype=float32)
+              pad_temp.shared[((threadIdx.x_1*7) + 6)] = @tir.if_then_else(((2 <= (blockIdx.y + ry.outer)) && ((blockIdx.y + ry.outer) < 226)), (float32*)placeholder_5[((((((rc.outer*50176) + (blockIdx.y*224)) + (ry.outer*224)) + (blockIdx.x*112)) + (threadIdx.x_1*7)) - 444)], 0f32, dtype=float32)
             }
             attr [IterVar(threadIdx.z_2: int32, (nullptr), "ThreadIndex", "threadIdx.z")] "thread_extent" = 1;
             attr [IterVar(threadIdx.y_2: int32, (nullptr), "ThreadIndex", "threadIdx.y")] "thread_extent" = 1;
             attr [IterVar(threadIdx.x_2: int32, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 16;
-            if @likely((threadIdx.x_2 < 2), dtype=bool, type="pure_intrin") {
-              placeholder.shared[threadIdx.x_2] = (float32*)placeholder_5[((((blockIdx.z*150) + (threadIdx.x_2*75)) + (rc.outer*25)) + (ry.outer*5))])
+            if @tir.likely((threadIdx.x_2 < 2), dtype=bool) {
+              placeholder.shared[threadIdx.x_2] = (float32*)placeholder_4[((((blockIdx.z*150) + (threadIdx.x_2*75)) + (rc.outer*25)) + (ry.outer*5))]
             }
-            compute_1[0] = ((float32*)compute_1[0]) + ((float32*)pad_temp.shared[threadIdx.x])*(float32*)placeholder.shared[0])))
-            compute_1[2] = ((float32*)compute_1[2]) + ((float32*)pad_temp.shared[(threadIdx.x + 16)])*(float32*)placeholder.shared[0])))
-            compute_1[4] = ((float32*)compute_1[4]) + ((float32*)pad_temp.shared[(threadIdx.x + 32)])*(float32*)placeholder.shared[0])))
-            compute_1[6] = ((float32*)compute_1[6]) + ((float32*)pad_temp.shared[(threadIdx.x + 48)])*(float32*)placeholder.shared[0])))
-            compute_1[8] = ((float32*)compute_1[8]) + ((float32*)pad_temp.shared[(threadIdx.x + 64)])*(float32*)placeholder.shared[0])))
-            compute_1[10] = ((float32*)compute_1[10]) + ((float32*)pad_temp.shared[(threadIdx.x + 80)])*(float32*)placeholder.shared[0])))
-            compute_1[12] = ((float32*)compute_1[12]) + ((float32*)pad_temp.shared[(threadIdx.x + 96)])*(float32*)placeholder.shared[0])))
-            compute_1[1] = ((float32*)compute_1[1]) + ((float32*)pad_temp.shared[threadIdx.x])*(float32*)placeholder.shared[1])))
-            compute_1[3] = ((float32*)compute_1[3]) + ((float32*)pad_temp.shared[(threadIdx.x + 16)])*(float32*)placeholder.shared[1])))
-            compute_1[5] = ((float32*)compute_1[5]) + ((float32*)pad_temp.shared[(threadIdx.x + 32)])*(float32*)placeholder.shared[1])))
-            compute_1[7] = ((float32*)compute_1[7]) + ((float32*)pad_temp.shared[(threadIdx.x + 48)])*(float32*)placeholder.shared[1])))
-            compute_1[9] = ((float32*)compute_1[9]) + ((float32*)pad_temp.shared[(threadIdx.x + 64)])*(float32*)placeholder.shared[1])))
-            compute_1[11] = ((float32*)compute_1[11]) + ((float32*)pad_temp.shared[(threadIdx.x + 80)])*(float32*)placeholder.shared[1])))
-            compute_1[13] = ((float32*)compute_1[13]) + ((float32*)pad_temp.shared[(threadIdx.x + 96)])*(float32*)placeholder.shared[1])))
+            compute_1[0] = ((float32*)compute_1[0] + ((float32*)pad_temp.shared[threadIdx.x]*(float32*)placeholder.shared[0]))
+            compute_1[2] = ((float32*)compute_1[2] + ((float32*)pad_temp.shared[(threadIdx.x + 16)]*(float32*)placeholder.shared[0]))
+            compute_1[4] = ((float32*)compute_1[4] + ((float32*)pad_temp.shared[(threadIdx.x + 32)]*(float32*)placeholder.shared[0]))
+            compute_1[6] = ((float32*)compute_1[6] + ((float32*)pad_temp.shared[(threadIdx.x + 48)]*(float32*)placeholder.shared[0]))
+            compute_1[8] = ((float32*)compute_1[8] + ((float32*)pad_temp.shared[(threadIdx.x + 64)]*(float32*)placeholder.shared[0]))
+            compute_1[10] = ((float32*)compute_1[10] + ((float32*)pad_temp.shared[(threadIdx.x + 80)]*(float32*)placeholder.shared[0]))
+            compute_1[12] = ((float32*)compute_1[12] + ((float32*)pad_temp.shared[(threadIdx.x + 96)]*(float32*)placeholder.shared[0]))
+            compute_1[1] = ((float32*)compute_1[1] + ((float32*)pad_temp.shared[threadIdx.x]*(float32*)placeholder.shared[1]))
+            compute_1[3] = ((float32*)compute_1[3] + ((float32*)pad_temp.shared[(threadIdx.x + 16)]*(float32*)placeholder.shared[1]))
+            compute_1[5] = ((float32*)compute_1[5] + ((float32*)pad_temp.shared[(threadIdx.x + 32)]*(float32*)placeholder.shared[1]))
+            compute_1[7] = ((float32*)compute_1[7] + ((float32*)pad_temp.shared[(threadIdx.x + 48)]*(float32*)placeholder.shared[1]))
+            compute_1[9] = ((float32*)compute_1[9] + ((float32*)pad_temp.shared[(threadIdx.x + 64)]*(float32*)placeholder.shared[1]))
+            compute_1[11] = ((float32*)compute_1[11] + ((float32*)pad_temp.shared[(threadIdx.x + 80)]*(float32*)placeholder.shared[1]))
+            compute_1[13] = ((float32*)compute_1[13] + ((float32*)pad_temp.shared[(threadIdx.x + 96)]*(float32*)placeholder.shared[1]))
             attr [IterVar(threadIdx.z_1, (nullptr), "ThreadIndex", "threadIdx.z")] "thread_extent" = 1;
             attr [IterVar(threadIdx.y_1, (nullptr), "ThreadIndex", "threadIdx.y")] "thread_extent" = 1;
             attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 16 {
-              pad_temp.shared[(threadIdx.x_1*7)] = @tvm_if_then_else((((2 <= (blockIdx.y + ry.outer)) && ((blockIdx.y + ry.outer) < 226)) && (1 <= ((blockIdx.x*112) + (threadIdx.x_1*7)))), (float32*)placeholder_4[((((((rc.outer*50176) + (blockIdx.y*224)) + (ry.outer*224)) + (blockIdx.x*112)) + (threadIdx.x_1*7)) - 449)]), 0f32, dtype=float32, type="pure_intrin")
-              pad_temp.shared[((threadIdx.x_1*7) + 1)] = @tvm_if_then_else(((2 <= (blockIdx.y + ry.outer)) && ((blockIdx.y + ry.outer) < 226)), (float32*)placeholder_4[((((((rc.outer*50176) + (blockIdx.y*224)) + (ry.outer*224)) + (blockIdx.x*112)) + (threadIdx.x_1*7)) - 448)]), 0f32, dtype=float32, type="pure_intrin")
-              pad_temp.shared[((threadIdx.x_1*7) + 2)] = @tvm_if_then_else(((2 <= (blockIdx.y + ry.outer)) && ((blockIdx.y + ry.outer) < 226)), (float32*)placeholder_4[((((((rc.outer*50176) + (blockIdx.y*224)) + (ry.outer*224)) + (blockIdx.x*112)) + (threadIdx.x_1*7)) - 447)]), 0f32, dtype=float32, type="pure_intrin")
-              pad_temp.shared[((threadIdx.x_1*7) + 3)] = @tvm_if_then_else(((2 <= (blockIdx.y + ry.outer)) && ((blockIdx.y + ry.outer) < 226)), (float32*)placeholder_4[((((((rc.outer*50176) + (blockIdx.y*224)) + (ry.outer*224)) + (blockIdx.x*112)) + (threadIdx.x_1*7)) - 446)]), 0f32, dtype=float32, type="pure_intrin")
-              pad_temp.shared[((threadIdx.x_1*7) + 4)] = @tvm_if_then_else(((2 <= (blockIdx.y + ry.outer)) && ((blockIdx.y + ry.outer) < 226)), (float32*)placeholder_4[((((((rc.outer*50176) + (blockIdx.y*224)) + (ry.outer*224)) + (blockIdx.x*112)) + (threadIdx.x_1*7)) - 445)]), 0f32, dtype=float32, type="pure_intrin")
-              pad_temp.shared[((threadIdx.x_1*7) + 5)] = @tvm_if_then_else(((2 <= (blockIdx.y + ry.outer)) && ((blockIdx.y + ry.outer) < 226)), (float32*)placeholder_4[((((((rc.outer*50176) + (blockIdx.y*224)) + (ry.outer*224)) + (blockIdx.x*112)) + (threadIdx.x_1*7)) - 444)]), 0f32, dtype=float32, type="pure_intrin")
-              pad_temp.shared[((threadIdx.x_1*7) + 6)] = @tvm_if_then_else(((2 <= (blockIdx.y + ry.outer)) && ((blockIdx.y + ry.outer) < 226)), (float32*)placeholder_4[((((((rc.outer*50176) + (blockIdx.y*224)) + (ry.outer*224)) + (blockIdx.x*112)) + (threadIdx.x_1*7)) - 443)]), 0f32, dtype=float32, type="pure_intrin")
+              pad_temp.shared[(threadIdx.x_1*7)] = @tir.if_then_else((((2 <= (blockIdx.y + ry.outer)) && ((blockIdx.y + ry.outer) < 226)) && (1 <= ((blockIdx.x*112) + (threadIdx.x_1*7)))), (float32*)placeholder_5[((((((rc.outer*50176) + (blockIdx.y*224)) + (ry.outer*224)) + (blockIdx.x*112)) + (threadIdx.x_1*7)) - 449)], 0f32, dtype=float32)
+              pad_temp.shared[((threadIdx.x_1*7) + 1)] = @tir.if_then_else(((2 <= (blockIdx.y + ry.outer)) && ((blockIdx.y + ry.outer) < 226)), (float32*)placeholder_5[((((((rc.outer*50176) + (blockIdx.y*224)) + (ry.outer*224)) + (blockIdx.x*112)) + (threadIdx.x_1*7)) - 448)], 0f32, dtype=float32)
+              pad_temp.shared[((threadIdx.x_1*7) + 2)] = @tir.if_then_else(((2 <= (blockIdx.y + ry.outer)) && ((blockIdx.y + ry.outer) < 226)), (float32*)placeholder_5[((((((rc.outer*50176) + (blockIdx.y*224)) + (ry.outer*224)) + (blockIdx.x*112)) + (threadIdx.x_1*7)) - 447)], 0f32, dtype=float32)
+              pad_temp.shared[((threadIdx.x_1*7) + 3)] = @tir.if_then_else(((2 <= (blockIdx.y + ry.outer)) && ((blockIdx.y + ry.outer) < 226)), (float32*)placeholder_5[((((((rc.outer*50176) + (blockIdx.y*224)) + (ry.outer*224)) + (blockIdx.x*112)) + (threadIdx.x_1*7)) - 446)], 0f32, dtype=float32)
+              pad_temp.shared[((threadIdx.x_1*7) + 4)] = @tir.if_then_else(((2 <= (blockIdx.y + ry.outer)) && ((blockIdx.y + ry.outer) < 226)), (float32*)placeholder_5[((((((rc.outer*50176) + (blockIdx.y*224)) + (ry.outer*224)) + (blockIdx.x*112)) + (threadIdx.x_1*7)) - 445)], 0f32, dtype=float32)
+              pad_temp.shared[((threadIdx.x_1*7) + 5)] = @tir.if_then_else(((2 <= (blockIdx.y + ry.outer)) && ((blockIdx.y + ry.outer) < 226)), (float32*)placeholder_5[((((((rc.outer*50176) + (blockIdx.y*224)) + (ry.outer*224)) + (blockIdx.x*112)) + (threadIdx.x_1*7)) - 444)], 0f32, dtype=float32)
+              pad_temp.shared[((threadIdx.x_1*7) + 6)] = @tir.if_then_else(((2 <= (blockIdx.y + ry.outer)) && ((blockIdx.y + ry.outer) < 226)), (float32*)placeholder_5[((((((rc.outer*50176) + (blockIdx.y*224)) + (ry.outer*224)) + (blockIdx.x*112)) + (threadIdx.x_1*7)) - 443)], 0f32, dtype=float32)
             }
             attr [IterVar(threadIdx.z_2, (nullptr), "ThreadIndex", "threadIdx.z")] "thread_extent" = 1;
             attr [IterVar(threadIdx.y_2, (nullptr), "ThreadIndex", "threadIdx.y")] "thread_extent" = 1;
             attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 16;
-            if @likely((threadIdx.x_2 < 2), dtype=bool, type="pure_intrin") {
-              placeholder.shared[threadIdx.x_2] = (float32*)placeholder_5[(((((blockIdx.z*150) + (threadIdx.x_2*75)) + (rc.outer*25)) + (ry.outer*5)) + 1)])
+            if @tir.likely((threadIdx.x_2 < 2), dtype=bool) {
+              placeholder.shared[threadIdx.x_2] = (float32*)placeholder_4[(((((blockIdx.z*150) + (threadIdx.x_2*75)) + (rc.outer*25)) + (ry.outer*5)) + 1)]
             }
-            compute_1[0] = ((float32*)compute_1[0]) + ((float32*)pad_temp.shared[threadIdx.x])*(float32*)placeholder.shared[0])))
-            compute_1[2] = ((float32*)compute_1[2]) + ((float32*)pad_temp.shared[(threadIdx.x + 16)])*(float32*)placeholder.shared[0])))
-            compute_1[4] = ((float32*)compute_1[4]) + ((float32*)pad_temp.shared[(threadIdx.x + 32)])*(float32*)placeholder.shared[0])))
-            compute_1[6] = ((float32*)compute_1[6]) + ((float32*)pad_temp.shared[(threadIdx.x + 48)])*(float32*)placeholder.shared[0])))
-            compute_1[8] = ((float32*)compute_1[8]) + ((float32*)pad_temp.shared[(threadIdx.x + 64)])*(float32*)placeholder.shared[0])))
-            compute_1[10] = ((float32*)compute_1[10]) + ((float32*)pad_temp.shared[(threadIdx.x + 80)])*(float32*)placeholder.shared[0])))
-            compute_1[12] = ((float32*)compute_1[12]) + ((float32*)pad_temp.shared[(threadIdx.x + 96)])*(float32*)placeholder.shared[0])))
-            compute_1[1] = ((float32*)compute_1[1]) + ((float32*)pad_temp.shared[threadIdx.x])*(float32*)placeholder.shared[1])))
-            compute_1[3] = ((float32*)compute_1[3]) + ((float32*)pad_temp.shared[(threadIdx.x + 16)])*(float32*)placeholder.shared[1])))
-            compute_1[5] = ((float32*)compute_1[5]) + ((float32*)pad_temp.shared[(threadIdx.x + 32)])*(float32*)placeholder.shared[1])))
-            compute_1[7] = ((float32*)compute_1[7]) + ((float32*)pad_temp.shared[(threadIdx.x + 48)])*(float32*)placeholder.shared[1])))
-            compute_1[9] = ((float32*)compute_1[9]) + ((float32*)pad_temp.shared[(threadIdx.x + 64)])*(float32*)placeholder.shared[1])))
-            compute_1[11] = ((float32*)compute_1[11]) + ((float32*)pad_temp.shared[(threadIdx.x + 80)])*(float32*)placeholder.shared[1])))
-            compute_1[13] = ((float32*)compute_1[13]) + ((float32*)pad_temp.shared[(threadIdx.x + 96)])*(float32*)placeholder.shared[1])))
+            compute_1[0] = ((float32*)compute_1[0] + ((float32*)pad_temp.shared[threadIdx.x]*(float32*)placeholder.shared[0]))
+            compute_1[2] = ((float32*)compute_1[2] + ((float32*)pad_temp.shared[(threadIdx.x + 16)]*(float32*)placeholder.shared[0]))
+            compute_1[4] = ((float32*)compute_1[4] + ((float32*)pad_temp.shared[(threadIdx.x + 32)]*(float32*)placeholder.shared[0]))
+            compute_1[6] = ((float32*)compute_1[6] + ((float32*)pad_temp.shared[(threadIdx.x + 48)]*(float32*)placeholder.shared[0]))
+            compute_1[8] = ((float32*)compute_1[8] + ((float32*)pad_temp.shared[(threadIdx.x + 64)]*(float32*)placeholder.shared[0]))
+            compute_1[10] = ((float32*)compute_1[10] + ((float32*)pad_temp.shared[(threadIdx.x + 80)]*(float32*)placeholder.shared[0]))
+            compute_1[12] = ((float32*)compute_1[12] + ((float32*)pad_temp.shared[(threadIdx.x + 96)]*(float32*)placeholder.shared[0]))
+            compute_1[1] = ((float32*)compute_1[1] + ((float32*)pad_temp.shared[threadIdx.x]*(float32*)placeholder.shared[1]))
+            compute_1[3] = ((float32*)compute_1[3] + ((float32*)pad_temp.shared[(threadIdx.x + 16)]*(float32*)placeholder.shared[1]))
+            compute_1[5] = ((float32*)compute_1[5] + ((float32*)pad_temp.shared[(threadIdx.x + 32)]*(float32*)placeholder.shared[1]))
+            compute_1[7] = ((float32*)compute_1[7] + ((float32*)pad_temp.shared[(threadIdx.x + 48)]*(float32*)placeholder.shared[1]))
+            compute_1[9] = ((float32*)compute_1[9] + ((float32*)pad_temp.shared[(threadIdx.x + 64)]*(float32*)placeholder.shared[1]))
+            compute_1[11] = ((float32*)compute_1[11] + ((float32*)pad_temp.shared[(threadIdx.x + 80)]*(float32*)placeholder.shared[1]))
+            compute_1[13] = ((float32*)compute_1[13] + ((float32*)pad_temp.shared[(threadIdx.x + 96)]*(float32*)placeholder.shared[1]))
             attr [IterVar(threadIdx.z_1, (nullptr), "ThreadIndex", "threadIdx.z")] "thread_extent" = 1;
             attr [IterVar(threadIdx.y_1, (nullptr), "ThreadIndex", "threadIdx.y")] "thread_extent" = 1;
             attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 16 {
-              pad_temp.shared[(threadIdx.x_1*7)] = @tvm_if_then_else(((2 <= (blockIdx.y + ry.outer)) && ((blockIdx.y + ry.outer) < 226)), (float32*)placeholder_4[((((((rc.outer*50176) + (blockIdx.y*224)) + (ry.outer*224)) + (blockIdx.x*112)) + (threadIdx.x_1*7)) - 448)]), 0f32, dtype=float32, type="pure_intrin")
-              pad_temp.shared[((threadIdx.x_1*7) + 1)] = @tvm_if_then_else(((2 <= (blockIdx.y + ry.outer)) && ((blockIdx.y + ry.outer) < 226)), (float32*)placeholder_4[((((((rc.outer*50176) + (blockIdx.y*224)) + (ry.outer*224)) + (blockIdx.x*112)) + (threadIdx.x_1*7)) - 447)]), 0f32, dtype=float32, type="pure_intrin")
-              pad_temp.shared[((threadIdx.x_1*7) + 2)] = @tvm_if_then_else(((2 <= (blockIdx.y + ry.outer)) && ((blockIdx.y + ry.outer) < 226)), (float32*)placeholder_4[((((((rc.outer*50176) + (blockIdx.y*224)) + (ry.outer*224)) + (blockIdx.x*112)) + (threadIdx.x_1*7)) - 446)]), 0f32, dtype=float32, type="pure_intrin")
-              pad_temp.shared[((threadIdx.x_1*7) + 3)] = @tvm_if_then_else(((2 <= (blockIdx.y + ry.outer)) && ((blockIdx.y + ry.outer) < 226)), (float32*)placeholder_4[((((((rc.outer*50176) + (blockIdx.y*224)) + (ry.outer*224)) + (blockIdx.x*112)) + (threadIdx.x_1*7)) - 445)]), 0f32, dtype=float32, type="pure_intrin")
-              pad_temp.shared[((threadIdx.x_1*7) + 4)] = @tvm_if_then_else(((2 <= (blockIdx.y + ry.outer)) && ((blockIdx.y + ry.outer) < 226)), (float32*)placeholder_4[((((((rc.outer*50176) + (blockIdx.y*224)) + (ry.outer*224)) + (blockIdx.x*112)) + (threadIdx.x_1*7)) - 444)]), 0f32, dtype=float32, type="pure_intrin")
-              pad_temp.shared[((threadIdx.x_1*7) + 5)] = @tvm_if_then_else(((2 <= (blockIdx.y + ry.outer)) && ((blockIdx.y + ry.outer) < 226)), (float32*)placeholder_4[((((((rc.outer*50176) + (blockIdx.y*224)) + (ry.outer*224)) + (blockIdx.x*112)) + (threadIdx.x_1*7)) - 443)]), 0f32, dtype=float32, type="pure_intrin")
-              pad_temp.shared[((threadIdx.x_1*7) + 6)] = @tvm_if_then_else(((2 <= (blockIdx.y + ry.outer)) && ((blockIdx.y + ry.outer) < 226)), (float32*)placeholder_4[((((((rc.outer*50176) + (blockIdx.y*224)) + (ry.outer*224)) + (blockIdx.x*112)) + (threadIdx.x_1*7)) - 442)]), 0f32, dtype=float32, type="pure_intrin")
+              pad_temp.shared[(threadIdx.x_1*7)] = @tir.if_then_else(((2 <= (blockIdx.y + ry.outer)) && ((blockIdx.y + ry.outer) < 226)), (float32*)placeholder_5[((((((rc.outer*50176) + (blockIdx.y*224)) + (ry.outer*224)) + (blockIdx.x*112)) + (threadIdx.x_1*7)) - 448)], 0f32, dtype=float32)
+              pad_temp.shared[((threadIdx.x_1*7) + 1)] = @tir.if_then_else(((2 <= (blockIdx.y + ry.outer)) && ((blockIdx.y + ry.outer) < 226)), (float32*)placeholder_5[((((((rc.outer*50176) + (blockIdx.y*224)) + (ry.outer*224)) + (blockIdx.x*112)) + (threadIdx.x_1*7)) - 447)], 0f32, dtype=float32)
+              pad_temp.shared[((threadIdx.x_1*7) + 2)] = @tir.if_then_else(((2 <= (blockIdx.y + ry.outer)) && ((blockIdx.y + ry.outer) < 226)), (float32*)placeholder_5[((((((rc.outer*50176) + (blockIdx.y*224)) + (ry.outer*224)) + (blockIdx.x*112)) + (threadIdx.x_1*7)) - 446)], 0f32, dtype=float32)
+              pad_temp.shared[((threadIdx.x_1*7) + 3)] = @tir.if_then_else(((2 <= (blockIdx.y + ry.outer)) && ((blockIdx.y + ry.outer) < 226)), (float32*)placeholder_5[((((((rc.outer*50176) + (blockIdx.y*224)) + (ry.outer*224)) + (blockIdx.x*112)) + (threadIdx.x_1*7)) - 445)], 0f32, dtype=float32)
+              pad_temp.shared[((threadIdx.x_1*7) + 4)] = @tir.if_then_else(((2 <= (blockIdx.y + ry.outer)) && ((blockIdx.y + ry.outer) < 226)), (float32*)placeholder_5[((((((rc.outer*50176) + (blockIdx.y*224)) + (ry.outer*224)) + (blockIdx.x*112)) + (threadIdx.x_1*7)) - 444)], 0f32, dtype=float32)
+              pad_temp.shared[((threadIdx.x_1*7) + 5)] = @tir.if_then_else(((2 <= (blockIdx.y + ry.outer)) && ((blockIdx.y + ry.outer) < 226)), (float32*)placeholder_5[((((((rc.outer*50176) + (blockIdx.y*224)) + (ry.outer*224)) + (blockIdx.x*112)) + (threadIdx.x_1*7)) - 443)], 0f32, dtype=float32)
+              pad_temp.shared[((threadIdx.x_1*7) + 6)] = @tir.if_then_else(((2 <= (blockIdx.y + ry.outer)) && ((blockIdx.y + ry.outer) < 226)), (float32*)placeholder_5[((((((rc.outer*50176) + (blockIdx.y*224)) + (ry.outer*224)) + (blockIdx.x*112)) + (threadIdx.x_1*7)) - 442)], 0f32, dtype=float32)
             }
             attr [IterVar(threadIdx.z_2, (nullptr), "ThreadIndex", "threadIdx.z")] "thread_extent" = 1;
             attr [IterVar(threadIdx.y_2, (nullptr), "ThreadIndex", "threadIdx.y")] "thread_extent" = 1;
             attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 16;
-            if @likely((threadIdx.x_2 < 2), dtype=bool, type="pure_intrin") {
-              placeholder.shared[threadIdx.x_2] = (float32*)placeholder_5[(((((blockIdx.z*150) + (threadIdx.x_2*75)) + (rc.outer*25)) + (ry.outer*5)) + 2)])
+            if @tir.likely((threadIdx.x_2 < 2), dtype=bool) {
+              placeholder.shared[threadIdx.x_2] = (float32*)placeholder_4[(((((blockIdx.z*150) + (threadIdx.x_2*75)) + (rc.outer*25)) + (ry.outer*5)) + 2)]
             }
-            compute_1[0] = ((float32*)compute_1[0]) + ((float32*)pad_temp.shared[threadIdx.x])*(float32*)placeholder.shared[0])))
-            compute_1[2] = ((float32*)compute_1[2]) + ((float32*)pad_temp.shared[(threadIdx.x + 16)])*(float32*)placeholder.shared[0])))
-            compute_1[4] = ((float32*)compute_1[4]) + ((float32*)pad_temp.shared[(threadIdx.x + 32)])*(float32*)placeholder.shared[0])))
-            compute_1[6] = ((float32*)compute_1[6]) + ((float32*)pad_temp.shared[(threadIdx.x + 48)])*(float32*)placeholder.shared[0])))
-            compute_1[8] = ((float32*)compute_1[8]) + ((float32*)pad_temp.shared[(threadIdx.x + 64)])*(float32*)placeholder.shared[0])))
-            compute_1[10] = ((float32*)compute_1[10]) + ((float32*)pad_temp.shared[(threadIdx.x + 80)])*(float32*)placeholder.shared[0])))
-            compute_1[12] = ((float32*)compute_1[12]) + ((float32*)pad_temp.shared[(threadIdx.x + 96)])*(float32*)placeholder.shared[0])))
-            compute_1[1] = ((float32*)compute_1[1]) + ((float32*)pad_temp.shared[threadIdx.x])*(float32*)placeholder.shared[1])))
-            compute_1[3] = ((float32*)compute_1[3]) + ((float32*)pad_temp.shared[(threadIdx.x + 16)])*(float32*)placeholder.shared[1])))
-            compute_1[5] = ((float32*)compute_1[5]) + ((float32*)pad_temp.shared[(threadIdx.x + 32)])*(float32*)placeholder.shared[1])))
-            compute_1[7] = ((float32*)compute_1[7]) + ((float32*)pad_temp.shared[(threadIdx.x + 48)])*(float32*)placeholder.shared[1])))
-            compute_1[9] = ((float32*)compute_1[9]) + ((float32*)pad_temp.shared[(threadIdx.x + 64)])*(float32*)placeholder.shared[1])))
-            compute_1[11] = ((float32*)compute_1[11]) + ((float32*)pad_temp.shared[(threadIdx.x + 80)])*(float32*)placeholder.shared[1])))
-            compute_1[13] = ((float32*)compute_1[13]) + ((float32*)pad_temp.shared[(threadIdx.x + 96)])*(float32*)placeholder.shared[1])))
+            compute_1[0] = ((float32*)compute_1[0] + ((float32*)pad_temp.shared[threadIdx.x]*(float32*)placeholder.shared[0]))
+            compute_1[2] = ((float32*)compute_1[2] + ((float32*)pad_temp.shared[(threadIdx.x + 16)]*(float32*)placeholder.shared[0]))
+            compute_1[4] = ((float32*)compute_1[4] + ((float32*)pad_temp.shared[(threadIdx.x + 32)]*(float32*)placeholder.shared[0]))
+            compute_1[6] = ((float32*)compute_1[6] + ((float32*)pad_temp.shared[(threadIdx.x + 48)]*(float32*)placeholder.shared[0]))
+            compute_1[8] = ((float32*)compute_1[8] + ((float32*)pad_temp.shared[(threadIdx.x + 64)]*(float32*)placeholder.shared[0]))
+            compute_1[10] = ((float32*)compute_1[10] + ((float32*)pad_temp.shared[(threadIdx.x + 80)]*(float32*)placeholder.shared[0]))
+            compute_1[12] = ((float32*)compute_1[12] + ((float32*)pad_temp.shared[(threadIdx.x + 96)]*(float32*)placeholder.shared[0]))
+            compute_1[1] = ((float32*)compute_1[1] + ((float32*)pad_temp.shared[threadIdx.x]*(float32*)placeholder.shared[1]))
+            compute_1[3] = ((float32*)compute_1[3] + ((float32*)pad_temp.shared[(threadIdx.x + 16)]*(float32*)placeholder.shared[1]))
+            compute_1[5] = ((float32*)compute_1[5] + ((float32*)pad_temp.shared[(threadIdx.x + 32)]*(float32*)placeholder.shared[1]))
+            compute_1[7] = ((float32*)compute_1[7] + ((float32*)pad_temp.shared[(threadIdx.x + 48)]*(float32*)placeholder.shared[1]))
+            compute_1[9] = ((float32*)compute_1[9] + ((float32*)pad_temp.shared[(threadIdx.x + 64)]*(float32*)placeholder.shared[1]))
+            compute_1[11] = ((float32*)compute_1[11] + ((float32*)pad_temp.shared[(threadIdx.x + 80)]*(float32*)placeholder.shared[1]))
+            compute_1[13] = ((float32*)compute_1[13] + ((float32*)pad_temp.shared[(threadIdx.x + 96)]*(float32*)placeholder.shared[1]))
             attr [IterVar(threadIdx.z_1, (nullptr), "ThreadIndex", "threadIdx.z")] "thread_extent" = 1;
             attr [IterVar(threadIdx.y_1, (nullptr), "ThreadIndex", "threadIdx.y")] "thread_extent" = 1;
             attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 16 {
-              pad_temp.shared[(threadIdx.x_1*7)] = @tvm_if_then_else(((2 <= (blockIdx.y + ry.outer)) && ((blockIdx.y + ry.outer) < 226)), (float32*)placeholder_4[((((((rc.outer*50176) + (blockIdx.y*224)) + (ry.outer*224)) + (blockIdx.x*112)) + (threadIdx.x_1*7)) - 447)]), 0f32, dtype=float32, type="pure_intrin")
-              pad_temp.shared[((threadIdx.x_1*7) + 1)] = @tvm_if_then_else(((2 <= (blockIdx.y + ry.outer)) && ((blockIdx.y + ry.outer) < 226)), (float32*)placeholder_4[((((((rc.outer*50176) + (blockIdx.y*224)) + (ry.outer*224)) + (blockIdx.x*112)) + (threadIdx.x_1*7)) - 446)]), 0f32, dtype=float32, type="pure_intrin")
-              pad_temp.shared[((threadIdx.x_1*7) + 2)] = @tvm_if_then_else(((2 <= (blockIdx.y + ry.outer)) && ((blockIdx.y + ry.outer) < 226)), (float32*)placeholder_4[((((((rc.outer*50176) + (blockIdx.y*224)) + (ry.outer*224)) + (blockIdx.x*112)) + (threadIdx.x_1*7)) - 445)]), 0f32, dtype=float32, type="pure_intrin")
-              pad_temp.shared[((threadIdx.x_1*7) + 3)] = @tvm_if_then_else(((2 <= (blockIdx.y + ry.outer)) && ((blockIdx.y + ry.outer) < 226)), (float32*)placeholder_4[((((((rc.outer*50176) + (blockIdx.y*224)) + (ry.outer*224)) + (blockIdx.x*112)) + (threadIdx.x_1*7)) - 444)]), 0f32, dtype=float32, type="pure_intrin")
-              pad_temp.shared[((threadIdx.x_1*7) + 4)] = @tvm_if_then_else(((2 <= (blockIdx.y + ry.outer)) && ((blockIdx.y + ry.outer) < 226)), (float32*)placeholder_4[((((((rc.outer*50176) + (blockIdx.y*224)) + (ry.outer*224)) + (blockIdx.x*112)) + (threadIdx.x_1*7)) - 443)]), 0f32, dtype=float32, type="pure_intrin")
-              pad_temp.shared[((threadIdx.x_1*7) + 5)] = @tvm_if_then_else(((2 <= (blockIdx.y + ry.outer)) && ((blockIdx.y + ry.outer) < 226)), (float32*)placeholder_4[((((((rc.outer*50176) + (blockIdx.y*224)) + (ry.outer*224)) + (blockIdx.x*112)) + (threadIdx.x_1*7)) - 442)]), 0f32, dtype=float32, type="pure_intrin")
-              pad_temp.shared[((threadIdx.x_1*7) + 6)] = @tvm_if_then_else((((2 <= (blockIdx.y + ry.outer)) && ((blockIdx.y + ry.outer) < 226)) && (((blockIdx.x*112) + (threadIdx.x_1*7)) < 217)), (float32*)placeholder_4[((((((rc.outer*50176) + (blockIdx.y*224)) + (ry.outer*224)) + (blockIdx.x*112)) + (threadIdx.x_1*7)) - 441)]), 0f32, dtype=float32, type="pure_intrin")
+              pad_temp.shared[(threadIdx.x_1*7)] = @tir.if_then_else(((2 <= (blockIdx.y + ry.outer)) && ((blockIdx.y + ry.outer) < 226)), (float32*)placeholder_5[((((((rc.outer*50176) + (blockIdx.y*224)) + (ry.outer*224)) + (blockIdx.x*112)) + (threadIdx.x_1*7)) - 447)], 0f32, dtype=float32)
+              pad_temp.shared[((threadIdx.x_1*7) + 1)] = @tir.if_then_else(((2 <= (blockIdx.y + ry.outer)) && ((blockIdx.y + ry.outer) < 226)), (float32*)placeholder_5[((((((rc.outer*50176) + (blockIdx.y*224)) + (ry.outer*224)) + (blockIdx.x*112)) + (threadIdx.x_1*7)) - 446)], 0f32, dtype=float32)
+              pad_temp.shared[((threadIdx.x_1*7) + 2)] = @tir.if_then_else(((2 <= (blockIdx.y + ry.outer)) && ((blockIdx.y + ry.outer) < 226)), (float32*)placeholder_5[((((((rc.outer*50176) + (blockIdx.y*224)) + (ry.outer*224)) + (blockIdx.x*112)) + (threadIdx.x_1*7)) - 445)], 0f32, dtype=float32)
+              pad_temp.shared[((threadIdx.x_1*7) + 3)] = @tir.if_then_else(((2 <= (blockIdx.y + ry.outer)) && ((blockIdx.y + ry.outer) < 226)), (float32*)placeholder_5[((((((rc.outer*50176) + (blockIdx.y*224)) + (ry.outer*224)) + (blockIdx.x*112)) + (threadIdx.x_1*7)) - 444)], 0f32, dtype=float32)
+              pad_temp.shared[((threadIdx.x_1*7) + 4)] = @tir.if_then_else(((2 <= (blockIdx.y + ry.outer)) && ((blockIdx.y + ry.outer) < 226)), (float32*)placeholder_5[((((((rc.outer*50176) + (blockIdx.y*224)) + (ry.outer*224)) + (blockIdx.x*112)) + (threadIdx.x_1*7)) - 443)], 0f32, dtype=float32)
+              pad_temp.shared[((threadIdx.x_1*7) + 5)] = @tir.if_then_else(((2 <= (blockIdx.y + ry.outer)) && ((blockIdx.y + ry.outer) < 226)), (float32*)placeholder_5[((((((rc.outer*50176) + (blockIdx.y*224)) + (ry.outer*224)) + (blockIdx.x*112)) + (threadIdx.x_1*7)) - 442)], 0f32, dtype=float32)
+              pad_temp.shared[((threadIdx.x_1*7) + 6)] = @tir.if_then_else((((2 <= (blockIdx.y + ry.outer)) && ((blockIdx.y + ry.outer) < 226)) && (((blockIdx.x*112) + (threadIdx.x_1*7)) < 217)), (float32*)placeholder_5[((((((rc.outer*50176) + (blockIdx.y*224)) + (ry.outer*224)) + (blockIdx.x*112)) + (threadIdx.x_1*7)) - 441)], 0f32, dtype=float32)
             }
             attr [IterVar(threadIdx.z_2, (nullptr), "ThreadIndex", "threadIdx.z")] "thread_extent" = 1;
             attr [IterVar(threadIdx.y_2, (nullptr), "ThreadIndex", "threadIdx.y")] "thread_extent" = 1;
             attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 16;
-            if @likely((threadIdx.x_2 < 2), dtype=bool, type="pure_intrin") {
-              placeholder.shared[threadIdx.x_2] = (float32*)placeholder_5[(((((blockIdx.z*150) + (threadIdx.x_2*75)) + (rc.outer*25)) + (ry.outer*5)) + 3)])
+            if @tir.likely((threadIdx.x_2 < 2), dtype=bool) {
+              placeholder.shared[threadIdx.x_2] = (float32*)placeholder_4[(((((blockIdx.z*150) + (threadIdx.x_2*75)) + (rc.outer*25)) + (ry.outer*5)) + 3)]
             }
-            compute_1[0] = ((float32*)compute_1[0]) + ((float32*)pad_temp.shared[threadIdx.x])*(float32*)placeholder.shared[0])))
-            compute_1[2] = ((float32*)compute_1[2]) + ((float32*)pad_temp.shared[(threadIdx.x + 16)])*(float32*)placeholder.shared[0])))
-            compute_1[4] = ((float32*)compute_1[4]) + ((float32*)pad_temp.shared[(threadIdx.x + 32)])*(float32*)placeholder.shared[0])))
-            compute_1[6] = ((float32*)compute_1[6]) + ((float32*)pad_temp.shared[(threadIdx.x + 48)])*(float32*)placeholder.shared[0])))
-            compute_1[8] = ((float32*)compute_1[8]) + ((float32*)pad_temp.shared[(threadIdx.x + 64)])*(float32*)placeholder.shared[0])))
-            compute_1[10] = ((float32*)compute_1[10]) + ((float32*)pad_temp.shared[(threadIdx.x + 80)])*(float32*)placeholder.shared[0])))
-            compute_1[12] = ((float32*)compute_1[12]) + ((float32*)pad_temp.shared[(threadIdx.x + 96)])*(float32*)placeholder.shared[0])))
-            compute_1[1] = ((float32*)compute_1[1]) + ((float32*)pad_temp.shared[threadIdx.x])*(float32*)placeholder.shared[1])))
-            compute_1[3] = ((float32*)compute_1[3]) + ((float32*)pad_temp.shared[(threadIdx.x + 16)])*(float32*)placeholder.shared[1])))
-            compute_1[5] = ((float32*)compute_1[5]) + ((float32*)pad_temp.shared[(threadIdx.x + 32)])*(float32*)placeholder.shared[1])))
-            compute_1[7] = ((float32*)compute_1[7]) + ((float32*)pad_temp.shared[(threadIdx.x + 48)])*(float32*)placeholder.shared[1])))
-            compute_1[9] = ((float32*)compute_1[9]) + ((float32*)pad_temp.shared[(threadIdx.x + 64)])*(float32*)placeholder.shared[1])))
-            compute_1[11] = ((float32*)compute_1[11]) + ((float32*)pad_temp.shared[(threadIdx.x + 80)])*(float32*)placeholder.shared[1])))
-            compute_1[13] = ((float32*)compute_1[13]) + ((float32*)pad_temp.shared[(threadIdx.x + 96)])*(float32*)placeholder.shared[1])))
+            compute_1[0] = ((float32*)compute_1[0] + ((float32*)pad_temp.shared[threadIdx.x]*(float32*)placeholder.shared[0]))
+            compute_1[2] = ((float32*)compute_1[2] + ((float32*)pad_temp.shared[(threadIdx.x + 16)]*(float32*)placeholder.shared[0]))
+            compute_1[4] = ((float32*)compute_1[4] + ((float32*)pad_temp.shared[(threadIdx.x + 32)]*(float32*)placeholder.shared[0]))
+            compute_1[6] = ((float32*)compute_1[6] + ((float32*)pad_temp.shared[(threadIdx.x + 48)]*(float32*)placeholder.shared[0]))
+            compute_1[8] = ((float32*)compute_1[8] + ((float32*)pad_temp.shared[(threadIdx.x + 64)]*(float32*)placeholder.shared[0]))
+            compute_1[10] = ((float32*)compute_1[10] + ((float32*)pad_temp.shared[(threadIdx.x + 80)]*(float32*)placeholder.shared[0]))
+            compute_1[12] = ((float32*)compute_1[12] + ((float32*)pad_temp.shared[(threadIdx.x + 96)]*(float32*)placeholder.shared[0]))
+            compute_1[1] = ((float32*)compute_1[1] + ((float32*)pad_temp.shared[threadIdx.x]*(float32*)placeholder.shared[1]))
+            compute_1[3] = ((float32*)compute_1[3] + ((float32*)pad_temp.shared[(threadIdx.x + 16)]*(float32*)placeholder.shared[1]))
+            compute_1[5] = ((float32*)compute_1[5] + ((float32*)pad_temp.shared[(threadIdx.x + 32)]*(float32*)placeholder.shared[1]))
+            compute_1[7] = ((float32*)compute_1[7] + ((float32*)pad_temp.shared[(threadIdx.x + 48)]*(float32*)placeholder.shared[1]))
+            compute_1[9] = ((float32*)compute_1[9] + ((float32*)pad_temp.shared[(threadIdx.x + 64)]*(float32*)placeholder.shared[1]))
+            compute_1[11] = ((float32*)compute_1[11] + ((float32*)pad_temp.shared[(threadIdx.x + 80)]*(float32*)placeholder.shared[1]))
+            compute_1[13] = ((float32*)compute_1[13] + ((float32*)pad_temp.shared[(threadIdx.x + 96)]*(float32*)placeholder.shared[1]))
             attr [IterVar(threadIdx.z_1, (nullptr), "ThreadIndex", "threadIdx.z")] "thread_extent" = 1;
             attr [IterVar(threadIdx.y_1, (nullptr), "ThreadIndex", "threadIdx.y")] "thread_extent" = 1;
             attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 16 {
-              pad_temp.shared[(threadIdx.x_1*7)] = @tvm_if_then_else(((2 <= (blockIdx.y + ry.outer)) && ((blockIdx.y + ry.outer) < 226)), (float32*)placeholder_4[((((((rc.outer*50176) + (blockIdx.y*224)) + (ry.outer*224)) + (blockIdx.x*112)) + (threadIdx.x_1*7)) - 446)]), 0f32, dtype=float32, type="pure_intrin")
-              pad_temp.shared[((threadIdx.x_1*7) + 1)] = @tvm_if_then_else(((2 <= (blockIdx.y + ry.outer)) && ((blockIdx.y + ry.outer) < 226)), (float32*)placeholder_4[((((((rc.outer*50176) + (blockIdx.y*224)) + (ry.outer*224)) + (blockIdx.x*112)) + (threadIdx.x_1*7)) - 445)]), 0f32, dtype=float32, type="pure_intrin")
-              pad_temp.shared[((threadIdx.x_1*7) + 2)] = @tvm_if_then_else(((2 <= (blockIdx.y + ry.outer)) && ((blockIdx.y + ry.outer) < 226)), (float32*)placeholder_4[((((((rc.outer*50176) + (blockIdx.y*224)) + (ry.outer*224)) + (blockIdx.x*112)) + (threadIdx.x_1*7)) - 444)]), 0f32, dtype=float32, type="pure_intrin")
-              pad_temp.shared[((threadIdx.x_1*7) + 3)] = @tvm_if_then_else(((2 <= (blockIdx.y + ry.outer)) && ((blockIdx.y + ry.outer) < 226)), (float32*)placeholder_4[((((((rc.outer*50176) + (blockIdx.y*224)) + (ry.outer*224)) + (blockIdx.x*112)) + (threadIdx.x_1*7)) - 443)]), 0f32, dtype=float32, type="pure_intrin")
-              pad_temp.shared[((threadIdx.x_1*7) + 4)] = @tvm_if_then_else(((2 <= (blockIdx.y + ry.outer)) && ((blockIdx.y + ry.outer) < 226)), (float32*)placeholder_4[((((((rc.outer*50176) + (blockIdx.y*224)) + (ry.outer*224)) + (blockIdx.x*112)) + (threadIdx.x_1*7)) - 442)]), 0f32, dtype=float32, type="pure_intrin")
-              pad_temp.shared[((threadIdx.x_1*7) + 5)] = @tvm_if_then_else((((2 <= (blockIdx.y + ry.outer)) && ((blockIdx.y + ry.outer) < 226)) && (((blockIdx.x*112) + (threadIdx.x_1*7)) < 217)), (float32*)placeholder_4[((((((rc.outer*50176) + (blockIdx.y*224)) + (ry.outer*224)) + (blockIdx.x*112)) + (threadIdx.x_1*7)) - 441)]), 0f32, dtype=float32, type="pure_intrin")
-              pad_temp.shared[((threadIdx.x_1*7) + 6)] = @tvm_if_then_else((((2 <= (blockIdx.y + ry.outer)) && ((blockIdx.y + ry.outer) < 226)) && (((blockIdx.x*112) + (threadIdx.x_1*7)) < 216)), (float32*)placeholder_4[((((((rc.outer*50176) + (blockIdx.y*224)) + (ry.outer*224)) + (blockIdx.x*112)) + (threadIdx.x_1*7)) - 440)]), 0f32, dtype=float32, type="pure_intrin")
+              pad_temp.shared[(threadIdx.x_1*7)] = @tir.if_then_else(((2 <= (blockIdx.y + ry.outer)) && ((blockIdx.y + ry.outer) < 226)), (float32*)placeholder_5[((((((rc.outer*50176) + (blockIdx.y*224)) + (ry.outer*224)) + (blockIdx.x*112)) + (threadIdx.x_1*7)) - 446)], 0f32, dtype=float32)
+              pad_temp.shared[((threadIdx.x_1*7) + 1)] = @tir.if_then_else(((2 <= (blockIdx.y + ry.outer)) && ((blockIdx.y + ry.outer) < 226)), (float32*)placeholder_5[((((((rc.outer*50176) + (blockIdx.y*224)) + (ry.outer*224)) + (blockIdx.x*112)) + (threadIdx.x_1*7)) - 445)], 0f32, dtype=float32)
+              pad_temp.shared[((threadIdx.x_1*7) + 2)] = @tir.if_then_else(((2 <= (blockIdx.y + ry.outer)) && ((blockIdx.y + ry.outer) < 226)), (float32*)placeholder_5[((((((rc.outer*50176) + (blockIdx.y*224)) + (ry.outer*224)) + (blockIdx.x*112)) + (threadIdx.x_1*7)) - 444)], 0f32, dtype=float32)
+              pad_temp.shared[((threadIdx.x_1*7) + 3)] = @tir.if_then_else(((2 <= (blockIdx.y + ry.outer)) && ((blockIdx.y + ry.outer) < 226)), (float32*)placeholder_5[((((((rc.outer*50176) + (blockIdx.y*224)) + (ry.outer*224)) + (blockIdx.x*112)) + (threadIdx.x_1*7)) - 443)], 0f32, dtype=float32)
+              pad_temp.shared[((threadIdx.x_1*7) + 4)] = @tir.if_then_else(((2 <= (blockIdx.y + ry.outer)) && ((blockIdx.y + ry.outer) < 226)), (float32*)placeholder_5[((((((rc.outer*50176) + (blockIdx.y*224)) + (ry.outer*224)) + (blockIdx.x*112)) + (threadIdx.x_1*7)) - 442)], 0f32, dtype=float32)
+              pad_temp.shared[((threadIdx.x_1*7) + 5)] = @tir.if_then_else((((2 <= (blockIdx.y + ry.outer)) && ((blockIdx.y + ry.outer) < 226)) && (((blockIdx.x*112) + (threadIdx.x_1*7)) < 217)), (float32*)placeholder_5[((((((rc.outer*50176) + (blockIdx.y*224)) + (ry.outer*224)) + (blockIdx.x*112)) + (threadIdx.x_1*7)) - 441)], 0f32, dtype=float32)
+              pad_temp.shared[((threadIdx.x_1*7) + 6)] = @tir.if_then_else((((2 <= (blockIdx.y + ry.outer)) && ((blockIdx.y + ry.outer) < 226)) && (((blockIdx.x*112) + (threadIdx.x_1*7)) < 216)), (float32*)placeholder_5[((((((rc.outer*50176) + (blockIdx.y*224)) + (ry.outer*224)) + (blockIdx.x*112)) + (threadIdx.x_1*7)) - 440)], 0f32, dtype=float32)
             }
             attr [IterVar(threadIdx.z_2, (nullptr), "ThreadIndex", "threadIdx.z")] "thread_extent" = 1;
             attr [IterVar(threadIdx.y_2, (nullptr), "ThreadIndex", "threadIdx.y")] "thread_extent" = 1;
             attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 16;
-            if @likely((threadIdx.x_2 < 2), dtype=bool, type="pure_intrin") {
-              placeholder.shared[threadIdx.x_2] = (float32*)placeholder_5[(((((blockIdx.z*150) + (threadIdx.x_2*75)) + (rc.outer*25)) + (ry.outer*5)) + 4)])
+            if @tir.likely((threadIdx.x_2 < 2), dtype=bool) {
+              placeholder.shared[threadIdx.x_2] = (float32*)placeholder_4[(((((blockIdx.z*150) + (threadIdx.x_2*75)) + (rc.outer*25)) + (ry.outer*5)) + 4)]
             }
-            compute_1[0] = ((float32*)compute_1[0]) + ((float32*)pad_temp.shared[threadIdx.x])*(float32*)placeholder.shared[0])))
-            compute_1[2] = ((float32*)compute_1[2]) + ((float32*)pad_temp.shared[(threadIdx.x + 16)])*(float32*)placeholder.shared[0])))
-            compute_1[4] = ((float32*)compute_1[4]) + ((float32*)pad_temp.shared[(threadIdx.x + 32)])*(float32*)placeholder.shared[0])))
-            compute_1[6] = ((float32*)compute_1[6]) + ((float32*)pad_temp.shared[(threadIdx.x + 48)])*(float32*)placeholder.shared[0])))
-            compute_1[8] = ((float32*)compute_1[8]) + ((float32*)pad_temp.shared[(threadIdx.x + 64)])*(float32*)placeholder.shared[0])))
-            compute_1[10] = ((float32*)compute_1[10]) + ((float32*)pad_temp.shared[(threadIdx.x + 80)])*(float32*)placeholder.shared[0])))
-            compute_1[12] = ((float32*)compute_1[12]) + ((float32*)pad_temp.shared[(threadIdx.x + 96)])*(float32*)placeholder.shared[0])))
-            compute_1[1] = ((float32*)compute_1[1]) + ((float32*)pad_temp.shared[threadIdx.x])*(float32*)placeholder.shared[1])))
-            compute_1[3] = ((float32*)compute_1[3]) + ((float32*)pad_temp.shared[(threadIdx.x + 16)])*(float32*)placeholder.shared[1])))
-            compute_1[5] = ((float32*)compute_1[5]) + ((float32*)pad_temp.shared[(threadIdx.x + 32)])*(float32*)placeholder.shared[1])))
-            compute_1[7] = ((float32*)compute_1[7]) + ((float32*)pad_temp.shared[(threadIdx.x + 48)])*(float32*)placeholder.shared[1])))
-            compute_1[9] = ((float32*)compute_1[9]) + ((float32*)pad_temp.shared[(threadIdx.x + 64)])*(float32*)placeholder.shared[1])))
-            compute_1[11] = ((float32*)compute_1[11]) + ((float32*)pad_temp.shared[(threadIdx.x + 80)])*(float32*)placeholder.shared[1])))
-            compute_1[13] = ((float32*)compute_1[13]) + ((float32*)pad_temp.shared[(threadIdx.x + 96)])*(float32*)placeholder.shared[1])))
+            compute_1[0] = ((float32*)compute_1[0] + ((float32*)pad_temp.shared[threadIdx.x]*(float32*)placeholder.shared[0]))
+            compute_1[2] = ((float32*)compute_1[2] + ((float32*)pad_temp.shared[(threadIdx.x + 16)]*(float32*)placeholder.shared[0]))
+            compute_1[4] = ((float32*)compute_1[4] + ((float32*)pad_temp.shared[(threadIdx.x + 32)]*(float32*)placeholder.shared[0]))
+            compute_1[6] = ((float32*)compute_1[6] + ((float32*)pad_temp.shared[(threadIdx.x + 48)]*(float32*)placeholder.shared[0]))
+            compute_1[8] = ((float32*)compute_1[8] + ((float32*)pad_temp.shared[(threadIdx.x + 64)]*(float32*)placeholder.shared[0]))
+            compute_1[10] = ((float32*)compute_1[10] + ((float32*)pad_temp.shared[(threadIdx.x + 80)]*(float32*)placeholder.shared[0]))
+            compute_1[12] = ((float32*)compute_1[12] + ((float32*)pad_temp.shared[(threadIdx.x + 96)]*(float32*)placeholder.shared[0]))
+            compute_1[1] = ((float32*)compute_1[1] + ((float32*)pad_temp.shared[threadIdx.x]*(float32*)placeholder.shared[1]))
+            compute_1[3] = ((float32*)compute_1[3] + ((float32*)pad_temp.shared[(threadIdx.x + 16)]*(float32*)placeholder.shared[1]))
+            compute_1[5] = ((float32*)compute_1[5] + ((float32*)pad_temp.shared[(threadIdx.x + 32)]*(float32*)placeholder.shared[1]))
+            compute_1[7] = ((float32*)compute_1[7] + ((float32*)pad_temp.shared[(threadIdx.x + 48)]*(float32*)placeholder.shared[1]))
+            compute_1[9] = ((float32*)compute_1[9] + ((float32*)pad_temp.shared[(threadIdx.x + 64)]*(float32*)placeholder.shared[1]))
+            compute_1[11] = ((float32*)compute_1[11] + ((float32*)pad_temp.shared[(threadIdx.x + 80)]*(float32*)placeholder.shared[1]))
+            compute_1[13] = ((float32*)compute_1[13] + ((float32*)pad_temp.shared[(threadIdx.x + 96)]*(float32*)placeholder.shared[1]))
           }
         }
-        compute[((((blockIdx.z*100352) + (blockIdx.y*224)) + (blockIdx.x*112)) + threadIdx.x)] = max((float32*)compute_1[0]), 0f32)
-        compute[(((((blockIdx.z*100352) + (blockIdx.y*224)) + (blockIdx.x*112)) + threadIdx.x) + 16)] = max((float32*)compute_1[2]), 0f32)
-        compute[(((((blockIdx.z*100352) + (blockIdx.y*224)) + (blockIdx.x*112)) + threadIdx.x) + 32)] = max((float32*)compute_1[4]), 0f32)
-        compute[(((((blockIdx.z*100352) + (blockIdx.y*224)) + (blockIdx.x*112)) + threadIdx.x) + 48)] = max((float32*)compute_1[6]), 0f32)
-        compute[(((((blockIdx.z*100352) + (blockIdx.y*224)) + (blockIdx.x*112)) + threadIdx.x) + 64)] = max((float32*)compute_1[8]), 0f32)
-        compute[(((((blockIdx.z*100352) + (blockIdx.y*224)) + (blockIdx.x*112)) + threadIdx.x) + 80)] = max((float32*)compute_1[10]), 0f32)
-        compute[(((((blockIdx.z*100352) + (blockIdx.y*224)) + (blockIdx.x*112)) + threadIdx.x) + 96)] = max((float32*)compute_1[12]), 0f32)
-        compute[(((((blockIdx.z*100352) + (blockIdx.y*224)) + (blockIdx.x*112)) + threadIdx.x) + 50176)] = max((float32*)compute_1[1]), 0f32)
-        compute[(((((blockIdx.z*100352) + (blockIdx.y*224)) + (blockIdx.x*112)) + threadIdx.x) + 50192)] = max((float32*)compute_1[3]), 0f32)
-        compute[(((((blockIdx.z*100352) + (blockIdx.y*224)) + (blockIdx.x*112)) + threadIdx.x) + 50208)] = max((float32*)compute_1[5]), 0f32)
-        compute[(((((blockIdx.z*100352) + (blockIdx.y*224)) + (blockIdx.x*112)) + threadIdx.x) + 50224)] = max((float32*)compute_1[7]), 0f32)
-        compute[(((((blockIdx.z*100352) + (blockIdx.y*224)) + (blockIdx.x*112)) + threadIdx.x) + 50240)] = max((float32*)compute_1[9]), 0f32)
-        compute[(((((blockIdx.z*100352) + (blockIdx.y*224)) + (blockIdx.x*112)) + threadIdx.x) + 50256)] = max((float32*)compute_1[11]), 0f32)
-        compute[(((((blockIdx.z*100352) + (blockIdx.y*224)) + (blockIdx.x*112)) + threadIdx.x) + 50272)] = max((float32*)compute_1[13]), 0f32)
+        compute[((((blockIdx.z*100352) + (blockIdx.y*224)) + (blockIdx.x*112)) + threadIdx.x)] = max((float32*)compute_1[0], 0f32)
+        compute[(((((blockIdx.z*100352) + (blockIdx.y*224)) + (blockIdx.x*112)) + threadIdx.x) + 16)] = max((float32*)compute_1[2], 0f32)
+        compute[(((((blockIdx.z*100352) + (blockIdx.y*224)) + (blockIdx.x*112)) + threadIdx.x) + 32)] = max((float32*)compute_1[4], 0f32)
+        compute[(((((blockIdx.z*100352) + (blockIdx.y*224)) + (blockIdx.x*112)) + threadIdx.x) + 48)] = max((float32*)compute_1[6], 0f32)
+        compute[(((((blockIdx.z*100352) + (blockIdx.y*224)) + (blockIdx.x*112)) + threadIdx.x) + 64)] = max((float32*)compute_1[8], 0f32)
+        compute[(((((blockIdx.z*100352) + (blockIdx.y*224)) + (blockIdx.x*112)) + threadIdx.x) + 80)] = max((float32*)compute_1[10], 0f32)
+        compute[(((((blockIdx.z*100352) + (blockIdx.y*224)) + (blockIdx.x*112)) + threadIdx.x) + 96)] = max((float32*)compute_1[12], 0f32)
+        compute[(((((blockIdx.z*100352) + (blockIdx.y*224)) + (blockIdx.x*112)) + threadIdx.x) + 50176)] = max((float32*)compute_1[1], 0f32)
+        compute[(((((blockIdx.z*100352) + (blockIdx.y*224)) + (blockIdx.x*112)) + threadIdx.x) + 50192)] = max((float32*)compute_1[3], 0f32)
+        compute[(((((blockIdx.z*100352) + (blockIdx.y*224)) + (blockIdx.x*112)) + threadIdx.x) + 50208)] = max((float32*)compute_1[5], 0f32)
+        compute[(((((blockIdx.z*100352) + (blockIdx.y*224)) + (blockIdx.x*112)) + threadIdx.x) + 50224)] = max((float32*)compute_1[7], 0f32)
+        compute[(((((blockIdx.z*100352) + (blockIdx.y*224)) + (blockIdx.x*112)) + threadIdx.x) + 50240)] = max((float32*)compute_1[9], 0f32)
+        compute[(((((blockIdx.z*100352) + (blockIdx.y*224)) + (blockIdx.x*112)) + threadIdx.x) + 50256)] = max((float32*)compute_1[11], 0f32)
+        compute[(((((blockIdx.z*100352) + (blockIdx.y*224)) + (blockIdx.x*112)) + threadIdx.x) + 50272)] = max((float32*)compute_1[13], 0f32)
       }
     }
 
diff --git a/docs/_sources/tutorials/topi/sg_execution_times.rst.txt b/docs/_sources/tutorials/topi/sg_execution_times.rst.txt
index fc5424b..c861977 100644
--- a/docs/_sources/tutorials/topi/sg_execution_times.rst.txt
+++ b/docs/_sources/tutorials/topi/sg_execution_times.rst.txt
@@ -5,6 +5,6 @@
 
 Computation times
 =================
-**00:00.536** total execution time for **tutorials_topi** files:
+**00:00.491** total execution time for **tutorials_topi** files:
 
-- **00:00.536**: :ref:`sphx_glr_tutorials_topi_intro_topi.py` (``intro_topi.py``)
+- **00:00.491**: :ref:`sphx_glr_tutorials_topi_intro_topi.py` (``intro_topi.py``)
diff --git a/docs/_sources/vta/install.rst.txt b/docs/_sources/vta/install.rst.txt
index b68fab7..fe6b468 100644
--- a/docs/_sources/vta/install.rst.txt
+++ b/docs/_sources/vta/install.rst.txt
@@ -20,15 +20,15 @@ VTA Installation Guide
 
 We present three installation guides, each extending on the previous one:
 
-1. `Simulator Installation`_
+1. `VTA Simulator Installation`_
 2. `Xilinx Pynq FPGA Setup`_
 3. `Intel DE10 FPGA Setup`_
 4. `Bitstream Generation with Xilinx Toolchains`_
 5. `Bitstream Generation with Intel Toolchains`_
 
 
-Simulator Installation
-----------------------
+VTA Simulator Installation
+--------------------------
 
 You need `TVM installed <https://tvm.apache.org/docs/install/index.html>`_ on your machine.
 For a quick and easy start, checkout the `Docker Guide <https://tvm.apache.org/docs/install/docker.html>`_.
@@ -199,6 +199,7 @@ In addition, you'll need to edit the ``vta_config.json`` file on the host to ind
 This time again, we will run the 2D convolution testbench.
 Beforehand, we need to program the Pynq board FPGA with a VTA bitstream, and build the VTA runtime via RPC.
 The following ``test_program_rpc.py`` script will perform two operations:
+
 * FPGA programming, by downloading a pre-compiled bitstream from a `VTA bitstream repository <https://github.com/uwsaml/vta-distro>`_ that matches the default ``vta_config.json`` configuration set by the host, and sending it over to the Pynq via RPC to program the Pynq's FPGA.
 * Runtime building on the Pynq, which needs to be run every time the ``vta_config.json`` configuration is modified. This ensures that the VTA software runtime that generates the accelerator's executable via just-in-time (JIT) compilation matches the specifications of the VTA design that is programmed on the FPGA. The build process takes about 30 seconds to complete so be patient!
 
@@ -226,14 +227,14 @@ You can also try out our `VTA programming tutorials <https://tvm.apache.org/docs
 Intel DE10 FPGA Setup
 ---------------------
 
-Similar to the PYNQ side setup steps, this third guide bring us the details on how can we setup up the Linux environment for Intel FPGA boards like DE10-Nano.
+Similar to the Pynq-side setup steps, this third guide bring us the details on how can we setup up the Linux environment for Intel FPGA boards like DE10-Nano.
 
 In terms of hardware components, you would need the `DE10-Nano Development Kit <https://www.terasic.com.tw/cgi-bin/page/archive.pl?Language=English&No=1046>`_, which can be acquired for $130, or $100 for academics from `Terasic <https://www.terasic.com.tw/>`_. A microSD card would be delivered the kit. Power cables and USB cables would be included as well. However, an additional Ethernet cable would be needed to connect the board to LAN.
 
 The rest part of this guide would provide the steps to
 
 * Flash the microSD card with latest Angstrom Linux image
-* Cross compilation setup
+* Cross-compilation setup
 * Device-side RPC server setup and deployment
 
 DE10-Nano Board Setup
@@ -269,7 +270,7 @@ This would take a few minutes for your PC to write the whole file systems into t
 After this process completes, you are ready to unmount the SD card and insert it into your DE10-Nano board.
 Now you can connect the power cable and serial port to boot the Angstrom Linux.
 
-   **Note**: When boot up from the microSD card, you might notice the incompatibility of the linux kernel ``zImage`` in the microSD card.
+   **Note**: When boot up from the microSD card, you might notice the incompatibility of the Linux kernel ``zImage`` in the microSD card.
    In this case, you might need to build the ``zImage`` file of your own from `socfpga-4.9.78-ltsi <https://github.com/altera-opensource/linux-socfpga/tree/socfpga-4.9.78-ltsi>`_ branch of the `linux-socfpga <https://github.com/altera-opensource/linux-socfpga>`_ repository.
    For a quick fix, you can also download a prebuilt version of the ``zImage`` file `from this link <https://raw.githubusercontent.com/liangfu/de10-nano-supplement/master/zImage>`_.
 
@@ -304,7 +305,7 @@ Specifically, to compile application executables for the system, you need to dow
 Bitstream Generation with Xilinx Toolchains
 -------------------------------------------
 
-If you're interested in generating the Xilinx FPGA bitstream on your own instead of using the pre-built VTA bistreams, follow the instructions below.
+If you're interested in generating the Xilinx FPGA bitstream on your own instead of using the pre-built VTA bitstreams, follow the instructions below.
 
 Xilinx Toolchain Installation
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -364,7 +365,7 @@ The last step is to update your ``~/.bashrc`` with the following lines. This wil
    export XILINX_VIVADO=${XILINX_PATH}/Vivado/2018.3
    export PATH=${XILINX_VIVADO}/bin:${PATH}
 
-HLS-based Custom VTA Bitstream Compilation for PYNQ
+HLS-based Custom VTA Bitstream Compilation for Pynq
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 High-level hardware parameters are listed in the VTA configuration file and can be customized by the user.
diff --git a/docs/_sources/vta/tutorials/autotvm/sg_execution_times.rst.txt b/docs/_sources/vta/tutorials/autotvm/sg_execution_times.rst.txt
index 3bae1f9..e671bc9 100644
--- a/docs/_sources/vta/tutorials/autotvm/sg_execution_times.rst.txt
+++ b/docs/_sources/vta/tutorials/autotvm/sg_execution_times.rst.txt
@@ -5,6 +5,6 @@
 
 Computation times
 =================
-**00:03.860** total execution time for **vta_tutorials_autotvm** files:
+**00:03.730** total execution time for **vta_tutorials_autotvm** files:
 
-- **00:03.860**: :ref:`sphx_glr_vta_tutorials_autotvm_tune_relay_vta.py` (``tune_relay_vta.py``)
+- **00:03.730**: :ref:`sphx_glr_vta_tutorials_autotvm_tune_relay_vta.py` (``tune_relay_vta.py``)
diff --git a/docs/_sources/vta/tutorials/autotvm/tune_relay_vta.rst.txt b/docs/_sources/vta/tutorials/autotvm/tune_relay_vta.rst.txt
index 8d33862..847eab9 100644
--- a/docs/_sources/vta/tutorials/autotvm/tune_relay_vta.rst.txt
+++ b/docs/_sources/vta/tutorials/autotvm/tune_relay_vta.rst.txt
@@ -481,29 +481,7 @@ Finally, we launch tuning jobs and evaluate the end-to-end performance.
  .. code-block:: none
 
     Extract tasks...
-
    ...1%, 0.01 MB, 40 KB/s, 0 seconds passed
    ...2%, 0.02 MB, 81 KB/s, 0 seconds passed
    ...3%, 0.02 MB, 122 KB/s, 0 seconds passed
    ...4%, 0.03 MB, 162 KB/s, 0 seconds passed
    ...5%, 0.04 MB, 202 KB/s, 0 seconds passed
    ...6%, 0.05 MB, 242 KB/s, 0 seconds passed
    ...7%, 0.05 MB, 282 KB/s, 0 seconds passed
    ...8%, 0.06 MB, 322 KB/s, 0 seconds passed
    ...9%, 0.07 MB, 362 KB/s, 0 seconds passed
    ...10%, 0.08 MB, 402 KB/s, 0 seconds passed
    ...11%, 0.09 MB, 441 KB/s, 0 seconds passed
    ...13%, 0.09 MB, 481 KB/s, 0 seconds passed
    ...14%, 0.10 MB, 520 KB/s, 0 seconds passed
    ...15%, 0.11 MB, 559 KB/s, 0 seconds passed
    ...16%, 0.12 MB, 598 KB/s, 0 seconds passed
    ...17%, 0.12 MB, 638 KB/s, 0 seconds passed
    ...18%, 0.13 MB, 677 KB/s, 0 seconds passed
    ...19%, 0.14 MB, 715 KB/s, 0 seconds passed
    ...20%, 0.15 MB, 755 KB/s, 0 seconds passed
    ...21%, 0.16 MB, 793 KB/s, 0 seconds passed
    ...22%, 0.16 MB, 832 KB/s, 0 seconds passed
 
    ...23%, 0.17 MB, 870 KB/s, 0 seconds passed
    ...25%, 0.18 MB, 909 KB/s, 0 seconds passed
    ...26%, 0.19 MB, 948 KB/s, 0 seconds passed
    ...27%, 0.20 MB, 986 KB/s, 0 seconds passed
    ...28%, 0.20 MB, 1022 KB/s, 0 seconds passed
    ...29%, 0.21 MB, 1061 KB/s, 0 seconds passed
    ...30%, 0.22 MB, 1099 KB/s, 0 seconds passed
    ...31%, 0.23 MB, 1136 KB/s, 0 seconds passed
    ...32%, 0.23 MB, 1175 KB/s, 0 seconds passed
    ...33%, 0.24 MB, 1213 KB/s, 0 seconds passed
    ...34%, 0.25 MB, 1251 KB/s, 0 seconds passed
    ...35%, 0.26 MB, 1287 KB/s, 0 seconds passed
    ...36%, 0.27 MB, 1325 KB/s, 0 seconds passed
    ...38%, 0.27 MB, 1362 KB/s, 0 seconds passed
    ...39%, 0.28 MB, 1400 KB/s, 0 seconds passed
    ...40%, 0.29 MB, 1438 KB/s, 0 seconds passed
    ...41%, 0.30 MB, 1475 KB/s, 0 seconds passed
    ...42%, 0.30 MB, 1512 KB/s, 0 seconds passed
    ...43%, 0.31 MB, 1550 KB/s, 0 seconds passed
    ...44%, 0.32 MB, 1584 KB/s, 0 seconds passed
    ...45%, 0.33 MB,
  1622 KB/s, 0 seconds passed
    ...46%, 0.34 MB, 1659 KB/s, 0 seconds passed
    ...47%, 0.34 MB, 1697 KB/s, 0 seconds passed
    ...48%, 0.35 MB, 1734 KB/s, 0 seconds passed
    ...50%, 0.36 MB, 1771 KB/s, 0 seconds passed
    ...51%, 0.37 MB, 1807 KB/s, 0 seconds passed
    ...52%, 0.38 MB, 1845 KB/s, 0 seconds passed
    ...53%, 0.38 MB, 1879 KB/s, 0 seconds passed
    ...54%, 0.39 MB, 1916 KB/s, 0 seconds passed
    ...55%, 0.40 MB, 1953 KB/s, 0 seconds passed
    ...56%, 0.41 MB, 1991 KB/s, 0 seconds passed
    ...57%, 0.41 MB, 2028 KB/s, 0 seconds passed
    ...58%, 0.42 MB, 2065 KB/s, 0 seconds passed
    ...59%, 0.43 MB, 2099 KB/s, 0 seconds passed
    ...60%, 0.44 MB, 2137 KB/s, 0 seconds passed
    ...62%, 0.45 MB, 2173 KB/s, 0 seconds passed
    ...63%, 0.45 MB, 2210 KB/s, 0 seconds passed
    ...64%, 0.46 MB, 2246 KB/s, 0 seconds passed
    ...65%, 0.47 MB, 2283 KB/s, 0 seconds passed
    ...66%, 0.48 MB, 2317 KB/s, 0 seconds passed
    ...67%, 0.48 MB, 2354 KB/s, 0 sec
 onds passed
    ...68%, 0.49 MB, 2389 KB/s, 0 seconds passed
    ...69%, 0.50 MB, 2426 KB/s, 0 seconds passed
    ...70%, 0.51 MB, 2462 KB/s, 0 seconds passed
    ...71%, 0.52 MB, 2499 KB/s, 0 seconds passed
    ...72%, 0.52 MB, 2531 KB/s, 0 seconds passed
    ...73%, 0.53 MB, 2568 KB/s, 0 seconds passed
    ...75%, 0.54 MB, 2604 KB/s, 0 seconds passed
    ...76%, 0.55 MB, 2640 KB/s, 0 seconds passed
    ...77%, 0.55 MB, 2669 KB/s, 0 seconds passed
    ...78%, 0.56 MB, 2706 KB/s, 0 seconds passed
    ...79%, 0.57 MB, 2741 KB/s, 0 seconds passed
    ...80%, 0.58 MB, 2778 KB/s, 0 seconds passed
    ...81%, 0.59 MB, 2814 KB/s, 0 seconds passed
    ...82%, 0.59 MB, 2850 KB/s, 0 seconds passed
    ...83%, 0.60 MB, 2886 KB/s, 0 seconds passed
    ...84%, 0.61 MB, 2922 KB/s, 0 seconds passed
    ...85%, 0.62 MB, 2957 KB/s, 0 seconds passed
    ...87%, 0.62 MB, 2993 KB/s, 0 seconds passed
    ...88%, 0.63 MB, 3026 KB/s, 0 seconds passed
    ...89%, 0.64 MB, 3062 KB/s, 0 seconds passed
    .
 ..90%, 0.65 MB, 3095 KB/s, 0 seconds passed
    ...91%, 0.66 MB, 3131 KB/s, 0 seconds passed
    ...92%, 0.66 MB, 3163 KB/s, 0 seconds passed
    ...93%, 0.67 MB, 3199 KB/s, 0 seconds passed
    ...94%, 0.68 MB, 3234 KB/s, 0 seconds passed
    ...95%, 0.69 MB, 3270 KB/s, 0 seconds passed
    ...96%, 0.70 MB, 3305 KB/s, 0 seconds passed
    ...97%, 0.70 MB, 3341 KB/s, 0 seconds passed
    ...99%, 0.71 MB, 3376 KB/s, 0 seconds passed
    ...100%, 0.72 MB, 3409 KB/s, 0 seconds passed
-    Exception in thread Thread-9:
-    Traceback (most recent call last):
-      File "/usr/lib/python3.6/threading.py", line 916, in _bootstrap_inner
-        self.run()
-      File "/usr/lib/python3.6/threading.py", line 864, in run
-        self._target(*self._args, **self._kwargs)
-      File "/workspace/docs/../python/tvm/autotvm/task/relay_integration.py", line 48, in _lower
-        grc.codegen(mod["main"])
-      File "/workspace/docs/../python/tvm/relay/backend/graph_runtime_codegen.py", line 88, in codegen
-        arr = self._get_param_by_name(key)
-      File "tvm/_ffi/_cython/./packed_func.pxi", line 321, in tvm._ffi._cy3.core.PackedFuncBase.__call__
-      File "tvm/_ffi/_cython/./packed_func.pxi", line 256, in tvm._ffi._cy3.core.FuncCall
-      File "tvm/_ffi/_cython/./packed_func.pxi", line 245, in tvm._ffi._cy3.core.FuncCall3
-      File "tvm/_ffi/_cython/./base.pxi", line 160, in tvm._ffi._cy3.core.CALL
-    tvm._ffi.base.TVMError: Traceback (most recent call last):
-      [bt] (3) /workspace/build/libtvm.so(TVMFuncCall+0x61) [0x7f259437cd11]
-      [bt] (2) /workspace/build/libtvm.so(std::_Function_handler<void (tvm::runtime::TVMArgs, tvm::runtime::TVMRetValue*), tvm::relay::backend::GraphRuntimeCodegenModule::GetFunction(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, tvm::runtime::ObjectPtr<tvm::runtime::Object> const&)::{lambda(tvm::runtime::TVMArgs, tvm::runtime::TVMRetValue*)#5}>::_M_invoke(std::_Any_data const&, tvm::runtime::TVMArgs&&, tvm::runtime::TVMRetValue*&&)+0x6f) [0x7f259 [...]
-      [bt] (1) /workspace/build/libtvm.so(tvm::runtime::TVMArgValue::operator std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >[abi:cxx11]() const+0x141) [0x7f259397c201]
-      [bt] (0) /workspace/build/libtvm.so(dmlc::LogMessageFatal::~LogMessageFatal()+0x67) [0x7f259396a047]
-      File "/workspace/include/tvm/runtime/packed_func.h", line 491
-    TVMError: Check failed: type_code_ == kTVMStr (8 vs. 11) : expected str but get Object
-
+
    ...1%, 0.01 MB, 37 KB/s, 0 seconds passed
    ...2%, 0.02 MB, 74 KB/s, 0 seconds passed
    ...3%, 0.02 MB, 111 KB/s, 0 seconds passed
    ...4%, 0.03 MB, 148 KB/s, 0 seconds passed
    ...5%, 0.04 MB, 184 KB/s, 0 seconds passed
    ...6%, 0.05 MB, 221 KB/s, 0 seconds passed
    ...7%, 0.05 MB, 257 KB/s, 0 seconds passed
    ...8%, 0.06 MB, 294 KB/s, 0 seconds passed
    ...9%, 0.07 MB, 329 KB/s, 0 seconds passed
    ...10%, 0.08 MB, 366 KB/s, 0 seconds passed
    ...11%, 0.09 MB, 402 KB/s, 0 seconds passed
    ...13%, 0.09 MB, 438 KB/s, 0 seconds passed
    ...14%, 0.10 MB, 474 KB/s, 0 seconds passed
    ...15%, 0.11 MB, 509 KB/s, 0 seconds passed
    ...16%, 0.12 MB, 545 KB/s, 0 seconds passed
    ...17%, 0.12 MB, 580 KB/s, 0 seconds passed
    ...18%, 0.13 MB, 616 KB/s, 0 seconds passed
    ...19%, 0.14 MB, 651 KB/s, 0 seconds passed
    ...20%, 0.15 MB, 687 KB/s, 0 seconds passed
    ...21%, 0.16 MB, 722 KB/s, 0 seconds passed
    ...22%, 0.16 MB, 757 KB/s, 0 seconds passed
 
    ...23%, 0.17 MB, 792 KB/s, 0 seconds passed
    ...25%, 0.18 MB, 827 KB/s, 0 seconds passed
    ...26%, 0.19 MB, 863 KB/s, 0 seconds passed
    ...27%, 0.20 MB, 897 KB/s, 0 seconds passed
    ...28%, 0.20 MB, 932 KB/s, 0 seconds passed
    ...29%, 0.21 MB, 967 KB/s, 0 seconds passed
    ...30%, 0.22 MB, 1002 KB/s, 0 seconds passed
    ...31%, 0.23 MB, 1037 KB/s, 0 seconds passed
    ...32%, 0.23 MB, 1071 KB/s, 0 seconds passed
    ...33%, 0.24 MB, 1106 KB/s, 0 seconds passed
    ...34%, 0.25 MB, 1139 KB/s, 0 seconds passed
    ...35%, 0.26 MB, 1174 KB/s, 0 seconds passed
    ...36%, 0.27 MB, 1208 KB/s, 0 seconds passed
    ...38%, 0.27 MB, 1243 KB/s, 0 seconds passed
    ...39%, 0.28 MB, 1278 KB/s, 0 seconds passed
    ...40%, 0.29 MB, 1313 KB/s, 0 seconds passed
    ...41%, 0.30 MB, 1344 KB/s, 0 seconds passed
    ...42%, 0.30 MB, 1379 KB/s, 0 seconds passed
    ...43%, 0.31 MB, 1413 KB/s, 0 seconds passed
    ...44%, 0.32 MB, 1447 KB/s, 0 seconds passed
    ...45%, 0.33 MB, 1
 479 KB/s, 0 seconds passed
    ...46%, 0.34 MB, 1514 KB/s, 0 seconds passed
    ...47%, 0.34 MB, 1548 KB/s, 0 seconds passed
    ...48%, 0.35 MB, 1583 KB/s, 0 seconds passed
    ...50%, 0.36 MB, 1616 KB/s, 0 seconds passed
    ...51%, 0.37 MB, 1650 KB/s, 0 seconds passed
    ...52%, 0.38 MB, 1682 KB/s, 0 seconds passed
    ...53%, 0.38 MB, 1716 KB/s, 0 seconds passed
    ...54%, 0.39 MB, 1749 KB/s, 0 seconds passed
    ...55%, 0.40 MB, 1783 KB/s, 0 seconds passed
    ...56%, 0.41 MB, 1817 KB/s, 0 seconds passed
    ...57%, 0.41 MB, 1851 KB/s, 0 seconds passed
    ...58%, 0.42 MB, 1882 KB/s, 0 seconds passed
    ...59%, 0.43 MB, 1917 KB/s, 0 seconds passed
    ...60%, 0.44 MB, 1949 KB/s, 0 seconds passed
    ...62%, 0.45 MB, 1983 KB/s, 0 seconds passed
    ...63%, 0.45 MB, 2015 KB/s, 0 seconds passed
    ...64%, 0.46 MB, 2049 KB/s, 0 seconds passed
    ...65%, 0.47 MB, 2080 KB/s, 0 seconds passed
    ...66%, 0.48 MB, 2113 KB/s, 0 seconds passed
    ...67%, 0.48 MB, 2146 KB/s, 0 secon
 ds passed
    ...68%, 0.49 MB, 2180 KB/s, 0 seconds passed
    ...69%, 0.50 MB, 2211 KB/s, 0 seconds passed
    ...70%, 0.51 MB, 2244 KB/s, 0 seconds passed
    ...71%, 0.52 MB, 2272 KB/s, 0 seconds passed
    ...72%, 0.52 MB, 2305 KB/s, 0 seconds passed
    ...73%, 0.53 MB, 2338 KB/s, 0 seconds passed
    ...75%, 0.54 MB, 2372 KB/s, 0 seconds passed
    ...76%, 0.55 MB, 2405 KB/s, 0 seconds passed
    ...77%, 0.55 MB, 2438 KB/s, 0 seconds passed
    ...78%, 0.56 MB, 2468 KB/s, 0 seconds passed
    ...79%, 0.57 MB, 2502 KB/s, 0 seconds passed
    ...80%, 0.58 MB, 2535 KB/s, 0 seconds passed
    ...81%, 0.59 MB, 2568 KB/s, 0 seconds passed
    ...82%, 0.59 MB, 2601 KB/s, 0 seconds passed
    ...83%, 0.60 MB, 2634 KB/s, 0 seconds passed
    ...84%, 0.61 MB, 2662 KB/s, 0 seconds passed
    ...85%, 0.62 MB, 2695 KB/s, 0 seconds passed
    ...87%, 0.62 MB, 2727 KB/s, 0 seconds passed
    ...88%, 0.63 MB, 2760 KB/s, 0 seconds passed
    ...89%, 0.64 MB, 2793 KB/s, 0 seconds passed
    ...
 90%, 0.65 MB, 2826 KB/s, 0 seconds passed
    ...91%, 0.66 MB, 2858 KB/s, 0 seconds passed
    ...92%, 0.66 MB, 2891 KB/s, 0 seconds passed
    ...93%, 0.67 MB, 2920 KB/s, 0 seconds passed
    ...94%, 0.68 MB, 2953 KB/s, 0 seconds passed
    ...95%, 0.69 MB, 2986 KB/s, 0 seconds passed
    ...96%, 0.70 MB, 3018 KB/s, 0 seconds passed
    ...97%, 0.70 MB, 3049 KB/s, 0 seconds passed
    ...99%, 0.71 MB, 3081 KB/s, 0 seconds passed
    ...100%, 0.72 MB, 3112 KB/s, 0 seconds passed
     Extracted 10 conv2d tasks:
     (1, 14, 14, 256, 512, 1, 1, 0, 0, 2, 2)
     (1, 28, 28, 128, 256, 1, 1, 0, 0, 2, 2)
diff --git a/docs/_sources/vta/tutorials/frontend/deploy_classification.rst.txt b/docs/_sources/vta/tutorials/frontend/deploy_classification.rst.txt
index f8ac6af..dca7412 100644
--- a/docs/_sources/vta/tutorials/frontend/deploy_classification.rst.txt
+++ b/docs/_sources/vta/tutorials/frontend/deploy_classification.rst.txt
@@ -243,8 +243,8 @@ The compilation steps are:
 
  .. code-block:: none
 
-
    ...12%, 0.01 MB, 36 KB/s, 0 seconds passed
    ...25%, 0.02 MB, 72 KB/s, 0 seconds passed
    ...38%, 0.02 MB, 109 KB/s, 0 seconds passed
    ...51%, 0.03 MB, 145 KB/s, 0 seconds passed
    ...64%, 0.04 MB, 181 KB/s, 0 seconds passed
    ...77%, 0.05 MB, 217 KB/s, 0 seconds passed
    ...90%, 0.05 MB, 253 KB/s, 0 seconds passed
    ...100%, 0.06 MB, 288 KB/s, 0 seconds passed
-    resnet18_v1 inference graph built in 4.19s!
+
    ...12%, 0.01 MB, 41 KB/s, 0 seconds passed
    ...25%, 0.02 MB, 82 KB/s, 0 seconds passed
    ...38%, 0.02 MB, 123 KB/s, 0 seconds passed
    ...51%, 0.03 MB, 164 KB/s, 0 seconds passed
    ...64%, 0.04 MB, 205 KB/s, 0 seconds passed
    ...77%, 0.05 MB, 245 KB/s, 0 seconds passed
    ...89%, 0.05 MB, 286 KB/s, 0 seconds passed
    ...100%, 0.06 MB, 326 KB/s, 0 seconds passed
+    resnet18_v1 inference graph built in 3.97s!
 
 
 
diff --git a/docs/_sources/vta/tutorials/frontend/deploy_detection.rst.txt b/docs/_sources/vta/tutorials/frontend/deploy_detection.rst.txt
index a84ad23..e995f53 100644
--- a/docs/_sources/vta/tutorials/frontend/deploy_detection.rst.txt
+++ b/docs/_sources/vta/tutorials/frontend/deploy_detection.rst.txt
@@ -315,7 +315,7 @@ The compilation steps are:
 
  .. code-block:: none
 
-    yolov3-tiny inference graph built in 5.53s!
+    yolov3-tiny inference graph built in 5.40s!
 
 
 
diff --git a/docs/_sources/vta/tutorials/frontend/sg_execution_times.rst.txt b/docs/_sources/vta/tutorials/frontend/sg_execution_times.rst.txt
index e00fd79..f4d2a0f 100644
--- a/docs/_sources/vta/tutorials/frontend/sg_execution_times.rst.txt
+++ b/docs/_sources/vta/tutorials/frontend/sg_execution_times.rst.txt
@@ -5,7 +5,7 @@
 
 Computation times
 =================
-**00:54.502** total execution time for **vta_tutorials_frontend** files:
+**00:53.024** total execution time for **vta_tutorials_frontend** files:
 
-- **00:32.900**: :ref:`sphx_glr_vta_tutorials_frontend_deploy_detection.py` (``deploy_detection.py``)
-- **00:21.602**: :ref:`sphx_glr_vta_tutorials_frontend_deploy_classification.py` (``deploy_classification.py``)
+- **00:31.870**: :ref:`sphx_glr_vta_tutorials_frontend_deploy_detection.py` (``deploy_detection.py``)
+- **00:21.153**: :ref:`sphx_glr_vta_tutorials_frontend_deploy_classification.py` (``deploy_classification.py``)
diff --git a/docs/_sources/vta/tutorials/matrix_multiply.rst.txt b/docs/_sources/vta/tutorials/matrix_multiply.rst.txt
index fa8d003..565e62e 100644
--- a/docs/_sources/vta/tutorials/matrix_multiply.rst.txt
+++ b/docs/_sources/vta/tutorials/matrix_multiply.rst.txt
@@ -303,11 +303,11 @@ After we construct the schedule, by default the schedule computes
  .. code-block:: none
 
     primfn(A_1: handle, B_1: handle, C_1: handle) -> ()
-      attr = {"tir.noalias": True, "global_symbol": "main"}
+      attr = {"global_symbol": "main", "tir.noalias": True}
       buffers = {B: Buffer(B_2: handle, int8, [16, 16, 16, 16], []),
-                 A: Buffer(A_2: handle, int8, [1, 16, 1, 16], []),
-                 C: Buffer(C_2: handle, int8, [1, 16, 1, 16], [])}
-      buffer_map = {C_1: C, B_1: B, A_1: A} {
+                 C: Buffer(C_2: handle, int8, [1, 16, 1, 16], []),
+                 A: Buffer(A_2: handle, int8, [1, 16, 1, 16], [])}
+      buffer_map = {A_1: A, B_1: B, C_1: C} {
       attr [A_buf: handle] "storage_scope" = "global";
       allocate(A_buf, int8, [256]);
       attr [B_buf: handle] "storage_scope" = "global";
@@ -316,14 +316,14 @@ After we construct the schedule, by default the schedule computes
       allocate(C_buf, int32, [256]) {
         for (i1: int32, 0, 16) {
           for (i3: int32, 0, 16) {
-            A_buf[((i1*16) + i3)] = (int8*)A_2[((i1*16) + i3)])
+            A_buf[((i1*16) + i3)] = (int8*)A_2[((i1*16) + i3)]
           }
         }
         for (i0: int32, 0, 16) {
           for (i1_1: int32, 0, 16) {
             for (i2: int32, 0, 16) {
               for (i3_1: int32, 0, 16) {
-                B_buf[((((i0*4096) + (i1_1*256)) + (i2*16)) + i3_1)] = (int8*)B_2[((((i0*4096) + (i1_1*256)) + (i2*16)) + i3_1)])
+                B_buf[((((i0*4096) + (i1_1*256)) + (i2*16)) + i3_1)] = (int8*)B_2[((((i0*4096) + (i1_1*256)) + (i2*16)) + i3_1)]
               }
             }
           }
@@ -333,14 +333,14 @@ After we construct the schedule, by default the schedule computes
             C_buf[((co*16) + ci)] = 0
             for (ko: int32, 0, 16) {
               for (ki: int32, 0, 16) {
-                C_buf[((co*16) + ci)] = ((int32*)C_buf[((co*16) + ci)]) + (cast(int32, (int8*)A_buf[((ko*16) + ki)]))*cast(int32, (int8*)B_buf[((((co*4096) + (ko*256)) + (ci*16)) + ki)]))))
+                C_buf[((co*16) + ci)] = ((int32*)C_buf[((co*16) + ci)] + (cast(int32, (int8*)A_buf[((ko*16) + ki)])*cast(int32, (int8*)B_buf[((((co*4096) + (ko*256)) + (ci*16)) + ki)])))
               }
             }
           }
         }
         for (i1_2: int32, 0, 16) {
           for (i3_2: int32, 0, 16) {
-            C_2[((i1_2*16) + i3_2)] = cast(int8, (int32*)C_buf[((i1_2*16) + i3_2)]))
+            C_2[((i1_2*16) + i3_2)] = cast(int8, (int32*)C_buf[((i1_2*16) + i3_2)])
           }
         }
       }
@@ -449,11 +449,11 @@ moving the copy operations into the matrix multiplication loop.
  .. code-block:: none
 
     primfn(A_1: handle, B_1: handle, C_1: handle) -> ()
-      attr = {"tir.noalias": True, "global_symbol": "main"}
-      buffers = {A: Buffer(A_2: handle, int8, [1, 16, 1, 16], []),
+      attr = {"global_symbol": "main", "tir.noalias": True}
+      buffers = {C: Buffer(C_2: handle, int8, [1, 16, 1, 16], []),
                  B: Buffer(B_2: handle, int8, [16, 16, 16, 16], []),
-                 C: Buffer(C_2: handle, int8, [1, 16, 1, 16], [])}
-      buffer_map = {C_1: C, B_1: B, A_1: A} {
+                 A: Buffer(A_2: handle, int8, [1, 16, 1, 16], [])}
+      buffer_map = {A_1: A, B_1: B, C_1: C} {
       attr [C_buf: handle] "storage_scope" = "local.acc_buffer";
       allocate(C_buf, int32, [256]);
       attr [A_buf: handle] "storage_scope" = "local.inp_buffer";
@@ -466,14 +466,14 @@ moving the copy operations into the matrix multiplication loop.
             for (ko: int32, 0, 16) {
               attr [IterVar(i0: int32, (nullptr), "DataPar", "")] "pragma_dma_copy" = 1;
               for (i3: int32, 0, 16) {
-                A_buf[i3] = (int8*)A_2[((ko*16) + i3)])
+                A_buf[i3] = (int8*)A_2[((ko*16) + i3)]
               }
               attr [IterVar(i0_1: int32, (nullptr), "DataPar", "")] "pragma_dma_copy" = 1;
               for (i3_1: int32, 0, 16) {
-                B_buf[i3_1] = (int8*)B_2[((((co*4096) + (ko*256)) + (ci*16)) + i3_1)])
+                B_buf[i3_1] = (int8*)B_2[((((co*4096) + (ko*256)) + (ci*16)) + i3_1)]
               }
               for (ki: int32, 0, 16) {
-                C_buf[((co*16) + ci)] = ((int32*)C_buf[((co*16) + ci)]) + (cast(int32, (int8*)A_buf[ki]))*cast(int32, (int8*)B_buf[ki]))))
+                C_buf[((co*16) + ci)] = ((int32*)C_buf[((co*16) + ci)] + (cast(int32, (int8*)A_buf[ki])*cast(int32, (int8*)B_buf[ki])))
               }
             }
           }
@@ -481,7 +481,7 @@ moving the copy operations into the matrix multiplication loop.
         attr [IterVar(i0_2: int32, (nullptr), "DataPar", "")] "pragma_dma_copy" = 1;
         for (i1: int32, 0, 16) {
           for (i3_2: int32, 0, 16) {
-            C_2[((i1*16) + i3_2)] = cast(int8, (int32*)C_buf[((i1*16) + i3_2)]))
+            C_2[((i1*16) + i3_2)] = cast(int8, (int32*)C_buf[((i1*16) + i3_2)])
           }
         }
       }
@@ -538,46 +538,46 @@ by the VTA runtime JIT compiler.
  .. code-block:: none
 
     primfn(A_1: handle, B_1: handle, C_1: handle) -> ()
-      attr = {"tir.noalias": True, "global_symbol": "main"}
-      buffers = {B: Buffer(B_2: handle, int8, [16, 16, 16, 16], []),
-                 A: Buffer(A_2: handle, int8, [1, 16, 1, 16], []),
-                 C: Buffer(C_2: handle, int8, [1, 16, 1, 16], [])}
-      buffer_map = {C_1: C, B_1: B, A_1: A} {
+      attr = {"global_symbol": "main", "tir.noalias": True}
+      buffers = {C: Buffer(C_2: handle, int8, [1, 16, 1, 16], []),
+                 B: Buffer(B_2: handle, int8, [16, 16, 16, 16], []),
+                 A: Buffer(A_2: handle, int8, [1, 16, 1, 16], [])}
+      buffer_map = {A_1: A, B_1: B, C_1: C} {
       attr [C_buf: handle] "storage_scope" = "local.acc_buffer";
       attr [A_buf: handle] "storage_scope" = "local.inp_buffer";
       attr [B_buf: handle] "storage_scope" = "local.wgt_buffer" {
         attr [IterVar(vta: int32, (nullptr), "ThreadIndex", "vta")] "coproc_scope" = 2 {
           attr [IterVar(vta, (nullptr), "ThreadIndex", "vta")] "coproc_uop_scope" = "VTAPushGEMMOp" {
-            @VTAUopLoopBegin(16, 1, 0, 0, dtype=int32, type="extern")
-            @VTAUopPush(0, 1, 0, 0, 0, 0, 0, 0, dtype=int32, type="extern")
-            @VTAUopLoopEnd(, dtype=int32, type="extern")
+            @tir.call_extern("VTAUopLoopBegin", 16, 1, 0, 0, dtype=int32)
+            @tir.vta.uop_push(0, 1, 0, 0, 0, 0, 0, 0, dtype=int32)
+            @tir.call_extern("VTAUopLoopEnd", dtype=int32)
           }
-          @vta.coproc_dep_push(2, 1, dtype=int32, type="intrin")
+          @tir.vta.coproc_dep_push(2, 1, dtype=int32)
         }
         for (ko: int32, 0, 16) {
           attr [IterVar(vta, (nullptr), "ThreadIndex", "vta")] "coproc_scope" = 1 {
-            @vta.coproc_dep_pop(2, 1, dtype=int32, type="intrin")
-            @VTALoadBuffer2D(@tvm_thread_context(@VTATLSCommandHandle(, dtype=handle, type="extern"), dtype=handle, type="intrin"), A_2, ko, 1, 1, 1, 0, 0, 0, 0, 0, 2, dtype=int32, type="extern")
-            @VTALoadBuffer2D(@tvm_thread_context(@VTATLSCommandHandle(, dtype=handle, type="extern"), dtype=handle, type="intrin"), B_2, ko, 1, 16, 16, 0, 0, 0, 0, 0, 1, dtype=int32, type="extern")
-            @vta.coproc_dep_push(1, 2, dtype=int32, type="intrin")
+            @tir.vta.coproc_dep_pop(2, 1, dtype=int32)
+            @tir.call_extern("VTALoadBuffer2D", @tir.tvm_thread_context(@tir.vta.command_handle(, dtype=handle), dtype=handle), A_2, ko, 1, 1, 1, 0, 0, 0, 0, 0, 2, dtype=int32)
+            @tir.call_extern("VTALoadBuffer2D", @tir.tvm_thread_context(@tir.vta.command_handle(, dtype=handle), dtype=handle), B_2, ko, 1, 16, 16, 0, 0, 0, 0, 0, 1, dtype=int32)
+            @tir.vta.coproc_dep_push(1, 2, dtype=int32)
           }
           attr [IterVar(vta, (nullptr), "ThreadIndex", "vta")] "coproc_scope" = 2 {
-            @vta.coproc_dep_pop(1, 2, dtype=int32, type="intrin")
+            @tir.vta.coproc_dep_pop(1, 2, dtype=int32)
             attr [IterVar(vta, (nullptr), "ThreadIndex", "vta")] "coproc_uop_scope" = "VTAPushGEMMOp" {
-              @VTAUopLoopBegin(16, 1, 0, 1, dtype=int32, type="extern")
-              @VTAUopPush(0, 0, 0, 0, 0, 0, 0, 0, dtype=int32, type="extern")
-              @VTAUopLoopEnd(, dtype=int32, type="extern")
+              @tir.call_extern("VTAUopLoopBegin", 16, 1, 0, 1, dtype=int32)
+              @tir.vta.uop_push(0, 0, 0, 0, 0, 0, 0, 0, dtype=int32)
+              @tir.call_extern("VTAUopLoopEnd", dtype=int32)
             }
-            @vta.coproc_dep_push(2, 1, dtype=int32, type="intrin")
+            @tir.vta.coproc_dep_push(2, 1, dtype=int32)
           }
         }
-        @vta.coproc_dep_push(2, 3, dtype=int32, type="intrin")
-        @vta.coproc_dep_pop(2, 1, dtype=int32, type="intrin")
+        @tir.vta.coproc_dep_push(2, 3, dtype=int32)
+        @tir.vta.coproc_dep_pop(2, 1, dtype=int32)
         attr [IterVar(vta, (nullptr), "ThreadIndex", "vta")] "coproc_scope" = 3 {
-          @vta.coproc_dep_pop(2, 3, dtype=int32, type="intrin")
-          @VTAStoreBuffer2D(@tvm_thread_context(@VTATLSCommandHandle(, dtype=handle, type="extern"), dtype=handle, type="intrin"), 0, 4, C_2, 0, 16, 1, 16, dtype=int32, type="extern")
+          @tir.vta.coproc_dep_pop(2, 3, dtype=int32)
+          @tir.call_extern("VTAStoreBuffer2D", @tir.tvm_thread_context(@tir.vta.command_handle(, dtype=handle), dtype=handle), 0, 4, C_2, 0, 16, 1, 16, dtype=int32)
         }
-        @vta.coproc_sync(, dtype=int32, type="intrin")
+        @tir.vta.coproc_sync(, dtype=int32)
       }
     }
 
diff --git a/docs/_sources/vta/tutorials/optimize/convolution_opt.rst.txt b/docs/_sources/vta/tutorials/optimize/convolution_opt.rst.txt
index 0b6419d..808f397 100644
--- a/docs/_sources/vta/tutorials/optimize/convolution_opt.rst.txt
+++ b/docs/_sources/vta/tutorials/optimize/convolution_opt.rst.txt
@@ -256,11 +256,11 @@ Those include:
  .. code-block:: none
 
     primfn(data_1: handle, kernel_1: handle, res_1: handle) -> ()
-      attr = {"tir.noalias": True, "global_symbol": "main"}
-      buffers = {data: Buffer(data_2: handle, int8, [1, 16, 14, 14, 1, 16], []),
+      attr = {"global_symbol": "main", "tir.noalias": True}
+      buffers = {res: Buffer(res_2: handle, int8, [1, 16, 14, 14, 1, 16], []),
                  kernel: Buffer(kernel_2: handle, int8, [16, 16, 3, 3, 16, 16], []),
-                 res: Buffer(res_2: handle, int8, [1, 16, 14, 14, 1, 16], [])}
-      buffer_map = {res_1: res, kernel_1: kernel, data_1: data} {
+                 data: Buffer(data_2: handle, int8, [1, 16, 14, 14, 1, 16], [])}
+      buffer_map = {data_1: data, kernel_1: kernel, res_1: res} {
       attr [data_buf: handle] "storage_scope" = "global";
       allocate(data_buf, int8, [65536]);
       attr [kernel_buf: handle] "storage_scope" = "global";
@@ -271,7 +271,7 @@ Those include:
           for (i2: int32, 0, 16) {
             for (i3: int32, 0, 16) {
               for (i5: int32, 0, 16) {
-                data_buf[((((i1*4096) + (i2*256)) + (i3*16)) + i5)] = @tvm_if_then_else(((((1 <= i2) && (i2 < 15)) && (1 <= i3)) && (i3 < 15)), (int8*)data_2[(((((i1*3136) + (i2*224)) + (i3*16)) + i5) - 240)]), 0i8, dtype=int8, type="pure_intrin")
+                data_buf[((((i1*4096) + (i2*256)) + (i3*16)) + i5)] = @tir.if_then_else(((((1 <= i2) && (i2 < 15)) && (1 <= i3)) && (i3 < 15)), (int8*)data_2[(((((i1*3136) + (i2*224)) + (i3*16)) + i5) - 240)], 0i8, dtype=int8)
               }
             }
           }
@@ -282,7 +282,7 @@ Those include:
               for (i3_1: int32, 0, 3) {
                 for (i4: int32, 0, 16) {
                   for (i5_1: int32, 0, 16) {
-                    kernel_buf[((((((i0*36864) + (i1_1*2304)) + (i2_1*768)) + (i3_1*256)) + (i4*16)) + i5_1)] = (int8*)kernel_2[((((((i0*36864) + (i1_1*2304)) + (i2_1*768)) + (i3_1*256)) + (i4*16)) + i5_1)])
+                    kernel_buf[((((((i0*36864) + (i1_1*2304)) + (i2_1*768)) + (i3_1*256)) + (i4*16)) + i5_1)] = (int8*)kernel_2[((((((i0*36864) + (i1_1*2304)) + (i2_1*768)) + (i3_1*256)) + (i4*16)) + i5_1)]
                   }
                 }
               }
@@ -298,7 +298,7 @@ Those include:
                   for (dy: int32, 0, 3) {
                     for (dx: int32, 0, 3) {
                       for (ic_tns: int32, 0, 16) {
-                        res_conv[((((co*3136) + (i*224)) + (j*16)) + ci)] = ((int32*)res_conv[((((co*3136) + (i*224)) + (j*16)) + ci)]) + (cast(int32, (int8*)data_buf[((((((ic*4096) + (i*256)) + (dy*256)) + (j*16)) + (dx*16)) + ic_tns)]))*cast(int32, (int8*)kernel_buf[((((((co*36864) + (ic*2304)) + (dy*768)) + (dx*256)) + (ci*16)) + ic_tns)]))))
+                        res_conv[((((co*3136) + (i*224)) + (j*16)) + ci)] = ((int32*)res_conv[((((co*3136) + (i*224)) + (j*16)) + ci)] + (cast(int32, (int8*)data_buf[((((((ic*4096) + (i*256)) + (dy*256)) + (j*16)) + (dx*16)) + ic_tns)])*cast(int32, (int8*)kernel_buf[((((((co*36864) + (ic*2304)) + (dy*768)) + (dx*256)) + (ci*16)) + ic_tns)])))
                       }
                     }
                   }
@@ -311,7 +311,7 @@ Those include:
           for (i2_2: int32, 0, 14) {
             for (i3_2: int32, 0, 14) {
               for (i5_2: int32, 0, 16) {
-                res_conv[((((i1_2*3136) + (i2_2*224)) + (i3_2*16)) + i5_2)] = @shift_right((int32*)res_conv[((((i1_2*3136) + (i2_2*224)) + (i3_2*16)) + i5_2)]), 8, dtype=int32, type="pure_intrin")
+                res_conv[((((i1_2*3136) + (i2_2*224)) + (i3_2*16)) + i5_2)] = @tir.shift_right((int32*)res_conv[((((i1_2*3136) + (i2_2*224)) + (i3_2*16)) + i5_2)], 8, dtype=int32)
               }
             }
           }
@@ -320,7 +320,7 @@ Those include:
           for (i2_3: int32, 0, 14) {
             for (i3_3: int32, 0, 14) {
               for (i5_3: int32, 0, 16) {
-                res_conv[((((i1_3*3136) + (i2_3*224)) + (i3_3*16)) + i5_3)] = max((int32*)res_conv[((((i1_3*3136) + (i2_3*224)) + (i3_3*16)) + i5_3)]), 0)
+                res_conv[((((i1_3*3136) + (i2_3*224)) + (i3_3*16)) + i5_3)] = max((int32*)res_conv[((((i1_3*3136) + (i2_3*224)) + (i3_3*16)) + i5_3)], 0)
               }
             }
           }
@@ -329,7 +329,7 @@ Those include:
           for (i2_4: int32, 0, 14) {
             for (i3_4: int32, 0, 14) {
               for (i5_4: int32, 0, 16) {
-                res_conv[((((i1_4*3136) + (i2_4*224)) + (i3_4*16)) + i5_4)] = min((int32*)res_conv[((((i1_4*3136) + (i2_4*224)) + (i3_4*16)) + i5_4)]), 127)
+                res_conv[((((i1_4*3136) + (i2_4*224)) + (i3_4*16)) + i5_4)] = min((int32*)res_conv[((((i1_4*3136) + (i2_4*224)) + (i3_4*16)) + i5_4)], 127)
               }
             }
           }
@@ -338,7 +338,7 @@ Those include:
           for (i2_5: int32, 0, 14) {
             for (i3_5: int32, 0, 14) {
               for (i5_5: int32, 0, 16) {
-                res_2[((((i1_5*3136) + (i2_5*224)) + (i3_5*16)) + i5_5)] = cast(int8, (int32*)res_conv[((((i1_5*3136) + (i2_5*224)) + (i3_5*16)) + i5_5)]))
+                res_2[((((i1_5*3136) + (i2_5*224)) + (i3_5*16)) + i5_5)] = cast(int8, (int32*)res_conv[((((i1_5*3136) + (i2_5*224)) + (i3_5*16)) + i5_5)])
               }
             }
           }
@@ -452,11 +452,11 @@ below.
  .. code-block:: none
 
     primfn(data_1: handle, kernel_1: handle, res_1: handle) -> ()
-      attr = {"tir.noalias": True, "global_symbol": "main"}
-      buffers = {data: Buffer(data_2: handle, int8, [1, 16, 14, 14, 1, 16], []),
+      attr = {"global_symbol": "main", "tir.noalias": True}
+      buffers = {res: Buffer(res_2: handle, int8, [1, 16, 14, 14, 1, 16], []),
                  kernel: Buffer(kernel_2: handle, int8, [16, 16, 3, 3, 16, 16], []),
-                 res: Buffer(res_2: handle, int8, [1, 16, 14, 14, 1, 16], [])}
-      buffer_map = {res_1: res, kernel_1: kernel, data_1: data} {
+                 data: Buffer(data_2: handle, int8, [1, 16, 14, 14, 1, 16], [])}
+      buffer_map = {data_1: data, kernel_1: kernel, res_1: res} {
       attr [data_buf: handle] "storage_scope" = "global";
       allocate(data_buf, int8, [65536]);
       attr [kernel_buf: handle] "storage_scope" = "global";
@@ -467,7 +467,7 @@ below.
           for (i2: int32, 0, 16) {
             for (i3: int32, 0, 16) {
               for (i5: int32, 0, 16) {
-                data_buf[((((i1*4096) + (i2*256)) + (i3*16)) + i5)] = @tvm_if_then_else(((((1 <= i2) && (i2 < 15)) && (1 <= i3)) && (i3 < 15)), (int8*)data_2[(((((i1*3136) + (i2*224)) + (i3*16)) + i5) - 240)]), 0i8, dtype=int8, type="pure_intrin")
+                data_buf[((((i1*4096) + (i2*256)) + (i3*16)) + i5)] = @tir.if_then_else(((((1 <= i2) && (i2 < 15)) && (1 <= i3)) && (i3 < 15)), (int8*)data_2[(((((i1*3136) + (i2*224)) + (i3*16)) + i5) - 240)], 0i8, dtype=int8)
               }
             }
           }
@@ -478,7 +478,7 @@ below.
               for (i3_1: int32, 0, 3) {
                 for (i4: int32, 0, 16) {
                   for (i5_1: int32, 0, 16) {
-                    kernel_buf[((((((i0*36864) + (i1_1*2304)) + (i2_1*768)) + (i3_1*256)) + (i4*16)) + i5_1)] = (int8*)kernel_2[((((((i0*36864) + (i1_1*2304)) + (i2_1*768)) + (i3_1*256)) + (i4*16)) + i5_1)])
+                    kernel_buf[((((((i0*36864) + (i1_1*2304)) + (i2_1*768)) + (i3_1*256)) + (i4*16)) + i5_1)] = (int8*)kernel_2[((((((i0*36864) + (i1_1*2304)) + (i2_1*768)) + (i3_1*256)) + (i4*16)) + i5_1)]
                   }
                 }
               }
@@ -504,8 +504,8 @@ below.
                     for (j: int32, 0, 14) {
                       for (ci: int32, 0, 16) {
                         for (ic_tns: int32, 0, 16) {
-                          res_conv[((((co*1568) + (i*224)) + (j*16)) + ci)] = ((int32*)res_conv[((((co*1568) + (i*224)) + (j*16)) + ci)]) + (cast(int32, (int8*)data_buf[(((((((ic.outer*4096) + (i2.outer*1792)) + (i*256)) + (dy*256)) + (j*16)) + (dx*16)) + ic_tns)]))*cast(int32, (int8*)kernel_buf[((((((co*36864) + (ic.outer*2304)) + (dy*768)) + (dx*256)) + (ci*16)) + ic_tns)]))))
-                          res_conv[(((((co*1568) + (i*224)) + (j*16)) + ci) + 12544)] = ((int32*)res_conv[(((((co*1568) + (i*224)) + (j*16)) + ci) + 12544)]) + (cast(int32, (int8*)data_buf[(((((((ic.outer*4096) + (i2.outer*1792)) + (i*256)) + (dy*256)) + (j*16)) + (dx*16)) + ic_tns)]))*cast(int32, (int8*)kernel_buf[(((((((co*36864) + (ic.outer*2304)) + (dy*768)) + (dx*256)) + (ci*16)) + ic_tns) + 294912)]))))
+                          res_conv[((((co*1568) + (i*224)) + (j*16)) + ci)] = ((int32*)res_conv[((((co*1568) + (i*224)) + (j*16)) + ci)] + (cast(int32, (int8*)data_buf[(((((((ic.outer*4096) + (i2.outer*1792)) + (i*256)) + (dy*256)) + (j*16)) + (dx*16)) + ic_tns)])*cast(int32, (int8*)kernel_buf[((((((co*36864) + (ic.outer*2304)) + (dy*768)) + (dx*256)) + (ci*16)) + ic_tns)])))
+                          res_conv[(((((co*1568) + (i*224)) + (j*16)) + ci) + 12544)] = ((int32*)res_conv[(((((co*1568) + (i*224)) + (j*16)) + ci) + 12544)] + (cast(int32, (int8*)data_buf[(((((((ic.outer*4096) + (i2.outer*1792)) + (i*256)) + (dy*256)) + (j*16)) + (dx*16)) + ic_tns)])*cast(int32, (int8*)kernel_buf[(((((((co*36864) + (ic.outer*2304)) + (dy*768)) + (dx*256)) + (ci*16)) + ic_tns) + 294912)])))
                         }
                       }
                     }
@@ -518,8 +518,8 @@ below.
             for (i2_2: int32, 0, 7) {
               for (i3_2: int32, 0, 14) {
                 for (i5_2: int32, 0, 16) {
-                  res_conv[((((i1_2*1568) + (i2_2*224)) + (i3_2*16)) + i5_2)] = @shift_right((int32*)res_conv[((((i1_2*1568) + (i2_2*224)) + (i3_2*16)) + i5_2)]), 8, dtype=int32, type="pure_intrin")
-                  res_conv[(((((i1_2*1568) + (i2_2*224)) + (i3_2*16)) + i5_2) + 12544)] = @shift_right((int32*)res_conv[(((((i1_2*1568) + (i2_2*224)) + (i3_2*16)) + i5_2) + 12544)]), 8, dtype=int32, type="pure_intrin")
+                  res_conv[((((i1_2*1568) + (i2_2*224)) + (i3_2*16)) + i5_2)] = @tir.shift_right((int32*)res_conv[((((i1_2*1568) + (i2_2*224)) + (i3_2*16)) + i5_2)], 8, dtype=int32)
+                  res_conv[(((((i1_2*1568) + (i2_2*224)) + (i3_2*16)) + i5_2) + 12544)] = @tir.shift_right((int32*)res_conv[(((((i1_2*1568) + (i2_2*224)) + (i3_2*16)) + i5_2) + 12544)], 8, dtype=int32)
                 }
               }
             }
@@ -528,8 +528,8 @@ below.
             for (i2_3: int32, 0, 7) {
               for (i3_3: int32, 0, 14) {
                 for (i5_3: int32, 0, 16) {
-                  res_conv[((((i1_3*1568) + (i2_3*224)) + (i3_3*16)) + i5_3)] = max((int32*)res_conv[((((i1_3*1568) + (i2_3*224)) + (i3_3*16)) + i5_3)]), 0)
-                  res_conv[(((((i1_3*1568) + (i2_3*224)) + (i3_3*16)) + i5_3) + 12544)] = max((int32*)res_conv[(((((i1_3*1568) + (i2_3*224)) + (i3_3*16)) + i5_3) + 12544)]), 0)
+                  res_conv[((((i1_3*1568) + (i2_3*224)) + (i3_3*16)) + i5_3)] = max((int32*)res_conv[((((i1_3*1568) + (i2_3*224)) + (i3_3*16)) + i5_3)], 0)
+                  res_conv[(((((i1_3*1568) + (i2_3*224)) + (i3_3*16)) + i5_3) + 12544)] = max((int32*)res_conv[(((((i1_3*1568) + (i2_3*224)) + (i3_3*16)) + i5_3) + 12544)], 0)
                 }
               }
             }
@@ -538,8 +538,8 @@ below.
             for (i2_4: int32, 0, 7) {
               for (i3_4: int32, 0, 14) {
                 for (i5_4: int32, 0, 16) {
-                  res_conv[((((i1_4*1568) + (i2_4*224)) + (i3_4*16)) + i5_4)] = min((int32*)res_conv[((((i1_4*1568) + (i2_4*224)) + (i3_4*16)) + i5_4)]), 127)
-                  res_conv[(((((i1_4*1568) + (i2_4*224)) + (i3_4*16)) + i5_4) + 12544)] = min((int32*)res_conv[(((((i1_4*1568) + (i2_4*224)) + (i3_4*16)) + i5_4) + 12544)]), 127)
+                  res_conv[((((i1_4*1568) + (i2_4*224)) + (i3_4*16)) + i5_4)] = min((int32*)res_conv[((((i1_4*1568) + (i2_4*224)) + (i3_4*16)) + i5_4)], 127)
+                  res_conv[(((((i1_4*1568) + (i2_4*224)) + (i3_4*16)) + i5_4) + 12544)] = min((int32*)res_conv[(((((i1_4*1568) + (i2_4*224)) + (i3_4*16)) + i5_4) + 12544)], 127)
                 }
               }
             }
@@ -548,8 +548,8 @@ below.
             for (i2.inner: int32, 0, 7) {
               for (i3.inner: int32, 0, 14) {
                 for (i5_5: int32, 0, 16) {
-                  res_2[(((((i1.inner*3136) + (i2.outer*1568)) + (i2.inner*224)) + (i3.inner*16)) + i5_5)] = cast(int8, (int32*)res_conv[((((i1.inner*1568) + (i2.inner*224)) + (i3.inner*16)) + i5_5)]))
-                  res_2[((((((i1.inner*3136) + (i2.outer*1568)) + (i2.inner*224)) + (i3.inner*16)) + i5_5) + 25088)] = cast(int8, (int32*)res_conv[(((((i1.inner*1568) + (i2.inner*224)) + (i3.inner*16)) + i5_5) + 12544)]))
+                  res_2[(((((i1.inner*3136) + (i2.outer*1568)) + (i2.inner*224)) + (i3.inner*16)) + i5_5)] = cast(int8, (int32*)res_conv[((((i1.inner*1568) + (i2.inner*224)) + (i3.inner*16)) + i5_5)])
+                  res_2[((((((i1.inner*3136) + (i2.outer*1568)) + (i2.inner*224)) + (i3.inner*16)) + i5_5) + 25088)] = cast(int8, (int32*)res_conv[(((((i1.inner*1568) + (i2.inner*224)) + (i3.inner*16)) + i5_5) + 12544)])
                 }
               }
             }
@@ -635,104 +635,104 @@ and mapping the shift, and clipping computation to the vector ALU.
  .. code-block:: none
 
     primfn(data_1: handle, kernel_1: handle, res_1: handle) -> ()
-      attr = {"tir.noalias": True, "global_symbol": "main"}
+      attr = {"global_symbol": "main", "tir.noalias": True}
       buffers = {res: Buffer(res_2: handle, int8, [1, 16, 14, 14, 1, 16], []),
-                 data: Buffer(data_2: handle, int8, [1, 16, 14, 14, 1, 16], []),
-                 kernel: Buffer(kernel_2: handle, int8, [16, 16, 3, 3, 16, 16], [])}
-      buffer_map = {kernel_1: kernel, res_1: res, data_1: data} {
+                 kernel: Buffer(kernel_2: handle, int8, [16, 16, 3, 3, 16, 16], []),
+                 data: Buffer(data_2: handle, int8, [1, 16, 14, 14, 1, 16], [])}
+      buffer_map = {data_1: data, kernel_1: kernel, res_1: res} {
       attr [res_conv: handle] "storage_scope" = "local.acc_buffer";
       attr [data_buf: handle] "storage_scope" = "local.inp_buffer";
       attr [kernel_buf: handle] "storage_scope" = "local.wgt_buffer" {
-        @vta.coproc_dep_push(3, 2, dtype=int32, type="intrin")
-        @vta.coproc_dep_push(3, 2, dtype=int32, type="intrin")
+        @tir.vta.coproc_dep_push(3, 2, dtype=int32)
+        @tir.vta.coproc_dep_push(3, 2, dtype=int32)
         for (i2.outer: int32, 0, 2) {
           for (cthread.s: int32, 0, 2) {
             attr [IterVar(vta: int32, (nullptr), "ThreadIndex", "vta")] "coproc_scope" = 2 {
-              @vta.coproc_dep_pop(3, 2, dtype=int32, type="intrin")
+              @tir.vta.coproc_dep_pop(3, 2, dtype=int32)
               attr [IterVar(vta, (nullptr), "ThreadIndex", "vta")] "coproc_uop_scope" = "VTAPushGEMMOp" {
-                @VTAUopLoopBegin(8, 98, 0, 0, dtype=int32, type="extern")
-                @VTAUopLoopBegin(7, 14, 0, 0, dtype=int32, type="extern")
+                @tir.call_extern("VTAUopLoopBegin", 8, 98, 0, 0, dtype=int32)
+                @tir.call_extern("VTAUopLoopBegin", 7, 14, 0, 0, dtype=int32)
                 for (j.init: int32, 0, 14) {
-                  @VTAUopPush(0, 1, ((cthread.s*784) + j.init), 0, 0, 0, 0, 0, dtype=int32, type="extern")
+                  @tir.vta.uop_push(0, 1, ((cthread.s*784) + j.init), 0, 0, 0, 0, 0, dtype=int32)
                 }
-                @VTAUopLoopEnd(, dtype=int32, type="extern")
-                @VTAUopLoopEnd(, dtype=int32, type="extern")
+                @tir.call_extern("VTAUopLoopEnd", dtype=int32)
+                @tir.call_extern("VTAUopLoopEnd", dtype=int32)
               }
-              @vta.coproc_dep_push(2, 1, dtype=int32, type="intrin")
+              @tir.vta.coproc_dep_push(2, 1, dtype=int32)
             }
           }
           for (ic.outer: int32, 0, 16) {
             attr [IterVar(vta, (nullptr), "ThreadIndex", "vta")] "coproc_scope" = 1 {
-              @vta.coproc_dep_pop(2, 1, dtype=int32, type="intrin")
-              @VTALoadBuffer2D(@tvm_thread_context(@VTATLSCommandHandle(, dtype=handle, type="extern"), dtype=handle, type="intrin"), data_2, ((((ic.outer*196) + (i2.outer*98)) + (max((1 - (i2.outer*7)), 0)*14)) - 14), 14, ((9 - max((1 - (i2.outer*7)), 0)) - max(((i2.outer*7) - 6), 0)), 14, 1, max((1 - (i2.outer*7)), 0), 1, max(((i2.outer*7) - 6), 0), 0, 2, dtype=int32, type="extern")
-              @VTALoadBuffer2D(@tvm_thread_context(@VTATLSCommandHandle(, dtype=handle, type="extern"), dtype=handle, type="intrin"), kernel_2, (ic.outer*9), 9, 8, 144, 0, 0, 0, 0, 0, 1, dtype=int32, type="extern")
-              @vta.coproc_dep_push(1, 2, dtype=int32, type="intrin")
+              @tir.vta.coproc_dep_pop(2, 1, dtype=int32)
+              @tir.call_extern("VTALoadBuffer2D", @tir.tvm_thread_context(@tir.vta.command_handle(, dtype=handle), dtype=handle), data_2, ((((ic.outer*196) + (i2.outer*98)) + (max((1 - (i2.outer*7)), 0)*14)) - 14), 14, ((9 - max((1 - (i2.outer*7)), 0)) - max(((i2.outer*7) - 6), 0)), 14, 1, max((1 - (i2.outer*7)), 0), 1, max(((i2.outer*7) - 6), 0), 0, 2, dtype=int32)
+              @tir.call_extern("VTALoadBuffer2D", @tir.tvm_thread_context(@tir.vta.command_handle(, dtype=handle), dtype=handle), kernel_2, (ic.outer*9), 9, 8, 144, 0, 0, 0, 0, 0, 1, dtype=int32)
+              @tir.vta.coproc_dep_push(1, 2, dtype=int32)
             }
             attr [IterVar(vta, (nullptr), "ThreadIndex", "vta")] "coproc_scope" = 1 {
-              @vta.coproc_dep_pop(2, 1, dtype=int32, type="intrin")
-              @VTALoadBuffer2D(@tvm_thread_context(@VTATLSCommandHandle(, dtype=handle, type="extern"), dtype=handle, type="intrin"), data_2, ((((ic.outer*196) + (i2.outer*98)) + (max((1 - (i2.outer*7)), 0)*14)) - 14), 14, ((9 - max((1 - (i2.outer*7)), 0)) - max(((i2.outer*7) - 6), 0)), 14, 1, max((1 - (i2.outer*7)), 0), 1, max(((i2.outer*7) - 6), 0), 144, 2, dtype=int32, type="extern")
-              @VTALoadBuffer2D(@tvm_thread_context(@VTATLSCommandHandle(, dtype=handle, type="extern"), dtype=handle, type="intrin"), kernel_2, ((ic.outer*9) + 1152), 9, 8, 144, 0, 0, 0, 0, 72, 1, dtype=int32, type="extern")
-              @vta.coproc_dep_push(1, 2, dtype=int32, type="intrin")
+              @tir.vta.coproc_dep_pop(2, 1, dtype=int32)
+              @tir.call_extern("VTALoadBuffer2D", @tir.tvm_thread_context(@tir.vta.command_handle(, dtype=handle), dtype=handle), data_2, ((((ic.outer*196) + (i2.outer*98)) + (max((1 - (i2.outer*7)), 0)*14)) - 14), 14, ((9 - max((1 - (i2.outer*7)), 0)) - max(((i2.outer*7) - 6), 0)), 14, 1, max((1 - (i2.outer*7)), 0), 1, max(((i2.outer*7) - 6), 0), 144, 2, dtype=int32)
+              @tir.call_extern("VTALoadBuffer2D", @tir.tvm_thread_context(@tir.vta.command_handle(, dtype=handle), dtype=handle), kernel_2, ((ic.outer*9) + 1152), 9, 8, 144, 0, 0, 0, 0, 72, 1, dtype=int32)
+              @tir.vta.coproc_dep_push(1, 2, dtype=int32)
             }
             for (cthread.s_1: int32, 0, 2) {
               attr [IterVar(vta, (nullptr), "ThreadIndex", "vta")] "coproc_scope" = 2 {
-                @vta.coproc_dep_pop(1, 2, dtype=int32, type="intrin")
+                @tir.vta.coproc_dep_pop(1, 2, dtype=int32)
                 attr [IterVar(vta, (nullptr), "ThreadIndex", "vta")] "coproc_uop_scope" = "VTAPushGEMMOp" {
-                  @VTAUopLoopBegin(8, 98, 0, 9, dtype=int32, type="extern")
-                  @VTAUopLoopBegin(7, 14, 16, 0, dtype=int32, type="extern")
+                  @tir.call_extern("VTAUopLoopBegin", 8, 98, 0, 9, dtype=int32)
+                  @tir.call_extern("VTAUopLoopBegin", 7, 14, 16, 0, dtype=int32)
                   for (dy: int32, 0, 3) {
                     for (dx: int32, 0, 3) {
                       for (j: int32, 0, 14) {
-                        @VTAUopPush(0, 0, ((cthread.s_1*784) + j), ((((cthread.s_1*144) + (dy*16)) + j) + dx), (((cthread.s_1*72) + (dy*3)) + dx), 0, 0, 0, dtype=int32, type="extern")
+                        @tir.vta.uop_push(0, 0, ((cthread.s_1*784) + j), ((((cthread.s_1*144) + (dy*16)) + j) + dx), (((cthread.s_1*72) + (dy*3)) + dx), 0, 0, 0, dtype=int32)
                       }
                     }
                   }
-                  @VTAUopLoopEnd(, dtype=int32, type="extern")
-                  @VTAUopLoopEnd(, dtype=int32, type="extern")
+                  @tir.call_extern("VTAUopLoopEnd", dtype=int32)
+                  @tir.call_extern("VTAUopLoopEnd", dtype=int32)
                 }
-                @vta.coproc_dep_push(2, 1, dtype=int32, type="intrin")
+                @tir.vta.coproc_dep_push(2, 1, dtype=int32)
               }
             }
           }
-          @vta.coproc_dep_pop(2, 1, dtype=int32, type="intrin")
-          @vta.coproc_dep_pop(2, 1, dtype=int32, type="intrin")
+          @tir.vta.coproc_dep_pop(2, 1, dtype=int32)
+          @tir.vta.coproc_dep_pop(2, 1, dtype=int32)
           for (cthread.s_2: int32, 0, 2) {
             attr [IterVar(vta, (nullptr), "ThreadIndex", "vta")] "coproc_scope" = 2 {
               attr [IterVar(vta, (nullptr), "ThreadIndex", "vta")] "coproc_uop_scope" = "VTAPushALUOp" {
-                @VTAUopLoopBegin(784, 1, 1, 0, dtype=int32, type="extern")
-                @VTAUopPush(1, 0, (cthread.s_2*784), (cthread.s_2*784), 0, 3, 1, 8, dtype=int32, type="extern")
-                @VTAUopLoopEnd(, dtype=int32, type="extern")
+                @tir.call_extern("VTAUopLoopBegin", 784, 1, 1, 0, dtype=int32)
+                @tir.vta.uop_push(1, 0, (cthread.s_2*784), (cthread.s_2*784), 0, 3, 1, 8, dtype=int32)
+                @tir.call_extern("VTAUopLoopEnd", dtype=int32)
               }
               attr [IterVar(vta, (nullptr), "ThreadIndex", "vta")] "coproc_uop_scope" = "VTAPushALUOp" {
-                @VTAUopLoopBegin(784, 1, 1, 0, dtype=int32, type="extern")
-                @VTAUopPush(1, 0, (cthread.s_2*784), (cthread.s_2*784), 0, 1, 1, 0, dtype=int32, type="extern")
-                @VTAUopLoopEnd(, dtype=int32, type="extern")
+                @tir.call_extern("VTAUopLoopBegin", 784, 1, 1, 0, dtype=int32)
+                @tir.vta.uop_push(1, 0, (cthread.s_2*784), (cthread.s_2*784), 0, 1, 1, 0, dtype=int32)
+                @tir.call_extern("VTAUopLoopEnd", dtype=int32)
               }
               attr [IterVar(vta, (nullptr), "ThreadIndex", "vta")] "coproc_uop_scope" = "VTAPushALUOp" {
-                @VTAUopLoopBegin(784, 1, 1, 0, dtype=int32, type="extern")
-                @VTAUopPush(1, 0, (cthread.s_2*784), (cthread.s_2*784), 0, 0, 1, 127, dtype=int32, type="extern")
-                @VTAUopLoopEnd(, dtype=int32, type="extern")
+                @tir.call_extern("VTAUopLoopBegin", 784, 1, 1, 0, dtype=int32)
+                @tir.vta.uop_push(1, 0, (cthread.s_2*784), (cthread.s_2*784), 0, 0, 1, 127, dtype=int32)
+                @tir.call_extern("VTAUopLoopEnd", dtype=int32)
               }
-              @vta.coproc_dep_push(2, 3, dtype=int32, type="intrin")
+              @tir.vta.coproc_dep_push(2, 3, dtype=int32)
             }
           }
           for (cthread.s_3: int32, 0, 2) {
             attr [IterVar(vta, (nullptr), "ThreadIndex", "vta")] "coproc_scope" = 3 {
-              @vta.coproc_dep_pop(2, 3, dtype=int32, type="intrin")
+              @tir.vta.coproc_dep_pop(2, 3, dtype=int32)
               for (i1.inner: int32, 0, 8) {
                 for (i2.inner: int32, 0, 7) {
                   for (i3.inner: int32, 0, 14) {
-                    @VTAStoreBuffer2D(@tvm_thread_context(@VTATLSCommandHandle(, dtype=handle, type="extern"), dtype=handle, type="intrin"), ((((cthread.s_3*784) + (i1.inner*98)) + (i2.inner*14)) + i3.inner), 4, res_2, (((((cthread.s_3*1568) + (i1.inner*196)) + (i2.outer*98)) + (i2.inner*14)) + i3.inner), 1, 1, 1, dtype=int32, type="extern")
+                    @tir.call_extern("VTAStoreBuffer2D", @tir.tvm_thread_context(@tir.vta.command_handle(, dtype=handle), dtype=handle), ((((cthread.s_3*784) + (i1.inner*98)) + (i2.inner*14)) + i3.inner), 4, res_2, (((((cthread.s_3*1568) + (i1.inner*196)) + (i2.outer*98)) + (i2.inner*14)) + i3.inner), 1, 1, 1, dtype=int32)
                   }
                 }
               }
-              @vta.coproc_dep_push(3, 2, dtype=int32, type="intrin")
+              @tir.vta.coproc_dep_push(3, 2, dtype=int32)
             }
           }
         }
-        @vta.coproc_dep_pop(3, 2, dtype=int32, type="intrin")
-        @vta.coproc_dep_pop(3, 2, dtype=int32, type="intrin")
-        @vta.coproc_sync(, dtype=int32, type="intrin")
+        @tir.vta.coproc_dep_pop(3, 2, dtype=int32)
+        @tir.vta.coproc_dep_pop(3, 2, dtype=int32)
+        @tir.vta.coproc_sync(, dtype=int32)
       }
     }
 
diff --git a/docs/_sources/vta/tutorials/optimize/matrix_multiply_opt.rst.txt b/docs/_sources/vta/tutorials/optimize/matrix_multiply_opt.rst.txt
index 815b29e..0510a2b 100644
--- a/docs/_sources/vta/tutorials/optimize/matrix_multiply_opt.rst.txt
+++ b/docs/_sources/vta/tutorials/optimize/matrix_multiply_opt.rst.txt
@@ -201,11 +201,11 @@ Those include:
  .. code-block:: none
 
     primfn(data_1: handle, weight_1: handle, res_1: handle) -> ()
-      attr = {"tir.noalias": True, "global_symbol": "main"}
-      buffers = {data: Buffer(data_2: handle, int8, [1, 64, 1, 16], []),
+      attr = {"global_symbol": "main", "tir.noalias": True}
+      buffers = {res: Buffer(res_2: handle, int8, [1, 64, 1, 16], []),
                  weight: Buffer(weight_2: handle, int8, [64, 64, 16, 16], []),
-                 res: Buffer(res_2: handle, int8, [1, 64, 1, 16], [])}
-      buffer_map = {res_1: res, weight_1: weight, data_1: data} {
+                 data: Buffer(data_2: handle, int8, [1, 64, 1, 16], [])}
+      buffer_map = {data_1: data, weight_1: weight, res_1: res} {
       attr [data_buf: handle] "storage_scope" = "global";
       allocate(data_buf, int8, [1024]);
       attr [weight_buf: handle] "storage_scope" = "global";
@@ -214,14 +214,14 @@ Those include:
       allocate(res_gem, int32, [1024]) {
         for (i1: int32, 0, 64) {
           for (i3: int32, 0, 16) {
-            data_buf[((i1*16) + i3)] = (int8*)data_2[((i1*16) + i3)])
+            data_buf[((i1*16) + i3)] = (int8*)data_2[((i1*16) + i3)]
           }
         }
         for (i0: int32, 0, 64) {
           for (i1_1: int32, 0, 64) {
             for (i2: int32, 0, 16) {
               for (i3_1: int32, 0, 16) {
-                weight_buf[((((i0*16384) + (i1_1*256)) + (i2*16)) + i3_1)] = (int8*)weight_2[((((i0*16384) + (i1_1*256)) + (i2*16)) + i3_1)])
+                weight_buf[((((i0*16384) + (i1_1*256)) + (i2*16)) + i3_1)] = (int8*)weight_2[((((i0*16384) + (i1_1*256)) + (i2*16)) + i3_1)]
               }
             }
           }
@@ -231,29 +231,29 @@ Those include:
             res_gem[((co*16) + ci)] = 0
             for (ic: int32, 0, 64) {
               for (ic_tns: int32, 0, 16) {
-                res_gem[((co*16) + ci)] = ((int32*)res_gem[((co*16) + ci)]) + (cast(int32, (int8*)data_buf[((ic*16) + ic_tns)]))*cast(int32, (int8*)weight_buf[((((co*16384) + (ic*256)) + (ci*16)) + ic_tns)]))))
+                res_gem[((co*16) + ci)] = ((int32*)res_gem[((co*16) + ci)] + (cast(int32, (int8*)data_buf[((ic*16) + ic_tns)])*cast(int32, (int8*)weight_buf[((((co*16384) + (ic*256)) + (ci*16)) + ic_tns)])))
               }
             }
           }
         }
         for (i1_2: int32, 0, 64) {
           for (i3_2: int32, 0, 16) {
-            res_gem[((i1_2*16) + i3_2)] = @shift_right((int32*)res_gem[((i1_2*16) + i3_2)]), 8, dtype=int32, type="pure_intrin")
+            res_gem[((i1_2*16) + i3_2)] = @tir.shift_right((int32*)res_gem[((i1_2*16) + i3_2)], 8, dtype=int32)
           }
         }
         for (i1_3: int32, 0, 64) {
           for (i3_3: int32, 0, 16) {
-            res_gem[((i1_3*16) + i3_3)] = max((int32*)res_gem[((i1_3*16) + i3_3)]), 0)
+            res_gem[((i1_3*16) + i3_3)] = max((int32*)res_gem[((i1_3*16) + i3_3)], 0)
           }
         }
         for (i1_4: int32, 0, 64) {
           for (i3_4: int32, 0, 16) {
-            res_gem[((i1_4*16) + i3_4)] = min((int32*)res_gem[((i1_4*16) + i3_4)]), 127)
+            res_gem[((i1_4*16) + i3_4)] = min((int32*)res_gem[((i1_4*16) + i3_4)], 127)
           }
         }
         for (i1_5: int32, 0, 64) {
           for (i3_5: int32, 0, 16) {
-            res_2[((i1_5*16) + i3_5)] = cast(int8, (int32*)res_gem[((i1_5*16) + i3_5)]))
+            res_2[((i1_5*16) + i3_5)] = cast(int8, (int32*)res_gem[((i1_5*16) + i3_5)])
           }
         }
       }
@@ -363,11 +363,11 @@ below:
  .. code-block:: none
 
     primfn(data_1: handle, weight_1: handle, res_1: handle) -> ()
-      attr = {"tir.noalias": True, "global_symbol": "main"}
-      buffers = {data: Buffer(data_2: handle, int8, [1, 64, 1, 16], []),
-                 weight: Buffer(weight_2: handle, int8, [64, 64, 16, 16], []),
-                 res: Buffer(res_2: handle, int8, [1, 64, 1, 16], [])}
-      buffer_map = {res_1: res, weight_1: weight, data_1: data} {
+      attr = {"global_symbol": "main", "tir.noalias": True}
+      buffers = {weight: Buffer(weight_2: handle, int8, [64, 64, 16, 16], []),
+                 res: Buffer(res_2: handle, int8, [1, 64, 1, 16], []),
+                 data: Buffer(data_2: handle, int8, [1, 64, 1, 16], [])}
+      buffer_map = {data_1: data, weight_1: weight, res_1: res} {
       attr [data_buf: handle] "storage_scope" = "global";
       allocate(data_buf, int8, [1024]);
       attr [weight_buf: handle] "storage_scope" = "global";
@@ -376,14 +376,14 @@ below:
       allocate(res_gem, int32, [256]) {
         for (i1: int32, 0, 64) {
           for (i3: int32, 0, 16) {
-            data_buf[((i1*16) + i3)] = (int8*)data_2[((i1*16) + i3)])
+            data_buf[((i1*16) + i3)] = (int8*)data_2[((i1*16) + i3)]
           }
         }
         for (i0: int32, 0, 64) {
           for (i1_1: int32, 0, 64) {
             for (i2: int32, 0, 16) {
               for (i3_1: int32, 0, 16) {
-                weight_buf[((((i0*16384) + (i1_1*256)) + (i2*16)) + i3_1)] = (int8*)weight_2[((((i0*16384) + (i1_1*256)) + (i2*16)) + i3_1)])
+                weight_buf[((((i0*16384) + (i1_1*256)) + (i2*16)) + i3_1)] = (int8*)weight_2[((((i0*16384) + (i1_1*256)) + (i2*16)) + i3_1)]
               }
             }
           }
@@ -399,7 +399,7 @@ below:
               for (ic.inner: int32, 0, 16) {
                 for (ci: int32, 0, 16) {
                   for (ic_tns: int32, 0, 16) {
-                    res_gem[((co*16) + ci)] = ((int32*)res_gem[((co*16) + ci)]) + (cast(int32, (int8*)data_buf[(((ic.outer*256) + (ic.inner*16)) + ic_tns)]))*cast(int32, (int8*)weight_buf[((((((i1.outer*262144) + (co*16384)) + (ic.outer*4096)) + (ic.inner*256)) + (ci*16)) + ic_tns)]))))
+                    res_gem[((co*16) + ci)] = ((int32*)res_gem[((co*16) + ci)] + (cast(int32, (int8*)data_buf[(((ic.outer*256) + (ic.inner*16)) + ic_tns)])*cast(int32, (int8*)weight_buf[((((((i1.outer*262144) + (co*16384)) + (ic.outer*4096)) + (ic.inner*256)) + (ci*16)) + ic_tns)])))
                   }
                 }
               }
@@ -407,22 +407,22 @@ below:
           }
           for (i1_2: int32, 0, 16) {
             for (i3_2: int32, 0, 16) {
-              res_gem[((i1_2*16) + i3_2)] = @shift_right((int32*)res_gem[((i1_2*16) + i3_2)]), 8, dtype=int32, type="pure_intrin")
+              res_gem[((i1_2*16) + i3_2)] = @tir.shift_right((int32*)res_gem[((i1_2*16) + i3_2)], 8, dtype=int32)
             }
           }
           for (i1_3: int32, 0, 16) {
             for (i3_3: int32, 0, 16) {
-              res_gem[((i1_3*16) + i3_3)] = max((int32*)res_gem[((i1_3*16) + i3_3)]), 0)
+              res_gem[((i1_3*16) + i3_3)] = max((int32*)res_gem[((i1_3*16) + i3_3)], 0)
             }
           }
           for (i1_4: int32, 0, 16) {
             for (i3_4: int32, 0, 16) {
-              res_gem[((i1_4*16) + i3_4)] = min((int32*)res_gem[((i1_4*16) + i3_4)]), 127)
+              res_gem[((i1_4*16) + i3_4)] = min((int32*)res_gem[((i1_4*16) + i3_4)], 127)
             }
           }
           for (i1.inner: int32, 0, 16) {
             for (i3_5: int32, 0, 16) {
-              res_2[(((i1.outer*256) + (i1.inner*16)) + i3_5)] = cast(int8, (int32*)res_gem[((i1.inner*16) + i3_5)]))
+              res_2[(((i1.outer*256) + (i1.inner*16)) + i3_5)] = cast(int8, (int32*)res_gem[((i1.inner*16) + i3_5)])
             }
           }
         }
@@ -506,73 +506,73 @@ and mapping the shift, and clipping computation to the vector ALU.
  .. code-block:: none
 
     primfn(data_1: handle, weight_1: handle, res_1: handle) -> ()
-      attr = {"tir.noalias": True, "global_symbol": "main"}
-      buffers = {data: Buffer(data_2: handle, int8, [1, 64, 1, 16], []),
+      attr = {"global_symbol": "main", "tir.noalias": True}
+      buffers = {res: Buffer(res_2: handle, int8, [1, 64, 1, 16], []),
                  weight: Buffer(weight_2: handle, int8, [64, 64, 16, 16], []),
-                 res: Buffer(res_2: handle, int8, [1, 64, 1, 16], [])}
-      buffer_map = {res_1: res, weight_1: weight, data_1: data} {
+                 data: Buffer(data_2: handle, int8, [1, 64, 1, 16], [])}
+      buffer_map = {data_1: data, weight_1: weight, res_1: res} {
       attr [res_gem: handle] "storage_scope" = "local.acc_buffer";
       attr [data_buf: handle] "storage_scope" = "local.inp_buffer";
       attr [weight_buf: handle] "storage_scope" = "local.wgt_buffer" {
-        @vta.coproc_dep_push(3, 2, dtype=int32, type="intrin")
+        @tir.vta.coproc_dep_push(3, 2, dtype=int32)
         for (i1.outer: int32, 0, 4) {
           attr [IterVar(vta: int32, (nullptr), "ThreadIndex", "vta")] "coproc_scope" = 2 {
-            @vta.coproc_dep_pop(3, 2, dtype=int32, type="intrin")
+            @tir.vta.coproc_dep_pop(3, 2, dtype=int32)
             attr [IterVar(vta, (nullptr), "ThreadIndex", "vta")] "coproc_uop_scope" = "VTAPushGEMMOp" {
-              @VTAUopLoopBegin(16, 1, 0, 0, dtype=int32, type="extern")
-              @VTAUopPush(0, 1, 0, 0, 0, 0, 0, 0, dtype=int32, type="extern")
-              @VTAUopLoopEnd(, dtype=int32, type="extern")
+              @tir.call_extern("VTAUopLoopBegin", 16, 1, 0, 0, dtype=int32)
+              @tir.vta.uop_push(0, 1, 0, 0, 0, 0, 0, 0, dtype=int32)
+              @tir.call_extern("VTAUopLoopEnd", dtype=int32)
             }
-            @vta.coproc_dep_push(2, 1, dtype=int32, type="intrin")
+            @tir.vta.coproc_dep_push(2, 1, dtype=int32)
           }
           for (ic.outer: int32, 0, 4) {
             attr [IterVar(vta, (nullptr), "ThreadIndex", "vta")] "coproc_scope" = 1 {
-              @vta.coproc_dep_pop(2, 1, dtype=int32, type="intrin")
-              @VTALoadBuffer2D(@tvm_thread_context(@VTATLSCommandHandle(, dtype=handle, type="extern"), dtype=handle, type="intrin"), data_2, (ic.outer*16), 16, 1, 16, 0, 0, 0, 0, 0, 2, dtype=int32, type="extern")
-              @VTALoadBuffer2D(@tvm_thread_context(@VTATLSCommandHandle(, dtype=handle, type="extern"), dtype=handle, type="intrin"), weight_2, ((i1.outer*1024) + (ic.outer*16)), 16, 16, 64, 0, 0, 0, 0, 0, 1, dtype=int32, type="extern")
-              @vta.coproc_dep_push(1, 2, dtype=int32, type="intrin")
+              @tir.vta.coproc_dep_pop(2, 1, dtype=int32)
+              @tir.call_extern("VTALoadBuffer2D", @tir.tvm_thread_context(@tir.vta.command_handle(, dtype=handle), dtype=handle), data_2, (ic.outer*16), 16, 1, 16, 0, 0, 0, 0, 0, 2, dtype=int32)
+              @tir.call_extern("VTALoadBuffer2D", @tir.tvm_thread_context(@tir.vta.command_handle(, dtype=handle), dtype=handle), weight_2, ((i1.outer*1024) + (ic.outer*16)), 16, 16, 64, 0, 0, 0, 0, 0, 1, dtype=int32)
+              @tir.vta.coproc_dep_push(1, 2, dtype=int32)
             }
             attr [IterVar(vta, (nullptr), "ThreadIndex", "vta")] "coproc_scope" = 2 {
-              @vta.coproc_dep_pop(1, 2, dtype=int32, type="intrin")
+              @tir.vta.coproc_dep_pop(1, 2, dtype=int32)
               attr [IterVar(vta, (nullptr), "ThreadIndex", "vta")] "coproc_uop_scope" = "VTAPushGEMMOp" {
-                @VTAUopLoopBegin(16, 1, 0, 16, dtype=int32, type="extern")
-                @VTAUopLoopBegin(16, 0, 1, 1, dtype=int32, type="extern")
-                @VTAUopPush(0, 0, 0, 0, 0, 0, 0, 0, dtype=int32, type="extern")
-                @VTAUopLoopEnd(, dtype=int32, type="extern")
-                @VTAUopLoopEnd(, dtype=int32, type="extern")
+                @tir.call_extern("VTAUopLoopBegin", 16, 1, 0, 16, dtype=int32)
+                @tir.call_extern("VTAUopLoopBegin", 16, 0, 1, 1, dtype=int32)
+                @tir.vta.uop_push(0, 0, 0, 0, 0, 0, 0, 0, dtype=int32)
+                @tir.call_extern("VTAUopLoopEnd", dtype=int32)
+                @tir.call_extern("VTAUopLoopEnd", dtype=int32)
               }
-              @vta.coproc_dep_push(2, 1, dtype=int32, type="intrin")
+              @tir.vta.coproc_dep_push(2, 1, dtype=int32)
             }
           }
-          @vta.coproc_dep_pop(2, 1, dtype=int32, type="intrin")
+          @tir.vta.coproc_dep_pop(2, 1, dtype=int32)
           attr [IterVar(vta, (nullptr), "ThreadIndex", "vta")] "coproc_scope" = 2 {
             attr [IterVar(vta, (nullptr), "ThreadIndex", "vta")] "coproc_uop_scope" = "VTAPushALUOp" {
-              @VTAUopLoopBegin(16, 1, 1, 0, dtype=int32, type="extern")
-              @VTAUopPush(1, 0, 0, 0, 0, 3, 1, 8, dtype=int32, type="extern")
-              @VTAUopLoopEnd(, dtype=int32, type="extern")
+              @tir.call_extern("VTAUopLoopBegin", 16, 1, 1, 0, dtype=int32)
+              @tir.vta.uop_push(1, 0, 0, 0, 0, 3, 1, 8, dtype=int32)
+              @tir.call_extern("VTAUopLoopEnd", dtype=int32)
             }
             attr [IterVar(vta, (nullptr), "ThreadIndex", "vta")] "coproc_uop_scope" = "VTAPushALUOp" {
-              @VTAUopLoopBegin(16, 1, 1, 0, dtype=int32, type="extern")
-              @VTAUopPush(1, 0, 0, 0, 0, 1, 1, 0, dtype=int32, type="extern")
-              @VTAUopLoopEnd(, dtype=int32, type="extern")
+              @tir.call_extern("VTAUopLoopBegin", 16, 1, 1, 0, dtype=int32)
+              @tir.vta.uop_push(1, 0, 0, 0, 0, 1, 1, 0, dtype=int32)
+              @tir.call_extern("VTAUopLoopEnd", dtype=int32)
             }
             attr [IterVar(vta, (nullptr), "ThreadIndex", "vta")] "coproc_uop_scope" = "VTAPushALUOp" {
-              @VTAUopLoopBegin(16, 1, 1, 0, dtype=int32, type="extern")
-              @VTAUopPush(1, 0, 0, 0, 0, 0, 1, 127, dtype=int32, type="extern")
-              @VTAUopLoopEnd(, dtype=int32, type="extern")
+              @tir.call_extern("VTAUopLoopBegin", 16, 1, 1, 0, dtype=int32)
+              @tir.vta.uop_push(1, 0, 0, 0, 0, 0, 1, 127, dtype=int32)
+              @tir.call_extern("VTAUopLoopEnd", dtype=int32)
             }
-            @vta.coproc_dep_push(2, 3, dtype=int32, type="intrin")
+            @tir.vta.coproc_dep_push(2, 3, dtype=int32)
           }
           attr [IterVar(vta, (nullptr), "ThreadIndex", "vta")] "coproc_scope" = 3 {
-            @vta.coproc_dep_pop(2, 3, dtype=int32, type="intrin")
+            @tir.vta.coproc_dep_pop(2, 3, dtype=int32)
             for (i1.inner: int32, 0, 16) {
-              @VTAStoreBuffer2D(@tvm_thread_context(@VTATLSCommandHandle(, dtype=handle, type="extern"), dtype=handle, type="intrin"), i1.inner, 4, res_2, ((i1.outer*16) + i1.inner), 1, 1, 1, dtype=int32, type="extern")
+              @tir.call_extern("VTAStoreBuffer2D", @tir.tvm_thread_context(@tir.vta.command_handle(, dtype=handle), dtype=handle), i1.inner, 4, res_2, ((i1.outer*16) + i1.inner), 1, 1, 1, dtype=int32)
             }
-            @vta.coproc_dep_push(3, 2, dtype=int32, type="intrin")
+            @tir.vta.coproc_dep_push(3, 2, dtype=int32)
           }
         }
-        @vta.coproc_sync(, dtype=int32, type="intrin")
-        @vta.coproc_dep_pop(3, 2, dtype=int32, type="intrin")
+        @tir.vta.coproc_sync(, dtype=int32)
+        @tir.vta.coproc_dep_pop(3, 2, dtype=int32)
       }
     }
 
diff --git a/docs/_sources/vta/tutorials/optimize/sg_execution_times.rst.txt b/docs/_sources/vta/tutorials/optimize/sg_execution_times.rst.txt
index ec433ab..6f79a02 100644
--- a/docs/_sources/vta/tutorials/optimize/sg_execution_times.rst.txt
+++ b/docs/_sources/vta/tutorials/optimize/sg_execution_times.rst.txt
@@ -5,7 +5,7 @@
 
 Computation times
 =================
-**00:03.090** total execution time for **vta_tutorials_optimize** files:
+**00:02.952** total execution time for **vta_tutorials_optimize** files:
 
-- **00:02.674**: :ref:`sphx_glr_vta_tutorials_optimize_convolution_opt.py` (``convolution_opt.py``)
-- **00:00.416**: :ref:`sphx_glr_vta_tutorials_optimize_matrix_multiply_opt.py` (``matrix_multiply_opt.py``)
+- **00:02.600**: :ref:`sphx_glr_vta_tutorials_optimize_convolution_opt.py` (``convolution_opt.py``)
+- **00:00.352**: :ref:`sphx_glr_vta_tutorials_optimize_matrix_multiply_opt.py` (``matrix_multiply_opt.py``)
diff --git a/docs/_sources/vta/tutorials/sg_execution_times.rst.txt b/docs/_sources/vta/tutorials/sg_execution_times.rst.txt
index 0a03c0e..8a25e95 100644
--- a/docs/_sources/vta/tutorials/sg_execution_times.rst.txt
+++ b/docs/_sources/vta/tutorials/sg_execution_times.rst.txt
@@ -5,7 +5,7 @@
 
 Computation times
 =================
-**00:00.766** total execution time for **vta_tutorials** files:
+**00:00.647** total execution time for **vta_tutorials** files:
 
-- **00:00.389**: :ref:`sphx_glr_vta_tutorials_matrix_multiply.py` (``matrix_multiply.py``)
-- **00:00.377**: :ref:`sphx_glr_vta_tutorials_vta_get_started.py` (``vta_get_started.py``)
+- **00:00.326**: :ref:`sphx_glr_vta_tutorials_matrix_multiply.py` (``matrix_multiply.py``)
+- **00:00.320**: :ref:`sphx_glr_vta_tutorials_vta_get_started.py` (``vta_get_started.py``)
diff --git a/docs/_sources/vta/tutorials/vta_get_started.rst.txt b/docs/_sources/vta/tutorials/vta_get_started.rst.txt
index a34b485..70fb2f3 100644
--- a/docs/_sources/vta/tutorials/vta_get_started.rst.txt
+++ b/docs/_sources/vta/tutorials/vta_get_started.rst.txt
@@ -299,33 +299,33 @@ After we construct the schedule, by default the schedule computes
  .. code-block:: none
 
     primfn(A_1: handle, B_1: handle, C_1: handle) -> ()
-      attr = {"tir.noalias": True, "global_symbol": "main"}
-      buffers = {A: Buffer(A_2: handle, int32, [1, 64, 1, 16], []),
+      attr = {"global_symbol": "main", "tir.noalias": True}
+      buffers = {C: Buffer(C_2: handle, int8, [1, 64, 1, 16], []),
                  B: Buffer(B_2: handle, int32, [1, 64, 1, 16], []),
-                 C: Buffer(C_2: handle, int8, [1, 64, 1, 16], [])}
-      buffer_map = {C_1: C, B_1: B, A_1: A} {
+                 A: Buffer(A_2: handle, int32, [1, 64, 1, 16], [])}
+      buffer_map = {A_1: A, B_1: B, C_1: C} {
       attr [A_buf: handle] "storage_scope" = "global";
       allocate(A_buf, int32, [1024]);
       attr [B_buf: handle] "storage_scope" = "global";
       allocate(B_buf, int32, [1024]) {
         for (i1: int32, 0, 64) {
           for (i3: int32, 0, 16) {
-            A_buf[((i1*16) + i3)] = (int32*)A_2[((i1*16) + i3)])
+            A_buf[((i1*16) + i3)] = (int32*)A_2[((i1*16) + i3)]
           }
         }
         for (i1_1: int32, 0, 64) {
           for (i3_1: int32, 0, 16) {
-            B_buf[((i1_1*16) + i3_1)] = (int32*)B_2[((i1_1*16) + i3_1)])
+            B_buf[((i1_1*16) + i3_1)] = (int32*)B_2[((i1_1*16) + i3_1)]
           }
         }
         for (i1_2: int32, 0, 64) {
           for (i3_2: int32, 0, 16) {
-            A_buf[((i1_2*16) + i3_2)] = ((int32*)A_buf[((i1_2*16) + i3_2)]) + (int32*)B_buf[((i1_2*16) + i3_2)]))
+            A_buf[((i1_2*16) + i3_2)] = ((int32*)A_buf[((i1_2*16) + i3_2)] + (int32*)B_buf[((i1_2*16) + i3_2)])
           }
         }
         for (i1_3: int32, 0, 64) {
           for (i3_3: int32, 0, 16) {
-            C_2[((i1_3*16) + i3_3)] = cast(int8, (int32*)A_buf[((i1_3*16) + i3_3)]))
+            C_2[((i1_3*16) + i3_3)] = cast(int8, (int32*)A_buf[((i1_3*16) + i3_3)])
           }
         }
       }
@@ -422,27 +422,27 @@ with an :code:`env.alu` pragma.
  .. code-block:: none
 
     primfn(A_1: handle, B_1: handle, C_1: handle) -> ()
-      attr = {"tir.noalias": True, "global_symbol": "main"}
-      buffers = {A: Buffer(A_2: handle, int32, [1, 64, 1, 16], []),
+      attr = {"global_symbol": "main", "tir.noalias": True}
+      buffers = {C: Buffer(C_2: handle, int8, [1, 64, 1, 16], []),
                  B: Buffer(B_2: handle, int32, [1, 64, 1, 16], []),
-                 C: Buffer(C_2: handle, int8, [1, 64, 1, 16], [])}
-      buffer_map = {C_1: C, B_1: B, A_1: A} {
+                 A: Buffer(A_2: handle, int32, [1, 64, 1, 16], [])}
+      buffer_map = {A_1: A, B_1: B, C_1: C} {
       attr [A_buf: handle] "storage_scope" = "local.acc_buffer" {
         attr [IterVar(vta: int32, (nullptr), "ThreadIndex", "vta")] "coproc_scope" = 2 {
-          @VTALoadBuffer2D(@tvm_thread_context(@VTATLSCommandHandle(, dtype=handle, type="extern"), dtype=handle, type="intrin"), A_2, 0, 64, 1, 64, 0, 0, 0, 0, 0, 3, dtype=int32, type="extern")
-          @VTALoadBuffer2D(@tvm_thread_context(@VTATLSCommandHandle(, dtype=handle, type="extern"), dtype=handle, type="intrin"), B_2, 0, 64, 1, 64, 0, 0, 0, 0, 64, 3, dtype=int32, type="extern")
+          @tir.call_extern("VTALoadBuffer2D", @tir.tvm_thread_context(@tir.vta.command_handle(, dtype=handle), dtype=handle), A_2, 0, 64, 1, 64, 0, 0, 0, 0, 0, 3, dtype=int32)
+          @tir.call_extern("VTALoadBuffer2D", @tir.tvm_thread_context(@tir.vta.command_handle(, dtype=handle), dtype=handle), B_2, 0, 64, 1, 64, 0, 0, 0, 0, 64, 3, dtype=int32)
           attr [IterVar(vta, (nullptr), "ThreadIndex", "vta")] "coproc_uop_scope" = "VTAPushALUOp" {
-            @VTAUopLoopBegin(64, 1, 1, 0, dtype=int32, type="extern")
-            @VTAUopPush(1, 0, 0, 64, 0, 2, 0, 0, dtype=int32, type="extern")
-            @VTAUopLoopEnd(, dtype=int32, type="extern")
+            @tir.call_extern("VTAUopLoopBegin", 64, 1, 1, 0, dtype=int32)
+            @tir.vta.uop_push(1, 0, 0, 64, 0, 2, 0, 0, dtype=int32)
+            @tir.call_extern("VTAUopLoopEnd", dtype=int32)
           }
-          @vta.coproc_dep_push(2, 3, dtype=int32, type="intrin")
+          @tir.vta.coproc_dep_push(2, 3, dtype=int32)
         }
         attr [IterVar(vta, (nullptr), "ThreadIndex", "vta")] "coproc_scope" = 3 {
-          @vta.coproc_dep_pop(2, 3, dtype=int32, type="intrin")
-          @VTAStoreBuffer2D(@tvm_thread_context(@VTATLSCommandHandle(, dtype=handle, type="extern"), dtype=handle, type="intrin"), 0, 4, C_2, 0, 64, 1, 64, dtype=int32, type="extern")
+          @tir.vta.coproc_dep_pop(2, 3, dtype=int32)
+          @tir.call_extern("VTAStoreBuffer2D", @tir.tvm_thread_context(@tir.vta.command_handle(, dtype=handle), dtype=handle), 0, 4, C_2, 0, 64, 1, 64, dtype=int32)
         }
-        @vta.coproc_sync(, dtype=int32, type="intrin")
+        @tir.vta.coproc_sync(, dtype=int32)
       }
     }
 
diff --git a/docs/api/doxygen/algorithm_8h.html b/docs/api/doxygen/algorithm_8h.html
index 9f82ef7..c5c2419 100644
--- a/docs/api/doxygen/algorithm_8h.html
+++ b/docs/api/doxygen/algorithm_8h.html
@@ -99,7 +99,7 @@ var searchBox = new SearchBox("searchBox", "search",false,'Search');
 </div><div class="textblock"><div class="dynheader">
 Include dependency graph for algorithm.h:</div>
 <div class="dyncontent">
-<div class="center"><iframe scrolling="no" frameborder="0" src="algorithm_8h__incl.svg" width="5171" height="1530"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
+<div class="center"><iframe scrolling="no" frameborder="0" src="algorithm_8h__incl.svg" width="4992" height="1515"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
 </div>
 </div>
 </div>
diff --git a/docs/api/doxygen/algorithm_8h__incl.svg b/docs/api/doxygen/algorithm_8h__incl.svg
index 11aae6c..83018f9 100644
--- a/docs/api/doxygen/algorithm_8h__incl.svg
+++ b/docs/api/doxygen/algorithm_8h__incl.svg
@@ -4,1468 +4,1468 @@
 <!-- Generated by graphviz version 2.38.0 (20140413.2041)
  -->
 <!-- Title: include/tvm/relay/attrs/algorithm.h Pages: 1 -->
-<svg width="3878pt" height="1147pt"
- viewBox="0.00 0.00 3878.00 1147.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
-<g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 1143)">
+<svg width="3744pt" height="1136pt"
+ viewBox="0.00 0.00 3744.00 1136.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+<g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 1132)">
 <title>include/tvm/relay/attrs/algorithm.h</title>
-<polygon fill="white" stroke="none" points="-4,4 -4,-1143 3874,-1143 3874,4 -4,4"/>
+<polygon fill="white" stroke="none" points="-4,4 -4,-1132 3740,-1132 3740,4 -4,4"/>
 <!-- Node1 -->
 <g id="node1" class="node"><title>Node1</title>
-<polygon fill="#bfbfbf" stroke="black" points="2877,-1108.5 2877,-1138.5 3001,-1138.5 3001,-1108.5 2877,-1108.5"/>
-<text text-anchor="start" x="2885" y="-1126.5" font-family="Helvetica,sans-Serif" font-size="10.00">include/tvm/relay/attrs</text>
-<text text-anchor="middle" x="2939" y="-1115.5" font-family="Helvetica,sans-Serif" font-size="10.00">/algorithm.h</text>
+<polygon fill="#bfbfbf" stroke="black" points="269,-1097.5 269,-1127.5 393,-1127.5 393,-1097.5 269,-1097.5"/>
+<text text-anchor="start" x="277" y="-1115.5" font-family="Helvetica,sans-Serif" font-size="10.00">include/tvm/relay/attrs</text>
+<text text-anchor="middle" x="331" y="-1104.5" font-family="Helvetica,sans-Serif" font-size="10.00">/algorithm.h</text>
 </g>
 <!-- Node2 -->
 <g id="node2" class="node"><title>Node2</title>
 <g id="a_node2"><a xlink:href="ir_2attrs_8h.html" target="_top" xlink:title="Helpers for attribute objects. ">
-<polygon fill="white" stroke="black" points="2943.5,-772.5 2943.5,-791.5 3022.5,-791.5 3022.5,-772.5 2943.5,-772.5"/>
-<text text-anchor="middle" x="2983" y="-779.5" font-family="Helvetica,sans-Serif" font-size="10.00">tvm/ir/attrs.h</text>
+<polygon fill="white" stroke="black" points="996.5,-761.5 996.5,-780.5 1075.5,-780.5 1075.5,-761.5 996.5,-761.5"/>
+<text text-anchor="middle" x="1036" y="-768.5" font-family="Helvetica,sans-Serif" font-size="10.00">tvm/ir/attrs.h</text>
 </a>
 </g>
 </g>
 <!-- Node1&#45;&gt;Node2 -->
 <g id="edge1" class="edge"><title>Node1&#45;&gt;Node2</title>
-<path fill="none" stroke="midnightblue" d="M3001.36,-1112.3C3091.89,-1095.69 3249,-1059.34 3249,-1007 3249,-1007 3249,-1007 3249,-893 3249,-800.526 3110.02,-784.265 3032.97,-782.355"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="3032.59,-778.848 3022.54,-782.174 3032.47,-785.847 3032.59,-778.848"/>
+<path fill="none" stroke="midnightblue" d="M329.406,-1097.46C326.437,-1064.41 323.559,-980.639 363,-929 439.464,-828.888 843.331,-787.376 986.017,-775.689"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="986.542,-779.158 996.229,-774.867 985.98,-772.181 986.542,-779.158"/>
 </g>
 <!-- Node13 -->
 <g id="node13" class="node"><title>Node13</title>
-<polygon fill="white" stroke="#bfbfbf" points="3164,-6 3164,-25 3208,-25 3208,-6 3164,-6"/>
-<text text-anchor="middle" x="3186" y="-13" font-family="Helvetica,sans-Serif" font-size="10.00">string</text>
+<polygon fill="white" stroke="#bfbfbf" points="586,-6 586,-25 630,-25 630,-6 586,-6"/>
+<text text-anchor="middle" x="608" y="-13" font-family="Helvetica,sans-Serif" font-size="10.00">string</text>
 </g>
 <!-- Node1&#45;&gt;Node13 -->
 <g id="edge220" class="edge"><title>Node1&#45;&gt;Node13</title>
-<path fill="none" stroke="midnightblue" d="M3001.17,-1121.34C3209.51,-1116.63 3870,-1094.8 3870,-1007 3870,-1007 3870,-1007 3870,-132 3870,-95.5528 3852.03,-84.387 3820,-67 3766.84,-38.1426 3340.13,-21.6859 3218.35,-17.5448"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="3218.28,-14.0408 3208.17,-17.2035 3218.05,-21.0368 3218.28,-14.0408"/>
+<path fill="none" stroke="midnightblue" d="M268.741,-1102.77C172.859,-1087.55 0,-1052.3 0,-996 0,-996 0,-996 0,-132 0,-95.8221 17.3813,-84.5812 49,-67 140.29,-16.2392 470.718,-15.0809 575.728,-16.0455"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="575.694,-19.5453 585.73,-16.1505 575.768,-12.5457 575.694,-19.5453"/>
 </g>
 <!-- Node34 -->
 <g id="node34" class="node"><title>Node34</title>
 <g id="a_node34"><a xlink:href="base_8h.html" target="_top" xlink:title="Base classes for the Relay IR. ">
-<polygon fill="white" stroke="black" points="1487.5,-940.5 1487.5,-959.5 1584.5,-959.5 1584.5,-940.5 1487.5,-940.5"/>
-<text text-anchor="middle" x="1536" y="-947.5" font-family="Helvetica,sans-Serif" font-size="10.00">tvm/relay/base.h</text>
+<polygon fill="white" stroke="black" points="1942.5,-929.5 1942.5,-948.5 2039.5,-948.5 2039.5,-929.5 1942.5,-929.5"/>
+<text text-anchor="middle" x="1991" y="-936.5" font-family="Helvetica,sans-Serif" font-size="10.00">tvm/relay/base.h</text>
 </a>
 </g>
 </g>
 <!-- Node1&#45;&gt;Node34 -->
-<g id="edge129" class="edge"><title>Node1&#45;&gt;Node34</title>
-<path fill="none" stroke="midnightblue" d="M2876.73,-1122.39C2647.03,-1121.25 1848.42,-1110.42 1615,-1016 1588.88,-1005.44 1564.8,-983.007 1550.26,-967.473"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="1552.51,-964.745 1543.21,-959.672 1547.32,-969.438 1552.51,-964.745"/>
+<g id="edge128" class="edge"><title>Node1&#45;&gt;Node34</title>
+<path fill="none" stroke="midnightblue" d="M393.409,-1111.52C648.147,-1110.97 1609.12,-1102.33 1894,-1005 1924.99,-994.412 1955.5,-971.299 1973.79,-955.688"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="1976.53,-957.941 1981.74,-948.714 1971.91,-952.68 1976.53,-957.941"/>
 </g>
 <!-- Node38 -->
 <g id="node38" class="node"><title>Node38</title>
 <g id="a_node38"><a xlink:href="relay_2expr_8h.html" target="_top" xlink:title="Relay expression language. ">
-<polygon fill="white" stroke="black" points="2582,-1052.5 2582,-1071.5 2676,-1071.5 2676,-1052.5 2582,-1052.5"/>
-<text text-anchor="middle" x="2629" y="-1059.5" font-family="Helvetica,sans-Serif" font-size="10.00">tvm/relay/expr.h</text>
+<polygon fill="white" stroke="black" points="540,-1041.5 540,-1060.5 634,-1060.5 634,-1041.5 540,-1041.5"/>
+<text text-anchor="middle" x="587" y="-1048.5" font-family="Helvetica,sans-Serif" font-size="10.00">tvm/relay/expr.h</text>
 </a>
 </g>
 </g>
 <!-- Node1&#45;&gt;Node38 -->
-<g id="edge157" class="edge"><title>Node1&#45;&gt;Node38</title>
-<path fill="none" stroke="midnightblue" d="M2876.64,-1110.53C2820.17,-1099.69 2737.77,-1083.88 2683.78,-1073.51"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="2684.19,-1070.03 2673.71,-1071.58 2682.87,-1076.9 2684.19,-1070.03"/>
+<g id="edge156" class="edge"><title>Node1&#45;&gt;Node38</title>
+<path fill="none" stroke="midnightblue" d="M391.344,-1097.47C436.956,-1086.87 498.652,-1072.53 540.48,-1062.81"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="541.453,-1066.18 550.401,-1060.51 539.868,-1059.36 541.453,-1066.18"/>
 </g>
 <!-- Node3 -->
 <g id="node3" class="node"><title>Node3</title>
-<polygon fill="white" stroke="#bfbfbf" points="3087.5,-716.5 3087.5,-735.5 3182.5,-735.5 3182.5,-716.5 3087.5,-716.5"/>
-<text text-anchor="middle" x="3135" y="-723.5" font-family="Helvetica,sans-Serif" font-size="10.00">dmlc/common.h</text>
+<polygon fill="white" stroke="#bfbfbf" points="874.5,-705.5 874.5,-724.5 969.5,-724.5 969.5,-705.5 874.5,-705.5"/>
+<text text-anchor="middle" x="922" y="-712.5" font-family="Helvetica,sans-Serif" font-size="10.00">dmlc/common.h</text>
 </g>
 <!-- Node2&#45;&gt;Node3 -->
 <g id="edge2" class="edge"><title>Node2&#45;&gt;Node3</title>
-<path fill="none" stroke="midnightblue" d="M3007.08,-772.444C3032.54,-763.399 3072.89,-749.064 3101.46,-738.916"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="3102.78,-742.162 3111.03,-735.516 3100.43,-735.566 3102.78,-742.162"/>
+<path fill="none" stroke="midnightblue" d="M1017.94,-761.444C999.425,-752.675 970.419,-738.935 949.145,-728.858"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="950.515,-725.634 939.979,-724.516 947.518,-731.96 950.515,-725.634"/>
 </g>
 <!-- Node4 -->
 <g id="node4" class="node"><title>Node4</title>
 <g id="a_node4"><a xlink:href="ir_2expr_8h.html" target="_top" xlink:title="Base expr nodes in TVM. ">
-<polygon fill="white" stroke="black" points="2155.5,-716.5 2155.5,-735.5 2232.5,-735.5 2232.5,-716.5 2155.5,-716.5"/>
-<text text-anchor="middle" x="2194" y="-723.5" font-family="Helvetica,sans-Serif" font-size="10.00">tvm/ir/expr.h</text>
+<polygon fill="white" stroke="black" points="2259.5,-705.5 2259.5,-724.5 2336.5,-724.5 2336.5,-705.5 2259.5,-705.5"/>
+<text text-anchor="middle" x="2298" y="-712.5" font-family="Helvetica,sans-Serif" font-size="10.00">tvm/ir/expr.h</text>
 </a>
 </g>
 </g>
 <!-- Node2&#45;&gt;Node4 -->
 <g id="edge3" class="edge"><title>Node2&#45;&gt;Node4</title>
-<path fill="none" stroke="midnightblue" d="M2943.33,-778.285C2811.48,-769.261 2388.04,-740.28 2243.16,-730.364"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="2243.03,-726.847 2232.81,-729.656 2242.55,-733.831 2243.03,-726.847"/>
+<path fill="none" stroke="midnightblue" d="M1075.5,-768.31C1260.99,-760.373 2045.05,-726.824 2249.23,-718.087"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="2249.5,-721.578 2259.34,-717.654 2249.2,-714.585 2249.5,-721.578"/>
 </g>
 <!-- Node2&#45;&gt;Node13 -->
-<g id="edge124" class="edge"><title>Node2&#45;&gt;Node13</title>
-<path fill="none" stroke="midnightblue" d="M3022.62,-781.306C3067.35,-779.774 3140.47,-771.414 3191,-736 3302.38,-657.929 3469.81,-341.652 3504,-210 3519.97,-148.485 3545.27,-115.336 3504,-67 3467.42,-24.1486 3291.27,-17.4699 3218.04,-16.5668"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="3218.04,-13.0667 3208.01,-16.4718 3217.97,-20.0664 3218.04,-13.0667"/>
+<g id="edge123" class="edge"><title>Node2&#45;&gt;Node13</title>
+<path fill="none" stroke="midnightblue" d="M996.281,-768.291C955.258,-764.875 890.803,-754.76 844,-725 807.684,-701.909 818.117,-675.231 784,-649 652.7,-548.049 581.917,-589.623 442,-501 388.524,-467.129 228,-376.3 228,-313 228,-313 228,-313 228,-132 228,-101.205 228.035,-86.3399 252,-67 301.412,-27.1238 497.099,-18.7337 575.382,-16.9694"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="575.694,-20.4639 585.621,-16.7618 575.552,-13.4653 575.694,-20.4639"/>
 </g>
 <!-- Node14 -->
 <g id="node14" class="node"><title>Node14</title>
-<polygon fill="white" stroke="#bfbfbf" points="2430,-6 2430,-25 2498,-25 2498,-6 2430,-6"/>
-<text text-anchor="middle" x="2464" y="-13" font-family="Helvetica,sans-Serif" font-size="10.00">type_traits</text>
+<polygon fill="white" stroke="#bfbfbf" points="2600,-6 2600,-25 2668,-25 2668,-6 2600,-6"/>
+<text text-anchor="middle" x="2634" y="-13" font-family="Helvetica,sans-Serif" font-size="10.00">type_traits</text>
 </g>
 <!-- Node2&#45;&gt;Node14 -->
-<g id="edge125" class="edge"><title>Node2&#45;&gt;Node14</title>
-<path fill="none" stroke="midnightblue" d="M2990.49,-772.34C3005.2,-755.187 3038.65,-715.478 3064,-680 3165.14,-538.473 3279.94,-467.494 3200,-313 3065.34,-52.7543 2646.33,-20.2826 2508.47,-16.7426"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="2508.35,-13.2393 2498.28,-16.5213 2508.2,-20.2376 2508.35,-13.2393"/>
+<g id="edge124" class="edge"><title>Node2&#45;&gt;Node14</title>
+<path fill="none" stroke="midnightblue" d="M1053.29,-761.428C1068.8,-753.277 1091.7,-740.049 1109,-725 1182.11,-661.391 1189.11,-633.968 1248,-557 1295.28,-495.202 1283.74,-456.38 1349,-414 1632.12,-230.133 1796.06,-424.725 2094,-266 2204.67,-207.042 2181.93,-123.249 2294,-67 2345.04,-41.3813 2509.83,-25.841 2589.73,-19.6297"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="2590.26,-23.0998 2599.97,-18.8495 2589.73,-16.1201 2590.26,-23.0998"/>
 </g>
 <!-- Node15 -->
 <g id="node15" class="node"><title>Node15</title>
-<polygon fill="white" stroke="#bfbfbf" points="288,-6 288,-25 330,-25 330,-6 288,-6"/>
-<text text-anchor="middle" x="309" y="-13" font-family="Helvetica,sans-Serif" font-size="10.00">utility</text>
+<polygon fill="white" stroke="#bfbfbf" points="1752,-6 1752,-25 1794,-25 1794,-6 1752,-6"/>
+<text text-anchor="middle" x="1773" y="-13" font-family="Helvetica,sans-Serif" font-size="10.00">utility</text>
 </g>
 <!-- Node2&#45;&gt;Node15 -->
-<g id="edge127" class="edge"><title>Node2&#45;&gt;Node15</title>
-<path fill="none" stroke="midnightblue" d="M2943.5,-774.239C2896.84,-766.06 2816.97,-751.455 2749,-736 2684.25,-721.277 2525.56,-670.569 2460,-660 2207.21,-619.246 2140.42,-642.067 1885,-624 1361.28,-586.954 1217.21,-643.835 709,-512 645.328,-495.483 632.702,-481.34 572,-456 384.748,-377.832 306.295,-408.783 152,-277 80.3088,-215.769 54.3091,-132.626 122,-67 164.422,-25.8714 236.703,-17.4106 277.871,-16.1493"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="277.94,-19.6487 287.874,-15.9668 277.812,-12.6499 277.94,-19.6487"/>
+<g id="edge126" class="edge"><title>Node2&#45;&gt;Node15</title>
+<path fill="none" stroke="midnightblue" d="M1041.28,-761.231C1051.73,-743.043 1074,-699.601 1074,-660 1074,-660 1074,-660 1074,-546 1074,-417.726 1187.95,-430.735 1256,-322 1324.14,-213.121 1280.43,-130.632 1392,-67 1451.51,-33.0615 1660.68,-21.077 1741.14,-17.6629"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="1741.76,-21.1407 1751.61,-17.2367 1741.47,-14.1465 1741.76,-21.1407"/>
 </g>
 <!-- Node16 -->
 <g id="node16" class="node"><title>Node16</title>
 <g id="a_node16"><a xlink:href="packed__func_8h.html" target="_top" xlink:title="Type&#45;erased function used across TVM API. ">
-<polygon fill="white" stroke="black" points="1764.5,-246.5 1764.5,-276.5 1877.5,-276.5 1877.5,-246.5 1764.5,-246.5"/>
-<text text-anchor="start" x="1772.5" y="-264.5" font-family="Helvetica,sans-Serif" font-size="10.00">tvm/runtime/packed</text>
-<text text-anchor="middle" x="1821" y="-253.5" font-family="Helvetica,sans-Serif" font-size="10.00">_func.h</text>
+<polygon fill="white" stroke="black" points="1972.5,-235.5 1972.5,-265.5 2085.5,-265.5 2085.5,-235.5 1972.5,-235.5"/>
+<text text-anchor="start" x="1980.5" y="-253.5" font-family="Helvetica,sans-Serif" font-size="10.00">tvm/runtime/packed</text>
+<text text-anchor="middle" x="2029" y="-242.5" font-family="Helvetica,sans-Serif" font-size="10.00">_func.h</text>
 </a>
 </g>
 </g>
 <!-- Node2&#45;&gt;Node16 -->
-<g id="edge122" class="edge"><title>Node2&#45;&gt;Node16</title>
-<path fill="none" stroke="midnightblue" d="M2979.66,-772.471C2963.55,-732.633 2889.74,-563.792 2769,-492 2730.16,-468.903 2096.72,-324.702 1887.49,-277.464"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="1888.08,-274.009 1877.55,-275.222 1886.54,-280.837 1888.08,-274.009"/>
+<g id="edge121" class="edge"><title>Node2&#45;&gt;Node16</title>
+<path fill="none" stroke="midnightblue" d="M1049.2,-761.46C1060.84,-753.33 1077.53,-740.116 1088,-725 1105.53,-699.681 1112,-690.795 1112,-660 1112,-660 1112,-660 1112,-546 1112,-367.295 1751.29,-281.7 1962.31,-258.314"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="1962.77,-261.785 1972.33,-257.215 1962.01,-254.827 1962.77,-261.785"/>
 </g>
 <!-- Node20 -->
 <g id="node20" class="node"><title>Node20</title>
-<polygon fill="white" stroke="#bfbfbf" points="3328.5,-123.5 3328.5,-142.5 3421.5,-142.5 3421.5,-123.5 3328.5,-123.5"/>
-<text text-anchor="middle" x="3375" y="-130.5" font-family="Helvetica,sans-Serif" font-size="10.00">unordered_map</text>
+<polygon fill="white" stroke="#bfbfbf" points="334.5,-123.5 334.5,-142.5 427.5,-142.5 427.5,-123.5 334.5,-123.5"/>
+<text text-anchor="middle" x="381" y="-130.5" font-family="Helvetica,sans-Serif" font-size="10.00">unordered_map</text>
 </g>
 <!-- Node2&#45;&gt;Node20 -->
-<g id="edge126" class="edge"><title>Node2&#45;&gt;Node20</title>
-<path fill="none" stroke="midnightblue" d="M2995.78,-772.266C3030.13,-748.762 3122.28,-685.562 3128,-680 3171.3,-637.876 3259.23,-511.951 3282,-456 3282.09,-455.77 3349.97,-220.676 3369.69,-152.39"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="3373.1,-153.184 3372.52,-142.606 3366.38,-151.242 3373.1,-153.184"/>
+<g id="edge125" class="edge"><title>Node2&#45;&gt;Node20</title>
+<path fill="none" stroke="midnightblue" d="M996.187,-768.216C958.841,-764.68 903.174,-754.421 865,-725 834.261,-701.309 847.465,-678.286 822,-649 724.241,-536.572 653.818,-557.377 556,-445 471.303,-347.697 408.612,-202.933 388.161,-152.287"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="391.301,-150.71 384.35,-142.715 384.797,-153.3 391.301,-150.71"/>
 </g>
 <!-- Node21 -->
 <g id="node21" class="node"><title>Node21</title>
-<polygon fill="white" stroke="#bfbfbf" points="1187.5,-123.5 1187.5,-142.5 1234.5,-142.5 1234.5,-123.5 1187.5,-123.5"/>
-<text text-anchor="middle" x="1211" y="-130.5" font-family="Helvetica,sans-Serif" font-size="10.00">vector</text>
+<polygon fill="white" stroke="#bfbfbf" points="1388.5,-123.5 1388.5,-142.5 1435.5,-142.5 1435.5,-123.5 1388.5,-123.5"/>
+<text text-anchor="middle" x="1412" y="-130.5" font-family="Helvetica,sans-Serif" font-size="10.00">vector</text>
 </g>
 <!-- Node2&#45;&gt;Node21 -->
-<g id="edge128" class="edge"><title>Node2&#45;&gt;Node21</title>
-<path fill="none" stroke="midnightblue" d="M2951.99,-772.446C2766.66,-721.183 1804.01,-452.863 1522,-333 1425.26,-291.884 1406.91,-269.116 1320,-210 1288.96,-188.884 1253.33,-163.9 1231.49,-148.5"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="1233.22,-145.436 1223.03,-142.527 1229.18,-151.154 1233.22,-145.436"/>
+<g id="edge127" class="edge"><title>Node2&#45;&gt;Node21</title>
+<path fill="none" stroke="midnightblue" d="M1029.03,-761.316C997.168,-720.974 865.04,-546.664 822,-378 801.433,-297.403 802.826,-248.145 898,-179 936.793,-150.816 1270.27,-138.298 1378.35,-134.956"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="1378.48,-138.454 1388.37,-134.653 1378.27,-131.457 1378.48,-138.454"/>
 </g>
 <!-- Node23 -->
 <g id="node23" class="node"><title>Node23</title>
-<polygon fill="white" stroke="#bfbfbf" points="2068.5,-185 2068.5,-204 2131.5,-204 2131.5,-185 2068.5,-185"/>
-<text text-anchor="middle" x="2100" y="-192" font-family="Helvetica,sans-Serif" font-size="10.00">functional</text>
+<polygon fill="white" stroke="#bfbfbf" points="940.5,-179.5 940.5,-198.5 1003.5,-198.5 1003.5,-179.5 940.5,-179.5"/>
+<text text-anchor="middle" x="972" y="-186.5" font-family="Helvetica,sans-Serif" font-size="10.00">functional</text>
 </g>
 <!-- Node2&#45;&gt;Node23 -->
-<g id="edge123" class="edge"><title>Node2&#45;&gt;Node23</title>
-<path fill="none" stroke="midnightblue" d="M2983,-772.442C2983,-753.936 2983,-708.812 2983,-671 2983,-671 2983,-671 2983,-613 2983,-514.753 2886.41,-534.843 2798,-492 2609.49,-400.65 2547.72,-413.413 2343,-369 2268.37,-352.81 2236.19,-378.681 2175,-333 2134.62,-302.854 2113.4,-244.296 2104.76,-214.261"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="2108.04,-213.001 2102.05,-204.264 2101.29,-214.832 2108.04,-213.001"/>
+<g id="edge122" class="edge"><title>Node2&#45;&gt;Node23</title>
+<path fill="none" stroke="midnightblue" d="M1033.07,-761.399C1015.67,-709.972 927.044,-442.54 936,-358 941.849,-302.785 958.225,-239.045 966.727,-208.369"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="970.125,-209.214 969.469,-198.64 963.388,-207.315 970.125,-209.214"/>
 </g>
 <!-- Node28 -->
 <g id="node28" class="node"><title>Node28</title>
 <g id="a_node28"><a xlink:href="structural__equal_8h.html" target="_top" xlink:title="Structural equality comparison. ">
-<polygon fill="white" stroke="black" points="2318.5,-425.5 2318.5,-455.5 2429.5,-455.5 2429.5,-425.5 2318.5,-425.5"/>
-<text text-anchor="start" x="2326.5" y="-443.5" font-family="Helvetica,sans-Serif" font-size="10.00">tvm/node/structural</text>
-<text text-anchor="middle" x="2374" y="-432.5" font-family="Helvetica,sans-Serif" font-size="10.00">_equal.h</text>
+<polygon fill="white" stroke="black" points="2706.5,-414.5 2706.5,-444.5 2817.5,-444.5 2817.5,-414.5 2706.5,-414.5"/>
+<text text-anchor="start" x="2714.5" y="-432.5" font-family="Helvetica,sans-Serif" font-size="10.00">tvm/node/structural</text>
+<text text-anchor="middle" x="2762" y="-421.5" font-family="Helvetica,sans-Serif" font-size="10.00">_equal.h</text>
 </a>
 </g>
 </g>
 <!-- Node2&#45;&gt;Node28 -->
-<g id="edge120" class="edge"><title>Node2&#45;&gt;Node28</title>
-<path fill="none" stroke="midnightblue" d="M2971.24,-772.494C2943.23,-752.321 2870.16,-700.259 2807,-660 2724.34,-607.312 2704.27,-592.643 2617,-548 2549.53,-513.485 2468.41,-479.345 2419.06,-459.354"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="2420.21,-456.044 2409.62,-455.548 2417.59,-462.535 2420.21,-456.044"/>
+<g id="edge119" class="edge"><title>Node2&#45;&gt;Node28</title>
+<path fill="none" stroke="midnightblue" d="M1075.7,-761.642C1253.83,-724.208 1980.76,-572.269 2211,-537 2341.12,-517.067 2380.32,-543.257 2505,-501 2521.6,-495.372 2522.7,-487.457 2539,-481 2553.66,-475.192 2636.04,-457.137 2696.42,-444.277"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="2697.23,-447.683 2706.29,-442.18 2695.78,-440.836 2697.23,-447.683"/>
 </g>
 <!-- Node30 -->
 <g id="node30" class="node"><title>Node30</title>
 <g id="a_node30"><a xlink:href="structural__hash_8h.html" target="_top" xlink:title="tvm/node/structural\l_hash.h">
-<polygon fill="white" stroke="black" points="2112.5,-425.5 2112.5,-455.5 2223.5,-455.5 2223.5,-425.5 2112.5,-425.5"/>
-<text text-anchor="start" x="2120.5" y="-443.5" font-family="Helvetica,sans-Serif" font-size="10.00">tvm/node/structural</text>
-<text text-anchor="middle" x="2168" y="-432.5" font-family="Helvetica,sans-Serif" font-size="10.00">_hash.h</text>
+<polygon fill="white" stroke="black" points="1898.5,-414.5 1898.5,-444.5 2009.5,-444.5 2009.5,-414.5 1898.5,-414.5"/>
+<text text-anchor="start" x="1906.5" y="-432.5" font-family="Helvetica,sans-Serif" font-size="10.00">tvm/node/structural</text>
+<text text-anchor="middle" x="1954" y="-421.5" font-family="Helvetica,sans-Serif" font-size="10.00">_hash.h</text>
 </a>
 </g>
 </g>
 <!-- Node2&#45;&gt;Node30 -->
-<g id="edge121" class="edge"><title>Node2&#45;&gt;Node30</title>
-<path fill="none" stroke="midnightblue" d="M2963.43,-772.429C2944.02,-763.767 2913.56,-749.734 2888,-736 2816.86,-697.777 2650.68,-581.069 2577,-548 2500.18,-513.519 2477.4,-513.567 2396,-492 2341.6,-477.586 2279.16,-463.955 2233.39,-454.496"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="2234.09,-451.068 2223.59,-452.483 2232.68,-457.925 2234.09,-451.068"/>
+<g id="edge120" class="edge"><title>Node2&#45;&gt;Node30</title>
+<path fill="none" stroke="midnightblue" d="M1073.17,-761.48C1107.86,-753.22 1160.89,-739.795 1206,-725 1480.79,-634.875 1800.8,-497.704 1912.79,-448.693"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="1914.29,-451.856 1922.05,-444.635 1911.48,-445.444 1914.29,-451.856"/>
 </g>
 <!-- Node5 -->
 <g id="node5" class="node"><title>Node5</title>
 <g id="a_node5"><a xlink:href="span_8h.html" target="_top" xlink:title="Span information for debugging purposes. ">
-<polygon fill="white" stroke="black" points="1894,-604.5 1894,-623.5 1974,-623.5 1974,-604.5 1894,-604.5"/>
-<text text-anchor="middle" x="1934" y="-611.5" font-family="Helvetica,sans-Serif" font-size="10.00">tvm/ir/span.h</text>
+<polygon fill="white" stroke="black" points="2275,-593.5 2275,-612.5 2355,-612.5 2355,-593.5 2275,-593.5"/>
+<text text-anchor="middle" x="2315" y="-600.5" font-family="Helvetica,sans-Serif" font-size="10.00">tvm/ir/span.h</text>
 </a>
 </g>
 </g>
 <!-- Node4&#45;&gt;Node5 -->
 <g id="edge4" class="edge"><title>Node4&#45;&gt;Node5</title>
-<path fill="none" stroke="midnightblue" d="M2173.6,-716.368C2127.84,-697.01 2016.88,-650.065 1963.56,-627.507"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="1964.88,-624.264 1954.31,-623.591 1962.15,-630.711 1964.88,-624.264"/>
+<path fill="none" stroke="midnightblue" d="M2299.33,-705.368C2302.08,-687.617 2308.41,-646.666 2312.13,-622.597"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="2315.6,-623.009 2313.67,-612.591 2308.69,-621.94 2315.6,-623.009"/>
 </g>
 <!-- Node6 -->
 <g id="node6" class="node"><title>Node6</title>
 <g id="a_node6"><a xlink:href="node_8h.html" target="_top" xlink:title="Definitions and helper macros for IR/AST nodes. ">
-<polygon fill="white" stroke="black" points="1792,-548.5 1792,-567.5 1890,-567.5 1890,-548.5 1792,-548.5"/>
-<text text-anchor="middle" x="1841" y="-555.5" font-family="Helvetica,sans-Serif" font-size="10.00">tvm/node/node.h</text>
+<polygon fill="white" stroke="black" points="2220,-537.5 2220,-556.5 2318,-556.5 2318,-537.5 2220,-537.5"/>
+<text text-anchor="middle" x="2269" y="-544.5" font-family="Helvetica,sans-Serif" font-size="10.00">tvm/node/node.h</text>
 </a>
 </g>
 </g>
 <!-- Node4&#45;&gt;Node6 -->
-<g id="edge114" class="edge"><title>Node4&#45;&gt;Node6</title>
-<path fill="none" stroke="midnightblue" d="M2155.36,-720.926C2092.64,-712.584 1967.43,-688.55 1885,-624 1868.96,-611.439 1856.65,-591.382 1849.18,-576.867"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="1852.32,-575.324 1844.8,-567.864 1846.03,-578.387 1852.32,-575.324"/>
+<g id="edge113" class="edge"><title>Node4&#45;&gt;Node6</title>
+<path fill="none" stroke="midnightblue" d="M2294.24,-705.302C2287.02,-688.084 2271.45,-648.265 2266,-613 2264.64,-604.216 2265.7,-601.884 2266,-593 2266.29,-584.383 2266.88,-574.854 2267.47,-566.791"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="2270.97,-566.859 2268.25,-556.618 2263.99,-566.318 2270.97,-566.859"/>
 </g>
 <!-- Node7 -->
 <g id="node7" class="node"><title>Node7</title>
 <g id="a_node7"><a xlink:href="node_2container_8h.html" target="_top" xlink:title="Array/Map container in the DSL graph. ">
-<polygon fill="white" stroke="red" points="1856.5,-369.5 1856.5,-388.5 1975.5,-388.5 1975.5,-369.5 1856.5,-369.5"/>
-<text text-anchor="middle" x="1916" y="-376.5" font-family="Helvetica,sans-Serif" font-size="10.00">tvm/node/container.h</text>
+<polygon fill="white" stroke="black" points="1810.5,-358.5 1810.5,-377.5 1929.5,-377.5 1929.5,-358.5 1810.5,-358.5"/>
+<text text-anchor="middle" x="1870" y="-365.5" font-family="Helvetica,sans-Serif" font-size="10.00">tvm/node/container.h</text>
 </a>
 </g>
 </g>
 <!-- Node4&#45;&gt;Node7 -->
-<g id="edge113" class="edge"><title>Node4&#45;&gt;Node7</title>
-<path fill="none" stroke="midnightblue" d="M2198.86,-716.473C2208.68,-698.386 2230,-654.585 2230,-615 2230,-615 2230,-615 2230,-557 2230,-438.81 2082.78,-520.902 1984,-456 1960.15,-440.332 1939.38,-414.272 1927.27,-397.114"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="1929.99,-394.9 1921.46,-388.619 1924.21,-398.85 1929.99,-394.9"/>
+<g id="edge112" class="edge"><title>Node4&#45;&gt;Node7</title>
+<path fill="none" stroke="midnightblue" d="M2273.9,-705.451C2202.83,-678.953 1992.63,-591.087 1889,-445 1876.98,-428.056 1872.54,-404.274 1870.91,-387.847"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="1874.4,-387.541 1870.18,-377.824 1867.42,-388.053 1874.4,-387.541"/>
 </g>
 <!-- Node11 -->
 <g id="node11" class="node"><title>Node11</title>
 <g id="a_node11"><a xlink:href="object_8h.html" target="_top" xlink:title="A managed object in the TVM runtime. ">
-<polygon fill="white" stroke="red" points="1500.5,-67.5 1500.5,-86.5 1617.5,-86.5 1617.5,-67.5 1500.5,-67.5"/>
-<text text-anchor="middle" x="1559" y="-74.5" font-family="Helvetica,sans-Serif" font-size="10.00">tvm/runtime/object.h</text>
+<polygon fill="white" stroke="red" points="2866.5,-67.5 2866.5,-86.5 2983.5,-86.5 2983.5,-67.5 2866.5,-67.5"/>
+<text text-anchor="middle" x="2925" y="-74.5" font-family="Helvetica,sans-Serif" font-size="10.00">tvm/runtime/object.h</text>
 </a>
 </g>
 </g>
 <!-- Node4&#45;&gt;Node11 -->
-<g id="edge115" class="edge"><title>Node4&#45;&gt;Node11</title>
-<path fill="none" stroke="midnightblue" d="M2155.31,-719.813C2094.81,-711.654 1974.17,-695.182 1872,-680 1634.57,-644.721 1032.91,-587.316 805,-512 613.093,-448.582 538.715,-398.192 465,-210 395.473,-32.498 497.256,-156.679 790,-123 1047.35,-93.3928 1355.52,-82.8268 1490.27,-79.4376"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="1490.53,-82.9323 1500.44,-79.1872 1490.36,-75.9344 1490.53,-82.9323"/>
+<g id="edge114" class="edge"><title>Node4&#45;&gt;Node11</title>
+<path fill="none" stroke="midnightblue" d="M2336.66,-713.691C2454.37,-711.798 2817.75,-698.887 3103,-613 3188.22,-587.341 3224.36,-577.994 3269,-501 3353.89,-354.59 3367.26,-211.49 3223,-123 3220.55,-121.495 3080.57,-100.783 2993.35,-87.9896"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="2993.65,-84.4966 2983.25,-86.5089 2992.64,-91.4226 2993.65,-84.4966"/>
 </g>
 <!-- Node4&#45;&gt;Node13 -->
-<g id="edge118" class="edge"><title>Node4&#45;&gt;Node13</title>
-<path fill="none" stroke="midnightblue" d="M2232.89,-716.776C2234.28,-716.51 2235.65,-716.25 2237,-716 2335.71,-697.712 2363.79,-708.696 2460,-680 2645.87,-624.559 3186,-389.46 3186,-195.5 3186,-195.5 3186,-195.5 3186,-132 3186,-97.8637 3186,-58.0522 3186,-35.2092"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="3189.5,-35.1934 3186,-25.1934 3182.5,-35.1934 3189.5,-35.1934"/>
+<g id="edge117" class="edge"><title>Node4&#45;&gt;Node13</title>
+<path fill="none" stroke="midnightblue" d="M2259.24,-712.484C2086.45,-704.897 1370.44,-664.627 811,-501 684.307,-463.944 626.636,-443.052 574,-322 528.517,-217.397 490.224,-167.051 545,-67 553.423,-51.6152 568.663,-39.2054 582.043,-30.5378"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="584.181,-33.3339 590.903,-25.1445 580.541,-27.3546 584.181,-33.3339"/>
 </g>
 <!-- Node4&#45;&gt;Node14 -->
-<g id="edge119" class="edge"><title>Node4&#45;&gt;Node14</title>
-<path fill="none" stroke="midnightblue" d="M2208.69,-716.399C2287.16,-670.116 2655.5,-448.486 2713,-333 2756.06,-246.508 3057.52,-484.114 2619,-67 2588.99,-38.4516 2542.63,-26.04 2508.42,-20.6451"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="2508.63,-17.1384 2498.23,-19.186 2507.63,-24.0678 2508.63,-17.1384"/>
+<g id="edge118" class="edge"><title>Node4&#45;&gt;Node14</title>
+<path fill="none" stroke="midnightblue" d="M2336.67,-713.523C2435.12,-711.81 2703.03,-704.026 2923,-669 3024.77,-652.795 3052.04,-650.554 3148,-613 3355.54,-531.782 3457.26,-482.537 3510,-266 3514.01,-249.53 3506.95,-180.809 3506,-179 3456.63,-85.0088 3398.44,-90.9102 3295,-67 3177.07,-39.7411 2806.71,-23.1844 2678.33,-18.1452"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="2678.22,-14.6382 2668.09,-17.7475 2677.95,-21.6329 2678.22,-14.6382"/>
 </g>
 <!-- Node24 -->
 <g id="node24" class="node"><title>Node24</title>
-<polygon fill="white" stroke="#bfbfbf" points="474,-185 474,-204 516,-204 516,-185 474,-185"/>
-<text text-anchor="middle" x="495" y="-192" font-family="Helvetica,sans-Serif" font-size="10.00">limits</text>
+<polygon fill="white" stroke="#bfbfbf" points="3581,-179.5 3581,-198.5 3623,-198.5 3623,-179.5 3581,-179.5"/>
+<text text-anchor="middle" x="3602" y="-186.5" font-family="Helvetica,sans-Serif" font-size="10.00">limits</text>
 </g>
 <!-- Node4&#45;&gt;Node24 -->
-<g id="edge117" class="edge"><title>Node4&#45;&gt;Node24</title>
-<path fill="none" stroke="midnightblue" d="M2155.31,-720.01C2105.18,-713.119 2015.26,-699.315 1940,-680 1838.17,-653.866 1818.72,-626.379 1716,-604 1562.34,-570.524 1518.54,-597.149 1364,-568 1331.36,-561.844 1324.69,-553.837 1292,-548 1100.59,-513.828 1041.32,-570.854 856,-512 684.804,-457.63 546.017,-270.513 506.391,-212.673"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="509.205,-210.587 500.707,-204.261 503.405,-214.506 509.205,-210.587"/>
+<g id="edge116" class="edge"><title>Node4&#45;&gt;Node24</title>
+<path fill="none" stroke="midnightblue" d="M2336.72,-713.819C2516.37,-712.783 3264.01,-706.25 3361,-669 3418.61,-646.874 3575,-553.714 3575,-492 3575,-492 3575,-492 3575,-311 3575,-273.838 3587.28,-231.764 3595.28,-208.308"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="3598.66,-209.252 3598.69,-198.657 3592.06,-206.919 3598.66,-209.252"/>
 </g>
 <!-- Node26 -->
 <g id="node26" class="node"><title>Node26</title>
-<polygon fill="white" stroke="#bfbfbf" points="3020,-252 3020,-271 3082,-271 3082,-252 3020,-252"/>
-<text text-anchor="middle" x="3051" y="-259" font-family="Helvetica,sans-Serif" font-size="10.00">algorithm</text>
+<polygon fill="white" stroke="#bfbfbf" points="2668,-241 2668,-260 2730,-260 2730,-241 2668,-241"/>
+<text text-anchor="middle" x="2699" y="-248" font-family="Helvetica,sans-Serif" font-size="10.00">algorithm</text>
 </g>
 <!-- Node4&#45;&gt;Node26 -->
-<g id="edge116" class="edge"><title>Node4&#45;&gt;Node26</title>
-<path fill="none" stroke="midnightblue" d="M2220.34,-716.412C2246.02,-707.846 2286.07,-693.982 2320,-680 2356.77,-664.851 2445.27,-619.24 2482,-604 2526.54,-585.518 2850.54,-483.721 2890,-456 2960.9,-406.189 3018.56,-317.457 3040.95,-279.977"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="3044.05,-281.617 3046.09,-271.222 3038.01,-278.072 3044.05,-281.617"/>
+<g id="edge115" class="edge"><title>Node4&#45;&gt;Node26</title>
+<path fill="none" stroke="midnightblue" d="M2336.77,-708.099C2438.85,-690.233 2714.86,-626.354 2826,-445 2867.83,-376.745 2767.65,-297.54 2721.15,-265.762"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="2722.88,-262.709 2712.62,-260.056 2718.98,-268.526 2722.88,-262.709"/>
 </g>
 <!-- Node33 -->
 <g id="node33" class="node"><title>Node33</title>
 <g id="a_node33"><a xlink:href="ir_2type_8h.html" target="_top" xlink:title="IR/AST nodes for the unified type system in TVM. ">
-<polygon fill="white" stroke="black" points="2374.5,-660.5 2374.5,-679.5 2451.5,-679.5 2451.5,-660.5 2374.5,-660.5"/>
-<text text-anchor="middle" x="2413" y="-667.5" font-family="Helvetica,sans-Serif" font-size="10.00">tvm/ir/type.h</text>
+<polygon fill="white" stroke="black" points="1984.5,-649.5 1984.5,-668.5 2061.5,-668.5 2061.5,-649.5 1984.5,-649.5"/>
+<text text-anchor="middle" x="2023" y="-656.5" font-family="Helvetica,sans-Serif" font-size="10.00">tvm/ir/type.h</text>
 </a>
 </g>
 </g>
 <!-- Node4&#45;&gt;Node33 -->
-<g id="edge106" class="edge"><title>Node4&#45;&gt;Node33</title>
-<path fill="none" stroke="midnightblue" d="M2228.7,-716.444C2266.58,-707.102 2327.35,-692.119 2368.66,-681.933"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="2369.59,-685.309 2378.46,-679.516 2367.91,-678.512 2369.59,-685.309"/>
+<g id="edge105" class="edge"><title>Node4&#45;&gt;Node33</title>
+<path fill="none" stroke="midnightblue" d="M2259.47,-706.435C2210.31,-696.781 2125.25,-680.077 2071.66,-669.555"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="2072.2,-666.094 2061.71,-667.601 2070.85,-672.963 2072.2,-666.094"/>
 </g>
 <!-- Node5&#45;&gt;Node6 -->
 <g id="edge5" class="edge"><title>Node5&#45;&gt;Node6</title>
-<path fill="none" stroke="midnightblue" d="M1919.06,-604.324C1904.41,-595.822 1881.87,-582.729 1864.76,-572.797"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="1866.31,-569.649 1855.9,-567.655 1862.8,-575.703 1866.31,-569.649"/>
+<path fill="none" stroke="midnightblue" d="M2307.4,-593.083C2300.77,-585.298 2290.98,-573.801 2282.94,-564.367"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="2285.6,-562.094 2276.45,-556.751 2280.27,-566.633 2285.6,-562.094"/>
 </g>
 <!-- Node5&#45;&gt;Node11 -->
-<g id="edge104" class="edge"><title>Node5&#45;&gt;Node11</title>
-<path fill="none" stroke="midnightblue" d="M1893.98,-607.877C1801.03,-596.103 1563.32,-566.694 1364,-548 1309.76,-542.913 919.306,-543.703 875,-512 840.829,-487.549 868.959,-453.409 838,-425 800.099,-390.22 758.192,-430.648 728,-389 653.963,-286.868 714.973,-180.987 827,-123 884.933,-93.0131 1319.22,-82.1987 1490.17,-79.0777"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="1490.37,-82.5747 1500.31,-78.8959 1490.25,-75.5758 1490.37,-82.5747"/>
+<g id="edge103" class="edge"><title>Node5&#45;&gt;Node11</title>
+<path fill="none" stroke="midnightblue" d="M2355.2,-596.753C2437.55,-585.273 2630.66,-554.579 2785,-501 2838.33,-482.487 2849.05,-471.302 2899,-445 2952.7,-416.72 2980.01,-425.333 3018,-378 3073.37,-309.013 3054.84,-261.142 3022,-179 3011.28,-152.175 3008.59,-144.231 2989,-123 2977.66,-110.71 2962.42,-99.8332 2949.66,-91.8527"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="2951.27,-88.7358 2940.9,-86.5748 2947.66,-94.7323 2951.27,-88.7358"/>
 </g>
 <!-- Node5&#45;&gt;Node13 -->
-<g id="edge105" class="edge"><title>Node5&#45;&gt;Node13</title>
-<path fill="none" stroke="midnightblue" d="M1974.05,-608.27C2183.98,-582.392 3148,-449.722 3148,-195.5 3148,-195.5 3148,-195.5 3148,-132 3148,-95.7544 3165.09,-56.171 3176.35,-34.0327"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="3179.46,-35.641 3181.03,-25.1633 3173.27,-32.3745 3179.46,-35.641"/>
+<g id="edge104" class="edge"><title>Node5&#45;&gt;Node13</title>
+<path fill="none" stroke="midnightblue" d="M2274.77,-598.138C2129.3,-583.792 1617.34,-529.973 1202,-445 996.35,-402.927 925.673,-430.536 746,-322 682.201,-283.461 663.687,-266.465 632,-199 606.115,-143.887 605.466,-69.7171 606.827,-35.3872"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="610.343,-35.167 607.355,-25.0024 603.352,-34.8121 610.343,-35.167"/>
 </g>
 <!-- Node6&#45;&gt;Node7 -->
 <g id="edge6" class="edge"><title>Node6&#45;&gt;Node7</title>
-<path fill="none" stroke="midnightblue" d="M1844.62,-548.463C1856.17,-521.195 1892.36,-435.783 1908.32,-398.133"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="1911.56,-399.441 1912.24,-388.868 1905.12,-396.71 1911.56,-399.441"/>
+<path fill="none" stroke="midnightblue" d="M2237.33,-537.443C2210.63,-529.651 2171.81,-516.941 2140,-501 2100.22,-481.061 2092.89,-471.309 2057,-445 2039.14,-431.909 2037.61,-424.291 2018,-414 1988.11,-398.312 1951.9,-387.182 1922.78,-379.915"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="1923.33,-376.45 1912.79,-377.505 1921.69,-383.254 1923.33,-376.45"/>
 </g>
 <!-- Node8 -->
 <g id="node8" class="node"><title>Node8</title>
 <g id="a_node8"><a xlink:href="runtime_2container_8h.html" target="_top" xlink:title="Common POD(plain old data) container types. ">
-<polygon fill="white" stroke="red" points="2184,-313.5 2184,-332.5 2316,-332.5 2316,-313.5 2184,-313.5"/>
-<text text-anchor="middle" x="2250" y="-320.5" font-family="Helvetica,sans-Serif" font-size="10.00">tvm/runtime/container.h</text>
+<polygon fill="white" stroke="red" points="2057,-302.5 2057,-321.5 2189,-321.5 2189,-302.5 2057,-302.5"/>
+<text text-anchor="middle" x="2123" y="-309.5" font-family="Helvetica,sans-Serif" font-size="10.00">tvm/runtime/container.h</text>
 </a>
 </g>
 </g>
 <!-- Node6&#45;&gt;Node8 -->
-<g id="edge97" class="edge"><title>Node6&#45;&gt;Node8</title>
-<path fill="none" stroke="midnightblue" d="M1873.04,-548.434C1932.47,-532.644 2057.74,-499.843 2102,-492 2175.94,-478.895 2387.81,-511.856 2438,-456 2487.99,-400.372 2363.86,-355.411 2293.88,-335.339"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="2294.55,-331.892 2283.98,-332.56 2292.66,-338.632 2294.55,-331.892"/>
+<g id="edge96" class="edge"><title>Node6&#45;&gt;Node8</title>
+<path fill="none" stroke="midnightblue" d="M2284.51,-537.489C2303.42,-525.897 2331.63,-503.95 2321,-481 2285.46,-404.232 2195.63,-349.74 2150.2,-326.115"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="2151.72,-322.959 2141.22,-321.545 2148.55,-329.198 2151.72,-322.959"/>
 </g>
 <!-- Node10 -->
 <g id="node10" class="node"><title>Node10</title>
 <g id="a_node10"><a xlink:href="runtime_2memory_8h.html" target="_top" xlink:title="Runtime memory management. ">
-<polygon fill="white" stroke="red" points="1443.5,-123.5 1443.5,-142.5 1570.5,-142.5 1570.5,-123.5 1443.5,-123.5"/>
-<text text-anchor="middle" x="1507" y="-130.5" font-family="Helvetica,sans-Serif" font-size="10.00">tvm/runtime/memory.h</text>
+<polygon fill="white" stroke="red" points="2440.5,-123.5 2440.5,-142.5 2567.5,-142.5 2567.5,-123.5 2440.5,-123.5"/>
+<text text-anchor="middle" x="2504" y="-130.5" font-family="Helvetica,sans-Serif" font-size="10.00">tvm/runtime/memory.h</text>
 </a>
 </g>
 </g>
 <!-- Node6&#45;&gt;Node10 -->
-<g id="edge98" class="edge"><title>Node6&#45;&gt;Node10</title>
-<path fill="none" stroke="midnightblue" d="M1791.65,-556.331C1696.1,-551.911 1489.69,-526.229 1427,-389 1387.67,-302.892 1461.85,-192.064 1493.58,-150.615"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="1496.41,-152.679 1499.82,-142.648 1490.9,-148.363 1496.41,-152.679"/>
+<g id="edge97" class="edge"><title>Node6&#45;&gt;Node10</title>
+<path fill="none" stroke="midnightblue" d="M2318.37,-544.376C2441.57,-539.23 2757.59,-518.948 2826,-445 2869.54,-397.935 2875.94,-358.156 2845,-302 2784.62,-192.412 2695.24,-254.282 2583,-199 2557.99,-186.685 2533.96,-165.041 2519.1,-150.134"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="2521.32,-147.391 2511.84,-142.648 2516.29,-152.264 2521.32,-147.391"/>
 </g>
 <!-- Node6&#45;&gt;Node11 -->
-<g id="edge99" class="edge"><title>Node6&#45;&gt;Node11</title>
-<path fill="none" stroke="midnightblue" d="M1791.75,-553.098C1712.4,-546.516 1551.43,-531.947 1416,-512 1110.79,-467.046 929.17,-485.836 791,-210 764.766,-157.627 833.887,-139.804 890,-123 1000.81,-89.8162 1342.73,-81.0347 1490.17,-78.7664"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="1490.5,-82.2621 1500.45,-78.6141 1490.4,-75.2628 1490.5,-82.2621"/>
+<g id="edge98" class="edge"><title>Node6&#45;&gt;Node11</title>
+<path fill="none" stroke="midnightblue" d="M2318.01,-543.802C2423.06,-538.697 2666.79,-524.641 2746,-501 2797.06,-485.762 2807.27,-473.957 2852,-445 2932,-393.204 3021,-408.307 3021,-313 3021,-313 3021,-313 3021,-249.5 3021,-185.214 2969,-122.663 2941.55,-94.1113"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="2943.72,-91.335 2934.2,-86.6811 2938.74,-96.2547 2943.72,-91.335"/>
 </g>
 <!-- Node12 -->
 <g id="node12" class="node"><title>Node12</title>
 <g id="a_node12"><a xlink:href="c__runtime__api_8h.html" target="_top" xlink:title="tvm/runtime/c_runtime\l_api.h">
-<polygon fill="white" stroke="red" points="1221.5,-0.5 1221.5,-30.5 1348.5,-30.5 1348.5,-0.5 1221.5,-0.5"/>
-<text text-anchor="start" x="1229.5" y="-18.5" font-family="Helvetica,sans-Serif" font-size="10.00">tvm/runtime/c_runtime</text>
-<text text-anchor="middle" x="1285" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00">_api.h</text>
+<polygon fill="white" stroke="red" points="3070.5,-0.5 3070.5,-30.5 3197.5,-30.5 3197.5,-0.5 3070.5,-0.5"/>
+<text text-anchor="start" x="3078.5" y="-18.5" font-family="Helvetica,sans-Serif" font-size="10.00">tvm/runtime/c_runtime</text>
+<text text-anchor="middle" x="3134" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00">_api.h</text>
 </a>
 </g>
 </g>
 <!-- Node6&#45;&gt;Node12 -->
-<g id="edge96" class="edge"><title>Node6&#45;&gt;Node12</title>
-<path fill="none" stroke="midnightblue" d="M1791.72,-555.324C1631.99,-549.254 1130.48,-525.016 986,-456 884.479,-407.506 866.882,-372.249 807,-277 757.121,-197.662 742.693,-109.923 826,-67 891.393,-33.3071 1098.92,-21.9689 1211.18,-18.2495"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="1211.53,-21.7404 1221.41,-17.923 1211.3,-14.7439 1211.53,-21.7404"/>
+<g id="edge95" class="edge"><title>Node6&#45;&gt;Node12</title>
+<path fill="none" stroke="midnightblue" d="M2318.15,-544.388C2522.71,-536.592 3303.18,-495.706 3459,-322 3537.64,-234.336 3488.93,-125.973 3387,-67 3332.18,-35.2873 3260.75,-23.2001 3207.76,-18.7336"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="3208,-15.2415 3197.76,-17.9618 3207.46,-22.2207 3208,-15.2415"/>
 </g>
 <!-- Node6&#45;&gt;Node13 -->
-<g id="edge100" class="edge"><title>Node6&#45;&gt;Node13</title>
-<path fill="none" stroke="midnightblue" d="M1890.24,-551.431C2022.07,-536.196 2380.73,-492.528 2494,-456 2661.04,-402.133 2699.63,-373.851 2846,-277 2967.02,-196.925 2976.65,-149.532 3096,-67 3116.46,-52.8554 3141.19,-39.203 3159.54,-29.6674"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="3161.2,-32.7504 3168.51,-25.0754 3158.01,-26.5205 3161.2,-32.7504"/>
+<g id="edge99" class="edge"><title>Node6&#45;&gt;Node13</title>
+<path fill="none" stroke="midnightblue" d="M2219.92,-540.849C2113.78,-529.73 1854.97,-502.73 1638,-481 1473.6,-464.535 1420.77,-507.913 1268,-445 1247.85,-436.7 1249.5,-423.735 1230,-414 1170.2,-384.149 1145.38,-404.446 1084,-378 1041.77,-359.804 1032.66,-351.028 997,-322 880.231,-226.961 899.729,-149.823 774,-67 732.508,-39.6679 675.574,-26.5924 640.32,-20.7485"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="640.535,-17.24 630.116,-19.1593 639.458,-24.1566 640.535,-17.24"/>
 </g>
 <!-- Node6&#45;&gt;Node14 -->
-<g id="edge101" class="edge"><title>Node6&#45;&gt;Node14</title>
-<path fill="none" stroke="midnightblue" d="M1890.19,-548.909C1937.36,-540.808 2010.44,-527.364 2073,-512 2103.18,-504.588 2109.55,-498.217 2140,-492 2209.68,-477.773 2398.63,-495.157 2458,-456 2595.88,-365.059 2658.62,-284.286 2623,-123 2617.08,-96.2049 2617.09,-85.6947 2597,-67 2572.44,-44.146 2536.51,-31.3275 2508.18,-24.3288"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="2508.65,-20.8461 2498.12,-22.0059 2507.08,-27.6665 2508.65,-20.8461"/>
+<g id="edge100" class="edge"><title>Node6&#45;&gt;Node14</title>
+<path fill="none" stroke="midnightblue" d="M2318.3,-545.598C2410.79,-543.986 2616.23,-536.342 2785,-501 3102.11,-434.595 3342.02,-457.46 3420,-143 3422.14,-134.372 3425.52,-129.966 3420,-123 3352.22,-37.5074 3288.69,-84.5009 3181,-67 2995.64,-36.8756 2771.8,-23.2156 2678.35,-18.5146"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="2678.29,-15.0078 2668.13,-18.0106 2677.95,-21.9993 2678.29,-15.0078"/>
 </g>
 <!-- Node6&#45;&gt;Node15 -->
-<g id="edge102" class="edge"><title>Node6&#45;&gt;Node15</title>
-<path fill="none" stroke="midnightblue" d="M1791.91,-555.743C1607.84,-550.89 964.579,-532.612 875,-512 746.071,-482.333 441.701,-344.639 328,-277 286.659,-252.407 264.637,-252.962 243,-210 214.186,-152.789 227.435,-121.559 261,-67 269.102,-53.8306 281.081,-41.2218 291.044,-31.8973"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="293.413,-34.4742 298.491,-25.1753 288.722,-29.2782 293.413,-34.4742"/>
+<g id="edge101" class="edge"><title>Node6&#45;&gt;Node15</title>
+<path fill="none" stroke="midnightblue" d="M2237.06,-537.373C2216.55,-530.435 2190.55,-518.85 2173,-501 2143.29,-470.785 2162.38,-445.462 2134,-414 2012.29,-279.089 1792,-371.701 1792,-190 1792,-190 1792,-190 1792,-132 1792,-97.4049 1783.6,-57.7448 1777.97,-35.0547"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="1781.29,-33.9237 1775.41,-25.1136 1774.51,-35.6707 1781.29,-33.9237"/>
 </g>
 <!-- Node6&#45;&gt;Node21 -->
-<g id="edge103" class="edge"><title>Node6&#45;&gt;Node21</title>
-<path fill="none" stroke="midnightblue" d="M1791.94,-553.21C1729.28,-547.644 1617.85,-535.298 1525,-512 1300.12,-455.571 1167.87,-523.002 1035,-333 1029.91,-325.716 1031.63,-321.227 1035,-313 1037.15,-307.755 1152.05,-192.81 1195.02,-149.935"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="1197.55,-152.35 1202.16,-142.81 1192.61,-147.394 1197.55,-152.35"/>
+<g id="edge102" class="edge"><title>Node6&#45;&gt;Node21</title>
+<path fill="none" stroke="midnightblue" d="M2219.74,-542.525C2164.38,-537.556 2072.39,-526.081 1997,-501 1929.9,-478.679 1763.76,-389.35 1566,-266 1521.85,-238.464 1508.92,-233.534 1470,-199 1453.11,-184.013 1436.19,-164.399 1425.03,-150.653"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="1427.55,-148.205 1418.58,-142.574 1422.08,-152.573 1427.55,-148.205"/>
 </g>
 <!-- Node27 -->
 <g id="node27" class="node"><title>Node27</title>
 <g id="a_node27"><a xlink:href="reflection_8h.html" target="_top" xlink:title="Reflection and serialization of compiler IR/AST nodes. ">
-<polygon fill="white" stroke="black" points="1578.5,-492.5 1578.5,-511.5 1697.5,-511.5 1697.5,-492.5 1578.5,-492.5"/>
-<text text-anchor="middle" x="1638" y="-499.5" font-family="Helvetica,sans-Serif" font-size="10.00">tvm/node/reflection.h</text>
+<polygon fill="white" stroke="black" points="2377.5,-481.5 2377.5,-500.5 2496.5,-500.5 2496.5,-481.5 2377.5,-481.5"/>
+<text text-anchor="middle" x="2437" y="-488.5" font-family="Helvetica,sans-Serif" font-size="10.00">tvm/node/reflection.h</text>
 </a>
 </g>
 </g>
 <!-- Node6&#45;&gt;Node27 -->
-<g id="edge65" class="edge"><title>Node6&#45;&gt;Node27</title>
-<path fill="none" stroke="midnightblue" d="M1808.83,-548.444C1773.94,-539.162 1718.11,-524.309 1679.83,-514.127"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="1680.58,-510.705 1670.02,-511.516 1678.78,-517.47 1680.58,-510.705"/>
+<g id="edge64" class="edge"><title>Node6&#45;&gt;Node27</title>
+<path fill="none" stroke="midnightblue" d="M2295.62,-537.444C2324.01,-528.32 2369.14,-513.813 2400.75,-503.651"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="2402.06,-506.909 2410.5,-500.516 2399.91,-500.244 2402.06,-506.909"/>
 </g>
 <!-- Node6&#45;&gt;Node28 -->
-<g id="edge94" class="edge"><title>Node6&#45;&gt;Node28</title>
-<path fill="none" stroke="midnightblue" d="M1865.96,-548.488C1892.16,-539.523 1934.54,-524.94 1971,-512 1995.51,-503.301 2000.73,-498.152 2026,-492 2147.05,-462.534 2181.14,-476.65 2304,-456 2305.45,-455.756 2306.92,-455.503 2308.4,-455.243"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="2309.08,-458.677 2318.29,-453.436 2307.82,-451.791 2309.08,-458.677"/>
+<g id="edge93" class="edge"><title>Node6&#45;&gt;Node28</title>
+<path fill="none" stroke="midnightblue" d="M2318.19,-539.925C2372.63,-532.71 2462.76,-519.309 2539,-501 2600.24,-486.293 2669.14,-463.525 2713.94,-447.864"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="2715.12,-451.157 2723.39,-444.537 2712.8,-444.554 2715.12,-451.157"/>
 </g>
 <!-- Node6&#45;&gt;Node30 -->
-<g id="edge95" class="edge"><title>Node6&#45;&gt;Node30</title>
-<path fill="none" stroke="midnightblue" d="M1856.87,-548.455C1882.29,-535.032 1933.95,-508.804 1980,-492 2019.76,-477.491 2065.6,-465.093 2102.07,-456.212"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="2103.19,-459.543 2112.09,-453.802 2101.55,-452.737 2103.19,-459.543"/>
+<g id="edge94" class="edge"><title>Node6&#45;&gt;Node30</title>
+<path fill="none" stroke="midnightblue" d="M2219.74,-544.93C2169.09,-542.024 2089.2,-532.308 2028,-501 2005.27,-489.369 1984.71,-468.535 1971.06,-452.521"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="1973.64,-450.15 1964.58,-444.66 1968.24,-454.603 1973.64,-450.15"/>
 </g>
 <!-- Node31 -->
 <g id="node31" class="node"><title>Node31</title>
 <g id="a_node31"><a xlink:href="repr__printer_8h.html" target="_top" xlink:title="Printer class to print repr string of each AST/IR nodes. ">
-<polygon fill="white" stroke="black" points="884,-492.5 884,-511.5 1014,-511.5 1014,-492.5 884,-492.5"/>
-<text text-anchor="middle" x="949" y="-499.5" font-family="Helvetica,sans-Serif" font-size="10.00">tvm/node/repr_printer.h</text>
+<polygon fill="white" stroke="black" points="2182,-481.5 2182,-500.5 2312,-500.5 2312,-481.5 2182,-481.5"/>
+<text text-anchor="middle" x="2247" y="-488.5" font-family="Helvetica,sans-Serif" font-size="10.00">tvm/node/repr_printer.h</text>
 </a>
 </g>
 </g>
 <!-- Node6&#45;&gt;Node31 -->
-<g id="edge91" class="edge"><title>Node6&#45;&gt;Node31</title>
-<path fill="none" stroke="midnightblue" d="M1791.77,-554.02C1643.71,-545.057 1201.79,-518.303 1024.16,-507.55"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="1024.31,-504.053 1014.11,-506.942 1023.88,-511.04 1024.31,-504.053"/>
+<g id="edge90" class="edge"><title>Node6&#45;&gt;Node31</title>
+<path fill="none" stroke="midnightblue" d="M2265.37,-537.083C2262.39,-529.77 2258.07,-519.181 2254.38,-510.103"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="2257.58,-508.691 2250.57,-500.751 2251.1,-511.333 2257.58,-508.691"/>
 </g>
 <!-- Node7&#45;&gt;Node8 -->
 <g id="edge7" class="edge"><title>Node7&#45;&gt;Node8</title>
-<path fill="none" stroke="midnightblue" d="M1968.92,-369.444C2028.17,-359.865 2124.1,-344.356 2187.12,-334.167"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="2188.01,-337.568 2197.32,-332.516 2186.89,-330.657 2188.01,-337.568"/>
+<path fill="none" stroke="midnightblue" d="M1910.09,-358.444C1954.32,-349.004 2025.54,-333.802 2073.28,-323.611"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="2074.05,-327.027 2083.1,-321.516 2072.59,-320.181 2074.05,-327.027"/>
 </g>
 <!-- Node7&#45;&gt;Node10 -->
-<g id="edge57" class="edge"><title>Node7&#45;&gt;Node10</title>
-<path fill="none" stroke="midnightblue" d="M1856.16,-376.266C1775.59,-372.816 1637.71,-362.643 1598,-333 1537.47,-287.814 1516.07,-192.767 1509.53,-152.575"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="1512.98,-151.949 1508.03,-142.578 1506.05,-152.986 1512.98,-151.949"/>
+<g id="edge58" class="edge"><title>Node7&#45;&gt;Node10</title>
+<path fill="none" stroke="midnightblue" d="M1929.59,-364.466C2026.24,-359.671 2212.46,-347.393 2273,-322 2322.38,-301.288 2443.05,-191.04 2487.35,-149.685"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="2489.79,-152.194 2494.69,-142.802 2485,-147.085 2489.79,-152.194"/>
 </g>
 <!-- Node7&#45;&gt;Node11 -->
-<g id="edge58" class="edge"><title>Node7&#45;&gt;Node11</title>
-<path fill="none" stroke="midnightblue" d="M1896.5,-369.39C1849.36,-347.874 1727.2,-287.852 1647,-210 1610.9,-174.958 1581.51,-122.719 1567.64,-95.6641"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="1570.71,-93.9851 1563.1,-86.6167 1564.46,-97.1244 1570.71,-93.9851"/>
+<g id="edge59" class="edge"><title>Node7&#45;&gt;Node11</title>
+<path fill="none" stroke="midnightblue" d="M1929.67,-363.41C2013.67,-357.769 2171.34,-345.022 2304,-322 2324.01,-318.528 2639.11,-239.089 2659,-235 2754.33,-215.403 2791.55,-249.084 2875,-199 2901.91,-182.852 2906.33,-172.129 2918,-143 2923.88,-128.332 2925.43,-110.394 2925.63,-97.0811"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="2929.12,-96.7349 2925.52,-86.7705 2922.12,-96.8051 2929.12,-96.7349"/>
 </g>
 <!-- Node7&#45;&gt;Node13 -->
-<g id="edge60" class="edge"><title>Node7&#45;&gt;Node13</title>
-<path fill="none" stroke="midnightblue" d="M1975.67,-375.029C2077.91,-369.405 2282.11,-355.69 2350,-333 2458.73,-296.663 2683.72,-104.647 2792,-67 2920.61,-22.2853 3085.2,-16.5042 3153.77,-16.1992"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="3153.93,-19.6991 3163.93,-16.1921 3153.93,-12.6991 3153.93,-19.6991"/>
-</g>
-<!-- Node7&#45;&gt;Node14 -->
-<g id="edge61" class="edge"><title>Node7&#45;&gt;Node14</title>
-<path fill="none" stroke="midnightblue" d="M1975.59,-378.304C2075.18,-377.705 2269.96,-371.467 2325,-333 2429.02,-260.301 2456.03,-91.7561 2462.29,-35.307"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="2465.8,-35.3393 2463.33,-25.0377 2458.84,-34.6344 2465.8,-35.3393"/>
+<g id="edge62" class="edge"><title>Node7&#45;&gt;Node13</title>
+<path fill="none" stroke="midnightblue" d="M1847.27,-358.481C1805.85,-342.599 1715.86,-306.394 1645,-266 1600.11,-240.414 1592.11,-228.942 1550,-199 1502.76,-165.406 1498.13,-143.747 1444,-123 1307.89,-70.8304 1260.46,-117.872 1118,-87 1090.12,-80.9575 1084.88,-73.0707 1057,-67 903.222,-33.5132 714.697,-21.497 640.343,-17.8657"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="640.373,-14.3633 630.219,-17.3892 640.044,-21.3555 640.373,-14.3633"/>
 </g>
 <!-- Node7&#45;&gt;Node15 -->
 <g id="edge63" class="edge"><title>Node7&#45;&gt;Node15</title>
-<path fill="none" stroke="midnightblue" d="M1856.18,-376.734C1572.17,-370.1 369.161,-334.744 265,-210 224.201,-161.139 247.323,-125.246 273,-67 278.453,-54.6293 287.218,-42.245 294.733,-32.8364"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="297.449,-35.0445 301.153,-25.1182 292.067,-30.568 297.449,-35.0445"/>
+<path fill="none" stroke="midnightblue" d="M1857.81,-358.423C1836.84,-342.847 1794.6,-307.64 1778,-266 1745.81,-185.269 1761,-77.854 1769.08,-35.1496"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="1772.56,-35.5972 1771.08,-25.106 1765.69,-34.2292 1772.56,-35.5972"/>
 </g>
 <!-- Node7&#45;&gt;Node16 -->
-<g id="edge59" class="edge"><title>Node7&#45;&gt;Node16</title>
-<path fill="none" stroke="midnightblue" d="M1908.86,-369.319C1894.14,-351.424 1860.04,-309.966 1838.91,-284.281"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="1841.61,-282.045 1832.55,-276.545 1836.2,-286.492 1841.61,-282.045"/>
-</g>
-<!-- Node7&#45;&gt;Node20 -->
-<g id="edge62" class="edge"><title>Node7&#45;&gt;Node20</title>
-<path fill="none" stroke="midnightblue" d="M1975.74,-374.463C2066.77,-368.608 2245.52,-355.283 2396,-333 2759.64,-279.155 3192.2,-178.065 3330.31,-144.867"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="3331.22,-148.249 3340.12,-142.504 3329.58,-141.444 3331.22,-148.249"/>
+<g id="edge60" class="edge"><title>Node7&#45;&gt;Node16</title>
+<path fill="none" stroke="midnightblue" d="M1881.95,-358.319C1907.12,-340.037 1966.14,-297.16 2001.28,-271.635"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="2003.63,-274.254 2009.66,-265.545 1999.52,-268.591 2003.63,-274.254"/>
 </g>
-<!-- Node7&#45;&gt;Node21 -->
-<g id="edge64" class="edge"><title>Node7&#45;&gt;Node21</title>
-<path fill="none" stroke="midnightblue" d="M1856.32,-377.595C1786.5,-375.449 1668.31,-366.567 1573,-333 1448.84,-289.272 1437.41,-238.518 1320,-179 1295.3,-166.478 1266.26,-154.624 1244.38,-146.229"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="1245.45,-142.894 1234.86,-142.625 1242.97,-149.44 1245.45,-142.894"/>
+<!-- Node7&#45;&gt;Node26 -->
+<g id="edge61" class="edge"><title>Node7&#45;&gt;Node26</title>
+<path fill="none" stroke="midnightblue" d="M1929.75,-364.458C2026.49,-359.749 2223.22,-347.711 2388,-322 2485.99,-306.71 2599.12,-278.298 2658.23,-262.615"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="2659.17,-265.986 2667.93,-260.026 2657.37,-259.223 2659.17,-265.986"/>
 </g>
 <!-- Node9 -->
 <g id="node9" class="node"><title>Node9</title>
-<polygon fill="white" stroke="#bfbfbf" points="1823,-6 1823,-25 1909,-25 1909,-6 1823,-6"/>
-<text text-anchor="middle" x="1866" y="-13" font-family="Helvetica,sans-Serif" font-size="10.00">dmlc/logging.h</text>
+<polygon fill="white" stroke="#bfbfbf" points="3452,-6 3452,-25 3538,-25 3538,-6 3452,-6"/>
+<text text-anchor="middle" x="3495" y="-13" font-family="Helvetica,sans-Serif" font-size="10.00">dmlc/logging.h</text>
 </g>
 <!-- Node8&#45;&gt;Node9 -->
 <g id="edge8" class="edge"><title>Node8&#45;&gt;Node9</title>
-<path fill="none" stroke="midnightblue" d="M2251.67,-313.103C2254.11,-298.907 2258.16,-270.258 2256,-246 2250.7,-186.352 2250.02,-157.398 2201,-123 2113.51,-61.6055 1988.94,-34.3691 1918.97,-23.2883"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="1919.47,-19.8239 1909.05,-21.7685 1918.41,-26.7431 1919.47,-19.8239"/>
+<path fill="none" stroke="midnightblue" d="M2183.1,-302.48C2363.2,-276.936 2887.19,-202.533 2905,-199 3003.97,-179.375 3027.7,-169.711 3125,-143 3209.89,-119.693 3231.71,-115.524 3315,-87 3368.84,-68.562 3430.32,-43.6206 3465.71,-28.8793"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="3467.09,-32.0938 3474.97,-25.0059 3464.39,-25.636 3467.09,-32.0938"/>
 </g>
 <!-- Node8&#45;&gt;Node10 -->
 <g id="edge9" class="edge"><title>Node8&#45;&gt;Node10</title>
-<path fill="none" stroke="midnightblue" d="M2183.84,-319.507C2067.96,-314.556 1833.15,-301.762 1755,-277 1662.06,-247.551 1565.94,-179.269 1526.08,-148.958"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="1528.09,-146.088 1518.03,-142.766 1523.82,-151.637 1528.09,-146.088"/>
+<path fill="none" stroke="midnightblue" d="M2146.16,-302.388C2169.16,-293.695 2205.32,-279.639 2236,-266 2326.67,-225.691 2431.5,-171.907 2478.69,-147.302"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="2480.35,-150.38 2487.59,-142.646 2477.11,-144.176 2480.35,-150.38"/>
 </g>
 <!-- Node8&#45;&gt;Node11 -->
 <g id="edge18" class="edge"><title>Node8&#45;&gt;Node11</title>
-<path fill="none" stroke="midnightblue" d="M2250.09,-313.269C2249.89,-280.344 2244.13,-163.511 2174,-123 2127.64,-96.2219 1778.25,-83.8168 1627.69,-79.6725"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="1627.73,-76.1722 1617.63,-79.3998 1627.54,-83.1697 1627.73,-76.1722"/>
+<path fill="none" stroke="midnightblue" d="M2152.04,-302.383C2206.96,-286.439 2330.05,-252.378 2436,-235 2526.91,-220.089 2770.39,-247.032 2849,-199 2887.68,-175.365 2909.87,-123.816 2919.39,-96.2485"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="2922.75,-97.252 2922.53,-86.6594 2916.1,-95.0771 2922.75,-97.252"/>
 </g>
 <!-- Node8&#45;&gt;Node13 -->
 <g id="edge53" class="edge"><title>Node8&#45;&gt;Node13</title>
-<path fill="none" stroke="midnightblue" d="M2267.56,-313.347C2319.63,-287.657 2477.71,-209.48 2608,-143 2673.16,-109.752 2684.07,-88.4899 2754,-67 2898.17,-22.692 3080.37,-16.7828 3153.47,-16.3253"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="3153.86,-19.8242 3163.85,-16.2935 3153.84,-12.8243 3153.86,-19.8242"/>
+<path fill="none" stroke="midnightblue" d="M2056.86,-302.735C2007.04,-295.58 1937.4,-283.585 1878,-266 1719.24,-219.002 1693.63,-166.948 1534,-123 1366.02,-76.7533 1319.01,-87.5922 1146,-67 954.572,-44.2156 723.811,-25.4705 640.208,-18.9608"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="640.318,-15.459 630.078,-18.176 639.777,-22.4381 640.318,-15.459"/>
 </g>
 <!-- Node8&#45;&gt;Node14 -->
-<g id="edge54" class="edge"><title>Node8&#45;&gt;Node14</title>
-<path fill="none" stroke="midnightblue" d="M2259.34,-313.303C2285.19,-289.163 2356.31,-222.29 2364,-210 2399.56,-153.166 2376.28,-123.731 2412,-67 2420.55,-53.4222 2433.48,-40.8426 2444.31,-31.6349"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="2446.54,-34.3286 2452.07,-25.2903 2442.12,-28.9084 2446.54,-34.3286"/>
+<g id="edge55" class="edge"><title>Node8&#45;&gt;Node14</title>
+<path fill="none" stroke="midnightblue" d="M2133.08,-302.344C2153.73,-284.327 2201.81,-240.972 2236,-199 2280.23,-144.715 2263.31,-105.191 2322,-67 2365.43,-38.737 2514.6,-24.69 2589.85,-19.2662"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="2590.22,-22.7484 2599.95,-18.5576 2589.73,-15.7656 2590.22,-22.7484"/>
 </g>
 <!-- Node8&#45;&gt;Node15 -->
-<g id="edge55" class="edge"><title>Node8&#45;&gt;Node15</title>
-<path fill="none" stroke="midnightblue" d="M2183.71,-320.784C1924.45,-315.482 979.3,-290.898 692,-210 543.918,-168.303 385.523,-67.967 329.809,-30.727"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="331.727,-27.7994 321.478,-25.1152 327.816,-33.605 331.727,-27.7994"/>
+<g id="edge56" class="edge"><title>Node8&#45;&gt;Node15</title>
+<path fill="none" stroke="midnightblue" d="M2125.87,-302.497C2132.69,-280.799 2147.92,-222.054 2129,-179 2114.19,-145.307 2096.83,-145.881 2068,-123 2033.43,-95.5623 2026.37,-84.8334 1986,-67 1924.63,-39.8887 1846.61,-26.154 1804.09,-20.2397"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="1804.45,-16.757 1794.07,-18.9012 1803.52,-23.6953 1804.45,-16.757"/>
 </g>
 <!-- Node8&#45;&gt;Node16 -->
 <g id="edge19" class="edge"><title>Node8&#45;&gt;Node16</title>
-<path fill="none" stroke="midnightblue" d="M2188.53,-313.475C2108.69,-302.401 1969.82,-283.14 1887.55,-271.731"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="1887.95,-268.252 1877.56,-270.345 1886.99,-275.186 1887.95,-268.252"/>
+<path fill="none" stroke="midnightblue" d="M2109.53,-302.475C2096.56,-294.266 2076.49,-281.558 2059.62,-270.885"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="2061.48,-267.914 2051.15,-265.523 2057.73,-273.829 2061.48,-267.914"/>
 </g>
 <!-- Node19 -->
 <g id="node19" class="node"><title>Node19</title>
-<polygon fill="white" stroke="#bfbfbf" points="1992.5,-123.5 1992.5,-142.5 2049.5,-142.5 2049.5,-123.5 1992.5,-123.5"/>
-<text text-anchor="middle" x="2021" y="-130.5" font-family="Helvetica,sans-Serif" font-size="10.00">memory</text>
+<polygon fill="white" stroke="#bfbfbf" points="1899.5,-123.5 1899.5,-142.5 1956.5,-142.5 1956.5,-123.5 1899.5,-123.5"/>
+<text text-anchor="middle" x="1928" y="-130.5" font-family="Helvetica,sans-Serif" font-size="10.00">memory</text>
 </g>
 <!-- Node8&#45;&gt;Node19 -->
 <g id="edge52" class="edge"><title>Node8&#45;&gt;Node19</title>
-<path fill="none" stroke="midnightblue" d="M2243.78,-313.356C2222.56,-283.953 2153.58,-189.078 2140,-179 2115.99,-161.187 2084.1,-149.63 2059.38,-142.694"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="2060.17,-139.284 2049.61,-140.086 2058.37,-146.047 2060.17,-139.284"/>
+<path fill="none" stroke="midnightblue" d="M2081.57,-302.438C2048.67,-294.911 2001.89,-282.569 1963,-266 1908.93,-242.965 1878.19,-250.009 1849,-199 1835.8,-175.934 1865.02,-157.627 1891.58,-146.394"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="1893.16,-149.531 1901.16,-142.581 1890.57,-143.027 1893.16,-149.531"/>
+</g>
+<!-- Node8&#45;&gt;Node20 -->
+<g id="edge54" class="edge"><title>Node8&#45;&gt;Node20</title>
+<path fill="none" stroke="midnightblue" d="M2056.66,-305.678C1964.11,-298.046 1791.07,-283.023 1644,-266 1362.34,-233.399 1294.01,-208.434 1012,-179 800.889,-156.966 548.9,-142.553 438.074,-136.813"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="437.963,-133.303 427.796,-136.285 437.603,-140.293 437.963,-133.303"/>
 </g>
 <!-- Node8&#45;&gt;Node21 -->
-<g id="edge56" class="edge"><title>Node8&#45;&gt;Node21</title>
-<path fill="none" stroke="midnightblue" d="M2183.98,-320.823C2077.13,-318.02 1859.98,-308.587 1679,-277 1567.79,-257.591 1534.36,-261.69 1434,-210 1414.62,-200.02 1415.52,-188.705 1396,-179 1346.63,-154.449 1283.05,-142.786 1244.74,-137.64"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="1245,-134.145 1234.64,-136.356 1244.11,-141.089 1245,-134.145"/>
+<g id="edge57" class="edge"><title>Node8&#45;&gt;Node21</title>
+<path fill="none" stroke="midnightblue" d="M2056.98,-308.56C1979.87,-304.495 1848.99,-293.803 1740,-266 1622.98,-236.146 1492.08,-174.209 1437.77,-147.14"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="1439.05,-143.865 1428.54,-142.507 1435.9,-150.121 1439.05,-143.865"/>
 </g>
 <!-- Node8&#45;&gt;Node26 -->
 <g id="edge51" class="edge"><title>Node8&#45;&gt;Node26</title>
-<path fill="none" stroke="midnightblue" d="M2316.23,-317.08C2476.07,-305.207 2879.39,-275.248 3009.7,-265.568"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="3009.97,-269.058 3019.68,-264.827 3009.45,-262.077 3009.97,-269.058"/>
+<path fill="none" stroke="midnightblue" d="M2189.07,-304.175C2308.96,-291.79 2559.04,-265.957 2657.7,-255.766"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="2658.28,-259.225 2667.87,-254.716 2657.56,-252.262 2658.28,-259.225"/>
 </g>
 <!-- Node10&#45;&gt;Node11 -->
 <g id="edge10" class="edge"><title>Node10&#45;&gt;Node11</title>
-<path fill="none" stroke="midnightblue" d="M1515.59,-123.083C1523.16,-115.22 1534.38,-103.569 1543.51,-94.0813"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="1546.16,-96.3821 1550.57,-86.7511 1541.12,-91.5266 1546.16,-96.3821"/>
+<path fill="none" stroke="midnightblue" d="M2567.94,-123.799C2645.7,-113.825 2776.43,-97.0569 2856.28,-86.8141"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="2856.77,-90.2805 2866.24,-85.5367 2855.88,-83.3374 2856.77,-90.2805"/>
 </g>
 <!-- Node10&#45;&gt;Node14 -->
 <g id="edge16" class="edge"><title>Node10&#45;&gt;Node14</title>
-<path fill="none" stroke="midnightblue" d="M1559.4,-123.483C1642.19,-110.214 1808.42,-84.3322 1950,-67 2123.58,-45.7511 2331,-27.5689 2419.96,-20.1133"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="2420.28,-23.5984 2429.96,-19.2786 2419.7,-16.6227 2420.28,-23.5984"/>
+<path fill="none" stroke="midnightblue" d="M2513.77,-123.319C2535.6,-103.921 2588.61,-56.8311 2616.37,-32.1625"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="2618.96,-34.5404 2624.12,-25.282 2614.32,-29.3074 2618.96,-34.5404"/>
 </g>
 <!-- Node10&#45;&gt;Node15 -->
 <g id="edge17" class="edge"><title>Node10&#45;&gt;Node15</title>
-<path fill="none" stroke="midnightblue" d="M1446.78,-123.458C1350.17,-109.979 1154.49,-83.6061 988,-67 738.83,-42.1469 437.097,-23.846 340.405,-18.2713"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="340.338,-14.7618 330.154,-17.6838 339.937,-21.7504 340.338,-14.7618"/>
+<path fill="none" stroke="midnightblue" d="M2463.09,-123.488C2398.78,-110.282 2269.99,-84.5525 2160,-67 2028.17,-45.9606 1870.02,-27.3928 1804.16,-19.9551"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="1804.36,-16.4564 1794.04,-18.8179 1803.58,-23.4126 1804.36,-16.4564"/>
 </g>
 <!-- Node11&#45;&gt;Node9 -->
 <g id="edge11" class="edge"><title>Node11&#45;&gt;Node9</title>
-<path fill="none" stroke="midnightblue" d="M1602.99,-67.4751C1658.31,-56.7523 1753.24,-38.3537 1812.65,-26.8394"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="1813.44,-30.2518 1822.59,-24.913 1812.11,-23.3797 1813.44,-30.2518"/>
+<path fill="none" stroke="midnightblue" d="M2983.59,-69.8843C3094.66,-58.2898 3334.32,-33.2729 3441.41,-22.0943"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="3442.07,-25.544 3451.65,-21.0246 3441.35,-18.5819 3442.07,-25.544"/>
 </g>
 <!-- Node11&#45;&gt;Node12 -->
 <g id="edge12" class="edge"><title>Node11&#45;&gt;Node12</title>
-<path fill="none" stroke="midnightblue" d="M1519.74,-67.4751C1477.76,-58.3584 1410.22,-43.6929 1358.35,-32.4276"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="1359.04,-28.9961 1348.52,-30.2942 1357.55,-35.8366 1359.04,-28.9961"/>
+<path fill="none" stroke="midnightblue" d="M2954.94,-67.4751C2986.16,-58.5882 3035.9,-44.4288 3075.04,-33.2859"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="3076.08,-36.6274 3084.74,-30.523 3074.17,-29.8949 3076.08,-36.6274"/>
 </g>
 <!-- Node11&#45;&gt;Node13 -->
 <g id="edge13" class="edge"><title>Node11&#45;&gt;Node13</title>
-<path fill="none" stroke="midnightblue" d="M1617.87,-73.8472C1882.05,-64.186 2954.02,-24.9837 3153.84,-17.676"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="3154.11,-21.1686 3163.98,-17.3054 3153.86,-14.1733 3154.11,-21.1686"/>
+<path fill="none" stroke="midnightblue" d="M2866.28,-74.492C2533.63,-65.9497 891.841,-23.789 640.347,-17.3307"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="640.148,-13.8245 630.062,-17.0665 639.969,-20.8222 640.148,-13.8245"/>
 </g>
 <!-- Node11&#45;&gt;Node14 -->
 <g id="edge14" class="edge"><title>Node11&#45;&gt;Node14</title>
-<path fill="none" stroke="midnightblue" d="M1617.68,-72.1419C1786.37,-61.0515 2270.06,-29.2507 2419.48,-19.4268"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="2419.91,-22.9066 2429.65,-18.7581 2419.45,-15.9217 2419.91,-22.9066"/>
+<path fill="none" stroke="midnightblue" d="M2883.31,-67.4751C2828.86,-56.3429 2733.95,-36.9374 2678.27,-25.5513"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="2678.84,-22.0966 2668.34,-23.5224 2677.44,-28.9547 2678.84,-22.0966"/>
 </g>
 <!-- Node11&#45;&gt;Node15 -->
 <g id="edge15" class="edge"><title>Node11&#45;&gt;Node15</title>
-<path fill="none" stroke="midnightblue" d="M1500.39,-73.21C1280.06,-62.7223 505.597,-25.858 340.219,-17.986"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="340.375,-14.4896 330.22,-17.51 340.042,-21.4816 340.375,-14.4896"/>
+<path fill="none" stroke="midnightblue" d="M2866.29,-72.9676C2658.53,-62.2369 1962.2,-26.2719 1804.8,-18.1426"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="1804.58,-14.6267 1794.42,-17.6061 1804.22,-21.6173 1804.58,-14.6267"/>
 </g>
 <!-- Node16&#45;&gt;Node9 -->
 <g id="edge20" class="edge"><title>Node16&#45;&gt;Node9</title>
-<path fill="none" stroke="midnightblue" d="M1830.28,-246.094C1836.11,-236.312 1843.21,-222.864 1847,-210 1865.27,-148.04 1866.84,-70.586 1866.47,-35.4258"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="1869.97,-35.1441 1866.29,-25.2067 1862.97,-35.2663 1869.97,-35.1441"/>
+<path fill="none" stroke="midnightblue" d="M2085.76,-247.586C2246.33,-241.919 2701.71,-224.044 2849,-199 2971.8,-178.12 3339.96,-64.8561 3458.09,-28.0558"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="3459.26,-31.3558 3467.77,-25.0371 3457.18,-24.6733 3459.26,-31.3558"/>
 </g>
 <!-- Node16&#45;&gt;Node11 -->
 <g id="edge42" class="edge"><title>Node16&#45;&gt;Node11</title>
-<path fill="none" stroke="midnightblue" d="M1764.45,-251.573C1734.32,-244.462 1698.19,-231.909 1672,-210 1638.13,-181.662 1653.6,-155.769 1624,-123 1612.75,-110.543 1597.41,-99.7172 1584.46,-91.8073"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="1585.94,-88.6189 1575.54,-86.5815 1582.4,-94.6579 1585.94,-88.6189"/>
+<path fill="none" stroke="midnightblue" d="M2085.51,-241.92C2135.85,-234.215 2211.08,-220.346 2274,-199 2283.18,-195.887 2421.66,-125.593 2431,-123 2509.13,-101.306 2739.63,-87.1781 2856.05,-81.2241"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="2856.46,-84.7078 2866.27,-80.7073 2856.11,-77.7168 2856.46,-84.7078"/>
 </g>
 <!-- Node16&#45;&gt;Node12 -->
 <g id="edge21" class="edge"><title>Node16&#45;&gt;Node12</title>
-<path fill="none" stroke="midnightblue" d="M1770.03,-246.469C1748.11,-238.502 1723.36,-226.705 1705,-210 1651.3,-161.13 1685.66,-108.39 1626,-67 1583.96,-37.835 1445.97,-25.1011 1358.94,-19.8599"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="1359.04,-16.3598 1348.85,-19.272 1358.63,-23.3479 1359.04,-16.3598"/>
+<path fill="none" stroke="midnightblue" d="M2061.99,-235.396C2124.45,-209.254 2264.77,-153.195 2388,-123 2630.11,-63.6763 2925.07,-33.6634 3060.1,-22.1931"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="3060.65,-25.6591 3070.32,-21.3342 3060.06,-18.6837 3060.65,-25.6591"/>
 </g>
 <!-- Node16&#45;&gt;Node13 -->
 <g id="edge46" class="edge"><title>Node16&#45;&gt;Node13</title>
-<path fill="none" stroke="midnightblue" d="M1877.74,-257.441C1942.66,-252.599 2051.42,-240.358 2140,-210 2215.83,-184.011 2222.85,-150.895 2298,-123 2411.26,-80.9596 2444.44,-84.3675 2564,-67 2788.31,-34.4155 3061.42,-21.3615 3153.75,-17.6785"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="3153.99,-21.1717 3163.85,-17.2845 3153.72,-14.177 3153.99,-21.1717"/>
+<path fill="none" stroke="midnightblue" d="M1976.86,-235.425C1940.85,-225.567 1891.89,-211.9 1849,-199 1665.27,-143.744 1626.56,-102.436 1438,-67 1280.85,-37.4669 774.182,-21.2316 640.558,-17.3942"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="640.284,-13.8851 630.189,-17.0999 640.085,-20.8823 640.284,-13.8851"/>
 </g>
 <!-- Node16&#45;&gt;Node14 -->
 <g id="edge48" class="edge"><title>Node16&#45;&gt;Node14</title>
-<path fill="none" stroke="midnightblue" d="M1877.59,-247.198C1900.49,-239.475 1925.76,-227.667 1944,-210 1974.44,-180.518 1950.15,-149.764 1983,-123 2050.38,-68.105 2313.92,-33.2492 2419.7,-21.2123"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="2420.19,-24.6801 2429.73,-20.0858 2419.41,-17.7239 2420.19,-24.6801"/>
+<path fill="none" stroke="midnightblue" d="M2054.42,-235.437C2069.93,-226.195 2089.66,-213.213 2105,-199 2159.58,-148.422 2143.17,-103.527 2208,-67 2272.57,-30.6176 2494.25,-20.2823 2589.7,-17.4842"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="2589.99,-20.9776 2599.89,-17.2003 2589.8,-13.9803 2589.99,-20.9776"/>
 </g>
 <!-- Node16&#45;&gt;Node15 -->
 <g id="edge49" class="edge"><title>Node16&#45;&gt;Node15</title>
-<path fill="none" stroke="midnightblue" d="M1764.19,-258.962C1562.17,-253.319 885.562,-232.894 791,-210 662.444,-178.876 638.797,-145.16 520,-87 502.859,-78.608 499.694,-74.1543 482,-67 433.894,-47.5486 375.512,-32.1057 340.358,-23.641"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="340.768,-20.1413 330.23,-21.2413 339.154,-26.9527 340.768,-20.1413"/>
+<path fill="none" stroke="midnightblue" d="M2055.49,-235.299C2076.87,-221.846 2101.5,-200.547 2089,-179 2027.42,-72.8951 1870.83,-33.5792 1804,-21.3146"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="1804.52,-17.8537 1794.07,-19.5724 1803.31,-24.7483 1804.52,-17.8537"/>
 </g>
 <!-- Node17 -->
 <g id="node17" class="node"><title>Node17</title>
 <g id="a_node17"><a xlink:href="data__type_8h.html" target="_top" xlink:title="tvm/runtime/data_type.h">
-<polygon fill="white" stroke="black" points="2463,-123.5 2463,-142.5 2599,-142.5 2599,-123.5 2463,-123.5"/>
-<text text-anchor="middle" x="2531" y="-130.5" font-family="Helvetica,sans-Serif" font-size="10.00">tvm/runtime/data_type.h</text>
+<polygon fill="white" stroke="black" points="3275,-123.5 3275,-142.5 3411,-142.5 3411,-123.5 3275,-123.5"/>
+<text text-anchor="middle" x="3343" y="-130.5" font-family="Helvetica,sans-Serif" font-size="10.00">tvm/runtime/data_type.h</text>
 </a>
 </g>
 </g>
 <!-- Node16&#45;&gt;Node17 -->
 <g id="edge22" class="edge"><title>Node16&#45;&gt;Node17</title>
-<path fill="none" stroke="midnightblue" d="M1877.84,-248.595C1914.59,-239.986 1963.02,-226.905 2004,-210 2029.94,-199.299 2032.23,-187.403 2059,-179 2130.28,-156.625 2337.79,-143.356 2452.49,-137.529"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="2452.81,-141.018 2462.62,-137.022 2452.46,-134.026 2452.81,-141.018"/>
+<path fill="none" stroke="midnightblue" d="M2085.53,-248.661C2230.89,-246.021 2629.36,-235.729 2959,-199 3079.18,-185.61 3218.85,-159.206 3292.13,-144.506"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="3292.91,-147.92 3302.02,-142.512 3291.52,-141.058 3292.91,-147.92"/>
 </g>
 <!-- Node18 -->
 <g id="node18" class="node"><title>Node18</title>
 <g id="a_node18"><a xlink:href="runtime_2module_8h.html" target="_top" xlink:title="Runtime container of the functions generated by TVM, This is used to support dynamically link...">
-<polygon fill="white" stroke="red" points="1714,-185 1714,-204 1838,-204 1838,-185 1714,-185"/>
-<text text-anchor="middle" x="1776" y="-192" font-family="Helvetica,sans-Serif" font-size="10.00">tvm/runtime/module.h</text>
+<polygon fill="white" stroke="red" points="1956,-179.5 1956,-198.5 2080,-198.5 2080,-179.5 1956,-179.5"/>
+<text text-anchor="middle" x="2018" y="-186.5" font-family="Helvetica,sans-Serif" font-size="10.00">tvm/runtime/module.h</text>
 </a>
 </g>
 </g>
 <!-- Node16&#45;&gt;Node18 -->
 <g id="edge27" class="edge"><title>Node16&#45;&gt;Node18</title>
-<path fill="none" stroke="midnightblue" d="M1805.56,-246.396C1797.54,-236.662 1788.4,-223.646 1782.18,-213.214"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="1785.11,-211.29 1777.25,-204.195 1778.97,-214.648 1785.11,-211.29"/>
+<path fill="none" stroke="midnightblue" d="M2020.55,-235.399C2018.07,-227.466 2016.05,-217.458 2015.16,-208.858"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="2018.64,-208.48 2014.67,-198.658 2011.65,-208.813 2018.64,-208.48"/>
 </g>
 <!-- Node16&#45;&gt;Node19 -->
 <g id="edge45" class="edge"><title>Node16&#45;&gt;Node19</title>
-<path fill="none" stroke="midnightblue" d="M1877.81,-252.689C1910.03,-245.87 1949.47,-233.237 1978,-210 1996.59,-194.856 2008.62,-169.413 2015.11,-152.178"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="2018.43,-153.269 2018.43,-142.674 2011.83,-150.956 2018.43,-153.269"/>
+<path fill="none" stroke="midnightblue" d="M1994.46,-235.393C1977.95,-227.046 1959.17,-214.953 1947,-199 1936.76,-185.575 1932.03,-166.757 1929.85,-152.822"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="1933.32,-152.37 1928.59,-142.889 1926.38,-153.249 1933.32,-152.37"/>
 </g>
 <!-- Node16&#45;&gt;Node21 -->
 <g id="edge50" class="edge"><title>Node16&#45;&gt;Node21</title>
-<path fill="none" stroke="midnightblue" d="M1764.31,-248.404C1688.92,-232.357 1551.51,-203.23 1434,-179 1367,-165.184 1288.57,-149.463 1244.86,-140.739"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="1245.27,-137.253 1234.78,-138.729 1243.9,-144.118 1245.27,-137.253"/>
+<path fill="none" stroke="midnightblue" d="M1972.38,-241.935C1907.03,-232.93 1796.4,-216.848 1702,-199 1608.6,-181.342 1499.45,-155.439 1445.59,-142.3"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="1446.12,-138.827 1435.57,-139.849 1444.45,-145.626 1446.12,-138.827"/>
 </g>
 <!-- Node22 -->
 <g id="node22" class="node"><title>Node22</title>
 <g id="a_node22"><a xlink:href="ndarray_8h.html" target="_top" xlink:title="A device&#45;independent managed NDArray abstraction. ">
-<polygon fill="white" stroke="red" points="1002.5,-185 1002.5,-204 1125.5,-204 1125.5,-185 1002.5,-185"/>
-<text text-anchor="middle" x="1064" y="-192" font-family="Helvetica,sans-Serif" font-size="10.00">tvm/runtime/ndarray.h</text>
+<polygon fill="white" stroke="red" points="2717.5,-179.5 2717.5,-198.5 2840.5,-198.5 2840.5,-179.5 2717.5,-179.5"/>
+<text text-anchor="middle" x="2779" y="-186.5" font-family="Helvetica,sans-Serif" font-size="10.00">tvm/runtime/ndarray.h</text>
 </a>
 </g>
 </g>
 <!-- Node16&#45;&gt;Node22 -->
 <g id="edge36" class="edge"><title>Node16&#45;&gt;Node22</title>
-<path fill="none" stroke="midnightblue" d="M1764.41,-255.641C1628.77,-243.994 1285.27,-214.5 1135.58,-201.646"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="1135.83,-198.155 1125.57,-200.787 1135.23,-205.129 1135.83,-198.155"/>
+<path fill="none" stroke="midnightblue" d="M2085.87,-246.27C2200.97,-239.515 2470.01,-222.625 2707.25,-199.022"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="2707.85,-202.479 2717.45,-198.001 2707.15,-195.514 2707.85,-202.479"/>
 </g>
 <!-- Node16&#45;&gt;Node23 -->
 <g id="edge43" class="edge"><title>Node16&#45;&gt;Node23</title>
-<path fill="none" stroke="midnightblue" d="M1877.85,-249.699C1925.24,-240.331 1994.39,-225.819 2054,-210 2057.42,-209.092 2060.97,-208.088 2064.5,-207.045"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="2065.79,-210.313 2074.33,-204.049 2063.75,-203.617 2065.79,-210.313"/>
+<path fill="none" stroke="midnightblue" d="M1972.17,-246.301C1783.35,-235.672 1179.54,-201.683 1014.21,-192.376"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="1014.14,-188.867 1003.96,-191.799 1013.75,-195.856 1014.14,-188.867"/>
 </g>
 <!-- Node16&#45;&gt;Node24 -->
 <g id="edge44" class="edge"><title>Node16&#45;&gt;Node24</title>
-<path fill="none" stroke="midnightblue" d="M1764.26,-258.269C1601.61,-251.793 1119.79,-232.049 720,-210 650.821,-206.185 569.744,-200.714 526.292,-197.7"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="526.305,-194.193 516.086,-196.99 525.819,-201.176 526.305,-194.193"/>
+<path fill="none" stroke="midnightblue" d="M2085.91,-247.347C2341.33,-237.686 3377.78,-198.481 3570.93,-191.175"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="3571.11,-194.671 3580.97,-190.796 3570.84,-187.676 3571.11,-194.671"/>
 </g>
 <!-- Node25 -->
 <g id="node25" class="node"><title>Node25</title>
-<polygon fill="white" stroke="#bfbfbf" points="1894.5,-185 1894.5,-204 1935.5,-204 1935.5,-185 1894.5,-185"/>
-<text text-anchor="middle" x="1915" y="-192" font-family="Helvetica,sans-Serif" font-size="10.00">tuple</text>
+<polygon fill="white" stroke="#bfbfbf" points="1858.5,-179.5 1858.5,-198.5 1899.5,-198.5 1899.5,-179.5 1858.5,-179.5"/>
+<text text-anchor="middle" x="1879" y="-186.5" font-family="Helvetica,sans-Serif" font-size="10.00">tuple</text>
 </g>
 <!-- Node16&#45;&gt;Node25 -->
 <g id="edge47" class="edge"><title>Node16&#45;&gt;Node25</title>
-<path fill="none" stroke="midnightblue" d="M1841.4,-246.396C1857.01,-235.597 1878.48,-220.756 1894.19,-209.887"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="1896.46,-212.573 1902.7,-204.007 1892.48,-206.816 1896.46,-212.573"/>
+<path fill="none" stroke="midnightblue" d="M1993.45,-235.399C1967.72,-225.19 1933.31,-211.545 1908.97,-201.887"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="1910.11,-198.576 1899.53,-198.142 1907.53,-205.082 1910.11,-198.576"/>
 </g>
 <!-- Node17&#45;&gt;Node9 -->
 <g id="edge23" class="edge"><title>Node17&#45;&gt;Node9</title>
-<path fill="none" stroke="midnightblue" d="M2503.96,-123.412C2459.4,-109.566 2367.55,-82.3181 2288,-67 2156.77,-41.7297 2000,-26.9389 1919.51,-20.4483"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="1919.39,-16.9274 1909.14,-19.624 1918.83,-23.9054 1919.39,-16.9274"/>
+<path fill="none" stroke="midnightblue" d="M3411.25,-126.027C3463.75,-119.852 3530.08,-108.063 3547,-87 3562.25,-68.0154 3538.08,-45.198 3517.84,-30.77"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="3519.77,-27.8509 3509.53,-25.1358 3515.84,-33.6447 3519.77,-27.8509"/>
 </g>
 <!-- Node17&#45;&gt;Node12 -->
 <g id="edge24" class="edge"><title>Node17&#45;&gt;Node12</title>
-<path fill="none" stroke="midnightblue" d="M2462.68,-125.667C2246.99,-105.674 1581.67,-44.0007 1358.7,-23.3316"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="1359.01,-19.8459 1348.73,-22.4079 1358.37,-26.8161 1359.01,-19.8459"/>
+<path fill="none" stroke="midnightblue" d="M3328.47,-123.409C3307.06,-110.798 3265.69,-86.6426 3230,-67 3210.47,-56.252 3188.48,-44.6468 3170.47,-35.275"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="3171.95,-32.0951 3161.46,-30.594 3168.72,-38.3079 3171.95,-32.0951"/>
 </g>
 <!-- Node17&#45;&gt;Node13 -->
 <g id="edge25" class="edge"><title>Node17&#45;&gt;Node13</title>
-<path fill="none" stroke="midnightblue" d="M2546.28,-123.278C2572.36,-108.852 2627.54,-80.2991 2678,-67 2853.11,-20.8461 3072.31,-16.197 3153.67,-16.2006"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="3153.77,-19.7008 3163.77,-16.2259 3153.78,-12.7008 3153.77,-19.7008"/>
+<path fill="none" stroke="midnightblue" d="M3301.73,-123.488C3222.54,-107.297 3051.8,-73.1297 2992,-67 2747.65,-41.9536 907.328,-19.9432 640.274,-16.8668"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="640.123,-13.3649 630.084,-16.7498 640.043,-20.3645 640.123,-13.3649"/>
 </g>
 <!-- Node17&#45;&gt;Node14 -->
 <g id="edge26" class="edge"><title>Node17&#45;&gt;Node14</title>
-<path fill="none" stroke="midnightblue" d="M2525.96,-123.319C2515,-104.422 2488.8,-59.2483 2474.22,-34.1206"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="2477.14,-32.1758 2469.09,-25.282 2471.08,-35.6882 2477.14,-32.1758"/>
+<path fill="none" stroke="midnightblue" d="M3319.83,-123.388C3280.83,-109.255 3199.31,-81.2866 3128,-67 2964.12,-34.1669 2765.14,-22.1009 2678.21,-18.178"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="2678.32,-14.6795 2668.17,-17.7392 2678.01,-21.6728 2678.32,-14.6795"/>
 </g>
 <!-- Node18&#45;&gt;Node10 -->
 <g id="edge29" class="edge"><title>Node18&#45;&gt;Node10</title>
-<path fill="none" stroke="midnightblue" d="M1737.46,-184.975C1689.52,-174.371 1607.64,-156.26 1555.49,-144.724"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="1555.99,-141.251 1545.47,-142.509 1554.48,-148.086 1555.99,-141.251"/>
+<path fill="none" stroke="midnightblue" d="M2080.05,-181.106C2169.55,-171.161 2334.61,-152.821 2430.23,-142.197"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="2430.84,-145.65 2440.4,-141.067 2430.07,-138.693 2430.84,-145.65"/>
 </g>
 <!-- Node18&#45;&gt;Node11 -->
 <g id="edge30" class="edge"><title>Node18&#45;&gt;Node11</title>
-<path fill="none" stroke="midnightblue" d="M1763.36,-184.852C1742.68,-170.905 1700.13,-142.99 1662,-123 1638.58,-110.719 1611.08,-98.8286 1590.4,-90.3574"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="1591.71,-87.1116 1581.12,-86.6019 1589.08,-93.5997 1591.71,-87.1116"/>
+<path fill="none" stroke="midnightblue" d="M2060.85,-179.457C2131.04,-165.737 2275.01,-138.741 2398,-123 2561.79,-102.037 2755.51,-88.3203 2856.29,-82.0136"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="2856.58,-85.5027 2866.34,-81.39 2856.14,-78.5161 2856.58,-85.5027"/>
 </g>
 <!-- Node18&#45;&gt;Node12 -->
 <g id="edge28" class="edge"><title>Node18&#45;&gt;Node12</title>
-<path fill="none" stroke="midnightblue" d="M1776.36,-184.712C1776.8,-161.345 1774.46,-97.3976 1738,-67 1709.54,-43.2723 1480.26,-27.2332 1359.23,-20.3329"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="1359.11,-16.8208 1348.93,-19.7529 1358.72,-23.8098 1359.11,-16.8208"/>
+<path fill="none" stroke="midnightblue" d="M2049.62,-179.464C2097.13,-166.716 2189.81,-142.154 2269,-123 2379.25,-96.3339 2405.87,-84.12 2518,-67 2712.35,-37.3271 2944.04,-24.2605 3060.07,-19.2292"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="3060.41,-22.7179 3070.26,-18.7954 3060.12,-15.7242 3060.41,-22.7179"/>
 </g>
 <!-- Node18&#45;&gt;Node13 -->
 <g id="edge32" class="edge"><title>Node18&#45;&gt;Node13</title>
-<path fill="none" stroke="midnightblue" d="M1796.45,-184.932C1833.13,-169.926 1912.85,-138.967 1983,-123 2438.79,-19.2564 3013.08,-15.436 3153.8,-16.1792"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="3153.88,-19.6797 3163.91,-16.2449 3153.93,-12.6799 3153.88,-19.6797"/>
+<path fill="none" stroke="midnightblue" d="M2013.42,-179.307C2005.52,-165.185 1987.94,-137.394 1965,-123 1874.05,-65.9362 1836.33,-81.9248 1730,-67 1511.28,-36.2981 801.008,-20.3874 640.457,-17.1325"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="640.341,-13.6295 630.273,-16.9281 640.201,-20.6281 640.341,-13.6295"/>
 </g>
 <!-- Node18&#45;&gt;Node16 -->
 <g id="edge35" class="edge"><title>Node18&#45;&gt;Node16</title>
-<path fill="none" stroke="midnightblue" d="M1786.79,-204.195C1794.5,-212.714 1804.31,-226.067 1811.65,-237.632"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="1808.76,-239.627 1816.91,-246.396 1814.76,-236.022 1808.76,-239.627"/>
+<path fill="none" stroke="midnightblue" d="M2024.53,-198.658C2027.33,-205.655 2029.79,-215.857 2031.17,-225.289"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="2027.71,-225.821 2032.23,-235.399 2034.67,-225.088 2027.71,-225.821"/>
 </g>
 <!-- Node18&#45;&gt;Node19 -->
 <g id="edge31" class="edge"><title>Node18&#45;&gt;Node19</title>
-<path fill="none" stroke="midnightblue" d="M1811.1,-184.975C1856.59,-173.928 1935.62,-154.735 1982.64,-143.315"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="1983.56,-146.694 1992.45,-140.933 1981.91,-139.892 1983.56,-146.694"/>
+<path fill="none" stroke="midnightblue" d="M2003.54,-179.324C1989.5,-170.9 1967.95,-157.971 1951.46,-148.074"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="1952.8,-144.798 1942.42,-142.655 1949.2,-150.801 1952.8,-144.798"/>
 </g>
 <!-- Node18&#45;&gt;Node20 -->
 <g id="edge33" class="edge"><title>Node18&#45;&gt;Node20</title>
-<path fill="none" stroke="midnightblue" d="M1831.61,-184.988C1848.7,-182.674 1867.59,-180.417 1885,-179 2171.26,-155.699 3081.15,-138.976 3318.13,-134.94"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="3318.31,-138.437 3328.25,-134.768 3318.19,-131.438 3318.31,-138.437"/>
+<path fill="none" stroke="midnightblue" d="M1955.79,-182.341C1940.2,-181.079 1923.5,-179.862 1908,-179 1334.4,-147.104 638.248,-136.967 437.593,-134.603"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="437.62,-131.103 427.58,-134.487 437.538,-138.103 437.62,-131.103"/>
 </g>
 <!-- Node18&#45;&gt;Node21 -->
 <g id="edge34" class="edge"><title>Node18&#45;&gt;Node21</title>
-<path fill="none" stroke="midnightblue" d="M1713.92,-186.962C1594.66,-174.403 1337.16,-147.286 1244.82,-137.561"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="1244.95,-134.056 1234.64,-136.49 1244.22,-141.018 1244.95,-134.056"/>
+<path fill="none" stroke="midnightblue" d="M1956,-182.475C1829.22,-171.178 1544.07,-145.768 1445.94,-137.024"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="1445.98,-133.514 1435.71,-136.112 1445.36,-140.486 1445.98,-133.514"/>
 </g>
 <!-- Node22&#45;&gt;Node11 -->
 <g id="edge39" class="edge"><title>Node22&#45;&gt;Node11</title>
-<path fill="none" stroke="midnightblue" d="M1074.5,-184.763C1093.68,-169.345 1136.25,-137.566 1178,-123 1282.72,-86.4606 1412.42,-78.4169 1490.28,-77.2579"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="1490.49,-80.7558 1500.45,-77.1414 1490.41,-73.7563 1490.49,-80.7558"/>
+<path fill="none" stroke="midnightblue" d="M2790.46,-179.368C2815.3,-160.653 2874.36,-116.151 2905.31,-92.8324"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="2907.72,-95.4044 2913.6,-86.5914 2903.5,-89.8135 2907.72,-95.4044"/>
 </g>
 <!-- Node22&#45;&gt;Node12 -->
 <g id="edge37" class="edge"><title>Node22&#45;&gt;Node12</title>
-<path fill="none" stroke="midnightblue" d="M1071.89,-184.791C1084.57,-170.998 1110.55,-143.582 1135,-123 1174.14,-90.0557 1223.03,-56.6409 1254.14,-36.2435"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="1256.34,-38.988 1262.81,-30.5965 1252.52,-33.1229 1256.34,-38.988"/>
+<path fill="none" stroke="midnightblue" d="M2781.87,-179.23C2789.77,-156.527 2814.2,-95.6137 2857,-67 2889.54,-45.2496 2989.74,-30.951 3060.24,-23.2918"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="3060.84,-26.7478 3070.41,-22.2098 3060.1,-19.7871 3060.84,-26.7478"/>
 </g>
 <!-- Node22&#45;&gt;Node15 -->
 <g id="edge40" class="edge"><title>Node22&#45;&gt;Node15</title>
-<path fill="none" stroke="midnightblue" d="M1032.55,-184.933C953.896,-163.643 742.194,-107.157 564,-67 483.775,-48.9208 388.562,-30.9931 340.343,-22.1611"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="340.702,-18.6688 330.236,-20.3171 339.446,-25.5552 340.702,-18.6688"/>
+<path fill="none" stroke="midnightblue" d="M2757.47,-179.406C2724.77,-166.479 2660.51,-141.566 2605,-123 2521.91,-95.2096 2502.14,-82.9619 2416,-67 2183.8,-23.974 1897.79,-17.5661 1804.22,-16.6437"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="1804.05,-13.1422 1794.02,-16.558 1803.99,-20.142 1804.05,-13.1422"/>
 </g>
 <!-- Node22&#45;&gt;Node17 -->
 <g id="edge38" class="edge"><title>Node22&#45;&gt;Node17</title>
-<path fill="none" stroke="midnightblue" d="M1125.63,-191C1359.61,-181.51 2192.77,-147.718 2452.85,-137.169"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="2453.12,-140.662 2462.97,-136.759 2452.84,-133.667 2453.12,-140.662"/>
+<path fill="none" stroke="midnightblue" d="M2840.7,-182.092C2943.91,-172.21 3151.39,-152.346 3264.72,-141.495"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="3265.1,-144.975 3274.72,-140.537 3264.43,-138.007 3265.1,-144.975"/>
 </g>
 <!-- Node22&#45;&gt;Node21 -->
 <g id="edge41" class="edge"><title>Node22&#45;&gt;Node21</title>
-<path fill="none" stroke="midnightblue" d="M1085.06,-184.975C1110.06,-174.857 1151.94,-157.905 1180.49,-146.351"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="1182.02,-149.505 1189.98,-142.509 1179.39,-143.017 1182.02,-149.505"/>
+<path fill="none" stroke="midnightblue" d="M2717.46,-179.85C2714.26,-179.536 2711.1,-179.249 2708,-179 2452.07,-158.379 1623.38,-138.781 1445.73,-134.753"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="1445.74,-131.252 1435.67,-134.525 1445.58,-138.25 1445.74,-131.252"/>
 </g>
 <!-- Node27&#45;&gt;Node10 -->
-<g id="edge84" class="edge"><title>Node27&#45;&gt;Node10</title>
-<path fill="none" stroke="midnightblue" d="M1632.45,-492.435C1610.84,-458.478 1532.49,-329.35 1511,-210 1507.53,-190.726 1506.74,-168.294 1506.68,-152.822"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="1510.19,-152.539 1506.76,-142.514 1503.19,-152.49 1510.19,-152.539"/>
+<g id="edge83" class="edge"><title>Node27&#45;&gt;Node10</title>
+<path fill="none" stroke="midnightblue" d="M2458.26,-481.43C2494.41,-465.284 2564,-426.71 2564,-369 2564,-369 2564,-369 2564,-311 2564,-249.809 2531.44,-182.717 2514.29,-151.626"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="2517.3,-149.83 2509.32,-142.85 2511.2,-153.277 2517.3,-149.83"/>
 </g>
 <!-- Node27&#45;&gt;Node11 -->
-<g id="edge86" class="edge"><title>Node27&#45;&gt;Node11</title>
-<path fill="none" stroke="midnightblue" d="M1578.42,-497.329C1513.78,-492.252 1408.03,-480.744 1320,-456 1286.8,-446.667 1058.41,-358.325 1035,-333 995.785,-290.584 1002.46,-266.986 993,-210 990.743,-196.408 985.248,-190.39 993,-179 1031.17,-122.915 1069.11,-139.171 1135,-123 1257.79,-92.8606 1405.48,-82.9072 1490.18,-79.6204"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="1490.33,-83.1174 1500.19,-79.2517 1490.07,-76.1222 1490.33,-83.1174"/>
+<g id="edge85" class="edge"><title>Node27&#45;&gt;Node11</title>
+<path fill="none" stroke="midnightblue" d="M2482.37,-481.478C2528.71,-472.318 2597.15,-457.402 2621,-445 2768.27,-368.43 2796.07,-326.792 2902,-199 2920.44,-176.758 2927.4,-170.871 2935,-143 2939.22,-127.51 2935.79,-109.424 2931.84,-96.2684"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="2935.07,-94.8958 2928.55,-86.5424 2928.44,-97.1366 2935.07,-94.8958"/>
 </g>
 <!-- Node27&#45;&gt;Node12 -->
-<g id="edge82" class="edge"><title>Node27&#45;&gt;Node12</title>
-<path fill="none" stroke="midnightblue" d="M1578.26,-496.954C1481.8,-489.987 1296.18,-474.659 1233,-456 1116.06,-421.467 806.276,-236.546 791,-210 699.127,-50.3445 1052.26,-22.0466 1211.2,-17.3069"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="1211.48,-20.8006 1221.38,-17.0276 1211.29,-13.8032 1211.48,-20.8006"/>
+<g id="edge81" class="edge"><title>Node27&#45;&gt;Node12</title>
+<path fill="none" stroke="midnightblue" d="M2496.54,-490.909C2580.1,-490.581 2736.04,-484.073 2862,-445 3131.33,-361.455 3275.66,-385.249 3420,-143 3424.55,-135.364 3425.07,-130.3 3420,-123 3372.49,-54.6045 3275.5,-30.0485 3207.71,-21.282"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="3207.85,-17.7746 3197.51,-20.053 3207.02,-24.7243 3207.85,-17.7746"/>
 </g>
 <!-- Node27&#45;&gt;Node13 -->
-<g id="edge88" class="edge"><title>Node27&#45;&gt;Node13</title>
-<path fill="none" stroke="midnightblue" d="M1697.62,-500.613C1858.36,-499.1 2297.99,-491.719 2438,-456 2616.56,-410.448 2660.04,-381.25 2812,-277 2848.04,-252.272 2855.47,-243.119 2884,-210 2934.29,-151.624 2918.69,-110.906 2982,-67 3034.65,-30.4863 3111.07,-20.3043 3153.87,-17.5101"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="3154.2,-20.9972 3163.99,-16.9474 3153.81,-14.008 3154.2,-20.9972"/>
+<g id="edge87" class="edge"><title>Node27&#45;&gt;Node13</title>
+<path fill="none" stroke="midnightblue" d="M2377.42,-484.913C2359.31,-483.492 2339.35,-482.051 2321,-481 2105.21,-468.638 1560.78,-488.211 1349,-445 1311.23,-437.294 1304.02,-427.738 1268,-414 1160.49,-372.991 1119.79,-388.664 1026,-322 905.406,-236.281 937.541,-145.295 812,-67 757.917,-33.2707 682.649,-22.0588 640.295,-18.3393"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="640.514,-14.8458 630.269,-17.5459 639.962,-21.824 640.514,-14.8458"/>
 </g>
 <!-- Node27&#45;&gt;Node14 -->
-<g id="edge89" class="edge"><title>Node27&#45;&gt;Node14</title>
-<path fill="none" stroke="midnightblue" d="M1697.63,-498.778C1844.55,-493.014 2215.82,-476.589 2268,-456 2278.1,-452.013 2565.07,-218.366 2572,-210 2593.56,-183.967 2599.75,-175.782 2608,-143 2622.44,-85.6073 2547.32,-46.9046 2500.1,-28.6232"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="2501.09,-25.2544 2490.49,-25.0327 2498.64,-31.8116 2501.09,-25.2544"/>
+<g id="edge88" class="edge"><title>Node27&#45;&gt;Node14</title>
+<path fill="none" stroke="midnightblue" d="M2440.59,-481.377C2463.85,-425.472 2593.36,-114.184 2626.35,-34.8846"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="2629.73,-35.8627 2630.34,-25.2855 2623.27,-33.1738 2629.73,-35.8627"/>
 </g>
 <!-- Node27&#45;&gt;Node16 -->
-<g id="edge87" class="edge"><title>Node27&#45;&gt;Node16</title>
-<path fill="none" stroke="midnightblue" d="M1644.73,-492.229C1670.7,-458.386 1764.86,-335.663 1803.88,-284.81"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="1806.77,-286.801 1810.08,-276.737 1801.21,-282.54 1806.77,-286.801"/>
+<g id="edge86" class="edge"><title>Node27&#45;&gt;Node16</title>
+<path fill="none" stroke="midnightblue" d="M2438.21,-481.052C2439.77,-465.755 2440.75,-434.268 2426,-414 2348.08,-306.904 2186.84,-270.184 2095.64,-257.744"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="2096.06,-254.269 2085.69,-256.445 2095.15,-261.21 2096.06,-254.269"/>
 </g>
 <!-- Node27&#45;&gt;Node17 -->
-<g id="edge83" class="edge"><title>Node27&#45;&gt;Node17</title>
-<path fill="none" stroke="midnightblue" d="M1697.57,-498.503C1838.29,-492.316 2183.22,-475.369 2232,-456 2252.19,-447.982 2367.49,-347.12 2384,-333 2412.22,-308.868 2420.14,-303.643 2446,-277 2473.23,-248.948 2483.27,-243.753 2503,-210 2513.72,-191.654 2521.54,-168.483 2526.15,-152.577"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="2529.54,-153.445 2528.82,-142.875 2522.79,-151.586 2529.54,-153.445"/>
+<g id="edge82" class="edge"><title>Node27&#45;&gt;Node17</title>
+<path fill="none" stroke="midnightblue" d="M2496.61,-486.709C2590.26,-480.934 2767.24,-467.416 2826,-445 2848.35,-436.473 2849.8,-426.817 2870,-414 3033.19,-310.443 3084.1,-301.435 3248,-199 3274.48,-182.45 3304.03,-161.888 3323.03,-148.377"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="3325.11,-151.192 3331.21,-142.532 3321.04,-145.496 3325.11,-151.192"/>
 </g>
 <!-- Node27&#45;&gt;Node21 -->
-<g id="edge90" class="edge"><title>Node27&#45;&gt;Node21</title>
-<path fill="none" stroke="midnightblue" d="M1617.38,-492.454C1578.26,-475.735 1491.4,-436.181 1427,-389 1398.35,-368.012 1393.79,-359.365 1370,-333 1311.8,-268.508 1249.29,-185.813 1223.36,-150.828"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="1226.07,-148.604 1217.32,-142.639 1220.44,-152.762 1226.07,-148.604"/>
+<g id="edge89" class="edge"><title>Node27&#45;&gt;Node21</title>
+<path fill="none" stroke="midnightblue" d="M2403.99,-481.497C2273.22,-447.683 1791.49,-321.891 1644,-266 1579.87,-241.698 1564.19,-233.645 1505,-199 1478.41,-183.436 1449.6,-162.627 1431.2,-148.792"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="1433.15,-145.873 1423.06,-142.614 1428.91,-151.448 1433.15,-145.873"/>
 </g>
 <!-- Node27&#45;&gt;Node22 -->
-<g id="edge85" class="edge"><title>Node27&#45;&gt;Node22</title>
-<path fill="none" stroke="midnightblue" d="M1578.35,-500.016C1522.19,-497.046 1436.93,-487.169 1370,-456 1302.16,-424.409 1133.14,-262.837 1080.15,-211.311"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="1082.28,-208.498 1072.68,-204.023 1077.39,-213.51 1082.28,-208.498"/>
+<g id="edge84" class="edge"><title>Node27&#45;&gt;Node22</title>
+<path fill="none" stroke="midnightblue" d="M2491.93,-481.485C2552.05,-468.68 2640,-438.134 2640,-369 2640,-369 2640,-369 2640,-311 2640,-276.183 2635.82,-260.98 2659,-235 2673.71,-218.518 2695.05,-208.051 2715.68,-201.414"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="2716.8,-204.733 2725.39,-198.54 2714.81,-198.021 2716.8,-204.733"/>
 </g>
 <!-- Node27&#45;&gt;Node28 -->
-<g id="edge66" class="edge"><title>Node27&#45;&gt;Node28</title>
-<path fill="none" stroke="midnightblue" d="M1697.53,-499.706C1814.32,-496.566 2081.53,-486.411 2304,-456 2305.46,-455.8 2306.94,-455.588 2308.42,-455.365"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="2309.03,-458.812 2318.33,-453.747 2307.9,-451.903 2309.03,-458.812"/>
+<g id="edge65" class="edge"><title>Node27&#45;&gt;Node28</title>
+<path fill="none" stroke="midnightblue" d="M2487.88,-481.477C2539.41,-472.738 2621.36,-458.546 2692,-445 2693.45,-444.722 2694.91,-444.439 2696.39,-444.152"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="2697.11,-447.576 2706.25,-442.204 2695.76,-440.709 2697.11,-447.576"/>
 </g>
 <!-- Node27&#45;&gt;Node30 -->
-<g id="edge76" class="edge"><title>Node27&#45;&gt;Node30</title>
-<path fill="none" stroke="midnightblue" d="M1697.51,-494.319C1797.48,-483.096 1998.38,-460.543 2102.33,-448.872"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="2102.84,-452.337 2112.39,-447.743 2102.06,-445.381 2102.84,-452.337"/>
+<g id="edge75" class="edge"><title>Node27&#45;&gt;Node30</title>
+<path fill="none" stroke="midnightblue" d="M2377.38,-483.351C2302.52,-474.906 2170.15,-459.663 2057,-445 2044.97,-443.441 2032.16,-441.701 2019.8,-439.983"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="2019.95,-436.47 2009.56,-438.55 2018.98,-443.403 2019.95,-436.47"/>
 </g>
 <!-- Node28&#45;&gt;Node7 -->
-<g id="edge67" class="edge"><title>Node28&#45;&gt;Node7</title>
-<path fill="none" stroke="midnightblue" d="M2318.3,-432.264C2234.66,-421.398 2076.74,-400.882 1985.62,-389.044"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="1986.03,-385.568 1975.66,-387.75 1985.12,-392.51 1986.03,-385.568"/>
+<g id="edge66" class="edge"><title>Node28&#45;&gt;Node7</title>
+<path fill="none" stroke="midnightblue" d="M2706.2,-424.778C2550.43,-414.388 2111.11,-385.083 1939.71,-373.65"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="1939.76,-370.146 1929.55,-372.972 1939.29,-377.13 1939.76,-370.146"/>
 </g>
 <!-- Node28&#45;&gt;Node13 -->
-<g id="edge75" class="edge"><title>Node28&#45;&gt;Node13</title>
-<path fill="none" stroke="midnightblue" d="M2420.86,-425.465C2478.89,-407.346 2580.09,-373.309 2662,-333 2754.28,-287.59 2785.97,-283.408 2858,-210 2909.94,-157.063 2883.53,-109.943 2944,-67 2977.44,-43.2539 3095.81,-26.8188 3153.68,-20.0123"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="3154.21,-23.4743 3163.74,-18.8539 3153.41,-16.5202 3154.21,-23.4743"/>
+<g id="edge74" class="edge"><title>Node28&#45;&gt;Node13</title>
+<path fill="none" stroke="midnightblue" d="M2706.25,-421.452C2585.73,-405.94 2292.1,-366.647 2048,-322 1932.58,-300.889 1901.32,-303.105 1790,-266 1717.65,-241.882 1551.53,-143.261 1478,-123 1334.91,-83.5708 1289.15,-126.205 1146,-87 1125.28,-81.3259 1122.74,-72.6105 1102,-67 932.708,-21.1932 720.524,-16.2981 640.521,-16.2192"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="640.122,-12.7197 630.127,-16.2362 640.133,-19.7197 640.122,-12.7197"/>
 </g>
 <!-- Node28&#45;&gt;Node17 -->
-<g id="edge74" class="edge"><title>Node28&#45;&gt;Node17</title>
-<path fill="none" stroke="midnightblue" d="M2398.85,-425.375C2438.98,-401.012 2516.88,-346.905 2550,-277 2568.66,-237.606 2564.2,-221.381 2554,-179 2551.69,-169.418 2546.98,-159.581 2542.42,-151.558"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="2545.29,-149.537 2537.1,-142.809 2539.31,-153.173 2545.29,-149.537"/>
+<g id="edge73" class="edge"><title>Node28&#45;&gt;Node17</title>
+<path fill="none" stroke="midnightblue" d="M2789.63,-414.494C2888.18,-364.541 3221.61,-195.528 3317.2,-147.08"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="3318.85,-150.168 3326.18,-142.525 3315.68,-143.924 3318.85,-150.168"/>
 </g>
 <!-- Node29 -->
 <g id="node29" class="node"><title>Node29</title>
 <g id="a_node29"><a xlink:href="functor_8h.html" target="_top" xlink:title="Defines the Functor data structures. ">
-<polygon fill="white" stroke="black" points="800.5,-185 800.5,-204 907.5,-204 907.5,-185 800.5,-185"/>
-<text text-anchor="middle" x="854" y="-192" font-family="Helvetica,sans-Serif" font-size="10.00">tvm/node/functor.h</text>
+<polygon fill="white" stroke="black" points="2592.5,-179.5 2592.5,-198.5 2699.5,-198.5 2699.5,-179.5 2592.5,-179.5"/>
+<text text-anchor="middle" x="2646" y="-186.5" font-family="Helvetica,sans-Serif" font-size="10.00">tvm/node/functor.h</text>
 </a>
 </g>
 </g>
 <!-- Node28&#45;&gt;Node29 -->
-<g id="edge68" class="edge"><title>Node28&#45;&gt;Node29</title>
-<path fill="none" stroke="midnightblue" d="M2318.22,-428.936C2242.59,-414.725 2103.27,-388.933 1984,-369 1578.73,-301.269 1089.95,-229.621 917.758,-204.685"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="918.003,-201.184 907.605,-203.216 917.001,-208.112 918.003,-201.184"/>
+<g id="edge67" class="edge"><title>Node28&#45;&gt;Node29</title>
+<path fill="none" stroke="midnightblue" d="M2764.54,-414.497C2769.92,-380.267 2778.96,-291.062 2739,-235 2727.84,-219.348 2709.73,-208.915 2692.44,-202.074"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="2693.57,-198.758 2682.97,-198.633 2691.18,-205.338 2693.57,-198.758"/>
 </g>
 <!-- Node29&#45;&gt;Node9 -->
-<g id="edge69" class="edge"><title>Node29&#45;&gt;Node9</title>
-<path fill="none" stroke="midnightblue" d="M849.295,-184.56C842.328,-169.946 831.564,-140.792 846,-123 922.122,-29.184 992.321,-83.522 1112,-67 1373.09,-30.9557 1688.73,-20.3834 1812.52,-17.502"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="1812.93,-20.9938 1822.85,-17.2693 1812.78,-13.9956 1812.93,-20.9938"/>
+<g id="edge68" class="edge"><title>Node29&#45;&gt;Node9</title>
+<path fill="none" stroke="midnightblue" d="M2657.04,-179.189C2687.02,-155.833 2773.36,-92.3922 2857,-67 2912.23,-50.2324 3298.17,-27.4122 3441.54,-19.4133"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="3442.05,-22.8905 3451.84,-18.841 3441.66,-15.9013 3442.05,-22.8905"/>
 </g>
 <!-- Node29&#45;&gt;Node11 -->
-<g id="edge70" class="edge"><title>Node29&#45;&gt;Node11</title>
-<path fill="none" stroke="midnightblue" d="M859.234,-184.831C869.204,-169.225 892.572,-136.746 922,-123 972.446,-99.4364 1335.96,-85.1445 1490.15,-80.0827"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="1490.56,-83.5714 1500.44,-79.7484 1490.33,-76.5751 1490.56,-83.5714"/>
+<g id="edge69" class="edge"><title>Node29&#45;&gt;Node11</title>
+<path fill="none" stroke="midnightblue" d="M2667.89,-179.368C2717.09,-159.97 2836.56,-112.87 2893.63,-90.3673"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="2895.19,-93.5154 2903.21,-86.5914 2892.62,-87.0033 2895.19,-93.5154"/>
 </g>
 <!-- Node29&#45;&gt;Node14 -->
-<g id="edge71" class="edge"><title>Node29&#45;&gt;Node14</title>
-<path fill="none" stroke="midnightblue" d="M855.635,-184.78C859.026,-169.39 868.387,-137.653 890,-123 945.512,-85.3644 1424.11,-71.8502 1491,-67 1849.03,-41.0393 2282.12,-23.4161 2419.78,-18.1478"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="2420.07,-21.6394 2429.93,-17.7613 2419.8,-14.6444 2420.07,-21.6394"/>
+<g id="edge70" class="edge"><title>Node29&#45;&gt;Node14</title>
+<path fill="none" stroke="midnightblue" d="M2647.03,-179.439C2649.21,-159.573 2653.69,-108.844 2648,-67 2646.52,-56.1271 2643.33,-44.2901 2640.36,-34.8109"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="2643.68,-33.6855 2637.22,-25.2874 2637.03,-35.8806 2643.68,-33.6855"/>
 </g>
 <!-- Node29&#45;&gt;Node15 -->
-<g id="edge72" class="edge"><title>Node29&#45;&gt;Node15</title>
-<path fill="none" stroke="midnightblue" d="M833.979,-184.889C782.584,-163.012 641.491,-104.38 520,-67 457.076,-47.6397 381.717,-31.1753 340.235,-22.6773"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="340.699,-19.2003 330.203,-20.6425 339.308,-26.0606 340.699,-19.2003"/>
+<g id="edge71" class="edge"><title>Node29&#45;&gt;Node15</title>
+<path fill="none" stroke="midnightblue" d="M2639.38,-179.116C2628.11,-164.721 2603.71,-136.546 2576,-123 2435.75,-54.4468 1934.84,-24.6994 1804.29,-18.0084"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="1804.34,-14.5065 1794.18,-17.4984 1803.99,-21.4976 1804.34,-14.5065"/>
 </g>
 <!-- Node29&#45;&gt;Node21 -->
-<g id="edge73" class="edge"><title>Node29&#45;&gt;Node21</title>
-<path fill="none" stroke="midnightblue" d="M905.149,-184.975C979.145,-172.642 1114.07,-150.156 1176.99,-139.668"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="1177.88,-143.069 1187.17,-137.972 1176.73,-136.164 1177.88,-143.069"/>
+<g id="edge72" class="edge"><title>Node29&#45;&gt;Node21</title>
+<path fill="none" stroke="midnightblue" d="M2592.18,-185.645C2381.69,-176.434 1617.31,-142.984 1446.16,-135.495"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="1446.12,-131.99 1435.98,-135.049 1445.81,-138.983 1446.12,-131.99"/>
 </g>
 <!-- Node30&#45;&gt;Node7 -->
-<g id="edge77" class="edge"><title>Node30&#45;&gt;Node7</title>
-<path fill="none" stroke="midnightblue" d="M2112.39,-426.369C2067.16,-415.692 2004.13,-400.81 1961.79,-390.812"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="1962.56,-387.398 1952.03,-388.506 1960.95,-394.21 1962.56,-387.398"/>
+<g id="edge76" class="edge"><title>Node30&#45;&gt;Node7</title>
+<path fill="none" stroke="midnightblue" d="M1934.09,-414.399C1921.01,-405.129 1903.92,-393.025 1890.68,-383.647"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="1892.41,-380.582 1882.22,-377.658 1888.36,-386.294 1892.41,-380.582"/>
 </g>
 <!-- Node30&#45;&gt;Node13 -->
-<g id="edge81" class="edge"><title>Node30&#45;&gt;Node13</title>
-<path fill="none" stroke="midnightblue" d="M2203.79,-425.408C2228.04,-415.653 2260.7,-402.112 2289,-389 2574.31,-256.789 2621.48,-168.811 2919,-67 3000.88,-38.9792 3102.52,-25.1852 3153.55,-19.6173"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="3154.07,-23.0821 3163.65,-18.5519 3153.34,-16.1207 3154.07,-23.0821"/>
+<g id="edge80" class="edge"><title>Node30&#45;&gt;Node13</title>
+<path fill="none" stroke="midnightblue" d="M1898.34,-423.158C1711.5,-405.037 1116.52,-345.718 1083,-322 1025.08,-281.013 1058.43,-232.656 1012,-179 954.72,-112.812 930.44,-101.514 850,-67 778.863,-36.4773 687.843,-23.8966 640.266,-19.1269"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="640.53,-15.6363 630.244,-18.1743 639.868,-22.6049 640.53,-15.6363"/>
 </g>
 <!-- Node30&#45;&gt;Node17 -->
-<g id="edge79" class="edge"><title>Node30&#45;&gt;Node17</title>
-<path fill="none" stroke="midnightblue" d="M2192.63,-425.327C2217.6,-410.998 2257.38,-388.279 2292,-369 2321.19,-352.742 2331.96,-353.939 2358,-333 2428.74,-276.118 2493.68,-187.969 2519.37,-151.112"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="2522.5,-152.738 2525.29,-142.518 2516.74,-148.766 2522.5,-152.738"/>
+<g id="edge78" class="edge"><title>Node30&#45;&gt;Node17</title>
+<path fill="none" stroke="midnightblue" d="M2009.72,-417.185C2138.32,-391.025 2465.73,-324.144 2739,-266 2945.81,-221.998 3191.97,-167.557 3294.47,-144.795"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="3295.52,-148.149 3304.52,-142.564 3294,-141.316 3295.52,-148.149"/>
 </g>
 <!-- Node30&#45;&gt;Node23 -->
-<g id="edge80" class="edge"><title>Node30&#45;&gt;Node23</title>
-<path fill="none" stroke="midnightblue" d="M2147.97,-425.36C2114.43,-400.188 2048.56,-343.92 2028,-277 2023.95,-263.83 2021.64,-258.224 2028,-246 2036.69,-229.286 2053.73,-216.952 2069.05,-208.659"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="2070.92,-211.636 2078.28,-204.014 2067.78,-205.384 2070.92,-211.636"/>
+<g id="edge79" class="edge"><title>Node30&#45;&gt;Node23</title>
+<path fill="none" stroke="midnightblue" d="M1898.26,-424.892C1712.26,-412.449 1120.61,-369.478 1050,-322 1013.35,-297.359 1024.93,-272.739 1002,-235 996.331,-225.672 989.625,-215.537 983.992,-207.25"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="986.748,-205.082 978.201,-198.822 980.979,-209.046 986.748,-205.082"/>
 </g>
 <!-- Node30&#45;&gt;Node29 -->
-<g id="edge78" class="edge"><title>Node30&#45;&gt;Node29</title>
-<path fill="none" stroke="midnightblue" d="M2112.37,-433.667C1986.35,-420.024 1669.67,-383.334 1408,-333 1305.53,-313.29 1281.28,-302.098 1180,-277 1079.16,-252.01 960.856,-222.349 898.187,-206.608"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="898.802,-203.154 888.251,-204.112 897.097,-209.943 898.802,-203.154"/>
+<g id="edge77" class="edge"><title>Node30&#45;&gt;Node29</title>
+<path fill="none" stroke="midnightblue" d="M2004.21,-414.469C2072.52,-395.101 2198.31,-358.421 2304,-322 2420.61,-281.816 2556.65,-226.83 2615.88,-202.478"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="2617.46,-205.613 2625.37,-198.568 2614.8,-199.141 2617.46,-205.613"/>
 </g>
 <!-- Node31&#45;&gt;Node29 -->
-<g id="edge92" class="edge"><title>Node31&#45;&gt;Node29</title>
-<path fill="none" stroke="midnightblue" d="M946.259,-492.186C933.255,-450.368 877.66,-271.585 859.653,-213.678"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="862.973,-212.567 856.661,-204.058 856.288,-214.646 862.973,-212.567"/>
+<g id="edge91" class="edge"><title>Node31&#45;&gt;Node29</title>
+<path fill="none" stroke="midnightblue" d="M2258.51,-481.345C2314.14,-439.518 2555.35,-258.156 2626.3,-204.811"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="2628.55,-207.497 2634.44,-198.69 2624.35,-201.902 2628.55,-207.497"/>
 </g>
 <!-- Node32 -->
 <g id="node32" class="node"><title>Node32</title>
-<polygon fill="white" stroke="#bfbfbf" points="770.5,-431 770.5,-450 829.5,-450 829.5,-431 770.5,-431"/>
-<text text-anchor="middle" x="800" y="-438" font-family="Helvetica,sans-Serif" font-size="10.00">iostream</text>
+<polygon fill="white" stroke="#bfbfbf" points="2066.5,-420 2066.5,-439 2125.5,-439 2125.5,-420 2066.5,-420"/>
+<text text-anchor="middle" x="2096" y="-427" font-family="Helvetica,sans-Serif" font-size="10.00">iostream</text>
 </g>
 <!-- Node31&#45;&gt;Node32 -->
-<g id="edge93" class="edge"><title>Node31&#45;&gt;Node32</title>
-<path fill="none" stroke="midnightblue" d="M927.652,-492.475C902.314,-482.357 859.862,-465.405 830.929,-453.851"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="831.893,-450.467 821.308,-450.009 829.297,-456.968 831.893,-450.467"/>
+<g id="edge92" class="edge"><title>Node31&#45;&gt;Node32</title>
+<path fill="none" stroke="midnightblue" d="M2225.37,-481.475C2199.58,-471.313 2156.29,-454.256 2126.96,-442.7"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="2128.18,-439.419 2117.59,-439.009 2125.62,-445.932 2128.18,-439.419"/>
 </g>
 <!-- Node33&#45;&gt;Node5 -->
-<g id="edge107" class="edge"><title>Node33&#45;&gt;Node5</title>
-<path fill="none" stroke="midnightblue" d="M2374.21,-664.627C2287.93,-654.9 2080.21,-631.483 1984.2,-620.66"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="1984.5,-617.171 1974.17,-619.528 1983.71,-624.126 1984.5,-617.171"/>
+<g id="edge106" class="edge"><title>Node33&#45;&gt;Node5</title>
+<path fill="none" stroke="midnightblue" d="M2061.78,-650.829C2114.02,-641.167 2207.1,-623.955 2264.68,-613.305"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="2265.46,-616.721 2274.66,-611.461 2264.19,-609.838 2265.46,-616.721"/>
 </g>
 <!-- Node33&#45;&gt;Node6 -->
-<g id="edge109" class="edge"><title>Node33&#45;&gt;Node6</title>
-<path fill="none" stroke="midnightblue" d="M2374.13,-661.526C2274.55,-642.375 2010.03,-591.505 1895.32,-569.447"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="1895.84,-565.982 1885.36,-567.53 1894.52,-572.856 1895.84,-565.982"/>
+<g id="edge108" class="edge"><title>Node33&#45;&gt;Node6</title>
+<path fill="none" stroke="midnightblue" d="M2042.3,-649.368C2085.42,-630.091 2189.71,-583.454 2240.4,-560.789"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="2242.09,-563.869 2249.79,-556.591 2239.23,-557.478 2242.09,-563.869"/>
 </g>
 <!-- Node33&#45;&gt;Node7 -->
-<g id="edge108" class="edge"><title>Node33&#45;&gt;Node7</title>
-<path fill="none" stroke="midnightblue" d="M2404.29,-660.453C2395.17,-651.459 2380.46,-636.852 2368,-624 2311.98,-566.201 2314.93,-530.054 2244,-492 2159.78,-446.818 2120.96,-490.925 2032,-456 2029.41,-454.985 1970.21,-415.898 1937.41,-394.187"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="1939.2,-391.173 1928.93,-388.57 1935.33,-397.009 1939.2,-391.173"/>
+<g id="edge107" class="edge"><title>Node33&#45;&gt;Node7</title>
+<path fill="none" stroke="midnightblue" d="M2012.33,-649.257C1980.83,-622.647 1888.36,-538.719 1856,-445 1849.38,-425.835 1855.46,-402.812 1861.54,-387.165"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="1864.9,-388.22 1865.61,-377.649 1858.46,-385.473 1864.9,-388.22"/>
 </g>
 <!-- Node33&#45;&gt;Node11 -->
-<g id="edge111" class="edge"><title>Node33&#45;&gt;Node11</title>
-<path fill="none" stroke="midnightblue" d="M2417.56,-660.414C2434.78,-626.688 2492.21,-501.401 2438,-425 2366.34,-324.009 2257.48,-425.37 2175,-333 2128.25,-280.644 2179.49,-237.027 2140,-179 2115.17,-142.515 2099.54,-137.907 2058,-123 1980.71,-95.2639 1746.01,-83.9627 1628.12,-79.9408"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="1627.88,-76.4311 1617.77,-79.5963 1627.65,-83.4272 1627.88,-76.4311"/>
+<g id="edge110" class="edge"><title>Node33&#45;&gt;Node11</title>
+<path fill="none" stroke="midnightblue" d="M2061.83,-657.299C2195.28,-653.272 2643.84,-626.219 2951,-445 2993.34,-420.022 3008.19,-416.304 3039,-378 3079.34,-327.839 3075.39,-297.964 3062,-235 3050.66,-181.679 3050.28,-161.81 3012,-123 2997.82,-108.629 2978.09,-97.9104 2961,-90.5465"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="2961.89,-87.1282 2951.3,-86.5961 2959.24,-93.6107 2961.89,-87.1282"/>
 </g>
 <!-- Node33&#45;&gt;Node13 -->
-<g id="edge112" class="edge"><title>Node33&#45;&gt;Node13</title>
-<path fill="none" stroke="midnightblue" d="M2431.65,-660.304C2459.13,-647.562 2512.22,-623.246 2558,-604 2718.29,-536.607 3262,-436.379 3262,-262.5 3262,-262.5 3262,-262.5 3262,-132 3262,-90.3123 3227.02,-52.0299 3204.52,-31.7185"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="3206.73,-28.9985 3196.89,-25.0755 3202.14,-34.2796 3206.73,-28.9985"/>
+<g id="edge111" class="edge"><title>Node33&#45;&gt;Node13</title>
+<path fill="none" stroke="midnightblue" d="M1984.44,-654.829C1829.84,-640.96 1242.37,-577.145 809,-378 729.464,-341.451 697.221,-337.692 647,-266 599.997,-198.902 605.92,-168.872 603,-87 602.683,-78.1168 602.562,-75.8781 603,-67 603.525,-56.3746 604.667,-44.5374 605.727,-34.9956"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="609.204,-35.3985 606.893,-25.0588 602.252,-34.5831 609.204,-35.3985"/>
 </g>
 <!-- Node33&#45;&gt;Node17 -->
-<g id="edge110" class="edge"><title>Node33&#45;&gt;Node17</title>
-<path fill="none" stroke="midnightblue" d="M2421.75,-660.391C2434.69,-647.757 2459.77,-623.574 2482,-604 2584.14,-514.08 2665.28,-520.579 2700,-389 2727.61,-284.359 2604.2,-184.742 2552.68,-148.447"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="2554.45,-145.412 2544.23,-142.607 2550.47,-151.171 2554.45,-145.412"/>
+<g id="edge109" class="edge"><title>Node33&#45;&gt;Node17</title>
+<path fill="none" stroke="midnightblue" d="M2061.72,-657.425C2238.01,-654.658 2960.31,-641.651 3057,-613 3150.98,-585.151 3183.47,-575.649 3247,-501 3337.84,-394.265 3343.99,-211.369 3343.48,-152.697"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="3346.98,-152.619 3343.31,-142.679 3339.98,-152.736 3346.98,-152.619"/>
 </g>
 <!-- Node34&#45;&gt;Node5 -->
-<g id="edge130" class="edge"><title>Node34&#45;&gt;Node5</title>
-<path fill="none" stroke="midnightblue" d="M1520.92,-940.346C1502.56,-928.597 1475.25,-906.475 1486,-884 1554.49,-740.867 1622.42,-731.65 1764,-660 1801.93,-640.802 1848.9,-629.175 1883.85,-622.563"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="1884.61,-625.981 1893.83,-620.749 1883.36,-619.094 1884.61,-625.981"/>
+<g id="edge129" class="edge"><title>Node34&#45;&gt;Node5</title>
+<path fill="none" stroke="midnightblue" d="M2039.67,-937.165C2303.07,-932.333 3546,-905.428 3546,-828 3546,-828 3546,-828 3546,-770 3546,-648.808 2596.81,-612.457 2365.41,-605.396"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="2365.36,-601.893 2355.26,-605.091 2365.15,-608.89 2365.36,-601.893"/>
 </g>
 <!-- Node34&#45;&gt;Node6 -->
-<g id="edge131" class="edge"><title>Node34&#45;&gt;Node6</title>
-<path fill="none" stroke="midnightblue" d="M1487.1,-941.962C1431.39,-930.861 1348,-903.475 1348,-839 1348,-839 1348,-839 1348,-669 1348,-581.056 1650.62,-563.394 1781.67,-559.869"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="1782.02,-563.361 1791.93,-559.612 1781.84,-556.363 1782.02,-563.361"/>
+<g id="edge130" class="edge"><title>Node34&#45;&gt;Node6</title>
+<path fill="none" stroke="midnightblue" d="M2039.66,-937.293C2245.48,-934.152 3045.43,-920.397 3296,-893 3431.7,-878.162 3510.71,-941.959 3598,-837 3747.74,-656.94 3551.79,-664.305 3307,-593 3213.17,-565.669 2534.33,-552.363 2328.2,-548.923"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="2328.22,-545.423 2318.17,-548.757 2328.11,-552.422 2328.22,-545.423"/>
 </g>
 <!-- Node34&#45;&gt;Node13 -->
-<g id="edge155" class="edge"><title>Node34&#45;&gt;Node13</title>
-<path fill="none" stroke="midnightblue" d="M1584.68,-941.009C1587.83,-940.627 1590.96,-940.285 1594,-940 2076.59,-894.755 2199.78,-925.842 2684,-904 2738.42,-901.545 3614.25,-872.324 3663,-848 3696.86,-831.103 3718,-820.843 3718,-783 3718,-783 3718,-783 3718,-132 3718,-98.3101 3707.43,-85.073 3679,-67 3602.1,-18.1192 3314.96,-15.5543 3218.12,-16.1253"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="3218.03,-12.6258 3208.06,-16.2004 3218.08,-19.6256 3218.03,-12.6258"/>
+<g id="edge154" class="edge"><title>Node34&#45;&gt;Node13</title>
+<path fill="none" stroke="midnightblue" d="M1942.42,-937.933C1682.29,-936.951 466,-924.219 466,-772 466,-772 466,-772 466,-714 466,-610.441 466.908,-584.519 464,-481 461.901,-406.278 456,-387.751 456,-313 456,-313 456,-313 456,-132 456,-72.1953 531.518,-39.3149 576.095,-25.1157"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="577.349,-28.3924 585.898,-22.1337 575.312,-21.6954 577.349,-28.3924"/>
 </g>
 <!-- Node34&#45;&gt;Node21 -->
-<g id="edge156" class="edge"><title>Node34&#45;&gt;Node21</title>
-<path fill="none" stroke="midnightblue" d="M1495.51,-940.426C1362.78,-909.913 939.567,-793.022 769,-512 664.257,-339.428 988.252,-181.651 993,-179 1052.2,-145.955 1132.64,-137.068 1177.41,-134.747"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="1177.62,-138.241 1187.46,-134.306 1177.31,-131.248 1177.62,-138.241"/>
+<g id="edge155" class="edge"><title>Node34&#45;&gt;Node21</title>
+<path fill="none" stroke="midnightblue" d="M1942.42,-937.675C1736.02,-936.087 930.965,-927.747 680,-893 654.16,-889.422 468.096,-857.528 452,-837 400.443,-771.247 426.509,-728.572 452,-649 488.402,-535.366 608,-549.822 608,-430.5 608,-430.5 608,-430.5 608,-367 608,-223.679 735.498,-228.497 870,-179 964.034,-144.395 1274.69,-136.239 1378.14,-134.462"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="1378.39,-137.958 1388.33,-134.296 1378.28,-130.959 1378.39,-137.958"/>
 </g>
 <!-- Node35 -->
 <g id="node35" class="node"><title>Node35</title>
 <g id="a_node35"><a xlink:href="tir_2expr_8h.html" target="_top" xlink:title="TIR expressions. ">
-<polygon fill="white" stroke="black" points="1609,-884.5 1609,-903.5 1689,-903.5 1689,-884.5 1609,-884.5"/>
-<text text-anchor="middle" x="1649" y="-891.5" font-family="Helvetica,sans-Serif" font-size="10.00">tvm/tir/expr.h</text>
+<polygon fill="white" stroke="black" points="1986,-873.5 1986,-892.5 2066,-892.5 2066,-873.5 1986,-873.5"/>
+<text text-anchor="middle" x="2026" y="-880.5" font-family="Helvetica,sans-Serif" font-size="10.00">tvm/tir/expr.h</text>
 </a>
 </g>
 </g>
 <!-- Node34&#45;&gt;Node35 -->
-<g id="edge132" class="edge"><title>Node34&#45;&gt;Node35</title>
-<path fill="none" stroke="midnightblue" d="M1553.91,-940.444C1572.25,-931.675 1601.01,-917.935 1622.09,-907.858"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="1623.67,-910.986 1631.18,-903.516 1620.65,-904.67 1623.67,-910.986"/>
+<g id="edge131" class="edge"><title>Node34&#45;&gt;Node35</title>
+<path fill="none" stroke="midnightblue" d="M1996.78,-929.083C2001.67,-921.534 2008.83,-910.495 2014.83,-901.23"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="2017.83,-903.046 2020.33,-892.751 2011.95,-899.239 2017.83,-903.046"/>
 </g>
 <!-- Node35&#45;&gt;Node4 -->
-<g id="edge133" class="edge"><title>Node35&#45;&gt;Node4</title>
-<path fill="none" stroke="midnightblue" d="M1671.33,-884.393C1726.5,-863.325 1873.94,-808.243 2000,-772 2049.06,-757.895 2106.06,-745.091 2145.53,-736.78"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="2146.34,-740.185 2155.42,-734.713 2144.91,-733.333 2146.34,-740.185"/>
+<g id="edge132" class="edge"><title>Node35&#45;&gt;Node4</title>
+<path fill="none" stroke="midnightblue" d="M2066.3,-874.414C2118.53,-862.701 2209.69,-835.613 2267,-781 2280.27,-768.353 2288.51,-748.871 2293.13,-734.531"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="2296.53,-735.347 2295.97,-724.767 2289.81,-733.386 2296.53,-735.347"/>
 </g>
 <!-- Node35&#45;&gt;Node6 -->
-<g id="edge136" class="edge"><title>Node35&#45;&gt;Node6</title>
-<path fill="none" stroke="midnightblue" d="M1650.96,-884.496C1655.03,-866.095 1664,-821.162 1664,-783 1664,-783 1664,-783 1664,-669 1664,-611.898 1731.28,-583.394 1782.88,-569.931"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="1783.77,-573.315 1792.63,-567.511 1782.09,-566.52 1783.77,-573.315"/>
+<g id="edge135" class="edge"><title>Node35&#45;&gt;Node6</title>
+<path fill="none" stroke="midnightblue" d="M2019.9,-873.492C1997.5,-841.134 1923.71,-723.789 1975,-649 2026.9,-573.325 2139.54,-553.618 2209.35,-548.917"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="2209.97,-552.387 2219.75,-548.306 2209.56,-545.399 2209.97,-552.387"/>
 </g>
 <!-- Node35&#45;&gt;Node7 -->
-<g id="edge134" class="edge"><title>Node35&#45;&gt;Node7</title>
-<path fill="none" stroke="midnightblue" d="M1640.26,-884.493C1623.39,-867.117 1588,-825.582 1588,-783 1588,-783 1588,-783 1588,-725 1588,-635.045 1825.17,-448.609 1896.16,-394.823"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="1898.45,-397.482 1904.33,-388.668 1894.23,-391.893 1898.45,-397.482"/>
+<g id="edge133" class="edge"><title>Node35&#45;&gt;Node7</title>
+<path fill="none" stroke="midnightblue" d="M1985.65,-877.224C1942.38,-871.229 1872.17,-858.965 1815,-837 1768.53,-819.145 1742.89,-824.11 1718,-781 1650.14,-663.465 1714.82,-602.645 1775,-481 1789.81,-451.07 1787.43,-439.488 1809,-414 1819.37,-401.753 1833.69,-390.964 1845.86,-383.023"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="1848.02,-385.8 1854.64,-377.528 1844.31,-379.867 1848.02,-385.8"/>
 </g>
 <!-- Node35&#45;&gt;Node12 -->
-<g id="edge137" class="edge"><title>Node35&#45;&gt;Node12</title>
-<path fill="none" stroke="midnightblue" d="M1609.94,-884.469C1426.48,-843.817 651.444,-665.389 462,-512 341.088,-414.1 325.757,-360.143 285,-210 235.303,-26.9203 971.366,-14.8033 1210.97,-15.6776"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="1211.3,-19.1791 1221.32,-15.7247 1211.34,-12.1792 1211.3,-19.1791"/>
+<g id="edge136" class="edge"><title>Node35&#45;&gt;Node12</title>
+<path fill="none" stroke="midnightblue" d="M2066.22,-882.266C2264.84,-883.338 3142.04,-885.213 3410,-837 3568.81,-808.425 3727,-821.363 3727,-660 3727,-660 3727,-660 3727,-132 3727,-79.6894 3366.89,-38.7677 3207.92,-23.2515"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="3208.03,-19.7465 3197.74,-22.2664 3207.36,-26.7139 3208.03,-19.7465"/>
 </g>
 <!-- Node35&#45;&gt;Node13 -->
-<g id="edge152" class="edge"><title>Node35&#45;&gt;Node13</title>
-<path fill="none" stroke="midnightblue" d="M1689.19,-892.392C1947.29,-888.433 3362.51,-865.935 3403,-848 3616.55,-753.42 3642,-613.553 3642,-380 3642,-380 3642,-380 3642,-132 3642,-98.3101 3631.26,-85.3344 3603,-67 3539.5,-25.8072 3304.78,-18.2142 3218.28,-16.8155"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="3218.3,-13.3156 3208.25,-16.6708 3218.2,-20.3148 3218.3,-13.3156"/>
+<g id="edge151" class="edge"><title>Node35&#45;&gt;Node13</title>
+<path fill="none" stroke="midnightblue" d="M1985.93,-881.715C1926.07,-880.064 1810.63,-872.105 1720,-837 1539.07,-766.919 1521.63,-697.268 1358,-593 1250.4,-524.431 1220.46,-512.212 1112,-445 1064.95,-415.84 1052.83,-409.043 1007,-378 936.331,-330.137 916.945,-320.186 851,-266 761.112,-192.14 737.295,-173.953 660,-87 644.749,-69.843 629.405,-48.4097 619.38,-33.7071"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="622.098,-31.4765 613.614,-25.1298 616.288,-35.3817 622.098,-31.4765"/>
 </g>
 <!-- Node35&#45;&gt;Node15 -->
-<g id="edge154" class="edge"><title>Node35&#45;&gt;Node15</title>
-<path fill="none" stroke="midnightblue" d="M1608.7,-893.581C1427.53,-894.088 678.297,-873.43 242,-512 97.7769,-392.525 45.1513,-307.011 80,-123 84.9758,-96.7264 81.8588,-84.5896 102,-67 151.935,-23.3907 233.408,-16.0909 277.786,-15.6348"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="277.964,-19.1349 287.968,-15.6451 277.971,-12.1349 277.964,-19.1349"/>
+<g id="edge153" class="edge"><title>Node35&#45;&gt;Node15</title>
+<path fill="none" stroke="midnightblue" d="M1985.84,-878.795C1919.72,-872.962 1791.34,-859.131 1752,-837 1701.84,-808.779 1695.45,-781.93 1687,-725 1646.43,-451.671 1741.81,-116.891 1767.06,-35.1471"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="1770.49,-35.9138 1770.14,-25.3248 1763.81,-33.8213 1770.49,-35.9138"/>
 </g>
 <!-- Node35&#45;&gt;Node17 -->
-<g id="edge138" class="edge"><title>Node35&#45;&gt;Node17</title>
-<path fill="none" stroke="midnightblue" d="M1689.1,-891.474C1882.94,-883.345 2721.44,-839.325 2917,-680 3078.11,-548.738 3257.26,-370.671 3091,-246 3015.82,-189.624 2744.93,-155.328 2609.38,-141.324"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="2609.38,-137.805 2599.07,-140.271 2608.67,-144.769 2609.38,-137.805"/>
+<g id="edge137" class="edge"><title>Node35&#45;&gt;Node17</title>
+<path fill="none" stroke="midnightblue" d="M2066.2,-882.069C2259.86,-882.182 3097.86,-880.127 3355,-837 3437.84,-823.106 3689,-744 3689,-660 3689,-660 3689,-660 3689,-249.5 3689,-214.518 3682.33,-199.52 3654,-179 3617.88,-152.838 3501.03,-141.738 3421.31,-137.132"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="3421.47,-133.636 3411.3,-136.576 3421.09,-140.625 3421.47,-133.636"/>
 </g>
 <!-- Node35&#45;&gt;Node20 -->
-<g id="edge153" class="edge"><title>Node35&#45;&gt;Node20</title>
-<path fill="none" stroke="midnightblue" d="M1689.33,-892.723C1946.79,-890.865 3350.5,-879.306 3381,-848 3480.13,-746.266 3396.97,-254.503 3378.45,-152.534"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="3381.86,-151.729 3376.61,-142.526 3374.98,-152.994 3381.86,-151.729"/>
+<g id="edge152" class="edge"><title>Node35&#45;&gt;Node20</title>
+<path fill="none" stroke="midnightblue" d="M1985.73,-881.446C1860.8,-879.384 1481.07,-870.458 1363,-837 1344.64,-831.797 1343.36,-822.202 1325,-817 1211.21,-784.76 900.79,-831.831 794,-781 782.483,-775.518 489.232,-454.743 481,-445 433.324,-388.575 380,-386.87 380,-313 380,-313 380,-313 380,-249.5 380,-215.362 380.442,-175.551 380.739,-152.709"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="384.238,-152.739 380.873,-142.693 377.239,-152.645 384.238,-152.739"/>
 </g>
 <!-- Node35&#45;&gt;Node24 -->
-<g id="edge151" class="edge"><title>Node35&#45;&gt;Node24</title>
-<path fill="none" stroke="midnightblue" d="M1618.99,-884.44C1592.04,-876.342 1551.64,-863.189 1518,-848 1409.88,-799.184 1393.63,-765.894 1286,-716 1109.86,-634.346 1052.73,-646.151 875,-568 826.226,-546.552 815.06,-538.784 769,-512 681.07,-460.867 642.14,-466.277 576,-389 529.86,-335.091 507.033,-251.151 498.76,-214.123"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="502.157,-213.273 496.65,-204.223 495.311,-214.732 502.157,-213.273"/>
+<g id="edge150" class="edge"><title>Node35&#45;&gt;Node24</title>
+<path fill="none" stroke="midnightblue" d="M2066.03,-882.027C2253.39,-881.931 3044.82,-879.001 3288,-837 3463.14,-806.751 3613,-781.73 3613,-604 3613,-604 3613,-604 3613,-311 3613,-274.745 3608.04,-232.631 3604.78,-208.867"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="3608.22,-208.189 3603.35,-198.781 3601.29,-209.174 3608.22,-208.189"/>
 </g>
 <!-- Node35&#45;&gt;Node26 -->
-<g id="edge149" class="edge"><title>Node35&#45;&gt;Node26</title>
-<path fill="none" stroke="midnightblue" d="M1689.01,-892.051C1932.67,-886.082 3208.25,-852.025 3263,-792 3412.07,-628.569 3139.55,-347.169 3067.7,-278.13"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="3069.98,-275.47 3060.32,-271.114 3065.16,-280.542 3069.98,-275.47"/>
+<g id="edge148" class="edge"><title>Node35&#45;&gt;Node26</title>
+<path fill="none" stroke="midnightblue" d="M2066.3,-880.991C2295.36,-874.998 3423,-841.945 3423,-772 3423,-772 3423,-772 3423,-658 3423,-576.873 2863.15,-324.089 2728.07,-264.279"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="2729.3,-260.997 2718.74,-260.157 2726.47,-267.4 2729.3,-260.997"/>
 </g>
 <!-- Node35&#45;&gt;Node29 -->
-<g id="edge135" class="edge"><title>Node35&#45;&gt;Node29</title>
-<path fill="none" stroke="midnightblue" d="M1630.27,-884.427C1611.35,-875.65 1581.36,-861.429 1556,-848 1364.19,-746.445 1334.48,-684.686 1133,-604 1059.94,-574.741 852.196,-561.487 791,-512 697.929,-436.737 804.076,-266.668 841.752,-212.447"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="844.638,-214.427 847.551,-204.241 838.921,-210.387 844.638,-214.427"/>
+<g id="edge134" class="edge"><title>Node35&#45;&gt;Node29</title>
+<path fill="none" stroke="midnightblue" d="M2066.11,-881.211C2240.37,-877.658 2932.78,-862.205 3149,-837 3299.5,-819.456 3430.17,-870.917 3471,-725 3498.24,-627.652 3499.88,-543.335 3247,-358 3158.53,-293.163 2834.05,-225.795 2702.23,-200.458"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="2702.81,-197.005 2692.33,-198.564 2701.49,-203.881 2702.81,-197.005"/>
 </g>
 <!-- Node35&#45;&gt;Node32 -->
-<g id="edge150" class="edge"><title>Node35&#45;&gt;Node32</title>
-<path fill="none" stroke="midnightblue" d="M1627.53,-884.378C1611.08,-876.853 1588.76,-864.542 1574,-848 1531.1,-799.914 1562.03,-758.97 1514,-716 1330.16,-551.515 1224.35,-607.688 985,-548 905.417,-528.154 857.809,-573.902 804,-512 791.833,-498.003 792.649,-475.929 795.353,-460.213"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="798.847,-460.607 797.496,-450.098 791.999,-459.155 798.847,-460.607"/>
+<g id="edge149" class="edge"><title>Node35&#45;&gt;Node32</title>
+<path fill="none" stroke="midnightblue" d="M1997.52,-873.427C1924.06,-851.184 1733.31,-792.551 1725,-781 1719.81,-773.784 1721.43,-769.143 1725,-761 1797.01,-596.519 1997.2,-481.012 2069.51,-443.582"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="2071.14,-446.68 2078.46,-439.015 2067.96,-440.446 2071.14,-446.68"/>
 </g>
 <!-- Node36 -->
 <g id="node36" class="node"><title>Node36</title>
 <g id="a_node36"><a xlink:href="buffer_8h.html" target="_top" xlink:title="Symbolic n&#45;dimensional array, to represent a memory buffer. ">
-<polygon fill="white" stroke="black" points="1948.5,-828.5 1948.5,-847.5 2035.5,-847.5 2035.5,-828.5 1948.5,-828.5"/>
-<text text-anchor="middle" x="1992" y="-835.5" font-family="Helvetica,sans-Serif" font-size="10.00">tvm/tir/buffer.h</text>
+<polygon fill="white" stroke="black" points="2018.5,-817.5 2018.5,-836.5 2105.5,-836.5 2105.5,-817.5 2018.5,-817.5"/>
+<text text-anchor="middle" x="2062" y="-824.5" font-family="Helvetica,sans-Serif" font-size="10.00">tvm/tir/buffer.h</text>
 </a>
 </g>
 </g>
 <!-- Node35&#45;&gt;Node36 -->
-<g id="edge139" class="edge"><title>Node35&#45;&gt;Node36</title>
-<path fill="none" stroke="midnightblue" d="M1689.02,-886.7C1750.71,-876.988 1869.12,-858.346 1938.23,-847.465"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="1939.13,-850.866 1948.47,-845.853 1938.05,-843.951 1939.13,-850.866"/>
+<g id="edge138" class="edge"><title>Node35&#45;&gt;Node36</title>
+<path fill="none" stroke="midnightblue" d="M2031.94,-873.083C2036.98,-865.534 2044.34,-854.495 2050.51,-845.23"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="2053.53,-847.013 2056.17,-836.751 2047.71,-843.13 2053.53,-847.013"/>
 </g>
 <!-- Node37 -->
 <g id="node37" class="node"><title>Node37</title>
 <g id="a_node37"><a xlink:href="var_8h.html" target="_top" xlink:title="Variables in the TIR. ">
-<polygon fill="white" stroke="black" points="2027,-772.5 2027,-791.5 2101,-791.5 2101,-772.5 2027,-772.5"/>
-<text text-anchor="middle" x="2064" y="-779.5" font-family="Helvetica,sans-Serif" font-size="10.00">tvm/tir/var.h</text>
+<polygon fill="white" stroke="black" points="3272,-761.5 3272,-780.5 3346,-780.5 3346,-761.5 3272,-761.5"/>
+<text text-anchor="middle" x="3309" y="-768.5" font-family="Helvetica,sans-Serif" font-size="10.00">tvm/tir/var.h</text>
 </a>
 </g>
 </g>
 <!-- Node35&#45;&gt;Node37 -->
-<g id="edge148" class="edge"><title>Node35&#45;&gt;Node37</title>
-<path fill="none" stroke="midnightblue" d="M1689.14,-893.804C1778.62,-894.767 1990.76,-892.196 2044,-848 2057.46,-836.822 2062.04,-816.799 2063.51,-801.919"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="2067.01,-801.968 2064.12,-791.776 2060.03,-801.547 2067.01,-801.968"/>
+<g id="edge147" class="edge"><title>Node35&#45;&gt;Node37</title>
+<path fill="none" stroke="midnightblue" d="M2066.16,-878.557C2255.47,-862.326 3058.02,-793.518 3261.8,-776.047"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="3262.21,-779.524 3271.88,-775.183 3261.62,-772.55 3262.21,-779.524"/>
 </g>
 <!-- Node36&#45;&gt;Node4 -->
-<g id="edge140" class="edge"><title>Node36&#45;&gt;Node4</title>
-<path fill="none" stroke="midnightblue" d="M1992.96,-828.423C1994.98,-814.468 2001.01,-786.912 2018,-772 2053.14,-741.159 2106.61,-730.947 2145.39,-727.814"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="2145.67,-731.303 2155.41,-727.134 2145.19,-724.319 2145.67,-731.303"/>
+<g id="edge139" class="edge"><title>Node36&#45;&gt;Node4</title>
+<path fill="none" stroke="midnightblue" d="M2105.67,-822.377C2143.76,-817.574 2199.3,-806.483 2241,-781 2260.54,-769.058 2277.04,-748.261 2287.18,-733.407"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="2290.35,-734.943 2292.89,-724.657 2284.49,-731.116 2290.35,-734.943"/>
 </g>
 <!-- Node36&#45;&gt;Node7 -->
-<g id="edge141" class="edge"><title>Node36&#45;&gt;Node7</title>
-<path fill="none" stroke="midnightblue" d="M1992.44,-828.458C1993.85,-797.819 1997.4,-690.657 1983,-604 1969.98,-525.644 1937.72,-436.196 1923.24,-398.396"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="1926.38,-396.829 1919.5,-388.772 1919.86,-399.361 1926.38,-396.829"/>
+<g id="edge140" class="edge"><title>Node36&#45;&gt;Node7</title>
+<path fill="none" stroke="midnightblue" d="M2018.4,-826.221C1942.53,-825.575 1791.11,-819.374 1758,-781 1652.54,-658.777 1807.58,-446.326 1856.21,-385.628"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="1858.98,-387.77 1862.57,-377.802 1853.55,-383.355 1858.98,-387.77"/>
 </g>
 <!-- Node36&#45;&gt;Node13 -->
-<g id="edge147" class="edge"><title>Node36&#45;&gt;Node13</title>
-<path fill="none" stroke="midnightblue" d="M2035.66,-836.547C2207.17,-834.547 2834.94,-825.075 3031,-792 3116.55,-777.568 3144.79,-782.558 3218,-736 3338.54,-659.339 3566,-338.354 3566,-195.5 3566,-195.5 3566,-195.5 3566,-132 3566,-97.8442 3553.61,-85.6601 3525,-67 3474.77,-34.2339 3293.44,-21.7573 3218.63,-17.9337"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="3218.55,-14.4258 3208.39,-17.431 3218.21,-21.4174 3218.55,-14.4258"/>
+<g id="edge146" class="edge"><title>Node36&#45;&gt;Node13</title>
+<path fill="none" stroke="midnightblue" d="M2018.3,-825.797C1842.92,-824.733 1189.89,-818.296 987,-781 909.591,-766.771 888.982,-761.014 819,-725 675.149,-650.972 266,-413.282 266,-251.5 266,-251.5 266,-251.5 266,-132 266,-67.0178 490.43,-31.5227 575.678,-20.3841"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="576.348,-23.827 585.824,-19.0881 575.461,-16.8834 576.348,-23.827"/>
 </g>
 <!-- Node36&#45;&gt;Node37 -->
-<g id="edge142" class="edge"><title>Node36&#45;&gt;Node37</title>
-<path fill="none" stroke="midnightblue" d="M2003.57,-828.324C2014.42,-820.185 2030.88,-807.839 2043.88,-798.087"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="2046.32,-800.634 2052.22,-791.834 2042.12,-795.034 2046.32,-800.634"/>
+<g id="edge141" class="edge"><title>Node36&#45;&gt;Node37</title>
+<path fill="none" stroke="midnightblue" d="M2105.65,-824.11C2297.28,-815.811 3063.31,-782.639 3261.82,-774.043"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="3262.09,-777.535 3271.93,-773.605 3261.78,-770.541 3262.09,-777.535"/>
 </g>
 <!-- Node37&#45;&gt;Node4 -->
-<g id="edge143" class="edge"><title>Node37&#45;&gt;Node4</title>
-<path fill="none" stroke="midnightblue" d="M2084.6,-772.444C2105.99,-763.557 2139.68,-749.563 2164.02,-739.451"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="2165.61,-742.585 2173.5,-735.516 2162.92,-736.12 2165.61,-742.585"/>
+<g id="edge142" class="edge"><title>Node37&#45;&gt;Node4</title>
+<path fill="none" stroke="midnightblue" d="M3271.76,-762.861C3267.14,-762.143 3262.47,-761.497 3258,-761 2910.18,-722.318 2487.86,-716.814 2346.62,-716.088"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="2346.57,-712.588 2336.55,-716.042 2346.53,-719.588 2346.57,-712.588"/>
 </g>
 <!-- Node37&#45;&gt;Node6 -->
-<g id="edge144" class="edge"><title>Node37&#45;&gt;Node6</title>
-<path fill="none" stroke="midnightblue" d="M2047.28,-772.425C2009.04,-751.977 1914.5,-696.59 1863,-624 1853.2,-610.184 1847.5,-591.663 1844.37,-577.926"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="1847.72,-576.831 1842.31,-567.721 1840.85,-578.216 1847.72,-576.831"/>
+<g id="edge143" class="edge"><title>Node37&#45;&gt;Node6</title>
+<path fill="none" stroke="midnightblue" d="M3311.14,-761.294C3317.7,-732.247 3334.23,-637.963 3285,-593 3249.54,-560.616 2540.45,-550.773 2328.4,-548.548"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="2328.12,-545.045 2318.09,-548.442 2328.05,-552.045 2328.12,-545.045"/>
 </g>
 <!-- Node37&#45;&gt;Node13 -->
-<g id="edge146" class="edge"><title>Node37&#45;&gt;Node13</title>
-<path fill="none" stroke="midnightblue" d="M2101.33,-774.001C2307.73,-734.269 3300,-529.377 3300,-262.5 3300,-262.5 3300,-262.5 3300,-132 3300,-83.1978 3247.81,-47.5591 3214.05,-29.6129"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="3215.58,-26.4611 3205.08,-25.024 3212.39,-32.6934 3215.58,-26.4611"/>
+<g id="edge145" class="edge"><title>Node37&#45;&gt;Node13</title>
+<path fill="none" stroke="midnightblue" d="M3271.77,-762.206C3145.09,-735.862 2718.94,-648.681 2364,-593 2042.68,-542.595 1960.45,-543.559 1638,-501 1547.27,-489.025 1318.93,-466.624 1230,-445 1143.4,-423.942 1118.88,-421.329 1041,-378 946.624,-325.494 693.444,-95.1116 625.166,-32.3419"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="627.174,-29.4334 617.447,-25.2348 622.433,-34.5831 627.174,-29.4334"/>
 </g>
 <!-- Node37&#45;&gt;Node17 -->
-<g id="edge145" class="edge"><title>Node37&#45;&gt;Node17</title>
-<path fill="none" stroke="midnightblue" d="M2101.18,-773.564C2137.39,-765.853 2193.73,-752.602 2241,-736 2261.27,-728.881 2264.57,-722.65 2285,-716 2360.51,-691.417 2385.71,-708.048 2460,-680 2647.73,-609.122 2710.34,-598.117 2852,-456 2881.79,-426.116 2909.31,-406.518 2890,-369 2823.4,-239.579 2649.33,-170.819 2570.64,-145.516"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="2571.67,-142.172 2561.08,-142.507 2569.57,-148.849 2571.67,-142.172"/>
+<g id="edge144" class="edge"><title>Node37&#45;&gt;Node17</title>
+<path fill="none" stroke="midnightblue" d="M3339.65,-761.409C3423.6,-735.812 3651,-651.728 3651,-492 3651,-492 3651,-492 3651,-249.5 3651,-217.049 3656.16,-200.665 3632,-179 3601.72,-151.843 3496.14,-141.053 3421.38,-136.78"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="3421.22,-133.266 3411.05,-136.219 3420.84,-140.256 3421.22,-133.266"/>
 </g>
 <!-- Node38&#45;&gt;Node2 -->
-<g id="edge158" class="edge"><title>Node38&#45;&gt;Node2</title>
-<path fill="none" stroke="midnightblue" d="M2676.02,-1059.56C2792.92,-1055.59 3089.82,-1042.95 3125,-1016 3149.44,-997.27 3149,-981.795 3149,-951 3149,-951 3149,-951 3149,-893 3149,-836.559 3081.49,-807.37 3032.58,-793.628"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="3033.33,-790.205 3022.76,-791.004 3031.52,-796.968 3033.33,-790.205"/>
+<g id="edge157" class="edge"><title>Node38&#45;&gt;Node2</title>
+<path fill="none" stroke="midnightblue" d="M539.83,-1042.11C463.649,-1027.35 326.786,-991.294 376,-929 395.36,-904.494 835.796,-812.803 986.194,-782.094"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="986.994,-785.503 996.093,-780.076 985.595,-778.644 986.994,-785.503"/>
 </g>
 <!-- Node38&#45;&gt;Node4 -->
-<g id="edge159" class="edge"><title>Node38&#45;&gt;Node4</title>
-<path fill="none" stroke="midnightblue" d="M2581.73,-1059.72C2463.98,-1055.26 2167,-1035.61 2167,-951 2167,-951 2167,-951 2167,-837 2167,-803.861 2178.7,-766.682 2186.7,-745.144"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="2190.02,-746.252 2190.36,-735.662 2183.49,-743.73 2190.02,-746.252"/>
+<g id="edge158" class="edge"><title>Node38&#45;&gt;Node4</title>
+<path fill="none" stroke="midnightblue" d="M634.268,-1049.63C1027.11,-1046.4 3736,-1021.18 3736,-940 3736,-940 3736,-940 3736,-882 3736,-810.5 2599.3,-734.921 2347.15,-719.035"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="2347.03,-715.521 2336.84,-718.387 2346.6,-722.507 2347.03,-715.521"/>
 </g>
 <!-- Node38&#45;&gt;Node13 -->
 <g id="edge206" class="edge"><title>Node38&#45;&gt;Node13</title>
-<path fill="none" stroke="midnightblue" d="M2676.16,-1060.39C2898.58,-1056.29 3832,-1024.69 3832,-783 3832,-783 3832,-783 3832,-132 3832,-98.3101 3821.6,-84.8011 3793,-67 3744.03,-36.5223 3337.6,-21.313 3218.61,-17.485"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="3218.43,-13.9777 3208.33,-17.1596 3218.21,-20.9742 3218.43,-13.9777"/>
+<path fill="none" stroke="midnightblue" d="M539.863,-1049.57C447.051,-1047.95 244.548,-1040.35 186,-1005 92.6043,-948.612 38,-849.705 38,-660 38,-660 38,-660 38,-132 38,-97.8442 49.9103,-84.8998 79,-67 162.772,-15.4525 473.998,-14.7404 575.652,-15.9413"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="575.863,-19.4443 585.908,-16.0778 575.956,-12.4449 575.863,-19.4443"/>
 </g>
 <!-- Node38&#45;&gt;Node23 -->
 <g id="edge205" class="edge"><title>Node38&#45;&gt;Node23</title>
-<path fill="none" stroke="midnightblue" d="M2633.17,-1052.42C2644.2,-1029.21 2673.98,-962.937 2684,-904 2710.65,-747.308 2736.17,-681.559 2650,-548 2519.08,-345.078 2345.22,-476.505 2151,-333 2126.61,-314.976 2123.17,-305.197 2112,-277 2103.95,-256.69 2101.24,-231.601 2100.35,-214.667"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="2103.84,-214.276 2099.99,-204.405 2096.85,-214.522 2103.84,-214.276"/>
+<path fill="none" stroke="midnightblue" d="M539.619,-1048.02C426.877,-1042.02 152,-1019.26 152,-940 152,-940 152,-940 152,-546 152,-524.049 792.773,-262.718 941.451,-202.376"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="942.955,-205.543 950.906,-198.541 940.324,-199.057 942.955,-205.543"/>
 </g>
 <!-- Node38&#45;&gt;Node34 -->
 <g id="edge207" class="edge"><title>Node38&#45;&gt;Node34</title>
-<path fill="none" stroke="midnightblue" d="M2581.72,-1061.21C2414.09,-1061.57 1848.87,-1059.47 1674,-1016 1630.74,-1005.25 1584.7,-980.532 1558.25,-964.865"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="1559.8,-961.713 1549.43,-959.545 1556.18,-967.707 1559.8,-961.713"/>
+<path fill="none" stroke="midnightblue" d="M634.299,-1049.44C849.648,-1046.72 1732.09,-1033.9 1852,-1005 1895.59,-994.494 1941.96,-969.701 1968.6,-953.952"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="1970.72,-956.76 1977.48,-948.602 1967.11,-950.764 1970.72,-956.76"/>
 </g>
 <!-- Node39 -->
 <g id="node39" class="node"><title>Node39</title>
 <g id="a_node39"><a xlink:href="ir_2module_8h.html" target="_top" xlink:title="IRModule that holds the functions and type definitions. ">
-<polygon fill="white" stroke="black" points="2583,-884.5 2583,-903.5 2675,-903.5 2675,-884.5 2583,-884.5"/>
-<text text-anchor="middle" x="2629" y="-891.5" font-family="Helvetica,sans-Serif" font-size="10.00">tvm/ir/module.h</text>
+<polygon fill="white" stroke="black" points="1374,-873.5 1374,-892.5 1466,-892.5 1466,-873.5 1374,-873.5"/>
+<text text-anchor="middle" x="1420" y="-880.5" font-family="Helvetica,sans-Serif" font-size="10.00">tvm/ir/module.h</text>
 </a>
 </g>
 </g>
 <!-- Node38&#45;&gt;Node39 -->
-<g id="edge160" class="edge"><title>Node38&#45;&gt;Node39</title>
-<path fill="none" stroke="midnightblue" d="M2629,-1052.08C2629,-1025.95 2629,-949.211 2629,-913.577"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="2632.5,-913.568 2629,-903.568 2625.5,-913.568 2632.5,-913.568"/>
+<g id="edge159" class="edge"><title>Node38&#45;&gt;Node39</title>
+<path fill="none" stroke="midnightblue" d="M634.255,-1049.33C799.196,-1046.73 1348.71,-1035.9 1522,-1005 1603.65,-990.442 1650.94,-1015.87 1700,-949 1705.26,-941.833 1705.77,-935.766 1700,-929 1671.83,-895.938 1549.47,-887.074 1476.15,-884.758"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="1476.24,-881.259 1466.14,-884.472 1476.04,-888.256 1476.24,-881.259"/>
 </g>
 <!-- Node43 -->
 <g id="node43" class="node"><title>Node43</title>
 <g id="a_node43"><a xlink:href="ir_2op_8h.html" target="_top" xlink:title="Primitive operators(builtin intrinsics) and registry for them. ">
-<polygon fill="white" stroke="black" points="1683,-996.5 1683,-1015.5 1751,-1015.5 1751,-996.5 1683,-996.5"/>
-<text text-anchor="middle" x="1717" y="-1003.5" font-family="Helvetica,sans-Serif" font-size="10.00">tvm/ir/op.h</text>
+<polygon fill="white" stroke="black" points="553,-985.5 553,-1004.5 621,-1004.5 621,-985.5 553,-985.5"/>
+<text text-anchor="middle" x="587" y="-992.5" font-family="Helvetica,sans-Serif" font-size="10.00">tvm/ir/op.h</text>
 </a>
 </g>
 </g>
 <!-- Node38&#45;&gt;Node43 -->
-<g id="edge181" class="edge"><title>Node38&#45;&gt;Node43</title>
-<path fill="none" stroke="midnightblue" d="M2581.89,-1058.21C2423.93,-1048.86 1914.45,-1018.69 1761.09,-1009.61"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="1761.28,-1006.12 1751.09,-1009.02 1760.86,-1013.1 1761.28,-1006.12"/>
+<g id="edge180" class="edge"><title>Node38&#45;&gt;Node43</title>
+<path fill="none" stroke="midnightblue" d="M587,-1041.08C587,-1034.01 587,-1023.86 587,-1014.99"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="590.5,-1014.75 587,-1004.75 583.5,-1014.75 590.5,-1014.75"/>
 </g>
 <!-- Node49 -->
 <g id="node49" class="node"><title>Node49</title>
 <g id="a_node49"><a xlink:href="relay_2type_8h.html" target="_top" xlink:title="Relay typed AST nodes. ">
-<polygon fill="white" stroke="black" points="2319.5,-996.5 2319.5,-1015.5 2372.5,-1015.5 2372.5,-996.5 2319.5,-996.5"/>
-<text text-anchor="middle" x="2346" y="-1003.5" font-family="Helvetica,sans-Serif" font-size="10.00">./type.h</text>
+<polygon fill="white" stroke="black" points="1460.5,-985.5 1460.5,-1004.5 1513.5,-1004.5 1513.5,-985.5 1460.5,-985.5"/>
+<text text-anchor="middle" x="1487" y="-992.5" font-family="Helvetica,sans-Serif" font-size="10.00">./type.h</text>
 </a>
 </g>
 </g>
 <!-- Node38&#45;&gt;Node49 -->
 <g id="edge208" class="edge"><title>Node38&#45;&gt;Node49</title>
-<path fill="none" stroke="midnightblue" d="M2584.16,-1052.44C2528.46,-1041.82 2434.48,-1023.88 2382.8,-1014.02"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="2383.23,-1010.54 2372.75,-1012.1 2381.92,-1017.42 2383.23,-1010.54"/>
+<path fill="none" stroke="midnightblue" d="M634.121,-1047.17C793.081,-1037.64 1307.58,-1006.76 1450.1,-998.214"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="1450.32,-1001.71 1460.09,-997.614 1449.9,-994.72 1450.32,-1001.71"/>
 </g>
 <!-- Node39&#45;&gt;Node4 -->
-<g id="edge168" class="edge"><title>Node39&#45;&gt;Node4</title>
-<path fill="none" stroke="midnightblue" d="M2582.95,-893.824C2501.33,-892.867 2329.93,-879.674 2224,-792 2209.81,-780.254 2201.99,-760.32 2197.93,-745.617"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="2201.26,-744.512 2195.51,-735.616 2194.46,-746.159 2201.26,-744.512"/>
+<g id="edge167" class="edge"><title>Node39&#45;&gt;Node4</title>
+<path fill="none" stroke="midnightblue" d="M1466.11,-875.747C1618.56,-855.034 2098.57,-789.494 2131,-781 2181.73,-767.713 2238.06,-743.743 2270.62,-728.919"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="2272.31,-731.994 2279.93,-724.633 2269.38,-725.636 2272.31,-731.994"/>
 </g>
 <!-- Node39&#45;&gt;Node7 -->
-<g id="edge176" class="edge"><title>Node39&#45;&gt;Node7</title>
-<path fill="none" stroke="midnightblue" d="M2628.63,-884.295C2627.86,-871.542 2625.44,-847.211 2618,-828 2607.31,-800.409 2599.59,-795.794 2582,-772 2504.06,-666.556 2488.77,-634.237 2390,-548 2356.48,-518.733 2348.26,-508.676 2307,-492 2221.64,-457.499 2187.61,-492.298 2103,-456 2081.68,-446.853 2081.7,-435.474 2061,-425 2030.81,-409.726 1994.44,-398.455 1965.68,-391.013"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="1966.39,-387.585 1955.84,-388.54 1964.69,-394.374 1966.39,-387.585"/>
+<g id="edge175" class="edge"><title>Node39&#45;&gt;Node7</title>
+<path fill="none" stroke="midnightblue" d="M1432.8,-873.201C1444.7,-864.695 1462.55,-851.06 1476,-837 1637.11,-668.563 1610.01,-566.826 1786,-414 1801.56,-400.489 1821.86,-389.473 1838.61,-381.722"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="1840.29,-384.807 1848,-377.542 1837.44,-378.412 1840.29,-384.807"/>
 </g>
 <!-- Node39&#45;&gt;Node13 -->
-<g id="edge177" class="edge"><title>Node39&#45;&gt;Node13</title>
-<path fill="none" stroke="midnightblue" d="M2675.31,-891.471C2869.82,-884.98 3611.66,-859.515 3633,-848 3664.37,-831.072 3680,-818.65 3680,-783 3680,-783 3680,-783 3680,-132 3680,-101.205 3680.29,-85.9313 3656,-67 3621.53,-40.1365 3319.08,-23.0169 3218.42,-18.0226"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="3218.41,-14.5177 3208.25,-17.5251 3218.06,-21.5093 3218.41,-14.5177"/>
+<g id="edge176" class="edge"><title>Node39&#45;&gt;Node13</title>
+<path fill="none" stroke="midnightblue" d="M1373.83,-881.063C1236.63,-877.967 833.011,-866.532 705,-837 375.075,-760.887 152,-651.591 152,-313 152,-313 152,-313 152,-132 152,-95.5528 170.36,-85.0917 202,-67 265.788,-30.5259 491.667,-20.0225 575.964,-17.3353"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="576.09,-20.8331 585.98,-17.0319 575.878,-13.8363 576.09,-20.8331"/>
 </g>
 <!-- Node39&#45;&gt;Node20 -->
-<g id="edge178" class="edge"><title>Node39&#45;&gt;Node20</title>
-<path fill="none" stroke="midnightblue" d="M2675.3,-891.717C2866.31,-886.315 3583.84,-864.926 3600,-848 3609.4,-838.154 3701.6,-961.421 3557,-492 3513.9,-352.09 3419.07,-200.856 3386.36,-150.992"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="3389.27,-149.055 3380.84,-142.644 3383.43,-152.916 3389.27,-149.055"/>
+<g id="edge177" class="edge"><title>Node39&#45;&gt;Node20</title>
+<path fill="none" stroke="midnightblue" d="M1373.69,-881.671C1244.55,-880.301 882.515,-873.242 770,-837 589.912,-778.991 528.1,-759.193 408,-613 317.403,-502.72 306.697,-443.123 328,-302 336.497,-245.709 360.658,-182.458 373.212,-152.128"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="376.604,-153.092 377.261,-142.518 370.153,-150.374 376.604,-153.092"/>
 </g>
 <!-- Node39&#45;&gt;Node21 -->
-<g id="edge180" class="edge"><title>Node39&#45;&gt;Node21</title>
-<path fill="none" stroke="midnightblue" d="M2582.53,-893.548C2447.43,-894.581 2056.89,-893.448 1939,-848 1581.77,-710.284 1532.03,-576.872 1294,-277 1270.9,-247.898 1262.64,-242.145 1244,-210 1233.1,-191.193 1223.57,-168.105 1217.59,-152.345"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="1220.79,-150.915 1214.04,-142.748 1214.22,-153.341 1220.79,-150.915"/>
+<g id="edge179" class="edge"><title>Node39&#45;&gt;Node21</title>
+<path fill="none" stroke="midnightblue" d="M1373.75,-882.805C1231.32,-884.199 803.718,-879.934 718,-781 695.881,-755.471 711.542,-738.155 718,-705 742.111,-581.218 829.823,-254.277 931,-179 966.745,-152.405 1274.52,-138.957 1378.12,-135.154"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="1378.47,-138.644 1388.33,-134.786 1378.22,-131.648 1378.47,-138.644"/>
 </g>
 <!-- Node39&#45;&gt;Node33 -->
-<g id="edge175" class="edge"><title>Node39&#45;&gt;Node33</title>
-<path fill="none" stroke="midnightblue" d="M2625.49,-884.359C2616.7,-863.446 2592.15,-809.1 2560,-772 2525.64,-732.343 2472.35,-700.93 2440.25,-684.225"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="2441.52,-680.94 2431.02,-679.52 2438.34,-687.177 2441.52,-680.94"/>
+<g id="edge174" class="edge"><title>Node39&#45;&gt;Node33</title>
+<path fill="none" stroke="midnightblue" d="M1461.13,-873.421C1502.77,-864.639 1568.99,-850.415 1626,-837 1638.38,-834.087 1836.42,-786.256 1848,-781 1910.76,-752.519 1975,-701.279 2005.22,-675.584"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="2007.84,-677.943 2013.14,-668.768 2003.28,-672.637 2007.84,-677.943"/>
 </g>
 <!-- Node40 -->
 <g id="node40" class="node"><title>Node40</title>
 <g id="a_node40"><a xlink:href="ir_2adt_8h.html" target="_top" xlink:title="Algebraic data type definitions. ">
-<polygon fill="white" stroke="black" points="2479,-772.5 2479,-791.5 2551,-791.5 2551,-772.5 2479,-772.5"/>
-<text text-anchor="middle" x="2515" y="-779.5" font-family="Helvetica,sans-Serif" font-size="10.00">tvm/ir/adt.h</text>
+<polygon fill="white" stroke="black" points="1767,-761.5 1767,-780.5 1839,-780.5 1839,-761.5 1767,-761.5"/>
+<text text-anchor="middle" x="1803" y="-768.5" font-family="Helvetica,sans-Serif" font-size="10.00">tvm/ir/adt.h</text>
 </a>
 </g>
 </g>
 <!-- Node39&#45;&gt;Node40 -->
-<g id="edge161" class="edge"><title>Node39&#45;&gt;Node40</title>
-<path fill="none" stroke="midnightblue" d="M2614.34,-884.475C2600.72,-876.162 2580.3,-862.658 2565,-848 2549.8,-833.436 2535.47,-814.009 2526.09,-800.197"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="2528.87,-798.061 2520.43,-791.66 2523.04,-801.929 2528.87,-798.061"/>
+<g id="edge160" class="edge"><title>Node39&#45;&gt;Node40</title>
+<path fill="none" stroke="midnightblue" d="M1449.73,-873.46C1518.11,-853.822 1686.53,-805.45 1763.49,-783.348"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="1764.64,-786.659 1773.28,-780.535 1762.71,-779.931 1764.64,-786.659"/>
 </g>
 <!-- Node41 -->
 <g id="node41" class="node"><title>Node41</title>
 <g id="a_node41"><a xlink:href="ir_2function_8h.html" target="_top" xlink:title="Function nodes. ">
-<polygon fill="white" stroke="black" points="3277.5,-828.5 3277.5,-847.5 3372.5,-847.5 3372.5,-828.5 3277.5,-828.5"/>
-<text text-anchor="middle" x="3325" y="-835.5" font-family="Helvetica,sans-Serif" font-size="10.00">tvm/ir/function.h</text>
+<polygon fill="white" stroke="black" points="1372.5,-817.5 1372.5,-836.5 1467.5,-836.5 1467.5,-817.5 1372.5,-817.5"/>
+<text text-anchor="middle" x="1420" y="-824.5" font-family="Helvetica,sans-Serif" font-size="10.00">tvm/ir/function.h</text>
 </a>
 </g>
 </g>
 <!-- Node39&#45;&gt;Node41 -->
-<g id="edge169" class="edge"><title>Node39&#45;&gt;Node41</title>
-<path fill="none" stroke="midnightblue" d="M2675.21,-889.415C2798,-879.888 3132.29,-853.951 3267.3,-843.477"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="3267.74,-846.953 3277.44,-842.69 3267.2,-839.974 3267.74,-846.953"/>
+<g id="edge168" class="edge"><title>Node39&#45;&gt;Node41</title>
+<path fill="none" stroke="midnightblue" d="M1420,-873.083C1420,-866.006 1420,-855.861 1420,-846.986"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="1423.5,-846.751 1420,-836.751 1416.5,-846.751 1423.5,-846.751"/>
 </g>
 <!-- Node42 -->
 <g id="node42" class="node"><title>Node42</title>
-<polygon fill="white" stroke="#bfbfbf" points="3505,-828.5 3505,-847.5 3591,-847.5 3591,-828.5 3505,-828.5"/>
-<text text-anchor="middle" x="3548" y="-835.5" font-family="Helvetica,sans-Serif" font-size="10.00">unordered_set</text>
+<polygon fill="white" stroke="#bfbfbf" points="1230,-817.5 1230,-836.5 1316,-836.5 1316,-817.5 1230,-817.5"/>
+<text text-anchor="middle" x="1273" y="-824.5" font-family="Helvetica,sans-Serif" font-size="10.00">unordered_set</text>
 </g>
 <!-- Node39&#45;&gt;Node42 -->
-<g id="edge179" class="edge"><title>Node39&#45;&gt;Node42</title>
-<path fill="none" stroke="midnightblue" d="M2675.39,-892.281C2804.79,-889.908 3180.62,-880.515 3491,-848 3492.26,-847.868 3493.54,-847.727 3494.82,-847.578"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="3495.33,-851.042 3504.8,-846.306 3494.44,-844.099 3495.33,-851.042"/>
+<g id="edge178" class="edge"><title>Node39&#45;&gt;Node42</title>
+<path fill="none" stroke="midnightblue" d="M1396.71,-873.444C1372.19,-864.438 1333.4,-850.189 1305.8,-840.049"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="1306.78,-836.679 1296.18,-836.516 1304.36,-843.25 1306.78,-836.679"/>
 </g>
 <!-- Node40&#45;&gt;Node4 -->
-<g id="edge162" class="edge"><title>Node40&#45;&gt;Node4</title>
-<path fill="none" stroke="midnightblue" d="M2478.8,-774.91C2420.9,-765.171 2307.4,-746.077 2242.53,-735.164"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="2243,-731.693 2232.56,-733.486 2241.84,-738.597 2243,-731.693"/>
+<g id="edge161" class="edge"><title>Node40&#45;&gt;Node4</title>
+<path fill="none" stroke="midnightblue" d="M1839.39,-766.273C1855.24,-764.651 1874.04,-762.73 1891,-761 2048.33,-744.955 2088.29,-746.25 2245,-725 2246.3,-724.824 2247.62,-724.64 2248.95,-724.449"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="2249.9,-727.845 2259.26,-722.886 2248.85,-720.924 2249.9,-727.845"/>
 </g>
 <!-- Node40&#45;&gt;Node6 -->
-<g id="edge165" class="edge"><title>Node40&#45;&gt;Node6</title>
-<path fill="none" stroke="midnightblue" d="M2489.84,-772.488C2413.64,-746.734 2178.57,-667.519 1983,-604 1947.36,-592.423 1906.56,-579.534 1877.87,-570.527"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="1878.85,-567.164 1868.26,-567.511 1876.75,-573.844 1878.85,-567.164"/>
+<g id="edge164" class="edge"><title>Node40&#45;&gt;Node6</title>
+<path fill="none" stroke="midnightblue" d="M1810.75,-761.182C1818.56,-752.215 1830.87,-737.849 1841,-725 1886.07,-667.822 1875.41,-630.187 1938,-593 1982.71,-566.439 2127.2,-555.009 2209.75,-550.544"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="2210.18,-554.026 2219.98,-550.009 2209.81,-547.036 2210.18,-554.026"/>
 </g>
 <!-- Node40&#45;&gt;Node7 -->
-<g id="edge164" class="edge"><title>Node40&#45;&gt;Node7</title>
-<path fill="none" stroke="midnightblue" d="M2512.55,-772.384C2506.32,-751.526 2488.35,-697.293 2460,-660 2392.59,-571.315 2374.08,-540.917 2274,-492 2191.28,-451.57 2155.54,-490.053 2070,-456 2046.75,-446.744 2045.07,-436.79 2023,-425 1999.11,-412.238 1970.81,-400.472 1949.31,-392.174"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="1950.55,-388.899 1939.96,-388.614 1948.06,-395.441 1950.55,-388.899"/>
+<g id="edge163" class="edge"><title>Node40&#45;&gt;Node7</title>
+<path fill="none" stroke="midnightblue" d="M1803.24,-761.275C1804.51,-720.932 1812.03,-548.708 1850,-414 1852.61,-404.743 1856.79,-394.931 1860.65,-386.841"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="1863.89,-388.193 1865.24,-377.684 1857.63,-385.058 1863.89,-388.193"/>
 </g>
 <!-- Node40&#45;&gt;Node11 -->
-<g id="edge166" class="edge"><title>Node40&#45;&gt;Node11</title>
-<path fill="none" stroke="midnightblue" d="M2528.01,-772.341C2577.85,-738.046 2750.98,-607.953 2693,-492 2570.76,-247.523 2457.12,-218.454 2201,-123 2148.39,-103.392 1782.6,-86.8328 1627.87,-80.6231"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="1627.67,-77.1127 1617.54,-80.2116 1627.39,-84.1072 1627.67,-77.1127"/>
+<g id="edge165" class="edge"><title>Node40&#45;&gt;Node11</title>
+<path fill="none" stroke="midnightblue" d="M1839.22,-762.044C1840.83,-761.691 1842.43,-761.341 1844,-761 2147.9,-694.767 2225.38,-684.877 2528,-613 2685.45,-575.603 3233,-592.331 3233,-430.5 3233,-430.5 3233,-430.5 3233,-311 3233,-180.558 3055.12,-113.812 2969.72,-89.2834"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="2970.61,-85.8956 2960.03,-86.5671 2968.72,-92.6359 2970.61,-85.8956"/>
 </g>
 <!-- Node40&#45;&gt;Node13 -->
-<g id="edge167" class="edge"><title>Node40&#45;&gt;Node13</title>
-<path fill="none" stroke="midnightblue" d="M2551.4,-774.493C2667.88,-751.882 3035.94,-666.327 3244,-456 3363.22,-335.481 3520.12,-266.582 3430,-123 3383.7,-49.2309 3273.36,-26.3585 3218.34,-19.4239"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="3218.45,-15.9128 3208.11,-18.2391 3217.64,-22.8664 3218.45,-15.9128"/>
+<g id="edge166" class="edge"><title>Node40&#45;&gt;Node13</title>
+<path fill="none" stroke="midnightblue" d="M1766.95,-770.7C1594.84,-772.062 848.316,-755.304 442,-378 350.627,-293.151 258.533,-228.502 325,-123 379.227,-36.9265 513.524,-20.1384 575.592,-17.0621"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="575.76,-20.5582 585.61,-16.6551 575.476,-13.564 575.76,-20.5582"/>
 </g>
 <!-- Node40&#45;&gt;Node33 -->
-<g id="edge163" class="edge"><title>Node40&#45;&gt;Node33</title>
-<path fill="none" stroke="midnightblue" d="M2509.05,-772.143C2500.16,-759.2 2482.59,-734.636 2465,-716 2454.88,-705.283 2442.43,-694.439 2432.25,-686.082"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="2434.32,-683.251 2424.33,-679.704 2429.93,-688.703 2434.32,-683.251"/>
+<g id="edge162" class="edge"><title>Node40&#45;&gt;Node33</title>
+<path fill="none" stroke="midnightblue" d="M1820.26,-761.368C1858.66,-742.171 1951.32,-695.842 1996.85,-673.074"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="1998.44,-676.194 2005.82,-668.591 1995.31,-669.933 1998.44,-676.194"/>
 </g>
 <!-- Node41&#45;&gt;Node2 -->
-<g id="edge170" class="edge"><title>Node41&#45;&gt;Node2</title>
-<path fill="none" stroke="midnightblue" d="M3277.45,-829.492C3212.99,-819.313 3098.41,-801.223 3032.81,-790.864"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="3033.14,-787.373 3022.71,-789.27 3032.04,-794.287 3033.14,-787.373"/>
+<g id="edge169" class="edge"><title>Node41&#45;&gt;Node2</title>
+<path fill="none" stroke="midnightblue" d="M1372.28,-819.046C1367.45,-818.352 1362.63,-817.66 1358,-817 1261.66,-803.276 1149.04,-787.628 1085.76,-778.871"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="1086.05,-775.378 1075.67,-777.475 1085.09,-782.312 1086.05,-775.378"/>
 </g>
 <!-- Node41&#45;&gt;Node4 -->
-<g id="edge171" class="edge"><title>Node41&#45;&gt;Node4</title>
-<path fill="none" stroke="midnightblue" d="M3277.44,-837.465C3147.42,-838.097 2775.33,-835.632 2470,-792 2383.93,-779.701 2285.25,-753.434 2232.33,-738.326"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="2233.26,-734.952 2222.68,-735.551 2231.33,-741.679 2233.26,-734.952"/>
+<g id="edge170" class="edge"><title>Node41&#45;&gt;Node4</title>
+<path fill="none" stroke="midnightblue" d="M1467.56,-821.554C1546.63,-814.038 1710.07,-797.974 1848,-781 1936.43,-770.118 2156.96,-738.682 2245,-725 2246.3,-724.798 2247.61,-724.592 2248.94,-724.381"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="2249.92,-727.768 2259.23,-722.712 2248.8,-720.858 2249.92,-727.768"/>
 </g>
 <!-- Node41&#45;&gt;Node8 -->
-<g id="edge172" class="edge"><title>Node41&#45;&gt;Node8</title>
-<path fill="none" stroke="midnightblue" d="M3319.44,-828.38C3287.76,-780.014 3124.59,-540.422 2928,-425 2852.08,-380.426 2824.46,-385.565 2738,-369 2594.09,-341.429 2422.94,-330.588 2326.34,-326.445"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="2326.22,-322.937 2316.08,-326.019 2325.93,-329.931 2326.22,-322.937"/>
+<g id="edge171" class="edge"><title>Node41&#45;&gt;Node8</title>
+<path fill="none" stroke="midnightblue" d="M1408.46,-817.433C1387.2,-800.571 1344,-760.799 1344,-716 1344,-716 1344,-716 1344,-602 1344,-528.721 1398.74,-526.729 1456,-481 1565.08,-393.889 1608.64,-387.887 1745,-358 1800.75,-345.781 1953.32,-329.568 2046.7,-320.311"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="2047.05,-323.793 2056.66,-319.327 2046.37,-316.827 2047.05,-323.793"/>
 </g>
 <!-- Node41&#45;&gt;Node13 -->
-<g id="edge173" class="edge"><title>Node41&#45;&gt;Node13</title>
-<path fill="none" stroke="midnightblue" d="M3351.3,-828.486C3388.68,-815 3456.65,-785.098 3493,-736 3495.92,-732.06 3604,-317.308 3604,-262.5 3604,-262.5 3604,-262.5 3604,-132 3604,-101.205 3604.09,-86.1785 3580,-67 3524.3,-22.6667 3302.2,-16.9953 3218.31,-16.4607"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="3218.11,-12.96 3208.1,-16.4172 3218.08,-19.9599 3218.11,-12.96"/>
+<g id="edge172" class="edge"><title>Node41&#45;&gt;Node13</title>
+<path fill="none" stroke="midnightblue" d="M1372.18,-821.218C1357.09,-819.777 1340.36,-818.248 1325,-817 1089.24,-797.844 1022.67,-841.519 794,-781 627.97,-737.059 581.987,-717.902 446,-613 428.652,-599.617 190,-334.91 190,-313 190,-313 190,-313 190,-132 190,-97.8442 202.255,-85.4491 231,-67 287.909,-30.4741 495.053,-20.0695 575.626,-17.3688"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="575.805,-20.865 585.689,-17.05 575.583,-13.8685 575.805,-20.865"/>
 </g>
 <!-- Node41&#45;&gt;Node14 -->
-<g id="edge174" class="edge"><title>Node41&#45;&gt;Node14</title>
-<path fill="none" stroke="midnightblue" d="M3330.56,-828.374C3348.64,-800.038 3406.73,-706.88 3443,-624 3532.29,-419.94 3591.96,-275.91 3430,-123 3362.07,-58.8709 2688.73,-25.9941 2508.29,-18.2905"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="2508.24,-14.7854 2498.1,-17.86 2507.95,-21.7792 2508.24,-14.7854"/>
+<g id="edge173" class="edge"><title>Node41&#45;&gt;Node14</title>
+<path fill="none" stroke="midnightblue" d="M1420,-817.442C1420,-798.936 1420,-753.812 1420,-716 1420,-716 1420,-716 1420,-602 1420,-501.459 1705.88,-390.582 1801,-358 1968.61,-300.586 2034.72,-390.771 2198,-322 2323.51,-269.137 2318.22,-199.333 2431,-123 2489.46,-83.4316 2564.36,-47.5673 2604.86,-29.2698"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="2606.45,-32.3922 2614.15,-25.1098 2603.59,-26.0038 2606.45,-32.3922"/>
 </g>
 <!-- Node43&#45;&gt;Node2 -->
-<g id="edge183" class="edge"><title>Node43&#45;&gt;Node2</title>
-<path fill="none" stroke="midnightblue" d="M1751.21,-1005.13C1856.99,-1005.06 2187.91,-1001.31 2458,-960 2560.29,-944.354 2585.57,-935.934 2684,-904 2786.08,-870.881 2902.89,-819.536 2955.17,-795.815"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="2956.8,-798.917 2964.45,-791.585 2953.9,-792.547 2956.8,-798.917"/>
+<g id="edge182" class="edge"><title>Node43&#45;&gt;Node2</title>
+<path fill="none" stroke="midnightblue" d="M604.326,-985.433C673.003,-951.478 926.38,-826.2 1009.44,-785.132"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="1011.07,-788.231 1018.48,-780.661 1007.97,-781.956 1011.07,-788.231"/>
 </g>
 <!-- Node43&#45;&gt;Node4 -->
-<g id="edge184" class="edge"><title>Node43&#45;&gt;Node4</title>
-<path fill="none" stroke="midnightblue" d="M1751.07,-1003.22C1849.78,-997.129 2129,-973.013 2129,-895 2129,-895 2129,-895 2129,-837 2129,-806.205 2137,-798.313 2153,-772 2159.42,-761.434 2168.54,-751.127 2176.55,-743.048"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="2179.29,-745.259 2184.04,-735.786 2174.42,-740.232 2179.29,-745.259"/>
+<g id="edge183" class="edge"><title>Node43&#45;&gt;Node4</title>
+<path fill="none" stroke="midnightblue" d="M621.21,-993.83C777.585,-992.854 1425.24,-986.573 1624,-949 1801.74,-915.4 1834.73,-865.477 2009,-817 2082.15,-796.652 2104.7,-807.101 2176,-781 2212.81,-767.526 2252.31,-744.789 2276.08,-730.091"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="2278.06,-732.981 2284.67,-724.704 2274.34,-727.05 2278.06,-732.981"/>
 </g>
 <!-- Node43&#45;&gt;Node13 -->
 <g id="edge202" class="edge"><title>Node43&#45;&gt;Node13</title>
-<path fill="none" stroke="midnightblue" d="M1751.33,-1003.58C1863.03,-998.866 2226.18,-982.704 2526,-960 2656,-950.155 3576.74,-904.978 3694,-848 3729.91,-830.552 3756,-822.923 3756,-783 3756,-783 3756,-783 3756,-132 3756,-95.5528 3737.88,-84.6639 3706,-67 3621.6,-20.2396 3318.35,-16.3221 3218.29,-16.3331"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="3218.17,-12.8332 3208.18,-16.3481 3218.18,-19.8332 3218.17,-12.8332"/>
+<path fill="none" stroke="midnightblue" d="M552.808,-991.871C456.511,-984.946 190,-959.007 190,-884 190,-884 190,-884 190,-826 190,-669.147 76,-648.853 76,-492 76,-492 76,-492 76,-132 76,-101.205 75.636,-85.8345 100,-67 137.667,-37.8818 469.984,-22.0669 575.792,-17.7369"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="576.02,-21.2306 585.871,-17.3313 575.739,-14.2363 576.02,-21.2306"/>
 </g>
 <!-- Node43&#45;&gt;Node15 -->
 <g id="edge203" class="edge"><title>Node43&#45;&gt;Node15</title>
-<path fill="none" stroke="midnightblue" d="M1682.88,-1004.89C1492.11,-1004.12 555.836,-998.436 270,-960 147.673,-943.551 0,-1018.43 0,-895 0,-895 0,-895 0,-132 0,-85.1454 40.6503,-87.0465 83,-67 148.096,-36.1865 232.753,-23.7691 277.72,-19.0903"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="278.128,-22.5672 287.739,-18.1088 277.445,-15.6006 278.128,-22.5672"/>
+<path fill="none" stroke="midnightblue" d="M593.307,-985.254C616.017,-953.923 696.176,-844.716 770,-761 913.425,-598.356 953.429,-560.836 1113,-414 1159.9,-370.844 1184.14,-372.516 1223,-322 1301.32,-220.194 1247.45,-130.673 1359,-67 1424.43,-29.6518 1657.05,-19.6537 1741.81,-17.2197"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="1741.95,-20.7174 1751.85,-16.9474 1741.76,-13.7199 1741.95,-20.7174"/>
 </g>
 <!-- Node43&#45;&gt;Node21 -->
 <g id="edge204" class="edge"><title>Node43&#45;&gt;Node21</title>
-<path fill="none" stroke="midnightblue" d="M1682.84,-1003.81C1483.17,-996.635 476,-957.354 476,-895 476,-895 476,-895 476,-501 476,-300.798 610.865,-266.362 791,-179 859.013,-146.015 1089.85,-136.973 1177.16,-134.702"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="1177.41,-138.197 1187.32,-134.452 1177.24,-131.199 1177.41,-138.197"/>
+<path fill="none" stroke="midnightblue" d="M552.831,-993.009C505.316,-990.434 417.625,-981.287 351,-949 311.305,-929.764 276,-928.11 276,-884 276,-884 276,-884 276,-826 276,-605.303 303.013,-529.434 442,-358 507.957,-276.645 548.287,-279.703 643,-235 711.892,-202.484 730.39,-194.389 805,-179 1019.78,-134.699 1285.24,-132.652 1378.06,-133.464"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="1378.23,-136.966 1388.26,-133.572 1378.3,-129.967 1378.23,-136.966"/>
 </g>
 <!-- Node43&#45;&gt;Node33 -->
-<g id="edge185" class="edge"><title>Node43&#45;&gt;Node33</title>
-<path fill="none" stroke="midnightblue" d="M1717,-996.442C1717,-977.936 1717,-932.812 1717,-895 1717,-895 1717,-895 1717,-837 1717,-703.88 2204.84,-677.306 2363.67,-672.182"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="2364.2,-675.667 2374.09,-671.861 2363.99,-668.671 2364.2,-675.667"/>
+<g id="edge184" class="edge"><title>Node43&#45;&gt;Node33</title>
+<path fill="none" stroke="midnightblue" d="M621.16,-993.702C750.916,-992.306 1213.04,-984.871 1355,-949 1546.62,-900.58 1577.44,-841.385 1758,-761 1816.22,-735.081 1830.44,-727.673 1890,-705 1922.12,-692.773 1959.2,-680.365 1986.04,-671.683"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="1987.34,-674.941 1995.79,-668.547 1985.2,-668.277 1987.34,-674.941"/>
 </g>
 <!-- Node44 -->
 <g id="node44" class="node"><title>Node44</title>
-<polygon fill="white" stroke="#bfbfbf" points="1603,-940.5 1603,-959.5 1689,-959.5 1689,-940.5 1603,-940.5"/>
-<text text-anchor="middle" x="1646" y="-947.5" font-family="Helvetica,sans-Serif" font-size="10.00">dmlc/registry.h</text>
+<polygon fill="white" stroke="#bfbfbf" points="385,-929.5 385,-948.5 471,-948.5 471,-929.5 385,-929.5"/>
+<text text-anchor="middle" x="428" y="-936.5" font-family="Helvetica,sans-Serif" font-size="10.00">dmlc/registry.h</text>
 </g>
 <!-- Node43&#45;&gt;Node44 -->
-<g id="edge182" class="edge"><title>Node43&#45;&gt;Node44</title>
-<path fill="none" stroke="midnightblue" d="M1705.59,-996.324C1694.89,-988.185 1678.66,-975.839 1665.84,-966.087"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="1667.69,-963.102 1657.61,-959.834 1663.46,-968.673 1667.69,-963.102"/>
+<g id="edge181" class="edge"><title>Node43&#45;&gt;Node44</title>
+<path fill="none" stroke="midnightblue" d="M561.806,-985.444C535.057,-976.359 492.597,-961.939 462.695,-951.783"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="463.671,-948.418 453.076,-948.516 461.419,-955.046 463.671,-948.418"/>
 </g>
 <!-- Node45 -->
 <g id="node45" class="node"><title>Node45</title>
 <g id="a_node45"><a xlink:href="type__relation_8h.html" target="_top" xlink:title="Type relation and function for type inference(checking). ">
-<polygon fill="white" stroke="black" points="2330.5,-940.5 2330.5,-959.5 2449.5,-959.5 2449.5,-940.5 2330.5,-940.5"/>
-<text text-anchor="middle" x="2390" y="-947.5" font-family="Helvetica,sans-Serif" font-size="10.00">tvm/ir/type_relation.h</text>
+<polygon fill="white" stroke="black" points="1227.5,-929.5 1227.5,-948.5 1346.5,-948.5 1346.5,-929.5 1227.5,-929.5"/>
+<text text-anchor="middle" x="1287" y="-936.5" font-family="Helvetica,sans-Serif" font-size="10.00">tvm/ir/type_relation.h</text>
 </a>
 </g>
 </g>
 <!-- Node43&#45;&gt;Node45 -->
-<g id="edge186" class="edge"><title>Node43&#45;&gt;Node45</title>
-<path fill="none" stroke="midnightblue" d="M1751.3,-1002.25C1856.68,-993.793 2177.2,-968.075 2320.03,-956.614"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="2320.73,-960.07 2330.42,-955.781 2320.17,-953.092 2320.73,-960.07"/>
+<g id="edge185" class="edge"><title>Node43&#45;&gt;Node45</title>
+<path fill="none" stroke="midnightblue" d="M621.236,-991.359C730,-982.969 1069.16,-956.804 1216.97,-945.402"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="1217.58,-948.866 1227.28,-944.607 1217.04,-941.887 1217.58,-948.866"/>
 </g>
 <!-- Node47 -->
 <g id="node47" class="node"><title>Node47</title>
 <g id="a_node47"><a xlink:href="attr__registry__map_8h.html" target="_top" xlink:title="Attribute map used in registry. ">
-<polygon fill="white" stroke="black" points="294,-179.5 294,-209.5 418,-209.5 418,-179.5 294,-179.5"/>
-<text text-anchor="start" x="302" y="-197.5" font-family="Helvetica,sans-Serif" font-size="10.00">tvm/node/attr_registry</text>
-<text text-anchor="middle" x="356" y="-186.5" font-family="Helvetica,sans-Serif" font-size="10.00">_map.h</text>
+<polygon fill="white" stroke="black" points="1358,-414.5 1358,-444.5 1482,-444.5 1482,-414.5 1358,-414.5"/>
+<text text-anchor="start" x="1366" y="-432.5" font-family="Helvetica,sans-Serif" font-size="10.00">tvm/node/attr_registry</text>
+<text text-anchor="middle" x="1420" y="-421.5" font-family="Helvetica,sans-Serif" font-size="10.00">_map.h</text>
 </a>
 </g>
 </g>
 <!-- Node43&#45;&gt;Node47 -->
-<g id="edge194" class="edge"><title>Node43&#45;&gt;Node47</title>
-<path fill="none" stroke="midnightblue" d="M1682.89,-1005.02C1507.54,-1004.92 706.394,-1002.14 462,-960 367.391,-943.687 256,-991.005 256,-895 256,-895 256,-895 256,-322 256,-276.646 296.049,-238.034 325.458,-215.794"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="327.796,-218.421 333.807,-209.696 323.667,-212.768 327.796,-218.421"/>
+<g id="edge193" class="edge"><title>Node43&#45;&gt;Node47</title>
+<path fill="none" stroke="midnightblue" d="M621.195,-991.908C764.083,-981.164 1306,-919.611 1306,-604 1306,-604 1306,-604 1306,-546 1306,-502.22 1348,-469.033 1381.02,-449.71"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="1383.05,-452.583 1390.05,-444.63 1379.62,-446.482 1383.05,-452.583"/>
 </g>
 <!-- Node48 -->
 <g id="node48" class="node"><title>Node48</title>
 <g id="a_node48"><a xlink:href="registry_8h.html" target="_top" xlink:title="This file defines the TVM global function registry. ">
-<polygon fill="white" stroke="black" points="1044,-313.5 1044,-332.5 1166,-332.5 1166,-313.5 1044,-313.5"/>
-<text text-anchor="middle" x="1105" y="-320.5" font-family="Helvetica,sans-Serif" font-size="10.00">tvm/runtime/registry.h</text>
+<polygon fill="white" stroke="black" points="1092,-302.5 1092,-321.5 1214,-321.5 1214,-302.5 1092,-302.5"/>
+<text text-anchor="middle" x="1153" y="-309.5" font-family="Helvetica,sans-Serif" font-size="10.00">tvm/runtime/registry.h</text>
 </a>
 </g>
 </g>
 <!-- Node43&#45;&gt;Node48 -->
 <g id="edge197" class="edge"><title>Node43&#45;&gt;Node48</title>
-<path fill="none" stroke="midnightblue" d="M1682.63,-1005.04C1546,-1004.93 1046,-1001.65 990,-960 965.289,-941.623 966,-925.795 966,-895 966,-895 966,-895 966,-837 966,-636.399 1066.24,-406.549 1096.47,-341.792"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="1099.73,-343.082 1100.84,-332.545 1093.4,-340.092 1099.73,-343.082"/>
+<path fill="none" stroke="midnightblue" d="M552.779,-991.786C498.824,-987.502 397.854,-976.006 376,-949 352.701,-920.208 329.742,-957.524 414,-817 560.596,-572.509 665.65,-555.718 913,-414 959.231,-387.512 970.029,-378.994 1019,-358 1049.9,-344.753 1086.1,-332.797 1113.08,-324.544"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="1114.38,-327.808 1122.94,-321.567 1112.36,-321.107 1114.38,-327.808"/>
 </g>
 <!-- Node45&#45;&gt;Node2 -->
-<g id="edge187" class="edge"><title>Node45&#45;&gt;Node2</title>
-<path fill="none" stroke="midnightblue" d="M2411.86,-940.495C2446.1,-927.354 2514.6,-901.778 2574,-884 2702.25,-845.612 2856.18,-810.487 2933.52,-793.583"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="2934.43,-796.965 2943.46,-791.417 2932.94,-790.125 2934.43,-796.965"/>
+<g id="edge186" class="edge"><title>Node45&#45;&gt;Node2</title>
+<path fill="none" stroke="midnightblue" d="M1234.6,-929.442C1205.06,-922.854 1168.48,-911.619 1140,-893 1097.67,-865.329 1062.82,-815.581 1046.31,-789.322"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="1049.29,-787.488 1041.08,-780.794 1043.32,-791.148 1049.29,-787.488"/>
 </g>
 <!-- Node45&#45;&gt;Node33 -->
-<g id="edge193" class="edge"><title>Node45&#45;&gt;Node33</title>
-<path fill="none" stroke="midnightblue" d="M2393.1,-940.289C2399.38,-921.856 2413,-877.435 2413,-839 2413,-839 2413,-839 2413,-781 2413,-748.948 2413,-711.643 2413,-689.749"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="2416.5,-689.558 2413,-679.558 2409.5,-689.558 2416.5,-689.558"/>
+<g id="edge192" class="edge"><title>Node45&#45;&gt;Node33</title>
+<path fill="none" stroke="midnightblue" d="M1341.74,-929.439C1448.77,-911.435 1691.86,-864.289 1881,-781 1924.12,-762.014 1935.78,-756.288 1971,-725 1986.98,-710.807 2001.85,-691.323 2011.56,-677.396"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="2014.69,-679.015 2017.41,-668.775 2008.9,-675.085 2014.69,-679.015"/>
 </g>
 <!-- Node45&#45;&gt;Node39 -->
-<g id="edge192" class="edge"><title>Node45&#45;&gt;Node39</title>
-<path fill="none" stroke="midnightblue" d="M2427.87,-940.444C2469.48,-931.043 2536.37,-915.929 2581.47,-905.739"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="2582.32,-909.134 2591.31,-903.516 2580.78,-902.306 2582.32,-909.134"/>
+<g id="edge191" class="edge"><title>Node45&#45;&gt;Node39</title>
+<path fill="none" stroke="midnightblue" d="M1308.07,-929.444C1330.06,-920.517 1364.73,-906.439 1389.66,-896.317"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="1391.08,-899.521 1399.02,-892.516 1388.44,-893.035 1391.08,-899.521"/>
 </g>
 <!-- Node46 -->
 <g id="node46" class="node"><title>Node46</title>
 <g id="a_node46"><a xlink:href="env__func_8h.html" target="_top" xlink:title="Serializable global function used in IR. ">
-<polygon fill="white" stroke="black" points="1184.5,-548.5 1184.5,-567.5 1283.5,-567.5 1283.5,-548.5 1184.5,-548.5"/>
-<text text-anchor="middle" x="1234" y="-555.5" font-family="Helvetica,sans-Serif" font-size="10.00">tvm/ir/env_func.h</text>
+<polygon fill="white" stroke="black" points="1140.5,-537.5 1140.5,-556.5 1239.5,-556.5 1239.5,-537.5 1140.5,-537.5"/>
+<text text-anchor="middle" x="1190" y="-544.5" font-family="Helvetica,sans-Serif" font-size="10.00">tvm/ir/env_func.h</text>
 </a>
 </g>
 </g>
 <!-- Node45&#45;&gt;Node46 -->
-<g id="edge188" class="edge"><title>Node45&#45;&gt;Node46</title>
-<path fill="none" stroke="midnightblue" d="M2330.23,-948.661C2170.73,-947.27 1737.77,-940.172 1600,-904 1521.1,-883.286 1462,-864.571 1462,-783 1462,-783 1462,-783 1462,-669 1462,-628.346 1342.93,-589.003 1276.88,-570.308"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="1277.73,-566.91 1267.15,-567.595 1275.85,-573.653 1277.73,-566.91"/>
+<g id="edge187" class="edge"><title>Node45&#45;&gt;Node46</title>
+<path fill="none" stroke="midnightblue" d="M1260.09,-929.41C1221.22,-915.126 1154,-882.535 1154,-828 1154,-828 1154,-828 1154,-658 1154,-623.982 1169.74,-586.85 1180.41,-565.6"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="1183.62,-567.012 1185.14,-556.527 1177.41,-563.772 1183.62,-567.012"/>
 </g>
 <!-- Node46&#45;&gt;Node13 -->
-<g id="edge190" class="edge"><title>Node46&#45;&gt;Node13</title>
-<path fill="none" stroke="midnightblue" d="M1283.75,-557.569C1455.45,-558.579 2038.22,-554.083 2506,-456 2594.87,-437.366 2616.87,-427.713 2699,-389 2742.3,-368.591 2750.17,-357.988 2791,-333 2831.5,-308.21 2843.3,-304.529 2882,-277 2999.29,-193.552 3125.77,-74.6253 3169.81,-32.2438"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="3172.29,-34.7125 3177.05,-25.2468 3167.42,-29.6785 3172.29,-34.7125"/>
+<g id="edge189" class="edge"><title>Node46&#45;&gt;Node13</title>
+<path fill="none" stroke="midnightblue" d="M1140.42,-544.649C1034.02,-539.157 781.345,-511.883 632,-378 567.645,-320.307 573.235,-283.693 556,-199 544.301,-141.512 533.153,-121.035 556,-67 562.092,-52.591 574.53,-40.2679 585.713,-31.4025"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="587.958,-34.0949 593.895,-25.3198 583.781,-28.4772 587.958,-34.0949"/>
 </g>
 <!-- Node46&#45;&gt;Node15 -->
-<g id="edge191" class="edge"><title>Node46&#45;&gt;Node15</title>
-<path fill="none" stroke="midnightblue" d="M1184.48,-557.798C1097.96,-557.975 914.322,-552.917 766,-512 666.439,-484.535 443.402,-347.346 346,-313 281.342,-290.2 254.577,-312.625 196,-277 113.271,-226.686 80.0338,-144.56 138,-67 154.77,-44.5615 233.3,-28.6089 277.88,-21.1868"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="278.584,-24.6184 287.898,-19.5673 277.467,-17.7081 278.584,-24.6184"/>
+<g id="edge190" class="edge"><title>Node46&#45;&gt;Node15</title>
+<path fill="none" stroke="midnightblue" d="M1194.47,-537.174C1209.52,-507.345 1259.26,-407.385 1294,-322 1329.34,-235.158 1310.29,-199.844 1364,-123 1386.43,-90.9108 1396.22,-82.892 1432,-67 1538.29,-19.7918 1679.71,-15.1555 1741.67,-15.6612"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="1741.9,-19.1646 1751.95,-15.7998 1742,-12.1652 1741.9,-19.1646"/>
 </g>
 <!-- Node46&#45;&gt;Node27 -->
-<g id="edge189" class="edge"><title>Node46&#45;&gt;Node27</title>
-<path fill="none" stroke="midnightblue" d="M1283.54,-550.379C1354.87,-540.845 1486.83,-523.206 1568.06,-512.348"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="1568.75,-515.787 1578.2,-510.993 1567.82,-508.849 1568.75,-515.787"/>
+<g id="edge188" class="edge"><title>Node46&#45;&gt;Node27</title>
+<path fill="none" stroke="midnightblue" d="M1239.64,-544.693C1397.67,-540.387 1903.15,-525.513 2321,-501 2335.91,-500.125 2351.88,-499.005 2367.06,-497.854"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="2367.72,-501.314 2377.42,-497.055 2367.18,-494.335 2367.72,-501.314"/>
+</g>
+<!-- Node47&#45;&gt;Node7 -->
+<g id="edge194" class="edge"><title>Node47&#45;&gt;Node7</title>
+<path fill="none" stroke="midnightblue" d="M1482.1,-420.289C1565.47,-409.265 1713.18,-389.736 1800.28,-378.219"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="1800.88,-381.67 1810.33,-376.89 1799.96,-374.731 1800.88,-381.67"/>
 </g>
 <!-- Node47&#45;&gt;Node15 -->
 <g id="edge195" class="edge"><title>Node47&#45;&gt;Node15</title>
-<path fill="none" stroke="midnightblue" d="M352.233,-179.315C343.821,-147.633 323.501,-71.1092 314.024,-35.4186"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="317.306,-34.1409 311.356,-25.3741 310.54,-35.9374 317.306,-34.1409"/>
+<path fill="none" stroke="midnightblue" d="M1411.62,-414.203C1385.16,-367.03 1309.22,-214.143 1379,-123 1423.87,-64.3916 1656.66,-30.641 1741.78,-20.1054"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="1742.36,-23.5607 1751.87,-18.8809 1741.52,-16.6118 1742.36,-23.5607"/>
 </g>
 <!-- Node47&#45;&gt;Node21 -->
 <g id="edge196" class="edge"><title>Node47&#45;&gt;Node21</title>
-<path fill="none" stroke="midnightblue" d="M418.164,-184.444C433.453,-182.434 449.798,-180.468 465,-179 739.381,-152.509 1072.14,-138.995 1177.4,-135.164"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="1177.57,-138.66 1187.43,-134.803 1177.31,-131.665 1177.57,-138.66"/>
+<path fill="none" stroke="midnightblue" d="M1404.35,-414.247C1395.34,-404.934 1384.85,-391.943 1380,-378 1351.84,-297.082 1387.43,-193.206 1404.18,-151.998"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="1407.43,-153.294 1408.07,-142.718 1400.98,-150.588 1407.43,-153.294"/>
 </g>
 <!-- Node48&#45;&gt;Node13 -->
 <g id="edge199" class="edge"><title>Node48&#45;&gt;Node13</title>
-<path fill="none" stroke="midnightblue" d="M1166.09,-321.84C1298.12,-320.759 1619.67,-314.163 1886,-277 2021.31,-258.12 2054.3,-246.31 2186,-210 2381.79,-156.024 2421.11,-108.072 2620,-67 2820.1,-25.6786 3066.89,-18.164 3153.85,-16.8008"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="3153.93,-20.3001 3163.88,-16.6603 3153.84,-13.3008 3153.93,-20.3001"/>
+<path fill="none" stroke="midnightblue" d="M1147.37,-302.179C1141.72,-293.209 1132.9,-278.841 1126,-266 1110.38,-236.93 1108.13,-228.824 1094,-199 1066.25,-140.423 1084.56,-104.93 1032,-67 969.461,-21.8683 728.484,-16.74 640.444,-16.4017"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="640.257,-12.9014 630.25,-16.3818 640.244,-19.9014 640.257,-12.9014"/>
 </g>
 <!-- Node48&#45;&gt;Node15 -->
 <g id="edge200" class="edge"><title>Node48&#45;&gt;Node15</title>
-<path fill="none" stroke="midnightblue" d="M1043.97,-313.931C854.387,-288.833 288.146,-213.605 285,-210 241.055,-159.644 279.17,-71.8652 298.908,-34.3398"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="302.197,-35.6176 303.904,-25.1612 296.049,-32.2708 302.197,-35.6176"/>
+<path fill="none" stroke="midnightblue" d="M1150.96,-302.283C1143,-265.811 1118.06,-124.674 1194,-67 1237.84,-33.7089 1627.62,-20.4801 1741.74,-17.3001"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="1742,-20.7943 1751.9,-17.0234 1741.81,-13.7969 1742,-20.7943"/>
 </g>
 <!-- Node48&#45;&gt;Node16 -->
 <g id="edge198" class="edge"><title>Node48&#45;&gt;Node16</title>
-<path fill="none" stroke="midnightblue" d="M1166.05,-316.927C1299.34,-305.851 1615.53,-279.575 1754.05,-268.064"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="1754.43,-271.544 1764.11,-267.228 1753.85,-264.568 1754.43,-271.544"/>
+<path fill="none" stroke="midnightblue" d="M1214.25,-306.84C1373.12,-296.049 1797.49,-267.224 1962.28,-256.032"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="1962.54,-259.522 1972.28,-255.352 1962.07,-252.538 1962.54,-259.522"/>
 </g>
 <!-- Node48&#45;&gt;Node21 -->
 <g id="edge201" class="edge"><title>Node48&#45;&gt;Node21</title>
-<path fill="none" stroke="midnightblue" d="M1108.64,-313.314C1114.8,-298.929 1127.98,-269.422 1142,-246 1152.13,-229.064 1157.35,-226.614 1168,-210 1180.49,-190.529 1193.36,-167.27 1201.73,-151.656"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="1204.93,-153.089 1206.53,-142.615 1198.75,-149.808 1204.93,-153.089"/>
+<path fill="none" stroke="midnightblue" d="M1162.77,-302.224C1187.92,-279.885 1257.47,-219.758 1322,-179 1341.44,-166.72 1364.68,-155.172 1382.68,-146.849"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="1384.42,-149.9 1392.08,-142.573 1381.53,-143.528 1384.42,-149.9"/>
 </g>
 <!-- Node49&#45;&gt;Node2 -->
 <g id="edge209" class="edge"><title>Node49&#45;&gt;Node2</title>
-<path fill="none" stroke="midnightblue" d="M2372.74,-1004.65C2480.87,-1003.01 2882.16,-994.618 2926,-960 2975.16,-921.189 2982.54,-839.55 2983.24,-802.285"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="2986.74,-801.868 2983.27,-791.856 2979.74,-801.844 2986.74,-801.868"/>
+<path fill="none" stroke="midnightblue" d="M1460.28,-993.683C1394.13,-991.571 1220.32,-977.764 1112,-893 1085.72,-872.438 1079.57,-865.363 1062,-837 1052.91,-822.326 1045.9,-803.926 1041.47,-790.452"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="1044.79,-789.367 1038.47,-780.868 1038.11,-791.457 1044.79,-789.367"/>
 </g>
 <!-- Node49&#45;&gt;Node13 -->
 <g id="edge218" class="edge"><title>Node49&#45;&gt;Node13</title>
-<path fill="none" stroke="midnightblue" d="M2372.55,-1003.61C2457.04,-999.042 2727.26,-983.486 2950,-960 3041.45,-950.357 3698.29,-905.566 3770,-848 3794.01,-828.722 3794,-813.795 3794,-783 3794,-783 3794,-783 3794,-132 3794,-101.205 3794.48,-85.6779 3770,-67 3725.96,-33.4063 3334.48,-20.4281 3218.34,-17.2969"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="3218.38,-13.7967 3208.29,-17.0321 3218.19,-20.7943 3218.38,-13.7967"/>
+<path fill="none" stroke="midnightblue" d="M1460.46,-991.461C1391.55,-984.834 1201.27,-966.33 1043,-949 769.864,-919.094 660.919,-993.392 435,-837 262.796,-717.792 114,-701.439 114,-492 114,-492 114,-492 114,-132 114,-94.4425 134.967,-84.8715 168,-67 238.591,-28.809 486.656,-19.3231 575.797,-17.1227"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="575.941,-20.6204 585.857,-16.8891 575.779,-13.6223 575.941,-20.6204"/>
 </g>
 <!-- Node49&#45;&gt;Node33 -->
 <g id="edge214" class="edge"><title>Node49&#45;&gt;Node33</title>
-<path fill="none" stroke="midnightblue" d="M2372.52,-1002.63C2399.19,-998.675 2438.99,-988.125 2458,-960 2517.07,-872.611 2450.65,-736.761 2423.5,-688.632"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="2426.39,-686.649 2418.36,-679.743 2420.33,-690.152 2426.39,-686.649"/>
+<path fill="none" stroke="midnightblue" d="M1513.55,-991.398C1558.98,-986.442 1654.15,-973.857 1731,-949 1861.14,-906.906 1881.24,-865.838 2009,-817 2061.81,-796.814 2097.15,-826.28 2131,-781 2162.2,-739.259 2091.84,-694.627 2050.46,-673.102"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="2051.88,-669.899 2041.37,-668.505 2048.72,-676.145 2051.88,-669.899"/>
 </g>
 <!-- Node49&#45;&gt;Node34 -->
 <g id="edge219" class="edge"><title>Node49&#45;&gt;Node34</title>
-<path fill="none" stroke="midnightblue" d="M2319.28,-1004.31C2223.04,-1001.64 1884.9,-990.602 1594.98,-959.993"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="1595.01,-956.476 1584.69,-958.896 1594.26,-963.436 1595.01,-956.476"/>
+<path fill="none" stroke="midnightblue" d="M1513.64,-991.658C1582.35,-985.536 1771.21,-968.191 1928,-949 1929.3,-948.841 1930.61,-948.678 1931.93,-948.511"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="1932.72,-951.937 1942.19,-947.171 1931.82,-944.996 1932.72,-951.937"/>
 </g>
 <!-- Node49&#45;&gt;Node35 -->
 <g id="edge217" class="edge"><title>Node49&#45;&gt;Node35</title>
-<path fill="none" stroke="midnightblue" d="M2319.1,-1000.76C2214.99,-984.324 1837.42,-924.736 1699.71,-903.003"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="1699.86,-899.484 1689.44,-901.382 1698.77,-906.399 1699.86,-899.484"/>
+<path fill="none" stroke="midnightblue" d="M1513.52,-994.068C1578.85,-993.591 1752.26,-988.389 1890,-949 1892.86,-948.183 1960.1,-915.813 1998.95,-897.069"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="2000.67,-900.124 2008.15,-892.625 1997.62,-893.82 2000.67,-900.124"/>
 </g>
 <!-- Node49&#45;&gt;Node45 -->
 <g id="edge215" class="edge"><title>Node49&#45;&gt;Node45</title>
-<path fill="none" stroke="midnightblue" d="M2353.27,-996.083C2359.54,-988.377 2368.79,-977.033 2376.43,-967.653"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="2379.27,-969.714 2382.87,-959.751 2373.84,-965.293 2379.27,-969.714"/>
+<path fill="none" stroke="midnightblue" d="M1460.44,-986.829C1426.72,-977.724 1368.16,-961.913 1328.51,-951.209"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="1329.19,-947.765 1318.62,-948.537 1327.36,-954.523 1329.19,-947.765"/>
 </g>
 <!-- Node49&#45;&gt;Node46 -->
 <g id="edge210" class="edge"><title>Node49&#45;&gt;Node46</title>
-<path fill="none" stroke="midnightblue" d="M2319.33,-1002.39C2253.49,-995.88 2078,-978.151 1932,-960 1856.66,-950.634 1315.6,-897.447 1258,-848 1234.63,-827.942 1234,-813.795 1234,-783 1234,-783 1234,-783 1234,-669 1234,-636.948 1234,-599.643 1234,-577.749"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="1237.5,-577.558 1234,-567.558 1230.5,-577.558 1237.5,-577.558"/>
+<path fill="none" stroke="midnightblue" d="M1470.7,-985.482C1415.27,-956.378 1236.68,-861.344 1221,-837 1164.71,-749.615 1178.94,-615.747 1186.64,-566.776"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="1190.13,-567.143 1188.33,-556.702 1183.23,-565.986 1190.13,-567.143"/>
 </g>
 <!-- Node49&#45;&gt;Node48 -->
 <g id="edge216" class="edge"><title>Node49&#45;&gt;Node48</title>
-<path fill="none" stroke="midnightblue" d="M2319.43,-1004.99C2192.05,-1004.73 1643.27,-1001.18 1478,-960 1279.11,-910.439 1080,-931.972 1080,-727 1080,-727 1080,-727 1080,-501 1080,-485.365 1095.49,-384.661 1102.09,-342.51"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="1105.56,-342.925 1103.66,-332.504 1098.65,-341.84 1105.56,-342.925"/>
+<path fill="none" stroke="midnightblue" d="M1460.27,-991.402C1331.23,-978.245 775.088,-914.3 680,-781 531.932,-573.429 579.704,-667.885 1050,-358 1070.86,-344.252 1096.63,-332.999 1117.17,-325.184"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="1118.62,-328.378 1126.78,-321.628 1116.19,-321.813 1118.62,-328.378"/>
 </g>
 <!-- Node50 -->
 <g id="node50" class="node"><title>Node50</title>
 <g id="a_node50"><a xlink:href="tensor__type_8h.html" target="_top" xlink:title="Polymorphic tensor types. ">
-<polygon fill="white" stroke="black" points="2233.5,-772.5 2233.5,-791.5 2346.5,-791.5 2346.5,-772.5 2233.5,-772.5"/>
-<text text-anchor="middle" x="2290" y="-779.5" font-family="Helvetica,sans-Serif" font-size="10.00">tvm/ir/tensor_type.h</text>
+<polygon fill="white" stroke="black" points="2009.5,-761.5 2009.5,-780.5 2122.5,-780.5 2122.5,-761.5 2009.5,-761.5"/>
+<text text-anchor="middle" x="2066" y="-768.5" font-family="Helvetica,sans-Serif" font-size="10.00">tvm/ir/tensor_type.h</text>
 </a>
 </g>
 </g>
 <!-- Node49&#45;&gt;Node50 -->
 <g id="edge211" class="edge"><title>Node49&#45;&gt;Node50</title>
-<path fill="none" stroke="midnightblue" d="M2340.1,-996.448C2334.34,-987.688 2325.77,-973.48 2321,-960 2301.35,-904.493 2293.82,-834.619 2291.23,-801.737"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="2294.71,-801.453 2290.5,-791.733 2287.73,-801.96 2294.71,-801.453"/>
+<path fill="none" stroke="midnightblue" d="M1513.52,-992.414C1555.12,-989.064 1637.43,-978.914 1700,-949 1783.79,-908.941 1778.39,-857.432 1862,-817 1905.13,-796.142 1958.08,-784.828 1999.22,-778.762"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="1999.72,-782.225 2009.14,-777.366 1998.75,-775.293 1999.72,-782.225"/>
 </g>
 <!-- Node50&#45;&gt;Node4 -->
 <g id="edge212" class="edge"><title>Node50&#45;&gt;Node4</title>
-<path fill="none" stroke="midnightblue" d="M2274.58,-772.324C2259.46,-763.822 2236.18,-750.729 2218.53,-740.797"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="2219.82,-737.507 2209.39,-735.655 2216.39,-743.608 2219.82,-737.507"/>
+<path fill="none" stroke="midnightblue" d="M2102.76,-761.444C2143.06,-752.063 2207.81,-736.993 2251.58,-726.804"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="2252.46,-730.192 2261.41,-724.516 2250.88,-723.375 2252.46,-730.192"/>
 </g>
 <!-- Node50&#45;&gt;Node33 -->
 <g id="edge213" class="edge"><title>Node50&#45;&gt;Node33</title>
-<path fill="none" stroke="midnightblue" d="M2299.65,-772.368C2320.4,-753.814 2369.49,-709.913 2395.74,-686.438"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="2398.27,-688.867 2403.39,-679.591 2393.61,-683.649 2398.27,-688.867"/>
+<path fill="none" stroke="midnightblue" d="M2062.87,-761.12C2058.4,-748.559 2049.87,-724.912 2042,-705 2038.45,-696.017 2034.29,-686.119 2030.78,-677.899"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="2033.93,-676.375 2026.76,-668.579 2027.51,-679.147 2033.93,-676.375"/>
 </g>
 </g>
 </svg>
diff --git a/docs/api/doxygen/analyzer_8h.html b/docs/api/doxygen/analyzer_8h.html
index cdbd6bd..6314c53 100644
--- a/docs/api/doxygen/analyzer_8h.html
+++ b/docs/api/doxygen/analyzer_8h.html
@@ -105,7 +105,7 @@ var searchBox = new SearchBox("searchBox", "search",false,'Search');
 </div><div class="textblock"><div class="dynheader">
 Include dependency graph for analyzer.h:</div>
 <div class="dyncontent">
-<div class="center"><iframe scrolling="no" frameborder="0" src="analyzer_8h__incl.svg" width="4110" height="1426"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
+<div class="center"><iframe scrolling="no" frameborder="0" src="analyzer_8h__incl.svg" width="3892" height="1426"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
 </div>
 </div>
 </div><div class="textblock"><div class="dynheader">
diff --git a/docs/api/doxygen/analyzer_8h__incl.svg b/docs/api/doxygen/analyzer_8h__incl.svg
index 111ae0b..dd224fe 100644
--- a/docs/api/doxygen/analyzer_8h__incl.svg
+++ b/docs/api/doxygen/analyzer_8h__incl.svg
@@ -4,1141 +4,1131 @@
 <!-- Generated by graphviz version 2.38.0 (20140413.2041)
  -->
 <!-- Title: include/tvm/arith/analyzer.h Pages: 1 -->
-<svg width="3082pt" height="1069pt"
- viewBox="0.00 0.00 3082.00 1069.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+<svg width="2919pt" height="1069pt"
+ viewBox="0.00 0.00 2918.92 1069.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
 <g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 1065)">
 <title>include/tvm/arith/analyzer.h</title>
-<polygon fill="white" stroke="none" points="-4,4 -4,-1065 3078,-1065 3078,4 -4,4"/>
+<polygon fill="white" stroke="none" points="-4,4 -4,-1065 2914.92,-1065 2914.92,4 -4,4"/>
 <!-- Node1 -->
 <g id="node1" class="node"><title>Node1</title>
-<polygon fill="#bfbfbf" stroke="black" points="2520.5,-1041.5 2520.5,-1060.5 2669.5,-1060.5 2669.5,-1041.5 2520.5,-1041.5"/>
-<text text-anchor="middle" x="2595" y="-1048.5" font-family="Helvetica,sans-Serif" font-size="10.00">include/tvm/arith/analyzer.h</text>
+<polygon fill="#bfbfbf" stroke="black" points="2646.42,-1041.5 2646.42,-1060.5 2795.42,-1060.5 2795.42,-1041.5 2646.42,-1041.5"/>
+<text text-anchor="middle" x="2720.92" y="-1048.5" font-family="Helvetica,sans-Serif" font-size="10.00">include/tvm/arith/analyzer.h</text>
 </g>
 <!-- Node2 -->
 <g id="node2" class="node"><title>Node2</title>
 <g id="a_node2"><a xlink:href="int__set_8h.html" target="_top" xlink:title="Integer set. ">
-<polygon fill="white" stroke="black" points="2325,-985.5 2325,-1004.5 2429,-1004.5 2429,-985.5 2325,-985.5"/>
-<text text-anchor="middle" x="2377" y="-992.5" font-family="Helvetica,sans-Serif" font-size="10.00">tvm/arith/int_set.h</text>
+<polygon fill="white" stroke="black" points="2474.92,-985.5 2474.92,-1004.5 2578.92,-1004.5 2578.92,-985.5 2474.92,-985.5"/>
+<text text-anchor="middle" x="2526.92" y="-992.5" font-family="Helvetica,sans-Serif" font-size="10.00">tvm/arith/int_set.h</text>
 </a>
 </g>
 </g>
 <!-- Node1&#45;&gt;Node2 -->
 <g id="edge1" class="edge"><title>Node1&#45;&gt;Node2</title>
-<path fill="none" stroke="midnightblue" d="M2560.46,-1041.44C2522.75,-1032.1 2462.26,-1017.12 2421.14,-1006.93"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="2421.93,-1003.52 2411.38,-1004.52 2420.25,-1010.32 2421.93,-1003.52"/>
+<path fill="none" stroke="midnightblue" d="M2690.18,-1041.44C2656.98,-1032.2 2603.93,-1017.44 2567.37,-1007.26"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="2568.09,-1003.83 2557.52,-1004.52 2566.22,-1010.57 2568.09,-1003.83"/>
 </g>
 <!-- Node3 -->
 <g id="node3" class="node"><title>Node3</title>
 <g id="a_node3"><a xlink:href="ir_2expr_8h.html" target="_top" xlink:title="Base expr nodes in TVM. ">
-<polygon fill="white" stroke="black" points="1871.5,-761.5 1871.5,-780.5 1948.5,-780.5 1948.5,-761.5 1871.5,-761.5"/>
-<text text-anchor="middle" x="1910" y="-768.5" font-family="Helvetica,sans-Serif" font-size="10.00">tvm/ir/expr.h</text>
+<polygon fill="white" stroke="black" points="1730.42,-761.5 1730.42,-780.5 1807.42,-780.5 1807.42,-761.5 1730.42,-761.5"/>
+<text text-anchor="middle" x="1768.92" y="-768.5" font-family="Helvetica,sans-Serif" font-size="10.00">tvm/ir/expr.h</text>
 </a>
 </g>
 </g>
 <!-- Node1&#45;&gt;Node3 -->
-<g id="edge158" class="edge"><title>Node1&#45;&gt;Node3</title>
-<path fill="none" stroke="midnightblue" d="M2595,-1041.44C2595,-1022.94 2595,-977.812 2595,-940 2595,-940 2595,-940 2595,-882 2595,-817.536 2115.97,-783.963 1959.09,-774.697"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="1958.98,-771.185 1948.79,-774.097 1958.57,-778.173 1958.98,-771.185"/>
+<g id="edge156" class="edge"><title>Node1&#45;&gt;Node3</title>
+<path fill="none" stroke="midnightblue" d="M2700.28,-1041.42C2667.43,-1026.23 2606.92,-991.293 2606.92,-940 2606.92,-940 2606.92,-940 2606.92,-882 2606.92,-801.883 1997.01,-778.405 1817.85,-773.237"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="1817.76,-769.733 1807.66,-772.95 1817.56,-776.73 1817.76,-769.733"/>
 </g>
 <!-- Node24 -->
 <g id="node24" class="node"><title>Node24</title>
-<polygon fill="white" stroke="#bfbfbf" points="1786.5,-179.5 1786.5,-198.5 1843.5,-198.5 1843.5,-179.5 1786.5,-179.5"/>
-<text text-anchor="middle" x="1815" y="-186.5" font-family="Helvetica,sans-Serif" font-size="10.00">memory</text>
+<polygon fill="white" stroke="#bfbfbf" points="1837.42,-179.5 1837.42,-198.5 1894.42,-198.5 1894.42,-179.5 1837.42,-179.5"/>
+<text text-anchor="middle" x="1865.92" y="-186.5" font-family="Helvetica,sans-Serif" font-size="10.00">memory</text>
 </g>
 <!-- Node1&#45;&gt;Node24 -->
-<g id="edge163" class="edge"><title>Node1&#45;&gt;Node24</title>
-<path fill="none" stroke="midnightblue" d="M2645.67,-1041.46C2728.55,-1026.04 2884,-990.397 2884,-940 2884,-940 2884,-940 2884,-367 2884,-332.183 2890.58,-314.624 2865,-291 2789.28,-221.062 2034.63,-196.021 1853.79,-191"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="1853.77,-187.498 1843.68,-190.724 1853.58,-194.496 1853.77,-187.498"/>
+<g id="edge161" class="edge"><title>Node1&#45;&gt;Node24</title>
+<path fill="none" stroke="midnightblue" d="M2726.21,-1041.23C2736.65,-1023.04 2758.92,-979.601 2758.92,-940 2758.92,-940 2758.92,-940 2758.92,-826 2758.92,-744.858 2682.92,-741.142 2682.92,-660 2682.92,-660 2682.92,-660 2682.92,-602 2682.92,-521.541 2644.92,-505.459 2644.92,-425 2644.92,-425 2644.92,-425 2644.92,-305.5 2644.92,-270.518 2639.09,-254.323 2609.92,-235 2550.93,-195.906 2049.4,-190.766 1904.8,-190.098"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="1904.64,-186.597 1894.63,-190.056 1904.61,-193.597 1904.64,-186.597"/>
 </g>
 <!-- Node25 -->
 <g id="node25" class="node"><title>Node25</title>
-<polygon fill="white" stroke="#bfbfbf" points="2875.5,-179.5 2875.5,-198.5 2968.5,-198.5 2968.5,-179.5 2875.5,-179.5"/>
-<text text-anchor="middle" x="2922" y="-186.5" font-family="Helvetica,sans-Serif" font-size="10.00">unordered_map</text>
+<polygon fill="white" stroke="#bfbfbf" points="2750.42,-179.5 2750.42,-198.5 2843.42,-198.5 2843.42,-179.5 2750.42,-179.5"/>
+<text text-anchor="middle" x="2796.92" y="-186.5" font-family="Helvetica,sans-Serif" font-size="10.00">unordered_map</text>
 </g>
 <!-- Node1&#45;&gt;Node25 -->
-<g id="edge164" class="edge"><title>Node1&#45;&gt;Node25</title>
-<path fill="none" stroke="midnightblue" d="M2669.86,-1044.87C2786.4,-1034.81 2998,-1007.27 2998,-940 2998,-940 2998,-940 2998,-305.5 2998,-263.812 2963.02,-225.53 2940.52,-205.219"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="2942.73,-202.498 2932.89,-198.575 2938.14,-207.78 2942.73,-202.498"/>
+<g id="edge162" class="edge"><title>Node1&#45;&gt;Node25</title>
+<path fill="none" stroke="midnightblue" d="M2795.56,-1044.81C2827.08,-1039.11 2862.14,-1027.67 2886.92,-1005 2909.65,-984.215 2910.92,-970.795 2910.92,-940 2910.92,-940 2910.92,-940 2910.92,-367 2910.92,-297.546 2848.11,-233.679 2815.74,-205.371"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="2817.74,-202.476 2807.85,-198.656 2813.2,-207.805 2817.74,-202.476"/>
 </g>
 <!-- Node26 -->
 <g id="node26" class="node"><title>Node26</title>
-<polygon fill="white" stroke="#bfbfbf" points="1248.5,-179.5 1248.5,-198.5 1295.5,-198.5 1295.5,-179.5 1248.5,-179.5"/>
-<text text-anchor="middle" x="1272" y="-186.5" font-family="Helvetica,sans-Serif" font-size="10.00">vector</text>
+<polygon fill="white" stroke="#bfbfbf" points="468.424,-179.5 468.424,-198.5 515.424,-198.5 515.424,-179.5 468.424,-179.5"/>
+<text text-anchor="middle" x="491.924" y="-186.5" font-family="Helvetica,sans-Serif" font-size="10.00">vector</text>
 </g>
 <!-- Node1&#45;&gt;Node26 -->
-<g id="edge165" class="edge"><title>Node1&#45;&gt;Node26</title>
-<path fill="none" stroke="midnightblue" d="M2520.41,-1048.69C2106.85,-1041.19 114,-1001.83 114,-940 114,-940 114,-940 114,-882 114,-761.716 913.265,-344.493 1021,-291 1074.8,-264.288 1088.28,-257.428 1144,-235 1175.64,-222.266 1212.43,-209.542 1238.43,-200.89"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="1239.78,-204.131 1248.17,-197.669 1237.58,-197.484 1239.78,-204.131"/>
+<g id="edge163" class="edge"><title>Node1&#45;&gt;Node26</title>
+<path fill="none" stroke="midnightblue" d="M2646.1,-1048.93C2212.43,-1042.52 40.9239,-1007.07 40.9239,-940 40.9239,-940 40.9239,-940 40.9239,-826 40.9239,-752.242 197.187,-559.996 298.924,-414 354.988,-333.545 358.181,-303.936 427.924,-235 439.973,-223.091 455.473,-212.104 468.201,-203.968"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="470.24,-206.822 476.891,-198.575 466.548,-200.875 470.24,-206.822"/>
 </g>
 <!-- Node31 -->
 <g id="node31" class="node"><title>Node31</title>
-<polygon fill="white" stroke="#bfbfbf" points="104,-235.5 104,-254.5 146,-254.5 146,-235.5 104,-235.5"/>
-<text text-anchor="middle" x="125" y="-242.5" font-family="Helvetica,sans-Serif" font-size="10.00">limits</text>
+<polygon fill="white" stroke="#bfbfbf" points="2558.92,-235.5 2558.92,-254.5 2600.92,-254.5 2600.92,-235.5 2558.92,-235.5"/>
+<text text-anchor="middle" x="2579.92" y="-242.5" font-family="Helvetica,sans-Serif" font-size="10.00">limits</text>
 </g>
 <!-- Node1&#45;&gt;Node31 -->
-<g id="edge162" class="edge"><title>Node1&#45;&gt;Node31</title>
-<path fill="none" stroke="midnightblue" d="M2520.4,-1050.27C2117.3,-1051.53 213.466,-1054.79 100,-1005 63.4418,-988.957 38,-979.923 38,-940 38,-940 38,-940 38,-658 38,-579.929 76,-564.571 76,-486.5 76,-486.5 76,-486.5 76,-423 76,-362.842 102.615,-295.216 116.615,-263.806"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="119.846,-265.155 120.822,-254.605 113.481,-262.244 119.846,-265.155"/>
+<g id="edge160" class="edge"><title>Node1&#45;&gt;Node31</title>
+<path fill="none" stroke="midnightblue" d="M2720.92,-1041.44C2720.92,-1022.94 2720.92,-977.812 2720.92,-940 2720.92,-940 2720.92,-940 2720.92,-882 2720.92,-800.858 2644.92,-797.142 2644.92,-716 2644.92,-716 2644.92,-716 2644.92,-602 2644.92,-521.541 2606.92,-505.459 2606.92,-425 2606.92,-425 2606.92,-425 2606.92,-367 2606.92,-329.838 2594.64,-287.764 2586.65,-264.308"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="2589.87,-262.919 2583.23,-254.657 2583.27,-265.252 2589.87,-262.919"/>
 </g>
 <!-- Node46 -->
 <g id="node46" class="node"><title>Node46</title>
 <g id="a_node46"><a xlink:href="with_8h.html" target="_top" xlink:title="RAII wrapper function to enter and exit a context object similar to python&#39;s with syntax...">
-<polygon fill="white" stroke="black" points="2730,-123.5 2730,-142.5 2836,-142.5 2836,-123.5 2730,-123.5"/>
-<text text-anchor="middle" x="2783" y="-130.5" font-family="Helvetica,sans-Serif" font-size="10.00">tvm/support/with.h</text>
+<polygon fill="white" stroke="black" points="1598.92,-123.5 1598.92,-142.5 1704.92,-142.5 1704.92,-123.5 1598.92,-123.5"/>
+<text text-anchor="middle" x="1651.92" y="-130.5" font-family="Helvetica,sans-Serif" font-size="10.00">tvm/support/with.h</text>
 </a>
 </g>
 </g>
 <!-- Node1&#45;&gt;Node46 -->
-<g id="edge159" class="edge"><title>Node1&#45;&gt;Node46</title>
-<path fill="none" stroke="midnightblue" d="M2669.81,-1046.66C2795.63,-1038.99 3036,-1014.57 3036,-940 3036,-940 3036,-940 3036,-244 3036,-161.432 2921.27,-140.185 2846.33,-135.114"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="2846.23,-131.602 2836.04,-134.501 2845.81,-138.589 2846.23,-131.602"/>
+<g id="edge157" class="edge"><title>Node1&#45;&gt;Node46</title>
+<path fill="none" stroke="midnightblue" d="M2755.8,-1041.4C2800.67,-1028 2872.92,-997.493 2872.92,-940 2872.92,-940 2872.92,-940 2872.92,-658 2872.92,-519.924 2758.92,-507.076 2758.92,-369 2758.92,-369 2758.92,-369 2758.92,-305.5 2758.92,-242.49 2695.86,-254.454 2635.92,-235 2463.2,-178.935 1904.32,-146.574 1715.17,-137.014"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="1715.27,-133.515 1705.11,-136.51 1714.92,-140.506 1715.27,-133.515"/>
 </g>
 <!-- Node2&#45;&gt;Node3 -->
 <g id="edge2" class="edge"><title>Node2&#45;&gt;Node3</title>
-<path fill="none" stroke="midnightblue" d="M2324.88,-986.293C2286.31,-979.463 2233.05,-967.63 2189,-949 2083.23,-904.263 1972.25,-821.244 1928.91,-787.182"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="1930.95,-784.333 1920.94,-780.868 1926.6,-789.821 1930.95,-784.333"/>
+<path fill="none" stroke="midnightblue" d="M2497.67,-985.433C2380.07,-950.991 1941.65,-822.588 1807.91,-783.418"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="1808.67,-779.993 1798.09,-780.541 1806.7,-786.711 1808.67,-779.993"/>
 </g>
 <!-- Node2&#45;&gt;Node25 -->
-<g id="edge157" class="edge"><title>Node2&#45;&gt;Node25</title>
-<path fill="none" stroke="midnightblue" d="M2429.18,-990.287C2516.25,-982.4 2694.96,-958.925 2830,-893 2936.56,-840.978 2960,-778.583 2960,-660 2960,-660 2960,-660 2960,-305.5 2960,-269.254 2942.91,-229.671 2931.65,-207.533"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="2934.73,-205.875 2926.97,-198.663 2928.54,-209.141 2934.73,-205.875"/>
+<g id="edge155" class="edge"><title>Node2&#45;&gt;Node25</title>
+<path fill="none" stroke="midnightblue" d="M2579.17,-989.432C2667.05,-978.574 2834.92,-942.806 2834.92,-828 2834.92,-828 2834.92,-828 2834.92,-658 2834.92,-579.929 2872.92,-564.571 2872.92,-486.5 2872.92,-486.5 2872.92,-486.5 2872.92,-423 2872.92,-338.634 2827.33,-245.179 2806.77,-207.326"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="2809.82,-205.605 2801.91,-198.556 2803.7,-208.999 2809.82,-205.605"/>
 </g>
 <!-- Node43 -->
 <g id="node43" class="node"><title>Node43</title>
 <g id="a_node43"><a xlink:href="tir_2expr_8h.html" target="_top" xlink:title="TIR expressions. ">
-<polygon fill="white" stroke="black" points="2198,-929.5 2198,-948.5 2278,-948.5 2278,-929.5 2198,-929.5"/>
-<text text-anchor="middle" x="2238" y="-936.5" font-family="Helvetica,sans-Serif" font-size="10.00">tvm/tir/expr.h</text>
+<polygon fill="white" stroke="black" points="1411.92,-929.5 1411.92,-948.5 1491.92,-948.5 1491.92,-929.5 1411.92,-929.5"/>
+<text text-anchor="middle" x="1451.92" y="-936.5" font-family="Helvetica,sans-Serif" font-size="10.00">tvm/tir/expr.h</text>
 </a>
 </g>
 </g>
 <!-- Node2&#45;&gt;Node43 -->
-<g id="edge134" class="edge"><title>Node2&#45;&gt;Node43</title>
-<path fill="none" stroke="midnightblue" d="M2354.98,-985.444C2331.9,-976.478 2295.44,-962.314 2269.36,-952.183"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="2270.51,-948.875 2259.92,-948.516 2267.98,-955.4 2270.51,-948.875"/>
+<g id="edge132" class="edge"><title>Node2&#45;&gt;Node43</title>
+<path fill="none" stroke="midnightblue" d="M2474.71,-991.377C2291.87,-982.193 1681.87,-951.551 1502.15,-942.523"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="1502.1,-939.016 1491.93,-942.01 1501.75,-946.007 1502.1,-939.016"/>
 </g>
 <!-- Node4 -->
 <g id="node4" class="node"><title>Node4</title>
 <g id="a_node4"><a xlink:href="span_8h.html" target="_top" xlink:title="Span information for debugging purposes. ">
-<polygon fill="white" stroke="black" points="1650,-649.5 1650,-668.5 1730,-668.5 1730,-649.5 1650,-649.5"/>
-<text text-anchor="middle" x="1690" y="-656.5" font-family="Helvetica,sans-Serif" font-size="10.00">tvm/ir/span.h</text>
+<polygon fill="white" stroke="black" points="2183.92,-649.5 2183.92,-668.5 2263.92,-668.5 2263.92,-649.5 2183.92,-649.5"/>
+<text text-anchor="middle" x="2223.92" y="-656.5" font-family="Helvetica,sans-Serif" font-size="10.00">tvm/ir/span.h</text>
 </a>
 </g>
 </g>
 <!-- Node3&#45;&gt;Node4 -->
 <g id="edge3" class="edge"><title>Node3&#45;&gt;Node4</title>
-<path fill="none" stroke="midnightblue" d="M1892.74,-761.368C1854.34,-742.171 1761.68,-695.842 1716.15,-673.074"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="1717.69,-669.933 1707.18,-668.591 1714.56,-676.194 1717.69,-669.933"/>
+<path fill="none" stroke="midnightblue" d="M1804.25,-761.46C1885.97,-741.702 2088.02,-692.857 2178.65,-670.945"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="2179.72,-674.286 2188.62,-668.535 2178.08,-667.483 2179.72,-674.286"/>
 </g>
 <!-- Node5 -->
 <g id="node5" class="node"><title>Node5</title>
 <g id="a_node5"><a xlink:href="node_8h.html" target="_top" xlink:title="Definitions and helper macros for IR/AST nodes. ">
-<polygon fill="white" stroke="black" points="1732,-593.5 1732,-612.5 1830,-612.5 1830,-593.5 1732,-593.5"/>
-<text text-anchor="middle" x="1781" y="-600.5" font-family="Helvetica,sans-Serif" font-size="10.00">tvm/node/node.h</text>
+<polygon fill="white" stroke="black" points="1108.92,-593.5 1108.92,-612.5 1206.92,-612.5 1206.92,-593.5 1108.92,-593.5"/>
+<text text-anchor="middle" x="1157.92" y="-600.5" font-family="Helvetica,sans-Serif" font-size="10.00">tvm/node/node.h</text>
 </a>
 </g>
 </g>
 <!-- Node3&#45;&gt;Node5 -->
-<g id="edge128" class="edge"><title>Node3&#45;&gt;Node5</title>
-<path fill="none" stroke="midnightblue" d="M1903.06,-761.075C1882.41,-734.493 1821.05,-655.538 1794.03,-620.763"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="1796.56,-618.317 1787.66,-612.568 1791.03,-622.612 1796.56,-618.317"/>
+<g id="edge126" class="edge"><title>Node3&#45;&gt;Node5</title>
+<path fill="none" stroke="midnightblue" d="M1731.53,-761.482C1667.89,-746.428 1535.02,-712.551 1427.92,-669 1410.24,-661.811 1407.93,-655.329 1389.92,-649 1332.95,-628.972 1264.79,-617.098 1217.15,-610.623"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="1217.37,-607.122 1207,-609.283 1216.46,-614.062 1217.37,-607.122"/>
 </g>
 <!-- Node6 -->
 <g id="node6" class="node"><title>Node6</title>
 <g id="a_node6"><a xlink:href="node_2container_8h.html" target="_top" xlink:title="Array/Map container in the DSL graph. ">
-<polygon fill="white" stroke="black" points="1832.5,-414.5 1832.5,-433.5 1951.5,-433.5 1951.5,-414.5 1832.5,-414.5"/>
-<text text-anchor="middle" x="1892" y="-421.5" font-family="Helvetica,sans-Serif" font-size="10.00">tvm/node/container.h</text>
+<polygon fill="white" stroke="black" points="1454.42,-414.5 1454.42,-433.5 1573.42,-433.5 1573.42,-414.5 1454.42,-414.5"/>
+<text text-anchor="middle" x="1513.92" y="-421.5" font-family="Helvetica,sans-Serif" font-size="10.00">tvm/node/container.h</text>
 </a>
 </g>
 </g>
 <!-- Node3&#45;&gt;Node6 -->
-<g id="edge127" class="edge"><title>Node3&#45;&gt;Node6</title>
-<path fill="none" stroke="midnightblue" d="M1948.82,-766.639C2043.02,-757.417 2276,-727.759 2276,-660 2276,-660 2276,-660 2276,-546 2276,-480.845 2072.35,-446.416 1961.66,-432.516"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="1961.93,-429.022 1951.58,-431.274 1961.08,-435.97 1961.93,-429.022"/>
+<g id="edge125" class="edge"><title>Node3&#45;&gt;Node6</title>
+<path fill="none" stroke="midnightblue" d="M1744.14,-761.409C1707.18,-746.835 1641.92,-713.526 1641.92,-660 1641.92,-660 1641.92,-660 1641.92,-602 1641.92,-582.657 1560.62,-481.717 1527.61,-441.531"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="1530.23,-439.213 1521.17,-433.722 1524.83,-443.665 1530.23,-439.213"/>
 </g>
 <!-- Node10 -->
 <g id="node10" class="node"><title>Node10</title>
 <g id="a_node10"><a xlink:href="object_8h.html" target="_top" xlink:title="A managed object in the TVM runtime. ">
-<polygon fill="white" stroke="black" points="1751.5,-123.5 1751.5,-142.5 1868.5,-142.5 1868.5,-123.5 1751.5,-123.5"/>
-<text text-anchor="middle" x="1810" y="-130.5" font-family="Helvetica,sans-Serif" font-size="10.00">tvm/runtime/object.h</text>
+<polygon fill="white" stroke="black" points="1425.42,-123.5 1425.42,-142.5 1542.42,-142.5 1542.42,-123.5 1425.42,-123.5"/>
+<text text-anchor="middle" x="1483.92" y="-130.5" font-family="Helvetica,sans-Serif" font-size="10.00">tvm/runtime/object.h</text>
 </a>
 </g>
 </g>
 <!-- Node3&#45;&gt;Node10 -->
-<g id="edge129" class="edge"><title>Node3&#45;&gt;Node10</title>
-<path fill="none" stroke="midnightblue" d="M1906.59,-761.408C1888.45,-715.588 1804,-501.509 1804,-486.5 1804,-486.5 1804,-486.5 1804,-367 1804,-313.187 1831.94,-305.692 1850,-255 1858.73,-230.498 1863.18,-224.729 1867,-199 1868.31,-190.207 1870.88,-186.999 1867,-179 1860.6,-165.8 1848.25,-155.303 1836.67,-147.779"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="1838.45,-144.768 1828.08,-142.61 1834.84,-150.765 1838.45,-144.768"/>
+<g id="edge127" class="edge"><title>Node3&#45;&gt;Node10</title>
+<path fill="none" stroke="midnightblue" d="M1807.53,-768.207C1914.76,-762.356 2212.36,-739.911 2272.92,-669 2314.65,-620.144 2343.91,-404.751 2278.92,-291 2223.31,-193.664 2161.79,-205.732 2052.92,-179 1852.48,-129.781 1795.64,-159.731 1589.92,-143 1577.92,-142.023 1565.14,-140.96 1552.78,-139.918"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="1552.77,-136.405 1542.51,-139.049 1552.18,-143.38 1552.77,-136.405"/>
 </g>
 <!-- Node15 -->
 <g id="node15" class="node"><title>Node15</title>
-<polygon fill="white" stroke="#bfbfbf" points="361,-62 361,-81 405,-81 405,-62 361,-62"/>
-<text text-anchor="middle" x="383" y="-69" font-family="Helvetica,sans-Serif" font-size="10.00">string</text>
+<polygon fill="white" stroke="#bfbfbf" points="2223.92,-62 2223.92,-81 2267.92,-81 2267.92,-62 2223.92,-62"/>
+<text text-anchor="middle" x="2245.92" y="-69" font-family="Helvetica,sans-Serif" font-size="10.00">string</text>
 </g>
 <!-- Node3&#45;&gt;Node15 -->
-<g id="edge132" class="edge"><title>Node3&#45;&gt;Node15</title>
-<path fill="none" stroke="midnightblue" d="M1871.36,-769.561C1679.65,-767.007 829.992,-751.165 581,-669 440.997,-622.8 304,-633.929 304,-486.5 304,-486.5 304,-486.5 304,-423 304,-292.827 356.498,-141.967 375.902,-90.6515"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="379.287,-91.601 379.605,-81.0109 372.752,-89.0909 379.287,-91.601"/>
+<g id="edge130" class="edge"><title>Node3&#45;&gt;Node15</title>
+<path fill="none" stroke="midnightblue" d="M1807.59,-768.247C1957.62,-760.939 2492.92,-729.965 2492.92,-660 2492.92,-660 2492.92,-660 2492.92,-188 2492.92,-94.3111 2345.3,-76.4515 2278.3,-73.1702"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="2278.41,-69.6719 2268.28,-72.7594 2278.13,-76.666 2278.41,-69.6719"/>
 </g>
 <!-- Node16 -->
 <g id="node16" class="node"><title>Node16</title>
-<polygon fill="white" stroke="#bfbfbf" points="1933,-62 1933,-81 2001,-81 2001,-62 1933,-62"/>
-<text text-anchor="middle" x="1967" y="-69" font-family="Helvetica,sans-Serif" font-size="10.00">type_traits</text>
+<polygon fill="white" stroke="#bfbfbf" points="348.924,-62 348.924,-81 416.924,-81 416.924,-62 348.924,-62"/>
+<text text-anchor="middle" x="382.924" y="-69" font-family="Helvetica,sans-Serif" font-size="10.00">type_traits</text>
 </g>
 <!-- Node3&#45;&gt;Node16 -->
-<g id="edge133" class="edge"><title>Node3&#45;&gt;Node16</title>
-<path fill="none" stroke="midnightblue" d="M1948.59,-766.951C2100.9,-753.878 2652,-696.328 2652,-548 2652,-548 2652,-548 2652,-367 2652,-266.906 2607.56,-239.743 2528,-179 2446.35,-116.66 2129.24,-85.4575 2011.41,-75.8338"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="2011.55,-72.3335 2001.3,-75.0211 2010.98,-79.311 2011.55,-72.3335"/>
+<g id="edge131" class="edge"><title>Node3&#45;&gt;Node16</title>
+<path fill="none" stroke="midnightblue" d="M1730.22,-766.904C1536.35,-750.939 670.672,-674.115 430.924,-557 297.189,-491.672 324.404,-383.128 338.924,-235 343.857,-184.676 342.664,-170.88 358.924,-123 362.77,-111.676 368.472,-99.5549 373.354,-90.027"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="376.452,-91.6551 378.032,-81.1787 370.264,-88.3835 376.452,-91.6551"/>
 </g>
 <!-- Node3&#45;&gt;Node31 -->
-<g id="edge131" class="edge"><title>Node3&#45;&gt;Node31</title>
-<path fill="none" stroke="midnightblue" d="M1871.49,-768.83C1653.31,-762.006 577.43,-725.678 442,-669 338.018,-625.483 317.286,-592.889 252,-501 193.585,-418.782 219.898,-376.247 166,-291 159.237,-280.304 149.832,-269.769 141.735,-261.585"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="144.104,-259.007 134.502,-254.529 139.216,-264.018 144.104,-259.007"/>
+<g id="edge129" class="edge"><title>Node3&#45;&gt;Node31</title>
+<path fill="none" stroke="midnightblue" d="M1807.67,-770.078C1951.67,-770.041 2450.58,-766.866 2506.92,-725 2531.64,-706.634 2530.92,-690.795 2530.92,-660 2530.92,-660 2530.92,-660 2530.92,-367 2530.92,-327.536 2553.59,-285.778 2568.09,-263.104"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="2571.11,-264.886 2573.72,-254.617 2565.28,-261.019 2571.11,-264.886"/>
 </g>
 <!-- Node33 -->
 <g id="node33" class="node"><title>Node33</title>
-<polygon fill="white" stroke="#bfbfbf" points="2794,-297 2794,-316 2856,-316 2856,-297 2794,-297"/>
-<text text-anchor="middle" x="2825" y="-304" font-family="Helvetica,sans-Serif" font-size="10.00">algorithm</text>
+<polygon fill="white" stroke="#bfbfbf" points="1593.92,-297 1593.92,-316 1655.92,-316 1655.92,-297 1593.92,-297"/>
+<text text-anchor="middle" x="1624.92" y="-304" font-family="Helvetica,sans-Serif" font-size="10.00">algorithm</text>
 </g>
 <!-- Node3&#45;&gt;Node33 -->
-<g id="edge130" class="edge"><title>Node3&#45;&gt;Node33</title>
-<path fill="none" stroke="midnightblue" d="M1948.85,-769.11C2082.1,-765.816 2515.49,-753.032 2571,-725 2736.72,-641.309 2804.71,-395.658 2820.97,-326.067"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="2824.42,-326.665 2823.21,-316.139 2817.59,-325.123 2824.42,-326.665"/>
+<g id="edge128" class="edge"><title>Node3&#45;&gt;Node33</title>
+<path fill="none" stroke="midnightblue" d="M1761.35,-761.258C1754.17,-752.577 1743.53,-738.63 1736.92,-725 1723.8,-697.915 1717.92,-690.098 1717.92,-660 1717.92,-660 1717.92,-660 1717.92,-423 1717.92,-392.205 1712.47,-382.583 1693.92,-358 1682.48,-342.835 1665.24,-330.213 1650.85,-321.38"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="1652.3,-318.17 1641.9,-316.134 1648.76,-324.21 1652.3,-318.17"/>
 </g>
 <!-- Node42 -->
 <g id="node42" class="node"><title>Node42</title>
 <g id="a_node42"><a xlink:href="ir_2type_8h.html" target="_top" xlink:title="IR/AST nodes for the unified type system in TVM. ">
-<polygon fill="white" stroke="black" points="1651.5,-705.5 1651.5,-724.5 1728.5,-724.5 1728.5,-705.5 1651.5,-705.5"/>
-<text text-anchor="middle" x="1690" y="-712.5" font-family="Helvetica,sans-Serif" font-size="10.00">tvm/ir/type.h</text>
+<polygon fill="white" stroke="black" points="1746.42,-705.5 1746.42,-724.5 1823.42,-724.5 1823.42,-705.5 1746.42,-705.5"/>
+<text text-anchor="middle" x="1784.92" y="-712.5" font-family="Helvetica,sans-Serif" font-size="10.00">tvm/ir/type.h</text>
 </a>
 </g>
 </g>
 <!-- Node3&#45;&gt;Node42 -->
-<g id="edge120" class="edge"><title>Node3&#45;&gt;Node42</title>
-<path fill="none" stroke="midnightblue" d="M1875.14,-761.444C1837.08,-752.102 1776.04,-737.119 1734.54,-726.933"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="1735.24,-723.501 1724.7,-724.516 1733.57,-730.299 1735.24,-723.501"/>
+<g id="edge118" class="edge"><title>Node3&#45;&gt;Node42</title>
+<path fill="none" stroke="midnightblue" d="M1771.57,-761.083C1773.71,-753.849 1776.8,-743.409 1779.47,-734.397"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="1782.85,-735.333 1782.33,-724.751 1776.13,-733.345 1782.85,-735.333"/>
 </g>
 <!-- Node4&#45;&gt;Node5 -->
 <g id="edge4" class="edge"><title>Node4&#45;&gt;Node5</title>
-<path fill="none" stroke="midnightblue" d="M1704.62,-649.324C1718.95,-640.822 1741.01,-627.729 1757.75,-617.797"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="1759.6,-620.768 1766.42,-612.655 1756.03,-614.748 1759.6,-620.768"/>
+<path fill="none" stroke="midnightblue" d="M2183.76,-655.965C2021.16,-647.729 1411.23,-616.831 1217.43,-607.014"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="1217.32,-603.504 1207.16,-606.494 1216.97,-610.495 1217.32,-603.504"/>
 </g>
 <!-- Node4&#45;&gt;Node10 -->
-<g id="edge118" class="edge"><title>Node4&#45;&gt;Node10</title>
-<path fill="none" stroke="midnightblue" d="M1692.49,-649.196C1700.95,-618.541 1728,-513.529 1728,-425 1728,-425 1728,-425 1728,-367 1728,-328.62 1749.54,-324.083 1769,-291 1778.63,-274.634 1786.68,-273.225 1792,-255 1801.66,-221.952 1766.29,-211.721 1777,-179 1780.49,-168.34 1787.57,-158.117 1794.29,-150.117"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="1796.9,-152.439 1800.98,-142.66 1791.7,-147.763 1796.9,-152.439"/>
+<g id="edge116" class="edge"><title>Node4&#45;&gt;Node10</title>
+<path fill="none" stroke="midnightblue" d="M2229.62,-649.288C2240.89,-631.205 2264.92,-587.946 2264.92,-548 2264.92,-548 2264.92,-548 2264.92,-305.5 2264.92,-236.627 2188.5,-265.851 2126.92,-235 2076.45,-209.714 2067.32,-194.097 2012.92,-179 2012.58,-178.905 1695.54,-151.972 1552.5,-139.824"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="1552.77,-136.334 1542.51,-138.975 1552.18,-143.309 1552.77,-136.334"/>
 </g>
 <!-- Node4&#45;&gt;Node15 -->
-<g id="edge119" class="edge"><title>Node4&#45;&gt;Node15</title>
-<path fill="none" stroke="midnightblue" d="M1649.96,-655.962C1486.5,-647.339 869.613,-611.76 683,-557 554.975,-519.432 508.921,-493.427 442,-378 385.947,-281.318 381.959,-141.381 382.492,-91.2442"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="385.992,-91.2882 382.676,-81.2255 378.993,-91.1594 385.992,-91.2882"/>
+<g id="edge117" class="edge"><title>Node4&#45;&gt;Node15</title>
+<path fill="none" stroke="midnightblue" d="M2234.28,-649.239C2243.79,-640.76 2257.75,-627.143 2266.92,-613 2362.8,-465.159 2416.92,-422.209 2416.92,-246 2416.92,-246 2416.92,-246 2416.92,-188 2416.92,-121.61 2327.31,-90.8136 2277.83,-78.8691"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="2278.46,-75.4219 2267.93,-76.6004 2276.9,-82.2451 2278.46,-75.4219"/>
 </g>
 <!-- Node5&#45;&gt;Node6 -->
 <g id="edge5" class="edge"><title>Node5&#45;&gt;Node6</title>
-<path fill="none" stroke="midnightblue" d="M1786.35,-593.463C1803.53,-566.078 1857.48,-480.052 1880.93,-442.653"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="1884.09,-444.2 1886.44,-433.868 1878.16,-440.481 1884.09,-444.2"/>
+<path fill="none" stroke="midnightblue" d="M1207.15,-597.878C1240.03,-592.851 1282.48,-581.705 1311.92,-557 1344.97,-529.279 1321.76,-497.581 1354.92,-470 1379.89,-449.233 1413.87,-438.033 1444.01,-432"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="1444.86,-435.401 1454.06,-430.146 1443.59,-428.517 1444.86,-435.401"/>
 </g>
 <!-- Node7 -->
 <g id="node7" class="node"><title>Node7</title>
 <g id="a_node7"><a xlink:href="runtime_2container_8h.html" target="_top" xlink:title="Common POD(plain old data) container types. ">
-<polygon fill="white" stroke="black" points="1998,-358.5 1998,-377.5 2130,-377.5 2130,-358.5 1998,-358.5"/>
-<text text-anchor="middle" x="2064" y="-365.5" font-family="Helvetica,sans-Serif" font-size="10.00">tvm/runtime/container.h</text>
+<polygon fill="white" stroke="black" points="1745.92,-358.5 1745.92,-377.5 1877.92,-377.5 1877.92,-358.5 1745.92,-358.5"/>
+<text text-anchor="middle" x="1811.92" y="-365.5" font-family="Helvetica,sans-Serif" font-size="10.00">tvm/runtime/container.h</text>
 </a>
 </g>
 </g>
 <!-- Node5&#45;&gt;Node7 -->
-<g id="edge111" class="edge"><title>Node5&#45;&gt;Node7</title>
-<path fill="none" stroke="midnightblue" d="M1791.41,-593.431C1833.53,-558.751 1991.69,-428.538 2045.75,-384.027"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="2048,-386.705 2053.5,-377.647 2043.55,-381.301 2048,-386.705"/>
+<g id="edge109" class="edge"><title>Node5&#45;&gt;Node7</title>
+<path fill="none" stroke="midnightblue" d="M1197.12,-593.453C1233.05,-585.27 1287.56,-571.982 1333.92,-557 1507.76,-500.832 1709.48,-414.165 1783.14,-381.783"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="1784.92,-384.82 1792.66,-377.583 1782.1,-378.415 1784.92,-384.82"/>
 </g>
 <!-- Node9 -->
 <g id="node9" class="node"><title>Node9</title>
 <g id="a_node9"><a xlink:href="runtime_2memory_8h.html" target="_top" xlink:title="Runtime memory management. ">
-<polygon fill="white" stroke="black" points="2128.5,-179.5 2128.5,-198.5 2255.5,-198.5 2255.5,-179.5 2128.5,-179.5"/>
-<text text-anchor="middle" x="2192" y="-186.5" font-family="Helvetica,sans-Serif" font-size="10.00">tvm/runtime/memory.h</text>
+<polygon fill="white" stroke="black" points="1322.42,-179.5 1322.42,-198.5 1449.42,-198.5 1449.42,-179.5 1322.42,-179.5"/>
+<text text-anchor="middle" x="1385.92" y="-186.5" font-family="Helvetica,sans-Serif" font-size="10.00">tvm/runtime/memory.h</text>
 </a>
 </g>
 </g>
 <!-- Node5&#45;&gt;Node9 -->
-<g id="edge112" class="edge"><title>Node5&#45;&gt;Node9</title>
-<path fill="none" stroke="midnightblue" d="M1830.25,-593.984C1974.51,-569.128 2386,-487.816 2386,-369 2386,-369 2386,-369 2386,-305.5 2386,-243.932 2312.97,-214.435 2256.52,-200.831"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="2257.08,-197.369 2246.56,-198.549 2255.52,-204.193 2257.08,-197.369"/>
+<g id="edge110" class="edge"><title>Node5&#45;&gt;Node9</title>
+<path fill="none" stroke="midnightblue" d="M1150.77,-593.364C1144.15,-584.763 1134.77,-570.873 1130.92,-557 1128.55,-548.434 1130.68,-545.885 1130.92,-537 1131.37,-520.981 1132.48,-517.019 1132.92,-501 1133.31,-487.228 1133.15,-483.776 1132.92,-470 1131.22,-365.523 1058.63,-314.939 1125.92,-235 1149.32,-207.207 1243.53,-196.58 1311.82,-192.516"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="1312.33,-195.993 1322.12,-191.94 1311.94,-189.004 1312.33,-195.993"/>
 </g>
 <!-- Node5&#45;&gt;Node10 -->
-<g id="edge113" class="edge"><title>Node5&#45;&gt;Node10</title>
-<path fill="none" stroke="midnightblue" d="M1779.1,-593.356C1775.06,-574.211 1766,-526.739 1766,-486.5 1766,-486.5 1766,-486.5 1766,-367 1766,-331.578 1770.83,-320.785 1790,-291 1802.95,-270.889 1816.37,-274.656 1830,-255 1845.23,-233.023 1847.02,-225.274 1852,-199 1853.65,-190.266 1855.26,-187.27 1852,-179 1847.49,-167.548 1838.53,-157.291 1830.05,-149.472"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="1832.09,-146.611 1822.21,-142.764 1827.53,-151.927 1832.09,-146.611"/>
+<g id="edge111" class="edge"><title>Node5&#45;&gt;Node10</title>
+<path fill="none" stroke="midnightblue" d="M1206.93,-594.682C1231.7,-588.561 1260.36,-577.376 1278.92,-557 1300.78,-533.014 1297.92,-518.951 1297.92,-486.5 1297.92,-486.5 1297.92,-486.5 1297.92,-367 1297.92,-263.889 1402.22,-285.767 1457.92,-199 1467.25,-184.474 1474.23,-166.058 1478.6,-152.54"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="1481.96,-153.503 1481.54,-142.917 1475.27,-151.46 1481.96,-153.503"/>
 </g>
 <!-- Node11 -->
 <g id="node11" class="node"><title>Node11</title>
 <g id="a_node11"><a xlink:href="c__runtime__api_8h.html" target="_top" xlink:title="tvm/runtime/c_runtime\l_api.h">
-<polygon fill="white" stroke="black" points="834.5,-56.5 834.5,-86.5 961.5,-86.5 961.5,-56.5 834.5,-56.5"/>
-<text text-anchor="start" x="842.5" y="-74.5" font-family="Helvetica,sans-Serif" font-size="10.00">tvm/runtime/c_runtime</text>
-<text text-anchor="middle" x="898" y="-63.5" font-family="Helvetica,sans-Serif" font-size="10.00">_api.h</text>
+<polygon fill="white" stroke="black" points="167.424,-56.5 167.424,-86.5 294.424,-86.5 294.424,-56.5 167.424,-56.5"/>
+<text text-anchor="start" x="175.424" y="-74.5" font-family="Helvetica,sans-Serif" font-size="10.00">tvm/runtime/c_runtime</text>
+<text text-anchor="middle" x="230.924" y="-63.5" font-family="Helvetica,sans-Serif" font-size="10.00">_api.h</text>
 </a>
 </g>
 </g>
 <!-- Node5&#45;&gt;Node11 -->
-<g id="edge110" class="edge"><title>Node5&#45;&gt;Node11</title>
-<path fill="none" stroke="midnightblue" d="M1731.82,-599.365C1698.1,-597.562 1652.34,-595.124 1612,-593 1304.45,-576.811 1224.74,-601.471 920,-557 815.467,-541.746 768.396,-570.686 689,-501 579.884,-405.229 565.519,-307.018 634,-179 649.193,-150.599 652.632,-139.983 680,-123 723.334,-96.1097 779.55,-83.5184 824.041,-77.6322"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="824.717,-81.0753 834.213,-76.3752 823.859,-74.1282 824.717,-81.0753"/>
+<g id="edge108" class="edge"><title>Node5&#45;&gt;Node11</title>
+<path fill="none" stroke="midnightblue" d="M1108.77,-601.866C966.804,-601.019 558.244,-595.235 430.924,-557 216.218,-492.522 115.79,-460.812 26.9239,-255 13.5342,-223.989 9.41785,-207.887 26.9239,-179 55.4375,-131.949 112.303,-104.701 158.768,-89.5654"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="160.012,-92.8433 168.511,-86.5167 157.922,-86.1627 160.012,-92.8433"/>
 </g>
 <!-- Node5&#45;&gt;Node15 -->
-<g id="edge114" class="edge"><title>Node5&#45;&gt;Node15</title>
-<path fill="none" stroke="midnightblue" d="M1731.83,-599.227C1698.11,-597.363 1652.35,-594.908 1612,-593 1515.25,-588.424 833.343,-586.236 741,-557 560.333,-499.801 521.773,-425.954 440,-255 416.779,-206.455 407.256,-195.154 394,-143 389.618,-125.758 386.69,-105.675 384.965,-91.3643"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="388.428,-90.8364 383.834,-81.2894 381.471,-91.6174 388.428,-90.8364"/>
+<g id="edge112" class="edge"><title>Node5&#45;&gt;Node15</title>
+<path fill="none" stroke="midnightblue" d="M1207.3,-598.531C1435.73,-581.162 2378.92,-494.406 2378.92,-246 2378.92,-246 2378.92,-246 2378.92,-188 2378.92,-134.575 2316.99,-100.279 2277.65,-83.8839"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="2278.77,-80.5629 2268.19,-80.1005 2276.17,-87.0627 2278.77,-80.5629"/>
 </g>
 <!-- Node5&#45;&gt;Node16 -->
-<g id="edge115" class="edge"><title>Node5&#45;&gt;Node16</title>
-<path fill="none" stroke="midnightblue" d="M1830.24,-595.946C1886.78,-588.578 1982.16,-574.874 2063,-557 2304.35,-503.639 2499.37,-500.269 2530,-255 2544.8,-136.514 2410.1,-158.214 2296,-123 2197.14,-92.4896 2075.65,-79.9551 2011.53,-75.1759"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="2011.59,-71.6715 2001.37,-74.448 2011.09,-78.6536 2011.59,-71.6715"/>
+<g id="edge113" class="edge"><title>Node5&#45;&gt;Node16</title>
+<path fill="none" stroke="midnightblue" d="M1108.54,-600.709C978.926,-596.972 631.342,-584.604 519.924,-557 460.046,-542.165 429.624,-550.584 392.924,-501 297.09,-371.522 356.182,-154.617 376.713,-90.7285"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="380.039,-91.8164 379.851,-81.2233 373.392,-89.6216 380.039,-91.8164"/>
 </g>
 <!-- Node17 -->
 <g id="node17" class="node"><title>Node17</title>
-<polygon fill="white" stroke="#bfbfbf" points="2548,-62 2548,-81 2590,-81 2590,-62 2548,-62"/>
-<text text-anchor="middle" x="2569" y="-69" font-family="Helvetica,sans-Serif" font-size="10.00">utility</text>
+<polygon fill="white" stroke="#bfbfbf" points="1196.92,-62 1196.92,-81 1238.92,-81 1238.92,-62 1196.92,-62"/>
+<text text-anchor="middle" x="1217.92" y="-69" font-family="Helvetica,sans-Serif" font-size="10.00">utility</text>
 </g>
 <!-- Node5&#45;&gt;Node17 -->
-<g id="edge116" class="edge"><title>Node5&#45;&gt;Node17</title>
-<path fill="none" stroke="midnightblue" d="M1830.32,-601.148C1934.06,-597.747 2180.71,-581.134 2368,-501 2529.71,-431.811 2604.66,-414.869 2678,-255 2708.04,-189.511 2628.87,-117.766 2589.59,-87.3966"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="2591.51,-84.4664 2581.42,-81.2366 2587.3,-90.054 2591.51,-84.4664"/>
+<g id="edge114" class="edge"><title>Node5&#45;&gt;Node17</title>
+<path fill="none" stroke="midnightblue" d="M1108.52,-600.841C990.182,-597.614 693.299,-586.721 598.924,-557 491.221,-523.082 376.835,-526.269 388.924,-414 404.394,-270.331 397.391,-170.319 533.924,-123 657.073,-80.3198 1069.79,-73.7058 1186.77,-72.6853"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="1186.88,-76.1847 1196.85,-72.6051 1186.82,-69.1849 1186.88,-76.1847"/>
 </g>
 <!-- Node5&#45;&gt;Node26 -->
-<g id="edge117" class="edge"><title>Node5&#45;&gt;Node26</title>
-<path fill="none" stroke="midnightblue" d="M1731.98,-599.075C1590.24,-590.556 1186.24,-565.808 1127,-557 1017.37,-540.701 953.011,-587.723 884,-501 778.568,-368.507 304.388,-759.449 1165,-235 1188.43,-220.722 1217.2,-208.903 1239.01,-200.966"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="1240.24,-204.243 1248.49,-197.598 1237.9,-197.646 1240.24,-204.243"/>
+<g id="edge115" class="edge"><title>Node5&#45;&gt;Node26</title>
+<path fill="none" stroke="midnightblue" d="M1108.82,-600.241C975.853,-594.563 612.683,-572.7 520.924,-501 460.591,-453.856 434.304,-257.829 433.924,-255 432.74,-246.19 430.014,-242.983 433.924,-235 440.446,-221.684 453.02,-211.177 464.797,-203.676"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="466.695,-206.621 473.534,-198.529 463.142,-200.589 466.695,-206.621"/>
 </g>
 <!-- Node36 -->
 <g id="node36" class="node"><title>Node36</title>
 <g id="a_node36"><a xlink:href="reflection_8h.html" target="_top" xlink:title="Reflection and serialization of compiler IR/AST nodes. ">
-<polygon fill="white" stroke="black" points="1230.5,-537.5 1230.5,-556.5 1349.5,-556.5 1349.5,-537.5 1230.5,-537.5"/>
-<text text-anchor="middle" x="1290" y="-544.5" font-family="Helvetica,sans-Serif" font-size="10.00">tvm/node/reflection.h</text>
+<polygon fill="white" stroke="black" points="731.424,-537.5 731.424,-556.5 850.424,-556.5 850.424,-537.5 731.424,-537.5"/>
+<text text-anchor="middle" x="790.924" y="-544.5" font-family="Helvetica,sans-Serif" font-size="10.00">tvm/node/reflection.h</text>
 </a>
 </g>
 </g>
 <!-- Node5&#45;&gt;Node36 -->
-<g id="edge79" class="edge"><title>Node5&#45;&gt;Node36</title>
-<path fill="none" stroke="midnightblue" d="M1731.67,-596.575C1643.86,-586.917 1460.22,-566.721 1359.6,-555.654"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="1359.86,-552.162 1349.53,-554.547 1359.09,-559.12 1359.86,-552.162"/>
+<g id="edge77" class="edge"><title>Node5&#45;&gt;Node36</title>
+<path fill="none" stroke="midnightblue" d="M1108.81,-594.773C1044.45,-585.304 931.271,-568.651 858.765,-557.982"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="859.216,-554.511 848.813,-556.518 858.197,-561.436 859.216,-554.511"/>
 </g>
 <!-- Node37 -->
 <g id="node37" class="node"><title>Node37</title>
 <g id="a_node37"><a xlink:href="structural__equal_8h.html" target="_top" xlink:title="Structural equality comparison. ">
-<polygon fill="white" stroke="black" points="1588.5,-470.5 1588.5,-500.5 1699.5,-500.5 1699.5,-470.5 1588.5,-470.5"/>
-<text text-anchor="start" x="1596.5" y="-488.5" font-family="Helvetica,sans-Serif" font-size="10.00">tvm/node/structural</text>
-<text text-anchor="middle" x="1644" y="-477.5" font-family="Helvetica,sans-Serif" font-size="10.00">_equal.h</text>
+<polygon fill="white" stroke="black" points="1013.42,-470.5 1013.42,-500.5 1124.42,-500.5 1124.42,-470.5 1013.42,-470.5"/>
+<text text-anchor="start" x="1021.42" y="-488.5" font-family="Helvetica,sans-Serif" font-size="10.00">tvm/node/structural</text>
+<text text-anchor="middle" x="1068.92" y="-477.5" font-family="Helvetica,sans-Serif" font-size="10.00">_equal.h</text>
 </a>
 </g>
 </g>
 <!-- Node5&#45;&gt;Node37 -->
-<g id="edge108" class="edge"><title>Node5&#45;&gt;Node37</title>
-<path fill="none" stroke="midnightblue" d="M1770.7,-593.319C1749.2,-575.192 1699.02,-532.885 1668.66,-507.288"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="1670.56,-504.315 1660.66,-500.545 1666.05,-509.667 1670.56,-504.315"/>
+<g id="edge106" class="edge"><title>Node5&#45;&gt;Node37</title>
+<path fill="none" stroke="midnightblue" d="M1142,-593.346C1128.67,-585.41 1109.96,-572.547 1097.92,-557 1087.23,-543.189 1079.86,-524.882 1075.22,-510.385"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="1078.52,-509.211 1072.32,-500.621 1071.81,-511.204 1078.52,-509.211"/>
 </g>
 <!-- Node39 -->
 <g id="node39" class="node"><title>Node39</title>
 <g id="a_node39"><a xlink:href="structural__hash_8h.html" target="_top" xlink:title="tvm/node/structural\l_hash.h">
-<polygon fill="white" stroke="black" points="893.5,-470.5 893.5,-500.5 1004.5,-500.5 1004.5,-470.5 893.5,-470.5"/>
-<text text-anchor="start" x="901.5" y="-488.5" font-family="Helvetica,sans-Serif" font-size="10.00">tvm/node/structural</text>
-<text text-anchor="middle" x="949" y="-477.5" font-family="Helvetica,sans-Serif" font-size="10.00">_hash.h</text>
+<polygon fill="white" stroke="black" points="884.424,-470.5 884.424,-500.5 995.424,-500.5 995.424,-470.5 884.424,-470.5"/>
+<text text-anchor="start" x="892.424" y="-488.5" font-family="Helvetica,sans-Serif" font-size="10.00">tvm/node/structural</text>
+<text text-anchor="middle" x="939.924" y="-477.5" font-family="Helvetica,sans-Serif" font-size="10.00">_hash.h</text>
 </a>
 </g>
 </g>
 <!-- Node5&#45;&gt;Node39 -->
-<g id="edge109" class="edge"><title>Node5&#45;&gt;Node39</title>
-<path fill="none" stroke="midnightblue" d="M1731.92,-599.153C1614.28,-592.152 1318.74,-573.466 1221,-557 1144.24,-544.069 1057.49,-519.867 1003.03,-503.469"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="1003.83,-500.055 993.249,-500.503 1001.8,-506.754 1003.83,-500.055"/>
+<g id="edge107" class="edge"><title>Node5&#45;&gt;Node39</title>
+<path fill="none" stroke="midnightblue" d="M1141.54,-593.319C1106.45,-574.727 1023.35,-530.699 975.507,-505.353"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="976.909,-502.134 966.434,-500.545 973.631,-508.32 976.909,-502.134"/>
 </g>
 <!-- Node40 -->
 <g id="node40" class="node"><title>Node40</title>
 <g id="a_node40"><a xlink:href="repr__printer_8h.html" target="_top" xlink:title="Printer class to print repr string of each AST/IR nodes. ">
-<polygon fill="white" stroke="black" points="2456,-537.5 2456,-556.5 2586,-556.5 2586,-537.5 2456,-537.5"/>
-<text text-anchor="middle" x="2521" y="-544.5" font-family="Helvetica,sans-Serif" font-size="10.00">tvm/node/repr_printer.h</text>
+<polygon fill="white" stroke="black" points="1139.92,-537.5 1139.92,-556.5 1269.92,-556.5 1269.92,-537.5 1139.92,-537.5"/>
+<text text-anchor="middle" x="1204.92" y="-544.5" font-family="Helvetica,sans-Serif" font-size="10.00">tvm/node/repr_printer.h</text>
 </a>
 </g>
 </g>
 <!-- Node5&#45;&gt;Node40 -->
-<g id="edge105" class="edge"><title>Node5&#45;&gt;Node40</title>
-<path fill="none" stroke="midnightblue" d="M1830.4,-599.407C1940.39,-593.464 2213.76,-577.759 2442,-557 2443.28,-556.884 2444.56,-556.764 2445.86,-556.642"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="2446.32,-560.114 2455.93,-555.651 2445.63,-553.148 2446.32,-560.114"/>
+<g id="edge103" class="edge"><title>Node5&#45;&gt;Node40</title>
+<path fill="none" stroke="midnightblue" d="M1165.69,-593.083C1172.46,-585.298 1182.47,-573.801 1190.68,-564.367"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="1193.38,-566.592 1197.31,-556.751 1188.1,-561.996 1193.38,-566.592"/>
 </g>
 <!-- Node6&#45;&gt;Node7 -->
 <g id="edge6" class="edge"><title>Node6&#45;&gt;Node7</title>
-<path fill="none" stroke="midnightblue" d="M1919.25,-414.444C1948.44,-405.28 1994.92,-390.688 2027.31,-380.519"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="2028.38,-383.851 2036.87,-377.516 2026.28,-377.173 2028.38,-383.851"/>
+<path fill="none" stroke="midnightblue" d="M1561.14,-414.444C1613.67,-404.925 1698.53,-389.547 1754.77,-379.356"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="1755.71,-382.743 1764.93,-377.516 1754.46,-375.856 1755.71,-382.743"/>
 </g>
 <!-- Node6&#45;&gt;Node9 -->
-<g id="edge70" class="edge"><title>Node6&#45;&gt;Node9</title>
-<path fill="none" stroke="midnightblue" d="M1951.67,-421.62C2003.67,-418.266 2079.83,-408.041 2139,-378 2171.24,-361.634 2180.39,-353.575 2198,-322 2212.82,-295.432 2208.87,-285.26 2212,-255 2212.91,-246.158 2213.85,-243.695 2212,-235 2210,-225.591 2205.9,-215.762 2201.93,-207.698"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="2204.89,-205.81 2197.13,-198.593 2198.69,-209.072 2204.89,-205.81"/>
+<g id="edge71" class="edge"><title>Node6&#45;&gt;Node9</title>
+<path fill="none" stroke="midnightblue" d="M1500.03,-414.395C1476.55,-399.029 1429.36,-364.464 1406.92,-322 1387.57,-285.36 1385,-235.608 1385.21,-208.865"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="1388.72,-208.698 1385.45,-198.621 1381.72,-208.539 1388.72,-208.698"/>
 </g>
 <!-- Node6&#45;&gt;Node10 -->
-<g id="edge71" class="edge"><title>Node6&#45;&gt;Node10</title>
-<path fill="none" stroke="midnightblue" d="M1885.11,-414.319C1878.73,-405.684 1869.69,-391.77 1866,-378 1855.99,-340.652 1850.27,-326.322 1866,-291 1875.73,-269.151 1888.65,-271.462 1906,-255 1931.53,-230.774 1948.53,-231.519 1962,-199 1965.4,-190.788 1967.32,-186.122 1962,-179 1942.77,-153.246 1909.44,-141.625 1878.89,-136.602"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="1879.05,-133.09 1868.66,-135.14 1878.06,-140.02 1879.05,-133.09"/>
+<g id="edge72" class="edge"><title>Node6&#45;&gt;Node10</title>
+<path fill="none" stroke="midnightblue" d="M1516.5,-414.478C1523.75,-391.708 1546.38,-328.624 1584.92,-291 1610.24,-266.287 1637.41,-285.148 1655.92,-255 1660.58,-247.426 1657.84,-243.68 1655.92,-235 1650.01,-208.205 1650.6,-197.043 1629.92,-179 1608.48,-160.287 1579.37,-149.356 1552.78,-142.97"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="1553.25,-139.49 1542.73,-140.738 1551.74,-146.323 1553.25,-139.49"/>
 </g>
 <!-- Node6&#45;&gt;Node15 -->
-<g id="edge74" class="edge"><title>Node6&#45;&gt;Node15</title>
-<path fill="none" stroke="midnightblue" d="M1832.31,-419.723C1661.46,-409.169 1157.07,-369.792 756,-255 686.785,-235.19 669.157,-229.623 604,-199 543.082,-170.37 531.463,-156.359 473,-123 450.737,-110.297 425.25,-96.0207 407.095,-85.8967"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="408.764,-82.8203 398.325,-81.0113 405.358,-88.9355 408.764,-82.8203"/>
-</g>
-<!-- Node6&#45;&gt;Node16 -->
-<g id="edge75" class="edge"><title>Node6&#45;&gt;Node16</title>
-<path fill="none" stroke="midnightblue" d="M1890.47,-414.2C1887.06,-392.302 1880.56,-334.03 1899,-291 1907.67,-270.772 1918.54,-271.591 1933,-255 1953.98,-230.93 1967.4,-229.118 1978,-199 1990.94,-162.229 1980.76,-115.9 1973.22,-90.7614"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="1976.51,-89.5536 1970.13,-81.0932 1969.84,-91.684 1976.51,-89.5536"/>
+<g id="edge75" class="edge"><title>Node6&#45;&gt;Node15</title>
+<path fill="none" stroke="midnightblue" d="M1573.6,-418.301C1688.94,-409.047 1934.46,-388.457 1971.92,-378 2027.45,-362.502 2045.96,-360.444 2088.92,-322 2166.58,-252.507 2156.86,-210.845 2212.92,-123 2220.1,-111.76 2228.3,-99.2099 2234.73,-89.4327"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="2237.68,-91.3166 2240.26,-81.0415 2231.84,-87.4631 2237.68,-91.3166"/>
 </g>
 <!-- Node6&#45;&gt;Node17 -->
-<g id="edge77" class="edge"><title>Node6&#45;&gt;Node17</title>
-<path fill="none" stroke="midnightblue" d="M1951.73,-415.535C2097.55,-396.141 2466.05,-339.706 2552,-255 2595.15,-212.476 2595.79,-182.785 2586,-123 2584.2,-111.989 2580.32,-100.144 2576.72,-90.6953"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="2579.89,-89.1861 2572.91,-81.2171 2573.39,-91.8007 2579.89,-89.1861"/>
+<g id="edge76" class="edge"><title>Node6&#45;&gt;Node17</title>
+<path fill="none" stroke="midnightblue" d="M1491.76,-414.392C1473.62,-406.65 1447.95,-394.036 1428.92,-378 1328.13,-293.024 1314.41,-256.841 1247.92,-143 1238.09,-126.158 1229.68,-105.481 1224.29,-90.8877"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="1227.44,-89.3155 1220.78,-81.0777 1220.85,-91.6723 1227.44,-89.3155"/>
 </g>
 <!-- Node20 -->
 <g id="node20" class="node"><title>Node20</title>
 <g id="a_node20"><a xlink:href="packed__func_8h.html" target="_top" xlink:title="Type&#45;erased function used across TVM API. ">
-<polygon fill="white" stroke="black" points="1381.5,-291.5 1381.5,-321.5 1494.5,-321.5 1494.5,-291.5 1381.5,-291.5"/>
-<text text-anchor="start" x="1389.5" y="-309.5" font-family="Helvetica,sans-Serif" font-size="10.00">tvm/runtime/packed</text>
-<text text-anchor="middle" x="1438" y="-298.5" font-family="Helvetica,sans-Serif" font-size="10.00">_func.h</text>
+<polygon fill="white" stroke="black" points="1416.42,-291.5 1416.42,-321.5 1529.42,-321.5 1529.42,-291.5 1416.42,-291.5"/>
+<text text-anchor="start" x="1424.42" y="-309.5" font-family="Helvetica,sans-Serif" font-size="10.00">tvm/runtime/packed</text>
+<text text-anchor="middle" x="1472.92" y="-298.5" font-family="Helvetica,sans-Serif" font-size="10.00">_func.h</text>
 </a>
 </g>
 </g>
 <!-- Node6&#45;&gt;Node20 -->
-<g id="edge72" class="edge"><title>Node6&#45;&gt;Node20</title>
-<path fill="none" stroke="midnightblue" d="M1858.25,-414.413C1783.15,-395.307 1601.15,-349.007 1503.12,-324.067"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="1503.88,-320.649 1493.33,-321.575 1502.16,-327.433 1503.88,-320.649"/>
-</g>
-<!-- Node6&#45;&gt;Node25 -->
-<g id="edge76" class="edge"><title>Node6&#45;&gt;Node25</title>
-<path fill="none" stroke="midnightblue" d="M1951.74,-422.568C2098.09,-420.964 2471.23,-413.418 2590,-378 2719.97,-339.241 2854.42,-242.215 2903.04,-204.924"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="2905.34,-207.574 2911.1,-198.684 2901.05,-202.039 2905.34,-207.574"/>
-</g>
-<!-- Node6&#45;&gt;Node26 -->
-<g id="edge78" class="edge"><title>Node6&#45;&gt;Node26</title>
-<path fill="none" stroke="midnightblue" d="M1832.23,-416.928C1712.46,-403.878 1448.05,-370.37 1372,-322 1327.06,-293.415 1294.8,-237.079 1280.39,-208.124"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="1283.4,-206.29 1275.91,-198.797 1277.09,-209.325 1283.4,-206.29"/>
-</g>
-<!-- Node35 -->
-<g id="node35" class="node"><title>Node35</title>
-<polygon fill="white" stroke="#bfbfbf" points="1908.5,-297 1908.5,-316 1985.5,-316 1985.5,-297 1908.5,-297"/>
-<text text-anchor="middle" x="1947" y="-304" font-family="Helvetica,sans-Serif" font-size="10.00">initializer_list</text>
+<g id="edge73" class="edge"><title>Node6&#45;&gt;Node20</title>
+<path fill="none" stroke="midnightblue" d="M1510.84,-414.319C1504.63,-396.812 1490.41,-356.749 1481.26,-330.97"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="1484.55,-329.798 1477.91,-321.545 1477.96,-332.14 1484.55,-329.798"/>
 </g>
-<!-- Node6&#45;&gt;Node35 -->
-<g id="edge73" class="edge"><title>Node6&#45;&gt;Node35</title>
-<path fill="none" stroke="midnightblue" d="M1896.13,-414.319C1905.09,-395.506 1926.45,-350.648 1938.45,-325.455"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="1941.68,-326.815 1942.82,-316.282 1935.36,-323.806 1941.68,-326.815"/>
+<!-- Node6&#45;&gt;Node33 -->
+<g id="edge74" class="edge"><title>Node6&#45;&gt;Node33</title>
+<path fill="none" stroke="midnightblue" d="M1530.2,-414.404C1544.46,-406.329 1565.15,-393.229 1579.92,-378 1595.2,-362.25 1608.13,-340.527 1616.15,-325.371"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="1619.46,-326.581 1620.89,-316.083 1613.23,-323.399 1619.46,-326.581"/>
 </g>
 <!-- Node8 -->
 <g id="node8" class="node"><title>Node8</title>
-<polygon fill="white" stroke="#bfbfbf" points="2197,-62 2197,-81 2283,-81 2283,-62 2197,-62"/>
-<text text-anchor="middle" x="2240" y="-69" font-family="Helvetica,sans-Serif" font-size="10.00">dmlc/logging.h</text>
+<polygon fill="white" stroke="#bfbfbf" points="1502.92,-62 1502.92,-81 1588.92,-81 1588.92,-62 1502.92,-62"/>
+<text text-anchor="middle" x="1545.92" y="-69" font-family="Helvetica,sans-Serif" font-size="10.00">dmlc/logging.h</text>
 </g>
 <!-- Node7&#45;&gt;Node8 -->
 <g id="edge7" class="edge"><title>Node7&#45;&gt;Node8</title>
-<path fill="none" stroke="midnightblue" d="M2108.22,-358.469C2150.39,-349.724 2210.31,-335.528 2230,-322 2260.41,-301.108 2266.92,-289.858 2279,-255 2299.38,-196.184 2267.2,-122.859 2249.88,-89.9305"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="2252.91,-88.1773 2245.05,-81.0688 2246.76,-91.525 2252.91,-88.1773"/>
+<path fill="none" stroke="midnightblue" d="M1878.01,-363.689C1907.41,-358.512 1939.49,-346.967 1957.92,-322 1996.07,-270.319 1983,-223.764 1936.92,-179 1888.66,-132.111 1694.23,-95.8337 1599.01,-80.4881"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="1599.43,-77.0116 1589,-78.8945 1598.33,-83.9244 1599.43,-77.0116"/>
 </g>
 <!-- Node7&#45;&gt;Node9 -->
 <g id="edge8" class="edge"><title>Node7&#45;&gt;Node9</title>
-<path fill="none" stroke="midnightblue" d="M2115.15,-358.481C2137.23,-352.138 2161.33,-341.116 2176,-322 2201.45,-288.846 2198.9,-236.562 2195.23,-208.791"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="2198.68,-208.185 2193.72,-198.822 2191.76,-209.234 2198.68,-208.185"/>
+<path fill="none" stroke="midnightblue" d="M1782.11,-358.399C1760.13,-351.088 1730.31,-339.044 1707.92,-322 1668.44,-291.933 1680.62,-260.302 1637.92,-235 1608.39,-217.499 1522.73,-204.733 1459.44,-197.36"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="1459.82,-193.881 1449.49,-196.223 1459.03,-200.836 1459.82,-193.881"/>
 </g>
 <!-- Node7&#45;&gt;Node10 -->
 <g id="edge22" class="edge"><title>Node7&#45;&gt;Node10</title>
-<path fill="none" stroke="midnightblue" d="M2064.07,-358.489C2063.8,-328.299 2058.48,-225.662 2000,-179 1965.95,-151.83 1918.2,-140.598 1879.09,-136.15"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="1879.13,-132.636 1868.83,-135.114 1878.43,-139.601 1879.13,-132.636"/>
+<path fill="none" stroke="midnightblue" d="M1802.59,-358.085C1788.34,-343.37 1764.29,-313.401 1778.92,-291 1800.69,-257.676 1839.16,-288.324 1860.92,-255 1865.78,-247.558 1866.01,-242.291 1860.92,-235 1825.83,-184.673 1651.74,-155.007 1552.86,-141.955"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="1553.02,-138.447 1542.65,-140.632 1552.12,-145.389 1553.02,-138.447"/>
 </g>
 <!-- Node7&#45;&gt;Node15 -->
 <g id="edge66" class="edge"><title>Node7&#45;&gt;Node15</title>
-<path fill="none" stroke="midnightblue" d="M1997.8,-365.54C1815.07,-360.504 1288.83,-339.194 861,-255 765.38,-236.182 739.472,-235.222 649,-199 583.997,-172.975 574.362,-152.798 511,-123 478.796,-107.855 440.728,-93.2359 414.526,-83.6589"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="415.696,-80.3602 405.102,-80.2447 413.312,-86.9416 415.696,-80.3602"/>
+<path fill="none" stroke="midnightblue" d="M1878.21,-359.533C1944.58,-347.939 2036.92,-318.739 2036.92,-246 2036.92,-246 2036.92,-246 2036.92,-188 2036.92,-157.205 2038.22,-143.805 2060.92,-123 2103.4,-84.0786 2172.98,-74.7009 2213.61,-72.7105"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="2213.91,-76.2021 2223.77,-72.3381 2213.65,-69.2068 2213.91,-76.2021"/>
 </g>
 <!-- Node7&#45;&gt;Node16 -->
-<g id="edge67" class="edge"><title>Node7&#45;&gt;Node16</title>
-<path fill="none" stroke="midnightblue" d="M2118.22,-358.478C2144.65,-352.072 2175.5,-340.997 2198,-322 2251.49,-276.844 2285.36,-245.659 2264,-179 2254.67,-149.873 2249.71,-139.564 2224,-123 2189.42,-100.723 2075.35,-84.7101 2011.42,-77.2342"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="2011.56,-73.728 2001.23,-76.0654 2010.77,-80.6824 2011.56,-73.728"/>
+<g id="edge68" class="edge"><title>Node7&#45;&gt;Node16</title>
+<path fill="none" stroke="midnightblue" d="M1745.9,-367.402C1582.96,-367.677 1148.89,-364.036 789.924,-322 632.619,-303.579 567.519,-348.828 439.924,-255 387.577,-216.507 381.957,-130.176 382.211,-91.6715"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="385.717,-91.4365 382.433,-81.3633 378.718,-91.2856 385.717,-91.4365"/>
 </g>
 <!-- Node7&#45;&gt;Node17 -->
-<g id="edge68" class="edge"><title>Node7&#45;&gt;Node17</title>
-<path fill="none" stroke="midnightblue" d="M2130.45,-359.782C2245.58,-345.905 2475.12,-311.75 2530,-255 2573.69,-209.814 2573.56,-128.304 2570.93,-91.4754"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="2574.39,-90.8524 2570.05,-81.1906 2567.42,-91.4547 2574.39,-90.8524"/>
+<g id="edge69" class="edge"><title>Node7&#45;&gt;Node17</title>
+<path fill="none" stroke="midnightblue" d="M1803.17,-358.421C1789.77,-344.19 1766.9,-314.9 1778.92,-291 1812.31,-224.663 1952.35,-234.431 1902.92,-179 1856.33,-126.741 1656.93,-163.301 1589.92,-143 1571.28,-137.352 1569.46,-128.983 1550.92,-123 1443.79,-88.4256 1309.47,-77.293 1249.44,-73.8978"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="1249.29,-70.385 1239.12,-73.35 1248.92,-77.3752 1249.29,-70.385"/>
 </g>
 <!-- Node7&#45;&gt;Node20 -->
 <g id="edge23" class="edge"><title>Node7&#45;&gt;Node20</title>
-<path fill="none" stroke="midnightblue" d="M1997.9,-360.717C1877.88,-349.309 1625.43,-325.315 1504.86,-313.855"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="1504.95,-310.347 1494.66,-312.885 1504.28,-317.316 1504.95,-310.347"/>
+<path fill="none" stroke="midnightblue" d="M1763.35,-358.475C1704.58,-348.16 1605.33,-330.74 1539.47,-319.18"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="1540.06,-315.73 1529.6,-317.448 1538.85,-322.624 1540.06,-315.73"/>
 </g>
 <!-- Node7&#45;&gt;Node24 -->
 <g id="edge65" class="edge"><title>Node7&#45;&gt;Node24</title>
-<path fill="none" stroke="midnightblue" d="M2061.51,-358.378C2054.05,-334.208 2029.37,-265.577 1982,-235 1942.74,-209.657 1889.49,-198.496 1853.77,-193.641"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="1854.1,-190.154 1843.74,-192.379 1853.22,-197.099 1854.1,-190.154"/>
+<path fill="none" stroke="midnightblue" d="M1878.23,-359.782C1901.98,-353.63 1926.43,-342.402 1940.92,-322 1948.49,-311.352 1944.31,-247.354 1936.92,-235 1928.33,-220.613 1913.15,-210.078 1899.04,-202.834"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="1900.53,-199.667 1889.99,-198.542 1897.53,-205.991 1900.53,-199.667"/>
+</g>
+<!-- Node7&#45;&gt;Node25 -->
+<g id="edge67" class="edge"><title>Node7&#45;&gt;Node25</title>
+<path fill="none" stroke="midnightblue" d="M1859.43,-358.463C2023.13,-329.047 2563.38,-231.966 2740.29,-200.177"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="2740.96,-203.613 2750.18,-198.4 2739.72,-196.724 2740.96,-203.613"/>
 </g>
 <!-- Node7&#45;&gt;Node26 -->
-<g id="edge69" class="edge"><title>Node7&#45;&gt;Node26</title>
-<path fill="none" stroke="midnightblue" d="M2057.27,-358.334C2043,-340.199 2009.73,-299.244 1994,-291 1871.45,-226.775 1431.64,-198.585 1305.83,-191.723"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="1305.82,-188.217 1295.65,-191.177 1305.45,-195.207 1305.82,-188.217"/>
+<g id="edge70" class="edge"><title>Node7&#45;&gt;Node26</title>
+<path fill="none" stroke="midnightblue" d="M1745.81,-364.429C1526.45,-355.34 824.344,-321.629 608.924,-255 589.677,-249.047 542.826,-221.337 514.595,-204.075"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="516.153,-200.925 505.802,-198.668 512.487,-206.888 516.153,-200.925"/>
 </g>
 <!-- Node7&#45;&gt;Node33 -->
 <g id="edge62" class="edge"><title>Node7&#45;&gt;Node33</title>
-<path fill="none" stroke="midnightblue" d="M2130.2,-361.824C2283.23,-349.859 2657.92,-320.564 2783.33,-310.758"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="2783.95,-314.22 2793.65,-309.951 2783.41,-307.241 2783.95,-314.22"/>
+<path fill="none" stroke="midnightblue" d="M1784.95,-358.496C1756.38,-349.476 1709.95,-334.792 1669.92,-322 1667.14,-321.11 1664.26,-320.187 1661.36,-319.257"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="1662.16,-315.837 1651.57,-316.109 1660.02,-322.502 1662.16,-315.837"/>
 </g>
 <!-- Node34 -->
 <g id="node34" class="node"><title>Node34</title>
-<polygon fill="white" stroke="#bfbfbf" points="2118.5,-297 2118.5,-316 2167.5,-316 2167.5,-297 2118.5,-297"/>
-<text text-anchor="middle" x="2143" y="-304" font-family="Helvetica,sans-Serif" font-size="10.00">cstring</text>
+<polygon fill="white" stroke="#bfbfbf" points="1788.42,-297 1788.42,-316 1837.42,-316 1837.42,-297 1788.42,-297"/>
+<text text-anchor="middle" x="1812.92" y="-304" font-family="Helvetica,sans-Serif" font-size="10.00">cstring</text>
 </g>
 <!-- Node7&#45;&gt;Node34 -->
 <g id="edge63" class="edge"><title>Node7&#45;&gt;Node34</title>
-<path fill="none" stroke="midnightblue" d="M2075.32,-358.475C2087.87,-349.02 2108.35,-333.596 2123.5,-322.187"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="2125.82,-324.821 2131.7,-316.009 2121.61,-319.23 2125.82,-324.821"/>
+<path fill="none" stroke="midnightblue" d="M1812.07,-358.475C1812.21,-350.08 1812.43,-336.981 1812.61,-326.163"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="1816.11,-326.067 1812.78,-316.009 1809.11,-325.949 1816.11,-326.067"/>
+</g>
+<!-- Node35 -->
+<g id="node35" class="node"><title>Node35</title>
+<polygon fill="white" stroke="#bfbfbf" points="1855.42,-297 1855.42,-316 1932.42,-316 1932.42,-297 1855.42,-297"/>
+<text text-anchor="middle" x="1893.92" y="-304" font-family="Helvetica,sans-Serif" font-size="10.00">initializer_list</text>
 </g>
 <!-- Node7&#45;&gt;Node35 -->
 <g id="edge64" class="edge"><title>Node7&#45;&gt;Node35</title>
-<path fill="none" stroke="midnightblue" d="M2047.24,-358.475C2027.78,-348.578 1995.45,-332.142 1972.79,-320.615"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="1974.23,-317.422 1963.73,-316.009 1971.06,-323.662 1974.23,-317.422"/>
+<path fill="none" stroke="midnightblue" d="M1823.67,-358.475C1836.7,-349.02 1857.96,-333.596 1873.68,-322.187"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="1876.16,-324.715 1882.2,-316.009 1872.05,-319.049 1876.16,-324.715"/>
 </g>
 <!-- Node9&#45;&gt;Node10 -->
 <g id="edge9" class="edge"><title>Node9&#45;&gt;Node10</title>
-<path fill="none" stroke="midnightblue" d="M2131.47,-179.444C2062.79,-169.735 1951.01,-153.933 1879,-143.754"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="1879.07,-140.229 1868.68,-142.295 1878.09,-147.16 1879.07,-140.229"/>
+<path fill="none" stroke="midnightblue" d="M1401.67,-179.324C1417.24,-170.743 1441.3,-157.486 1459.38,-147.522"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="1461.15,-150.546 1468.22,-142.655 1457.77,-144.415 1461.15,-150.546"/>
 </g>
 <!-- Node9&#45;&gt;Node16 -->
 <g id="edge20" class="edge"><title>Node9&#45;&gt;Node16</title>
-<path fill="none" stroke="midnightblue" d="M2175.09,-179.319C2136.06,-159.286 2039.51,-109.722 1992.94,-85.8164"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="1994.3,-82.5776 1983.8,-81.1245 1991.1,-88.805 1994.3,-82.5776"/>
+<path fill="none" stroke="midnightblue" d="M1362.13,-179.477C1320.81,-165.08 1232.8,-136.223 1155.92,-123 881.882,-75.8653 546.269,-71.9263 427.302,-72.1548"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="427.119,-68.6552 417.129,-72.1858 427.14,-75.6552 427.119,-68.6552"/>
 </g>
 <!-- Node9&#45;&gt;Node17 -->
 <g id="edge21" class="edge"><title>Node9&#45;&gt;Node17</title>
-<path fill="none" stroke="midnightblue" d="M2215.8,-179.43C2251.94,-166.533 2322.91,-141.659 2384,-123 2437.61,-106.623 2500.73,-89.9428 2537.66,-80.4428"/>
-<polygon fill="midnightblue" stroke="midnightblue" points="2538.98,-83.7186 2547.8,-77.8466 2537.24,-76.9375 2538.98,-83.7186"/>
+<path fill="none" stroke="midnightblue" d="M1376.76,-179.407C1362.62,-166.372 1334.35,-141.195 1307.92,-123 1287.44,-108.896 1262.71,-95.2398 1244.36,-85.6927"/>
+<polygon fill="midnightblue" stroke="midnightblue" points="1245.9,-82.5474 1235.4,-81.094 1242.7,-88.7748 1245.9,-82.5474"/>
... 378878 lines suppressed ...