You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tvm.apache.org by tq...@apache.org on 2023/01/08 20:29:36 UTC

[tvm-site] branch asf-site updated: deploying docs (apache/tvm@a99f0c15458653896c0bbe00ebf91d144c37aff2)

This is an automated email from the ASF dual-hosted git repository.

tqchen pushed a commit to branch asf-site
in repository https://gitbox.apache.org/repos/asf/tvm-site.git


The following commit(s) were added to refs/heads/asf-site by this push:
     new 66fd76f821 deploying docs (apache/tvm@a99f0c15458653896c0bbe00ebf91d144c37aff2)
66fd76f821 is described below

commit 66fd76f821f1bb2669c28bc7d4cbcb27b6b79c6c
Author: tvm-bot <95...@users.noreply.github.com>
AuthorDate: Sun Jan 8 20:29:26 2023 +0000

    deploying docs (apache/tvm@a99f0c15458653896c0bbe00ebf91d144c37aff2)
---
 docs/_images/sphx_glr_micro_train_001.png          |   Bin 330120 -> 324216 bytes
 docs/_images/sphx_glr_micro_train_thumb.png        |   Bin 23754 -> 23634 bytes
 .../how_to/compile_models/from_darknet.rst.txt     |     2 +-
 .../how_to/compile_models/from_keras.rst.txt       |     2 +-
 .../how_to/compile_models/from_mxnet.rst.txt       |     2 +-
 .../how_to/compile_models/from_oneflow.rst.txt     |     2 +-
 .../how_to/compile_models/from_pytorch.rst.txt     |     2 +-
 .../how_to/compile_models/from_tensorflow.rst.txt  |     2 +-
 .../compile_models/sg_execution_times.rst.txt      |    22 +-
 .../deploy_models/deploy_model_on_adreno.rst.txt   |     2 +-
 .../deploy_models/deploy_model_on_android.rst.txt  |     2 +-
 .../deploy_object_detection_pytorch.rst.txt        |     4 +-
 .../deploy_models/deploy_prequantized.rst.txt      |     6 +-
 .../deploy_prequantized_tflite.rst.txt             |     4 +-
 .../how_to/deploy_models/deploy_quantized.rst.txt  |     2 +-
 .../deploy_models/deploy_ssd_gluoncv.rst.txt       |     4 +-
 .../deploy_models/sg_execution_times.rst.txt       |    20 +-
 .../extend_tvm/bring_your_own_datatypes.rst.txt    |     2 +-
 .../how_to/extend_tvm/sg_execution_times.rst.txt   |    10 +-
 .../how_to/extend_tvm/use_pass_instrument.rst.txt  |    16 +-
 .../optimize_operators/opt_conv_cuda.rst.txt       |     2 +-
 .../optimize_operators/opt_conv_tensorcore.rst.txt |     2 +-
 .../how_to/optimize_operators/opt_gemm.rst.txt     |    16 +-
 .../optimize_operators/sg_execution_times.rst.txt  |     8 +-
 .../sg_execution_times.rst.txt                     |    14 +-
 .../tune_conv2d_layer_cuda.rst.txt                 |   813 +-
 .../tune_network_cuda.rst.txt                      |     4 +-
 .../tune_network_x86.rst.txt                       |     4 +-
 .../tune_sparse_x86.rst.txt                        |   111 +-
 .../tune_with_autotvm/sg_execution_times.rst.txt   |    10 +-
 .../tune_with_autotvm/tune_conv2d_cuda.rst.txt     |   432 +-
 .../work_with_microtvm/micro_autotune.rst.txt      |    16 +-
 .../work_with_microtvm/micro_pytorch.rst.txt       |     4 +-
 .../how_to/work_with_microtvm/micro_train.rst.txt  |    18 +-
 .../work_with_microtvm/sg_execution_times.rst.txt  |    14 +-
 .../work_with_relay/sg_execution_times.rst.txt     |     8 +-
 .../how_to/work_with_schedules/intrin_math.rst.txt |     2 +-
 .../work_with_schedules/sg_execution_times.rst.txt |    18 +-
 .../how_to/work_with_schedules/tensorize.rst.txt   |     2 +-
 .../tutorials/autotvm/sg_execution_times.rst.txt   |     4 +-
 .../frontend/deploy_classification.rst.txt         |     2 +-
 .../tutorials/frontend/deploy_detection.rst.txt    |     2 +-
 .../tutorials/frontend/sg_execution_times.rst.txt  |     6 +-
 .../tutorials/optimize/sg_execution_times.rst.txt  |     6 +-
 .../topic/vta/tutorials/sg_execution_times.rst.txt |     6 +-
 .../tutorial/auto_scheduler_matmul_x86.rst.txt     |     4 +-
 docs/_sources/tutorial/autotvm_matmul_x86.rst.txt  |    20 +-
 docs/_sources/tutorial/autotvm_relay_x86.rst.txt   |    64 +-
 .../tutorial/cross_compilation_and_rpc.rst.txt     |     2 +-
 docs/_sources/tutorial/intro_topi.rst.txt          |     2 +-
 docs/_sources/tutorial/sg_execution_times.rst.txt  |    24 +-
 .../tutorial/tensor_expr_get_started.rst.txt       |    44 +-
 docs/commit_hash                                   |     2 +-
 docs/how_to/compile_models/from_darknet.html       |     2 +-
 docs/how_to/compile_models/from_keras.html         |     2 +-
 docs/how_to/compile_models/from_mxnet.html         |     2 +-
 docs/how_to/compile_models/from_oneflow.html       |    10 +-
 docs/how_to/compile_models/from_pytorch.html       |     8 +-
 docs/how_to/compile_models/from_tensorflow.html    |     2 +-
 docs/how_to/compile_models/sg_execution_times.html |    22 +-
 .../deploy_models/deploy_model_on_adreno.html      |     2 +-
 .../deploy_models/deploy_model_on_android.html     |     2 +-
 .../deploy_object_detection_pytorch.html           |    36 +-
 docs/how_to/deploy_models/deploy_prequantized.html |     9 +-
 .../deploy_models/deploy_prequantized_tflite.html  |     4 +-
 docs/how_to/deploy_models/deploy_quantized.html    |     2 +-
 docs/how_to/deploy_models/deploy_ssd_gluoncv.html  |    36 +-
 docs/how_to/deploy_models/sg_execution_times.html  |    20 +-
 .../extend_tvm/bring_your_own_datatypes.html       |     2 +-
 docs/how_to/extend_tvm/sg_execution_times.html     |    10 +-
 docs/how_to/extend_tvm/use_pass_instrument.html    |    16 +-
 docs/how_to/optimize_operators/opt_conv_cuda.html  |     2 +-
 .../optimize_operators/opt_conv_tensorcore.html    |     2 +-
 docs/how_to/optimize_operators/opt_gemm.html       |    16 +-
 .../optimize_operators/sg_execution_times.html     |     8 +-
 .../sg_execution_times.html                        |    14 +-
 .../tune_conv2d_layer_cuda.html                    |   813 +-
 .../tune_with_autoscheduler/tune_network_cuda.html |     4 +-
 .../tune_with_autoscheduler/tune_network_x86.html  |     4 +-
 .../tune_with_autoscheduler/tune_sparse_x86.html   |   111 +-
 .../tune_with_autotvm/sg_execution_times.html      |    12 +-
 .../how_to/tune_with_autotvm/tune_conv2d_cuda.html |   432 +-
 docs/how_to/work_with_microtvm/micro_autotune.html |    16 +-
 docs/how_to/work_with_microtvm/micro_pytorch.html  |     4 +-
 docs/how_to/work_with_microtvm/micro_train.html    |    16 +-
 .../work_with_microtvm/sg_execution_times.html     |    14 +-
 .../how_to/work_with_relay/sg_execution_times.html |     8 +-
 docs/how_to/work_with_schedules/intrin_math.html   |     2 +-
 .../work_with_schedules/sg_execution_times.html    |    18 +-
 docs/how_to/work_with_schedules/tensorize.html     |     2 +-
 docs/install/nnpack.html                           |    12 +-
 docs/reference/api/doxygen/annotated.html          |   390 +-
 docs/reference/api/doxygen/array_8h.html           |     2 +-
 docs/reference/api/doxygen/array_8h__dep__incl.svg |  1498 ++-
 docs/reference/api/doxygen/c__runtime__api_8h.html |     2 +-
 .../api/doxygen/c__runtime__api_8h__dep__incl.svg  |  1531 ++-
 docs/reference/api/doxygen/classes.html            |   496 +-
 .../classtvm_1_1ContextManager-members.html        |    82 -
 .../api/doxygen/classtvm_1_1ContextManager.html    |   184 -
 .../classtvm_1_1ContextManager__coll__graph.svg    |    23 -
 .../doxygen/classtvm_1_1TracedArray-members.html   |    90 -
 .../api/doxygen/classtvm_1_1TracedArray.html       |   413 -
 .../classtvm_1_1TracedArrayIterator-members.html   |    97 -
 .../doxygen/classtvm_1_1TracedArrayIterator.html   |   561 -
 ...lasstvm_1_1TracedArrayIterator__coll__graph.svg |    33 -
 .../classtvm_1_1TracedArray__coll__graph.svg       |    30 -
 .../classtvm_1_1TracedBasicValue-members.html      |    84 -
 .../api/doxygen/classtvm_1_1TracedBasicValue.html  |   245 -
 .../classtvm_1_1TracedBasicValue__coll__graph.svg  |    26 -
 .../api/doxygen/classtvm_1_1TracedMap-members.html |    89 -
 .../api/doxygen/classtvm_1_1TracedMap.html         |   379 -
 .../classtvm_1_1TracedMapIterator-members.html     |    94 -
 .../api/doxygen/classtvm_1_1TracedMapIterator.html |   453 -
 .../classtvm_1_1TracedMapIterator__coll__graph.svg |    30 -
 .../doxygen/classtvm_1_1TracedMap__coll__graph.svg |    29 -
 .../doxygen/classtvm_1_1TracedObject-members.html  |    90 -
 .../api/doxygen/classtvm_1_1TracedObject.html      |   440 -
 .../classtvm_1_1TracedObject__coll__graph.svg      |    32 -
 .../classtvm_1_1TracedOptional-members.html        |    88 -
 .../api/doxygen/classtvm_1_1TracedOptional.html    |   361 -
 .../classtvm_1_1TracedOptional__coll__graph.svg    |    29 -
 .../api/doxygen/classtvm_1_1runtime_1_1Object.html |     2 +-
 .../doxygen/classtvm_1_1runtime_1_1ObjectRef.html  |     2 +-
 ...asstvm_1_1runtime_1_1ObjectRef__coll__graph.svg |    12 +-
 .../classtvm_1_1runtime_1_1Object__coll__graph.svg |     8 +-
 ...ript_1_1ir__builder_1_1ir_1_1IRModuleFrame.html |     4 +-
 ..._1_1ir__builder_1_1ir_1_1IRModuleFrameNode.html |     4 +-
 ..._1ir__builder_1_1tir_1_1AllocateConstFrame.html |     4 +-
 ...__builder_1_1tir_1_1AllocateConstFrameNode.html |     4 +-
 ...ipt_1_1ir__builder_1_1tir_1_1AllocateFrame.html |     4 +-
 ...1_1ir__builder_1_1tir_1_1AllocateFrameNode.html |     4 +-
 ...cript_1_1ir__builder_1_1tir_1_1AssertFrame.html |     4 +-
 ...t_1_1ir__builder_1_1tir_1_1AssertFrameNode.html |     4 +-
 ...1script_1_1ir__builder_1_1tir_1_1AttrFrame.html |     4 +-
 ...ipt_1_1ir__builder_1_1tir_1_1AttrFrameNode.html |     4 +-
 ...script_1_1ir__builder_1_1tir_1_1BlockFrame.html |     4 +-
 ...pt_1_1ir__builder_1_1tir_1_1BlockFrameNode.html |     4 +-
 ...pt_1_1ir__builder_1_1tir_1_1BlockInitFrame.html |     4 +-
 ..._1ir__builder_1_1tir_1_1BlockInitFrameNode.html |     4 +-
 ...t_1_1ir__builder_1_1tir_1_1DeclBufferFrame.html |     4 +-
 ...1ir__builder_1_1tir_1_1DeclBufferFrameNode.html |     4 +-
 ...1script_1_1ir__builder_1_1tir_1_1ElseFrame.html |     4 +-
 ...ipt_1_1ir__builder_1_1tir_1_1ElseFrameNode.html |     4 +-
 ..._1script_1_1ir__builder_1_1tir_1_1ForFrame.html |     4 +-
 ...ript_1_1ir__builder_1_1tir_1_1ForFrameNode.html |     4 +-
 ...1_1script_1_1ir__builder_1_1tir_1_1IfFrame.html |     4 +-
 ...cript_1_1ir__builder_1_1tir_1_1IfFrameNode.html |     4 +-
 ...1_1ir__builder_1_1tir_1_1LaunchThreadFrame.html |     4 +-
 ...r__builder_1_1tir_1_1LaunchThreadFrameNode.html |     4 +-
 ..._1script_1_1ir__builder_1_1tir_1_1LetFrame.html |     4 +-
 ...ript_1_1ir__builder_1_1tir_1_1LetFrameNode.html |     4 +-
 ...ipt_1_1ir__builder_1_1tir_1_1PrimFuncFrame.html |     4 +-
 ...1_1ir__builder_1_1tir_1_1PrimFuncFrameNode.html |     4 +-
 ...ript_1_1ir__builder_1_1tir_1_1RealizeFrame.html |     4 +-
 ..._1_1ir__builder_1_1tir_1_1RealizeFrameNode.html |     4 +-
 ..._1script_1_1ir__builder_1_1tir_1_1TIRFrame.html |     4 +-
 ...ript_1_1ir__builder_1_1tir_1_1TIRFrameNode.html |     4 +-
 ...1script_1_1ir__builder_1_1tir_1_1ThenFrame.html |     4 +-
 ...ipt_1_1ir__builder_1_1tir_1_1ThenFrameNode.html |     4 +-
 ...script_1_1ir__builder_1_1tir_1_1WhileFrame.html |     4 +-
 ...pt_1_1ir__builder_1_1tir_1_1WhileFrameNode.html |     4 +-
 ...pt_1_1printer_1_1AttrAccessDocNode-members.html |    67 +-
 ..._1_1script_1_1printer_1_1AttrAccessDocNode.html |     7 +-
 ..._1printer_1_1AttrAccessDocNode__coll__graph.svg |   307 +-
 ...rinter_1_1AttrAccessDocNode__inherit__graph.svg |   121 +-
 ..._1script_1_1printer_1_1CallDocNode-members.html |    69 +-
 ...asstvm_1_1script_1_1printer_1_1CallDocNode.html |     7 +-
 ...ript_1_1printer_1_1CallDocNode__coll__graph.svg |   333 +-
 ...t_1_1printer_1_1CallDocNode__inherit__graph.svg |   121 +-
 ..._1script_1_1printer_1_1DictDocNode-members.html |    67 +-
 ...asstvm_1_1script_1_1printer_1_1DictDocNode.html |     7 +-
 ...ript_1_1printer_1_1DictDocNode__coll__graph.svg |   253 +-
 ...t_1_1printer_1_1DictDocNode__inherit__graph.svg |   121 +-
 .../classtvm_1_1script_1_1printer_1_1Doc.html      |     2 +-
 .../classtvm_1_1script_1_1printer_1_1DocNode.html  |     2 +-
 ...cript_1_1printer_1_1DocNode__inherit__graph.svg |   267 +-
 ...1_1script_1_1printer_1_1Doc__inherit__graph.svg |   625 +-
 .../classtvm_1_1script_1_1printer_1_1ExprDoc.html  |     2 +-
 ..._1script_1_1printer_1_1ExprDocNode-members.html |    61 +-
 ...asstvm_1_1script_1_1printer_1_1ExprDocNode.html |    36 +-
 ...ript_1_1printer_1_1ExprDocNode__coll__graph.svg |   217 +-
 ...t_1_1printer_1_1ExprDocNode__inherit__graph.svg |   157 +-
 ...cript_1_1printer_1_1ExprDoc__inherit__graph.svg |   337 +-
 ...stvm_1_1script_1_1printer_1_1Frame-members.html |     2 +
 .../classtvm_1_1script_1_1printer_1_1Frame.html    |    68 +-
 ..._1_1script_1_1printer_1_1FrameNode-members.html |    42 +-
 ...classtvm_1_1script_1_1printer_1_1FrameNode.html |   139 +-
 ...script_1_1printer_1_1FrameNode__coll__graph.svg |   415 +-
 ...ipt_1_1printer_1_1FrameNode__inherit__graph.svg |   158 +-
 ..._1_1script_1_1printer_1_1Frame__coll__graph.svg |   122 +-
 ...1script_1_1printer_1_1Frame__inherit__graph.svg |   126 +-
 ..._1script_1_1printer_1_1IRDocsifier-members.html |     2 +-
 ...asstvm_1_1script_1_1printer_1_1IRDocsifier.html |    14 +-
 ..._1_1printer_1_1IRDocsifierFunctor-members.html} |    15 +-
 ..._1script_1_1printer_1_1IRDocsifierFunctor.html} |   133 +-
 ...printer_1_1IRDocsifierFunctor__coll__graph.svg} |    29 +-
 ...ript_1_1printer_1_1IRDocsifierNode-members.html |    56 +-
 ...vm_1_1script_1_1printer_1_1IRDocsifierNode.html |   430 +-
 ..._1_1printer_1_1IRDocsifierNode__coll__graph.svg |   378 +-
 ...1printer_1_1IRDocsifierNode__inherit__graph.svg |   125 +-
 ..._1_1script_1_1printer_1_1IdDocNode-members.html |    65 +-
 ...classtvm_1_1script_1_1printer_1_1IdDocNode.html |     7 +-
 ...script_1_1printer_1_1IdDocNode__coll__graph.svg |   255 +-
 ...ipt_1_1printer_1_1IdDocNode__inherit__graph.svg |   121 +-
 ...1script_1_1printer_1_1IndexDocNode-members.html |    67 +-
 ...sstvm_1_1script_1_1printer_1_1IndexDocNode.html |     7 +-
 ...ipt_1_1printer_1_1IndexDocNode__coll__graph.svg |   303 +-
 ..._1_1printer_1_1IndexDocNode__inherit__graph.svg |   121 +-
 ...script_1_1printer_1_1LambdaDocNode-members.html |    65 +-
 ...stvm_1_1script_1_1printer_1_1LambdaDocNode.html |     7 +-
 ...pt_1_1printer_1_1LambdaDocNode__coll__graph.svg |   303 +-
 ...1_1printer_1_1LambdaDocNode__inherit__graph.svg |   121 +-
 ..._1script_1_1printer_1_1ListDocNode-members.html |    65 +-
 ...asstvm_1_1script_1_1printer_1_1ListDocNode.html |     7 +-
 ...ript_1_1printer_1_1ListDocNode__coll__graph.svg |   251 +-
 ...t_1_1printer_1_1ListDocNode__inherit__graph.svg |   121 +-
 ...1_1script_1_1printer_1_1LiteralDoc-members.html |    37 +-
 ...lasstvm_1_1script_1_1printer_1_1LiteralDoc.html |   232 +-
 ...cript_1_1printer_1_1LiteralDocNode-members.html |    65 +-
 ...tvm_1_1script_1_1printer_1_1LiteralDocNode.html |     7 +-
 ...t_1_1printer_1_1LiteralDocNode__coll__graph.svg |   227 +-
 ..._1printer_1_1LiteralDocNode__inherit__graph.svg |   121 +-
 ...cript_1_1printer_1_1LiteralDoc__coll__graph.svg |   167 +-
 ...pt_1_1printer_1_1LiteralDoc__inherit__graph.svg |   131 +-
 ...script_1_1printer_1_1MetadataFrame-members.html |   104 -
 ...stvm_1_1script_1_1printer_1_1MetadataFrame.html |   233 -
 ...pt_1_1printer_1_1MetadataFrameNode-members.html |   119 -
 ..._1_1script_1_1printer_1_1MetadataFrameNode.html |   317 -
 ..._1printer_1_1MetadataFrameNode__coll__graph.svg |   202 -
 ...rinter_1_1MetadataFrameNode__inherit__graph.svg |   101 -
 ...pt_1_1printer_1_1MetadataFrame__coll__graph.svg |   116 -
 ...1_1printer_1_1MetadataFrame__inherit__graph.svg |    86 -
 ...ipt_1_1printer_1_1OperationDocNode-members.html |    69 +-
 ...m_1_1script_1_1printer_1_1OperationDocNode.html |     7 +-
 ...1_1printer_1_1OperationDocNode__coll__graph.svg |   251 +-
 ...printer_1_1OperationDocNode__inherit__graph.svg |   121 +-
 ...pt_1_1printer_1_1RootNodeContainer-members.html |   101 -
 ..._1_1script_1_1printer_1_1RootNodeContainer.html |   238 -
 ..._1printer_1_1RootNodeContainerNode-members.html |   114 -
 ...script_1_1printer_1_1RootNodeContainerNode.html |   301 -
 ...inter_1_1RootNodeContainerNode__coll__graph.svg |   147 -
 ...er_1_1RootNodeContainerNode__inherit__graph.svg |    76 -
 ..._1printer_1_1RootNodeContainer__coll__graph.svg |    91 -
 ...rinter_1_1RootNodeContainer__inherit__graph.svg |    61 -
 ...1script_1_1printer_1_1TupleDocNode-members.html |    65 +-
 ...sstvm_1_1script_1_1printer_1_1TupleDocNode.html |     7 +-
 ...ipt_1_1printer_1_1TupleDocNode__coll__graph.svg |   251 +-
 ..._1_1printer_1_1TupleDocNode__inherit__graph.svg |   121 +-
 ..._1script_1_1printer_1_1VarDefFrame-members.html |   104 -
 ...asstvm_1_1script_1_1printer_1_1VarDefFrame.html |   233 -
 ...ript_1_1printer_1_1VarDefFrameNode-members.html |   119 -
 ...vm_1_1script_1_1printer_1_1VarDefFrameNode.html |   317 -
 ..._1_1printer_1_1VarDefFrameNode__coll__graph.svg |   203 -
 ...1printer_1_1VarDefFrameNode__inherit__graph.svg |   101 -
 ...ript_1_1printer_1_1VarDefFrame__coll__graph.svg |   116 -
 ...t_1_1printer_1_1VarDefFrame__inherit__graph.svg |    86 -
 ...m_1_1script_1_1printer_1_1VarTable-members.html |   101 -
 .../classtvm_1_1script_1_1printer_1_1VarTable.html |   228 -
 ...1script_1_1printer_1_1VarTableNode-members.html |   120 -
 ...sstvm_1_1script_1_1printer_1_1VarTableNode.html |   574 -
 ...ipt_1_1printer_1_1VarTableNode__coll__graph.svg |    87 -
 ..._1_1printer_1_1VarTableNode__inherit__graph.svg |    81 -
 ...1script_1_1printer_1_1VarTable__coll__graph.svg |    92 -
 ...ript_1_1printer_1_1VarTable__inherit__graph.svg |    62 -
 .../doxygen/classtvm_1_1tir_1_1PrimFuncNode.html   |     2 +-
 .../api/doxygen/constant__utils_8h_source.html     |     2 +-
 docs/reference/api/doxygen/data__type_8h.html      |     2 +-
 .../api/doxygen/data__type_8h__dep__incl.svg       |  1551 +--
 .../api/doxygen/detail_2broadcast_8h_source.html   |     2 +-
 .../api/doxygen/detail_2extern_8h_source.html      |     2 +-
 docs/reference/api/doxygen/dilate_8h_source.html   |     2 +-
 docs/reference/api/doxygen/dir_000023_000007.html  |     2 +-
 docs/reference/api/doxygen/dir_000023_000008.html  |     2 +-
 docs/reference/api/doxygen/dir_000023_000011.html  |     2 +-
 docs/reference/api/doxygen/dir_000023_000013.html  |    73 -
 docs/reference/api/doxygen/dir_000023_000020.html  |     2 +-
 docs/reference/api/doxygen/dir_000024_000007.html  |     2 +-
 docs/reference/api/doxygen/dir_000024_000011.html  |     2 +-
 docs/reference/api/doxygen/dir_000024_000020.html  |     2 +-
 docs/reference/api/doxygen/dir_000029_000007.html  |     2 +-
 docs/reference/api/doxygen/dir_000029_000020.html  |     2 +-
 docs/reference/api/doxygen/dir_000030_000011.html  |     2 +-
 docs/reference/api/doxygen/dir_000030_000029.html  |     2 +-
 docs/reference/api/doxygen/dir_000031_000007.html  |     2 +-
 docs/reference/api/doxygen/dir_000031_000008.html  |     2 +-
 docs/reference/api/doxygen/dir_000031_000013.html  |    73 -
 docs/reference/api/doxygen/dir_000031_000020.html  |     2 +-
 .../dir_67fdee7a5e0396034822418fa5baa4b4.html      |     2 +-
 .../dir_84875704194fd544d29fe0c7fedd8939.html      |     7 +-
 .../dir_84875704194fd544d29fe0c7fedd8939_dep.svg   |   219 +-
 .../dir_a59a89c7dd2e4e6561fe59bf359ce2f3.html      |    12 +-
 .../dir_a59a89c7dd2e4e6561fe59bf359ce2f3_dep.svg   |   137 +-
 .../dir_b4c7d8e826c599ba55146c099a14beb5.html      |     2 +-
 .../dir_b4c7d8e826c599ba55146c099a14beb5_dep.svg   |   524 +-
 .../dir_e4a1a856a30057b9b1543256279fc7a1.html      |     2 +-
 docs/reference/api/doxygen/doc_8h.html             |     5 +-
 docs/reference/api/doxygen/doc_8h__dep__incl.svg   |   108 +-
 docs/reference/api/doxygen/doc_8h__incl.svg        |  1038 +-
 docs/reference/api/doxygen/doc_8h_source.html      |   249 +-
 docs/reference/api/doxygen/doc__printer_8h.html    |   106 -
 .../api/doxygen/doc__printer_8h__incl.svg          |  1411 ---
 .../api/doxygen/doc__printer_8h_source.html        |    81 -
 docs/reference/api/doxygen/elemwise_8h_source.html |     2 +-
 docs/reference/api/doxygen/files.html              |    14 +-
 docs/reference/api/doxygen/functions__.html        |     4 -
 docs/reference/api/doxygen/functions_a.html        |    20 +-
 docs/reference/api/doxygen/functions_b.html        |    12 +-
 docs/reference/api/doxygen/functions_c.html        |    14 +-
 docs/reference/api/doxygen/functions_d.html        |    21 +-
 docs/reference/api/doxygen/functions_e.html        |    18 +-
 docs/reference/api/doxygen/functions_f.html        |    18 +-
 docs/reference/api/doxygen/functions_func_a.html   |    18 +-
 docs/reference/api/doxygen/functions_func_b.html   |     9 +-
 docs/reference/api/doxygen/functions_func_c.html   |    11 +-
 docs/reference/api/doxygen/functions_func_d.html   |    17 +-
 docs/reference/api/doxygen/functions_func_e.html   |    18 +-
 docs/reference/api/doxygen/functions_func_f.html   |     3 +
 docs/reference/api/doxygen/functions_func_g.html   |    32 +-
 docs/reference/api/doxygen/functions_func_i.html   |    19 +-
 docs/reference/api/doxygen/functions_func_m.html   |     5 +-
 docs/reference/api/doxygen/functions_func_o.html   |    48 +-
 docs/reference/api/doxygen/functions_func_r.html   |    14 +-
 docs/reference/api/doxygen/functions_func_s.html   |    20 +-
 docs/reference/api/doxygen/functions_func_t.html   |    44 +-
 docs/reference/api/doxygen/functions_func_v.html   |    47 +-
 docs/reference/api/doxygen/functions_func_w.html   |     6 -
 docs/reference/api/doxygen/functions_g.html        |    30 +-
 docs/reference/api/doxygen/functions_i.html        |    26 +-
 docs/reference/api/doxygen/functions_k.html        |     4 +-
 docs/reference/api/doxygen/functions_m.html        |    12 +-
 docs/reference/api/doxygen/functions_n.html        |     3 +-
 docs/reference/api/doxygen/functions_o.html        |    44 +-
 docs/reference/api/doxygen/functions_p.html        |     4 +-
 docs/reference/api/doxygen/functions_r.html        |    17 +-
 docs/reference/api/doxygen/functions_rela.html     |     3 -
 docs/reference/api/doxygen/functions_s.html        |    16 +-
 docs/reference/api/doxygen/functions_t.html        |    53 +-
 docs/reference/api/doxygen/functions_type.html     |   386 +
 docs/reference/api/doxygen/functions_type_c.html   |   104 -
 docs/reference/api/doxygen/functions_type_d.html   |    85 -
 docs/reference/api/doxygen/functions_type_f.html   |   232 -
 docs/reference/api/doxygen/functions_type_i.html   |    95 -
 docs/reference/api/doxygen/functions_type_k.html   |    80 -
 docs/reference/api/doxygen/functions_type_l.html   |    76 -
 docs/reference/api/doxygen/functions_type_m.html   |    83 -
 docs/reference/api/doxygen/functions_type_o.html   |    79 -
 docs/reference/api/doxygen/functions_type_p.html   |    89 -
 docs/reference/api/doxygen/functions_type_r.html   |   100 -
 docs/reference/api/doxygen/functions_type_s.html   |    80 -
 docs/reference/api/doxygen/functions_type_t.html   |   112 -
 docs/reference/api/doxygen/functions_type_v.html   |    86 -
 docs/reference/api/doxygen/functions_type_w.html   |    85 -
 docs/reference/api/doxygen/functions_u.html        |     2 +-
 docs/reference/api/doxygen/functions_v.html        |    50 +-
 docs/reference/api/doxygen/functions_vars.html     |     4 -
 docs/reference/api/doxygen/functions_vars_b.html   |     3 +
 docs/reference/api/doxygen/functions_vars_c.html   |     7 +
 docs/reference/api/doxygen/functions_vars_d.html   |     6 +
 docs/reference/api/doxygen/functions_vars_f.html   |     3 +
 docs/reference/api/doxygen/functions_vars_i.html   |     3 +
 docs/reference/api/doxygen/functions_vars_m.html   |     2 +-
 docs/reference/api/doxygen/functions_vars_n.html   |     1 +
 docs/reference/api/doxygen/functions_vars_o.html   |     3 +
 docs/reference/api/doxygen/functions_vars_r.html   |     3 -
 docs/reference/api/doxygen/functions_vars_s.html   |     2 +-
 docs/reference/api/doxygen/functions_vars_v.html   |     1 -
 docs/reference/api/doxygen/functions_w.html        |    20 +-
 .../api/doxygen/functor_8h__dep__incl.svg          |   426 +-
 docs/reference/api/doxygen/globals_defs.html       |     3 +
 docs/reference/api/doxygen/globals_t.html          |     3 +
 docs/reference/api/doxygen/hierarchy.html          |  2700 +++--
 docs/reference/api/doxygen/inherit_graph_100.svg   |    17 +-
 docs/reference/api/doxygen/inherit_graph_101.svg   |    47 +-
 docs/reference/api/doxygen/inherit_graph_102.svg   |    14 +-
 docs/reference/api/doxygen/inherit_graph_103.svg   |    17 +-
 docs/reference/api/doxygen/inherit_graph_104.svg   |    14 +-
 docs/reference/api/doxygen/inherit_graph_105.svg   |    14 +-
 docs/reference/api/doxygen/inherit_graph_106.svg   |    18 +-
 docs/reference/api/doxygen/inherit_graph_107.svg   |    14 +-
 docs/reference/api/doxygen/inherit_graph_108.svg   |    15 +-
 docs/reference/api/doxygen/inherit_graph_109.svg   |    14 +-
 docs/reference/api/doxygen/inherit_graph_110.svg   |    29 +-
 docs/reference/api/doxygen/inherit_graph_111.svg   | 10627 +++++++++++++++++-
 docs/reference/api/doxygen/inherit_graph_112.svg   |    15 +-
 docs/reference/api/doxygen/inherit_graph_113.svg   |    15 +-
 docs/reference/api/doxygen/inherit_graph_114.svg   |    15 +-
 docs/reference/api/doxygen/inherit_graph_115.svg   |    15 +-
 docs/reference/api/doxygen/inherit_graph_116.svg   |    14 +-
 docs/reference/api/doxygen/inherit_graph_117.svg   |    29 +-
 docs/reference/api/doxygen/inherit_graph_118.svg   | 10690 +------------------
 docs/reference/api/doxygen/inherit_graph_119.svg   |    12 +-
 docs/reference/api/doxygen/inherit_graph_12.svg    |    16 +-
 docs/reference/api/doxygen/inherit_graph_120.svg   |  7535 ++++++++++++-
 docs/reference/api/doxygen/inherit_graph_121.svg   |    15 +-
 docs/reference/api/doxygen/inherit_graph_122.svg   |    14 +-
 docs/reference/api/doxygen/inherit_graph_123.svg   |     6 +-
 docs/reference/api/doxygen/inherit_graph_124.svg   |    14 +-
 docs/reference/api/doxygen/inherit_graph_125.svg   |    16 +-
 docs/reference/api/doxygen/inherit_graph_126.svg   |    15 +-
 docs/reference/api/doxygen/inherit_graph_127.svg   |  7631 +------------
 docs/reference/api/doxygen/inherit_graph_128.svg   |    14 +-
 docs/reference/api/doxygen/inherit_graph_129.svg   |    14 +-
 docs/reference/api/doxygen/inherit_graph_130.svg   |    17 +-
 docs/reference/api/doxygen/inherit_graph_131.svg   |    14 +-
 docs/reference/api/doxygen/inherit_graph_132.svg   |    17 +-
 docs/reference/api/doxygen/inherit_graph_133.svg   |    15 +-
 docs/reference/api/doxygen/inherit_graph_134.svg   |    14 +-
 docs/reference/api/doxygen/inherit_graph_135.svg   |    14 +-
 docs/reference/api/doxygen/inherit_graph_136.svg   |    17 +-
 docs/reference/api/doxygen/inherit_graph_137.svg   |    17 +-
 docs/reference/api/doxygen/inherit_graph_138.svg   |    14 +-
 docs/reference/api/doxygen/inherit_graph_139.svg   |    15 +-
 docs/reference/api/doxygen/inherit_graph_140.svg   |    12 +-
 docs/reference/api/doxygen/inherit_graph_141.svg   |    14 +-
 docs/reference/api/doxygen/inherit_graph_142.svg   |    61 +-
 docs/reference/api/doxygen/inherit_graph_143.svg   |    16 +-
 docs/reference/api/doxygen/inherit_graph_144.svg   |    20 +-
 docs/reference/api/doxygen/inherit_graph_145.svg   |    17 +-
 docs/reference/api/doxygen/inherit_graph_146.svg   |    18 +-
 docs/reference/api/doxygen/inherit_graph_147.svg   |    15 +-
 docs/reference/api/doxygen/inherit_graph_148.svg   |    17 +-
 docs/reference/api/doxygen/inherit_graph_149.svg   |    64 +-
 docs/reference/api/doxygen/inherit_graph_150.svg   |    17 +-
 docs/reference/api/doxygen/inherit_graph_151.svg   |    20 +-
 docs/reference/api/doxygen/inherit_graph_152.svg   |    17 +-
 docs/reference/api/doxygen/inherit_graph_153.svg   |    19 +-
 docs/reference/api/doxygen/inherit_graph_154.svg   |    12 +-
 docs/reference/api/doxygen/inherit_graph_155.svg   |    14 +-
 docs/reference/api/doxygen/inherit_graph_156.svg   |    21 +-
 docs/reference/api/doxygen/inherit_graph_157.svg   |    18 +-
 docs/reference/api/doxygen/inherit_graph_158.svg   |    18 +-
 docs/reference/api/doxygen/inherit_graph_159.svg   |     4 +-
 docs/reference/api/doxygen/inherit_graph_160.svg   |     4 +-
 docs/reference/api/doxygen/inherit_graph_161.svg   |     4 +-
 docs/reference/api/doxygen/inherit_graph_162.svg   |    22 +-
 docs/reference/api/doxygen/inherit_graph_163.svg   |    18 +-
 docs/reference/api/doxygen/inherit_graph_164.svg   |    21 +-
 docs/reference/api/doxygen/inherit_graph_165.svg   |    21 +-
 docs/reference/api/doxygen/inherit_graph_166.svg   |     4 +-
 docs/reference/api/doxygen/inherit_graph_167.svg   |    23 +-
 docs/reference/api/doxygen/inherit_graph_168.svg   |    21 +-
 docs/reference/api/doxygen/inherit_graph_169.svg   |    23 +-
 docs/reference/api/doxygen/inherit_graph_170.svg   |    18 +-
 docs/reference/api/doxygen/inherit_graph_171.svg   |    19 +-
 docs/reference/api/doxygen/inherit_graph_172.svg   |    19 +-
 docs/reference/api/doxygen/inherit_graph_173.svg   |     4 +-
 docs/reference/api/doxygen/inherit_graph_174.svg   |    23 +-
 docs/reference/api/doxygen/inherit_graph_175.svg   |    21 +-
 docs/reference/api/doxygen/inherit_graph_176.svg   |    17 +-
 docs/reference/api/doxygen/inherit_graph_177.svg   |    17 +-
 docs/reference/api/doxygen/inherit_graph_178.svg   |    15 +-
 docs/reference/api/doxygen/inherit_graph_179.svg   |    15 +-
 docs/reference/api/doxygen/inherit_graph_180.svg   |    14 +-
 docs/reference/api/doxygen/inherit_graph_181.svg   |    15 +-
 docs/reference/api/doxygen/inherit_graph_182.svg   |    15 +-
 docs/reference/api/doxygen/inherit_graph_183.svg   |    15 +-
 docs/reference/api/doxygen/inherit_graph_184.svg   |    15 +-
 docs/reference/api/doxygen/inherit_graph_185.svg   |    16 +-
 docs/reference/api/doxygen/inherit_graph_186.svg   |    15 +-
 docs/reference/api/doxygen/inherit_graph_187.svg   |    15 +-
 docs/reference/api/doxygen/inherit_graph_188.svg   |    30 +-
 docs/reference/api/doxygen/inherit_graph_189.svg   |    12 +-
 docs/reference/api/doxygen/inherit_graph_190.svg   |    29 +-
 docs/reference/api/doxygen/inherit_graph_191.svg   |    17 +-
 docs/reference/api/doxygen/inherit_graph_192.svg   |    15 +-
 docs/reference/api/doxygen/inherit_graph_193.svg   |    29 +-
 docs/reference/api/doxygen/inherit_graph_194.svg   |    12 +-
 docs/reference/api/doxygen/inherit_graph_195.svg   |    30 +-
 docs/reference/api/doxygen/inherit_graph_196.svg   |    15 +-
 docs/reference/api/doxygen/inherit_graph_197.svg   |    15 +-
 docs/reference/api/doxygen/inherit_graph_198.svg   |    15 +-
 docs/reference/api/doxygen/inherit_graph_199.svg   |    12 +-
 docs/reference/api/doxygen/inherit_graph_200.svg   |    15 +-
 docs/reference/api/doxygen/inherit_graph_201.svg   |    16 +-
 docs/reference/api/doxygen/inherit_graph_202.svg   |    15 +-
 docs/reference/api/doxygen/inherit_graph_203.svg   |    12 +-
 docs/reference/api/doxygen/inherit_graph_204.svg   |    15 +-
 docs/reference/api/doxygen/inherit_graph_205.svg   |    14 +-
 docs/reference/api/doxygen/inherit_graph_206.svg   |    16 +-
 docs/reference/api/doxygen/inherit_graph_207.svg   |   124 +-
 docs/reference/api/doxygen/inherit_graph_208.svg   |    78 +-
 docs/reference/api/doxygen/inherit_graph_209.svg   |    14 +-
 docs/reference/api/doxygen/inherit_graph_210.svg   |    15 +-
 docs/reference/api/doxygen/inherit_graph_211.svg   |    20 +-
 docs/reference/api/doxygen/inherit_graph_212.svg   |   124 +-
 docs/reference/api/doxygen/inherit_graph_213.svg   |    78 +-
 docs/reference/api/doxygen/inherit_graph_214.svg   |    14 +-
 docs/reference/api/doxygen/inherit_graph_215.svg   |    30 +-
 docs/reference/api/doxygen/inherit_graph_216.svg   |    30 +-
 docs/reference/api/doxygen/inherit_graph_217.svg   |    12 +-
 docs/reference/api/doxygen/inherit_graph_218.svg   |    12 +-
 docs/reference/api/doxygen/inherit_graph_219.svg   |    12 +-
 docs/reference/api/doxygen/inherit_graph_220.svg   |    15 +-
 docs/reference/api/doxygen/inherit_graph_221.svg   |    15 +-
 docs/reference/api/doxygen/inherit_graph_222.svg   |    12 +-
 docs/reference/api/doxygen/inherit_graph_223.svg   |    18 +-
 docs/reference/api/doxygen/inherit_graph_224.svg   |    15 +-
 docs/reference/api/doxygen/inherit_graph_225.svg   |    12 +-
 docs/reference/api/doxygen/inherit_graph_226.svg   |    15 +-
 docs/reference/api/doxygen/inherit_graph_227.svg   |    30 +-
 docs/reference/api/doxygen/inherit_graph_228.svg   |    30 +-
 docs/reference/api/doxygen/inherit_graph_229.svg   |    12 +-
 docs/reference/api/doxygen/inherit_graph_230.svg   |    12 +-
 docs/reference/api/doxygen/inherit_graph_231.svg   |    12 +-
 docs/reference/api/doxygen/inherit_graph_232.svg   |    12 +-
 docs/reference/api/doxygen/inherit_graph_233.svg   |    12 +-
 docs/reference/api/doxygen/inherit_graph_234.svg   |    22 -
 docs/reference/api/doxygen/inherit_graph_235.svg   |    22 -
 docs/reference/api/doxygen/inherit_graph_236.svg   |    22 -
 docs/reference/api/doxygen/inherit_graph_237.svg   |    22 -
 docs/reference/api/doxygen/inherit_graph_238.svg   |    22 -
 docs/reference/api/doxygen/inherit_graph_239.svg   |    22 -
 docs/reference/api/doxygen/inherit_graph_240.svg   |    22 -
 docs/reference/api/doxygen/inherit_graph_241.svg   |    22 -
 docs/reference/api/doxygen/inherit_graph_242.svg   |    22 -
 docs/reference/api/doxygen/inherit_graph_243.svg   |    22 -
 docs/reference/api/doxygen/inherit_graph_244.svg   |    22 -
 docs/reference/api/doxygen/inherit_graph_245.svg   |    22 -
 docs/reference/api/doxygen/inherit_graph_26.svg    |    12 +-
 docs/reference/api/doxygen/inherit_graph_27.svg    |    12 +-
 docs/reference/api/doxygen/inherit_graph_28.svg    |    12 +-
 docs/reference/api/doxygen/inherit_graph_29.svg    |    12 +-
 docs/reference/api/doxygen/inherit_graph_30.svg    |    15 +-
 docs/reference/api/doxygen/inherit_graph_31.svg    |    14 +-
 docs/reference/api/doxygen/inherit_graph_32.svg    |    15 +-
 docs/reference/api/doxygen/inherit_graph_33.svg    |    12 +-
 docs/reference/api/doxygen/inherit_graph_34.svg    |    15 +-
 docs/reference/api/doxygen/inherit_graph_35.svg    |    15 +-
 docs/reference/api/doxygen/inherit_graph_36.svg    |    15 +-
 docs/reference/api/doxygen/inherit_graph_37.svg    |    14 +-
 docs/reference/api/doxygen/inherit_graph_38.svg    |     4 +-
 docs/reference/api/doxygen/inherit_graph_39.svg    |    14 +-
 docs/reference/api/doxygen/inherit_graph_40.svg    |     4 +-
 docs/reference/api/doxygen/inherit_graph_41.svg    |    62 +-
 docs/reference/api/doxygen/inherit_graph_42.svg    |    62 +-
 docs/reference/api/doxygen/inherit_graph_43.svg    |     4 +-
 docs/reference/api/doxygen/inherit_graph_44.svg    |    31 +-
 docs/reference/api/doxygen/inherit_graph_45.svg    |    33 +-
 docs/reference/api/doxygen/inherit_graph_46.svg    |    28 +-
 docs/reference/api/doxygen/inherit_graph_47.svg    |    12 +-
 docs/reference/api/doxygen/inherit_graph_48.svg    |    17 +-
 docs/reference/api/doxygen/inherit_graph_49.svg    |     4 +-
 docs/reference/api/doxygen/inherit_graph_50.svg    |    17 +-
 docs/reference/api/doxygen/inherit_graph_51.svg    |    12 +-
 docs/reference/api/doxygen/inherit_graph_52.svg    |    14 +-
 docs/reference/api/doxygen/inherit_graph_53.svg    |    17 +-
 docs/reference/api/doxygen/inherit_graph_54.svg    |    17 +-
 docs/reference/api/doxygen/inherit_graph_55.svg    |    17 +-
 docs/reference/api/doxygen/inherit_graph_56.svg    |    14 +-
 docs/reference/api/doxygen/inherit_graph_57.svg    |    15 +-
 docs/reference/api/doxygen/inherit_graph_58.svg    |    14 +-
 docs/reference/api/doxygen/inherit_graph_59.svg    |     4 +-
 docs/reference/api/doxygen/inherit_graph_60.svg    |    17 +-
 docs/reference/api/doxygen/inherit_graph_61.svg    |    15 +-
 docs/reference/api/doxygen/inherit_graph_62.svg    |    15 +-
 docs/reference/api/doxygen/inherit_graph_63.svg    |    15 +-
 docs/reference/api/doxygen/inherit_graph_64.svg    |    15 +-
 docs/reference/api/doxygen/inherit_graph_65.svg    |    14 +-
 docs/reference/api/doxygen/inherit_graph_66.svg    |    14 +-
 docs/reference/api/doxygen/inherit_graph_67.svg    |    16 +-
 docs/reference/api/doxygen/inherit_graph_68.svg    |    12 +-
 docs/reference/api/doxygen/inherit_graph_69.svg    |    16 +-
 docs/reference/api/doxygen/inherit_graph_70.svg    |    12 +-
 docs/reference/api/doxygen/inherit_graph_71.svg    |    15 +-
 docs/reference/api/doxygen/inherit_graph_72.svg    |    14 +-
 docs/reference/api/doxygen/inherit_graph_73.svg    |    14 +-
 docs/reference/api/doxygen/inherit_graph_74.svg    |    15 +-
 docs/reference/api/doxygen/inherit_graph_75.svg    |    30 +-
 docs/reference/api/doxygen/inherit_graph_76.svg    |    16 +-
 docs/reference/api/doxygen/inherit_graph_77.svg    |    45 +-
 docs/reference/api/doxygen/inherit_graph_78.svg    |    15 +-
 docs/reference/api/doxygen/inherit_graph_79.svg    |    14 +-
 docs/reference/api/doxygen/inherit_graph_80.svg    |    44 +-
 docs/reference/api/doxygen/inherit_graph_81.svg    |    15 +-
 docs/reference/api/doxygen/inherit_graph_82.svg    |    30 +-
 docs/reference/api/doxygen/inherit_graph_83.svg    |    15 +-
 docs/reference/api/doxygen/inherit_graph_84.svg    |    39 +-
 docs/reference/api/doxygen/inherit_graph_85.svg    |    14 +-
 docs/reference/api/doxygen/inherit_graph_86.svg    |    29 +-
 docs/reference/api/doxygen/inherit_graph_87.svg    |    45 +-
 docs/reference/api/doxygen/inherit_graph_88.svg    |    12 +-
 docs/reference/api/doxygen/inherit_graph_89.svg    |    12 +-
 docs/reference/api/doxygen/inherit_graph_90.svg    |    14 +-
 docs/reference/api/doxygen/inherit_graph_91.svg    |    30 +-
 docs/reference/api/doxygen/inherit_graph_92.svg    |    15 +-
 docs/reference/api/doxygen/inherit_graph_93.svg    |    32 +-
 docs/reference/api/doxygen/inherit_graph_94.svg    |    46 +-
 docs/reference/api/doxygen/inherit_graph_95.svg    |    15 +-
 docs/reference/api/doxygen/inherit_graph_96.svg    |    16 +-
 docs/reference/api/doxygen/inherit_graph_97.svg    |    14 +-
 docs/reference/api/doxygen/inherit_graph_98.svg    |    15 +-
 docs/reference/api/doxygen/inherit_graph_99.svg    |    17 +-
 docs/reference/api/doxygen/inherits.html           |   418 +-
 docs/reference/api/doxygen/ir_2adt_8h.html         |     2 +-
 .../api/doxygen/ir_2adt_8h__dep__incl.svg          |  1028 +-
 .../api/doxygen/ir_2attrs_8h__dep__incl.svg        |   138 +-
 .../api/doxygen/ir_2expr_8h__dep__incl.svg         |   274 +-
 ...uilder_2ir_2frame_8h.html => ir_2frame_8h.html} |     6 +-
 ...__dep__incl.svg => ir_2frame_8h__dep__incl.svg} |     2 +-
 ..._2frame_8h__incl.svg => ir_2frame_8h__incl.svg} |     0
 .../reference/api/doxygen/ir_2frame_8h_source.html |    96 +
 docs/reference/api/doxygen/ir_2function_8h.html    |     2 +-
 .../api/doxygen/ir_2function_8h__dep__incl.svg     |   888 +-
 docs/reference/api/doxygen/ir_2ir_8h.html          |     2 +-
 docs/reference/api/doxygen/ir_2ir_8h__incl.svg     |     2 +-
 docs/reference/api/doxygen/ir_2ir_8h_source.html   |     4 +-
 docs/reference/api/doxygen/ir_2module_8h.html      |     2 +-
 .../api/doxygen/ir_2module_8h__dep__incl.svg       |  1070 +-
 .../api/doxygen/ir_2span_8h__dep__incl.svg         |   346 +-
 .../api/doxygen/ir_2type_8h__dep__incl.svg         |   378 +-
 .../doxygen/ir__builder_2ir_2frame_8h_source.html  |    96 -
 .../doxygen/ir__builder_2tir_2frame_8h_source.html |   206 -
 docs/reference/api/doxygen/ir__docsifier_8h.html   |    43 +-
 ...p__incl.svg => ir__docsifier_8h__dep__incl.svg} |    30 +-
 .../api/doxygen/ir__docsifier_8h__incl.svg         |  2496 +++--
 .../api/doxygen/ir__docsifier_8h_source.html       |    79 +-
 ...able_8h.html => ir__docsifier__functor_8h.html} |    30 +-
 .../ir__docsifier__functor_8h__dep__incl.svg       |    53 +
 .../doxygen/ir__docsifier__functor_8h__incl.svg    |  1269 +++
 .../doxygen/ir__docsifier__functor_8h_source.html  |    90 +
 .../api/doxygen/layer__norm_8h_source.html         |     4 +-
 .../doxygen/local__response__norm_8h_source.html   |     2 +-
 docs/reference/api/doxygen/map_8h.html             |     2 +-
 docs/reference/api/doxygen/map_8h__dep__incl.svg   |  1432 ++-
 docs/reference/api/doxygen/menudata.js             |    31 +-
 docs/reference/api/doxygen/namespacemembers_d.html |     9 +-
 .../api/doxygen/namespacemembers_func_g.html       |     7 +-
 .../api/doxygen/namespacemembers_func_m.html       |    15 +-
 .../api/doxygen/namespacemembers_func_r.html       |     5 +-
 .../api/doxygen/namespacemembers_func_s.html       |    13 +-
 docs/reference/api/doxygen/namespacemembers_g.html |    11 +-
 docs/reference/api/doxygen/namespacemembers_k.html |     5 +-
 docs/reference/api/doxygen/namespacemembers_m.html |    15 +-
 docs/reference/api/doxygen/namespacemembers_r.html |     7 +-
 docs/reference/api/doxygen/namespacemembers_s.html |    11 +-
 docs/reference/api/doxygen/namespacemembers_t.html |     3 +
 .../api/doxygen/namespacemembers_type.html         |    13 +-
 .../api/doxygen/namespacemembers_vars.html         |     3 -
 docs/reference/api/doxygen/namespacemembers_w.html |     5 +-
 docs/reference/api/doxygen/namespacetvm.html       |   520 +-
 .../api/doxygen/namespacetvm_1_1detail.html        |    12 -
 .../doxygen/namespacetvm_1_1script_1_1printer.html |   275 +-
 .../reference/api/doxygen/namespacetvm_1_1tir.html |    19 +
 docs/reference/api/doxygen/ndarray_8h.html         |     2 +-
 .../api/doxygen/ndarray_8h__dep__incl.svg          |  1275 ++-
 docs/reference/api/doxygen/nn_2bnn_8h_source.html  |     4 +-
 .../api/doxygen/nn_2pooling_8h_source.html         |     4 +-
 .../api/doxygen/nn_2softmax_8h_source.html         |     2 +-
 docs/reference/api/doxygen/node_8h.html            |     2 +-
 docs/reference/api/doxygen/node_8h__dep__incl.svg  |  1457 ++-
 docs/reference/api/doxygen/object_8h.html          |     2 +-
 .../reference/api/doxygen/object_8h__dep__incl.svg |  1823 ++--
 docs/reference/api/doxygen/object__path_8h.html    |     2 +-
 .../api/doxygen/object__path_8h__dep__incl.svg     |  1326 ++-
 docs/reference/api/doxygen/optional_8h.html        |     2 +-
 .../api/doxygen/optional_8h__dep__incl.svg         |  1885 ++--
 docs/reference/api/doxygen/packed__func_8h.html    |     2 +-
 .../api/doxygen/packed__func_8h__dep__incl.svg     |   350 +-
 docs/reference/api/doxygen/printer_2frame_8h.html  |   129 -
 .../api/doxygen/printer_2frame_8h__dep__incl.svg   |    58 -
 .../api/doxygen/printer_2frame_8h__incl.svg        |  1429 ---
 .../api/doxygen/printer_2frame_8h_source.html      |   103 -
 docs/reference/api/doxygen/printer_8h.html         |    26 +-
 docs/reference/api/doxygen/printer_8h__incl.svg    |  1592 +--
 docs/reference/api/doxygen/printer_8h_source.html  |    28 +-
 .../reference/api/doxygen/reduction_8h_source.html |     4 +-
 docs/reference/api/doxygen/reflection_8h.html      |     2 +-
 .../api/doxygen/reflection_8h__dep__incl.svg       |  1495 ++-
 .../api/doxygen/registry_8h__dep__incl.svg         |    24 +-
 docs/reference/api/doxygen/repr__printer_8h.html   |     2 +-
 .../api/doxygen/repr__printer_8h__dep__incl.svg    |  1487 ++-
 .../api/doxygen/runtime_2container_2adt_8h.html    |     2 +-
 .../runtime_2container_2adt_8h__dep__incl.svg      |   925 +-
 .../runtime_2container_2base_8h__dep__incl.svg     |   642 +-
 docs/reference/api/doxygen/runtime_2memory_8h.html |     2 +-
 .../api/doxygen/runtime_2memory_8h__dep__incl.svg  |  1742 +--
 docs/reference/api/doxygen/runtime_2module_8h.html |     2 +-
 .../api/doxygen/runtime_2module_8h__dep__incl.svg  |   350 +-
 .../script_2ir__builder_2base_8h__dep__incl.svg    |     4 +-
 docs/reference/api/doxygen/search/all_1.js         |     2 +-
 docs/reference/api/doxygen/search/all_10.js        |    24 +-
 docs/reference/api/doxygen/search/all_11.js        |     6 +-
 docs/reference/api/doxygen/search/all_13.js        |    21 +-
 docs/reference/api/doxygen/search/all_14.js        |    24 +-
 docs/reference/api/doxygen/search/all_15.js        |    37 +-
 docs/reference/api/doxygen/search/all_16.js        |     4 +-
 docs/reference/api/doxygen/search/all_17.js        |    18 +-
 docs/reference/api/doxygen/search/all_18.js        |     7 +-
 docs/reference/api/doxygen/search/all_2.js         |    10 +-
 docs/reference/api/doxygen/search/all_3.js         |     8 +-
 docs/reference/api/doxygen/search/all_4.js         |     5 +-
 docs/reference/api/doxygen/search/all_5.js         |    19 +-
 docs/reference/api/doxygen/search/all_6.js         |     8 +-
 docs/reference/api/doxygen/search/all_7.js         |     8 +-
 docs/reference/api/doxygen/search/all_8.js         |     9 +-
 docs/reference/api/doxygen/search/all_9.js         |     2 +-
 docs/reference/api/doxygen/search/all_a.js         |    17 +-
 docs/reference/api/doxygen/search/all_c.js         |     3 +-
 docs/reference/api/doxygen/search/all_e.js         |    11 +-
 docs/reference/api/doxygen/search/all_f.js         |     4 +-
 docs/reference/api/doxygen/search/classes_0.js     |     1 -
 docs/reference/api/doxygen/search/classes_1.js     |     2 +-
 docs/reference/api/doxygen/search/classes_10.js    |     6 +-
 docs/reference/api/doxygen/search/classes_11.js    |    22 +-
 docs/reference/api/doxygen/search/classes_13.js    |     9 +-
 docs/reference/api/doxygen/search/classes_2.js     |     1 -
 docs/reference/api/doxygen/search/classes_3.js     |     1 +
 docs/reference/api/doxygen/search/classes_7.js     |     2 +-
 docs/reference/api/doxygen/search/classes_8.js     |     1 +
 docs/reference/api/doxygen/search/classes_a.js     |     3 -
 docs/reference/api/doxygen/search/classes_f.js     |     4 +-
 docs/reference/api/doxygen/search/defines_8.js     |     1 +
 docs/reference/api/doxygen/search/files_11.js      |     1 -
 docs/reference/api/doxygen/search/files_3.js       |     1 -
 docs/reference/api/doxygen/search/files_5.js       |     2 +-
 docs/reference/api/doxygen/search/files_7.js       |     1 +
 docs/reference/api/doxygen/search/files_f.js       |     2 -
 docs/reference/api/doxygen/search/functions_1.js   |     9 +-
 docs/reference/api/doxygen/search/functions_10.js  |     2 +-
 docs/reference/api/doxygen/search/functions_12.js  |    11 +-
 docs/reference/api/doxygen/search/functions_13.js  |    12 +-
 docs/reference/api/doxygen/search/functions_14.js  |    14 +-
 docs/reference/api/doxygen/search/functions_15.js  |     2 +-
 docs/reference/api/doxygen/search/functions_16.js  |     6 +-
 docs/reference/api/doxygen/search/functions_17.js  |     2 -
 docs/reference/api/doxygen/search/functions_2.js   |     5 +-
 docs/reference/api/doxygen/search/functions_3.js   |     1 -
 docs/reference/api/doxygen/search/functions_4.js   |     9 +-
 docs/reference/api/doxygen/search/functions_5.js   |     8 +-
 docs/reference/api/doxygen/search/functions_6.js   |     3 +-
 docs/reference/api/doxygen/search/functions_7.js   |     9 +-
 docs/reference/api/doxygen/search/functions_9.js   |    10 +-
 docs/reference/api/doxygen/search/functions_d.js   |     4 +-
 docs/reference/api/doxygen/search/functions_e.js   |     2 +-
 docs/reference/api/doxygen/search/functions_f.js   |    22 +-
 docs/reference/api/doxygen/search/related_11.js    |     1 -
 docs/reference/api/doxygen/search/typedefs_10.js   |     2 +-
 docs/reference/api/doxygen/search/typedefs_11.js   |     3 -
 docs/reference/api/doxygen/search/typedefs_3.js    |     5 +-
 docs/reference/api/doxygen/search/typedefs_5.js    |     2 +-
 docs/reference/api/doxygen/search/typedefs_7.js    |     4 +-
 docs/reference/api/doxygen/search/typedefs_a.js    |     1 -
 docs/reference/api/doxygen/search/typedefs_b.js    |     1 -
 docs/reference/api/doxygen/search/typedefs_c.js    |     2 +-
 docs/reference/api/doxygen/search/typedefs_d.js    |     2 +-
 docs/reference/api/doxygen/search/typedefs_f.js    |     3 +-
 docs/reference/api/doxygen/search/variables_0.js   |     2 +-
 docs/reference/api/doxygen/search/variables_10.js  |     1 -
 docs/reference/api/doxygen/search/variables_11.js  |     2 +-
 docs/reference/api/doxygen/search/variables_14.js  |     2 +-
 docs/reference/api/doxygen/search/variables_2.js   |     1 +
 docs/reference/api/doxygen/search/variables_3.js   |     4 +-
 docs/reference/api/doxygen/search/variables_4.js   |     2 +
 docs/reference/api/doxygen/search/variables_6.js   |     1 +
 docs/reference/api/doxygen/search/variables_9.js   |     1 +
 docs/reference/api/doxygen/search/variables_a.js   |     1 -
 docs/reference/api/doxygen/search/variables_c.js   |     4 +-
 docs/reference/api/doxygen/search/variables_d.js   |     2 +-
 docs/reference/api/doxygen/search/variables_e.js   |     1 +
 docs/reference/api/doxygen/serializer_8h.html      |     2 +-
 .../api/doxygen/serializer_8h__dep__incl.svg       |  1261 ++-
 docs/reference/api/doxygen/shape__tuple_8h.html    |     2 +-
 .../api/doxygen/shape__tuple_8h__dep__incl.svg     |  1274 ++-
 docs/reference/api/doxygen/source__map_8h.html     |     2 +-
 .../api/doxygen/source__map_8h__dep__incl.svg      |  1072 +-
 docs/reference/api/doxygen/stmt_8h__dep__incl.svg  |     2 +-
 docs/reference/api/doxygen/stmt_8h_source.html     |     2 +-
 .../api/doxygen/strided__slice_8h_source.html      |     2 +-
 .../reference/api/doxygen/string_8h__dep__incl.svg |   414 +-
 ...m_1_1detail_1_1TracedObjectWrapperSelector.html |    88 -
 ..._01Array_3_01T_01_4_00_01true_01_4-members.html |    81 -
 ...lector_3_01Array_3_01T_01_4_00_01true_01_4.html |   114 -
 ...rray_3_01T_01_4_00_01true_01_4__coll__graph.svg |    25 -
 ...p_3_01K_00_01V_01_4_00_01true_01_4-members.html |    81 -
 ...r_3_01Map_3_01K_00_01V_01_4_00_01true_01_4.html |   114 -
 ...01K_00_01V_01_4_00_01true_01_4__coll__graph.svg |    25 -
 ...Optional_3_01T_01_4_00_01true_01_4-members.html |    81 -
 ...tor_3_01Optional_3_01T_01_4_00_01true_01_4.html |   114 -
 ...onal_3_01T_01_4_00_01true_01_4__coll__graph.svg |    25 -
 ...bjectWrapperSelector_3_01T_00_01false_01_4.html |   114 -
 ...Selector_3_01T_00_01false_01_4__coll__graph.svg |    24 -
 ...apperSelector_3_01T_00_01true_01_4-members.html |    81 -
 ...ObjectWrapperSelector_3_01T_00_01true_01_4.html |   114 -
 ...rSelector_3_01T_00_01true_01_4__coll__graph.svg |    24 -
 ...1_1TracedObjectWrapperSelector__coll__graph.svg |    24 -
 ...tructtvm_1_1runtime_1_1is__valid__iterator.html |     2 +-
 ...m_1_1script_1_1printer_1_1Default-members.html} |    18 +-
 .../structtvm_1_1script_1_1printer_1_1Default.html |   274 +
 ..._1script_1_1printer_1_1Default__coll__graph.svg |    67 +
 ..._1IRDocsifierNode_1_1VariableInfo-members.html} |     9 +-
 ...printer_1_1IRDocsifierNode_1_1VariableInfo.html |   139 +
 ...RDocsifierNode_1_1VariableInfo__coll__graph.svg |   120 +
 .../api/doxygen/structural__equal_8h.html          |     2 +-
 .../doxygen/structural__equal_8h__dep__incl.svg    |  1311 +--
 .../reference/api/doxygen/structural__hash_8h.html |     2 +-
 .../api/doxygen/structural__hash_8h__dep__incl.svg |  1311 +--
 .../api/doxygen/tir_2analysis_8h_source.html       |     2 +-
 ...lder_2tir_2frame_8h.html => tir_2frame_8h.html} |     8 +-
 ..._dep__incl.svg => tir_2frame_8h__dep__incl.svg} |     0
 ...2frame_8h__incl.svg => tir_2frame_8h__incl.svg} |     2 +-
 .../api/doxygen/tir_2frame_8h_source.html          |   206 +
 docs/reference/api/doxygen/tir_2ir_8h.html         |     2 +-
 docs/reference/api/doxygen/tir_2ir_8h__incl.svg    |     4 +-
 docs/reference/api/doxygen/tir_2ir_8h_source.html  |     4 +-
 docs/reference/api/doxygen/tir_2op_8h.html         |    28 +-
 docs/reference/api/doxygen/tir_2op_8h_source.html  |   116 +-
 .../api/doxygen/tir_2op__attr__types_8h.html       |     3 +
 .../doxygen/tir_2op__attr__types_8h_source.html    |     6 +-
 docs/reference/api/doxygen/topi_2nn_8h_source.html |     6 +-
 .../api/doxygen/topi_2transform_8h_source.html     |     4 +-
 docs/reference/api/doxygen/traced__object_8h.html  |   173 -
 .../api/doxygen/traced__object_8h__dep__incl.svg   |   153 -
 .../api/doxygen/traced__object_8h__incl.svg        |  1148 --
 .../api/doxygen/traced__object_8h_source.html      |   168 -
 .../api/doxygen/traced__object__functor_8h.html    |   145 -
 .../traced__object__functor_8h__dep__incl.svg      |    37 -
 .../doxygen/traced__object__functor_8h__incl.svg   |  1315 ---
 .../doxygen/traced__object__functor_8h_source.html |    99 -
 docs/reference/api/doxygen/var__table_8h__incl.svg |  1457 ---
 .../api/doxygen/var__table_8h_source.html          |   107 -
 docs/reference/api/doxygen/with_8h.html            |     5 +-
 docs/reference/api/doxygen/with_8h__dep__incl.svg  |   784 +-
 docs/reference/api/doxygen/with_8h_source.html     |     4 +-
 docs/reference/api/python/auto_scheduler.html      |     4 +-
 .../api/typedoc/classes/bytestreamreader.html      |    12 +-
 .../api/typedoc/classes/cachedcallstack.html       |    34 +-
 docs/reference/api/typedoc/classes/dldatatype.html |    12 +-
 docs/reference/api/typedoc/classes/dldevice.html   |    10 +-
 .../reference/api/typedoc/classes/environment.html |    12 +-
 docs/reference/api/typedoc/classes/ffilibrary.html |    20 +-
 .../api/typedoc/classes/graphexecutor.html         |    16 +-
 docs/reference/api/typedoc/classes/instance.html   |    40 +-
 docs/reference/api/typedoc/classes/memory.html     |    34 +-
 docs/reference/api/typedoc/classes/module.html     |    10 +-
 docs/reference/api/typedoc/classes/ndarray.html    |    22 +-
 .../api/typedoc/classes/packedfunccell.html        |     6 +-
 docs/reference/api/typedoc/classes/rpcserver.html  |    14 +-
 docs/reference/api/typedoc/classes/scalar.html     |     6 +-
 .../api/typedoc/classes/webgpucontext.html         |    12 +-
 docs/reference/api/typedoc/enums/argtypecode.html  |    30 +-
 .../api/typedoc/enums/aynccallbackcode.html        |     4 +-
 .../api/typedoc/enums/dldatatypecode.html          |     8 +-
 .../api/typedoc/enums/rpcserverstate.html          |    12 +-
 docs/reference/api/typedoc/enums/sizeof.html       |    18 +-
 docs/reference/api/typedoc/index.html              |   112 +-
 .../api/typedoc/interfaces/disposable.html         |     2 +-
 .../api/typedoc/interfaces/functioninfo.html       |     6 +-
 .../api/typedoc/interfaces/libraryprovider.html    |     4 +-
 docs/searchindex.js                                |     2 +-
 .../vta/tutorials/autotvm/sg_execution_times.html  |     4 +-
 .../tutorials/frontend/deploy_classification.html  |     2 +-
 .../vta/tutorials/frontend/deploy_detection.html   |     2 +-
 .../vta/tutorials/frontend/sg_execution_times.html |    10 +-
 .../vta/tutorials/optimize/sg_execution_times.html |     6 +-
 docs/topic/vta/tutorials/sg_execution_times.html   |     6 +-
 docs/tutorial/auto_scheduler_matmul_x86.html       |     4 +-
 docs/tutorial/autotvm_matmul_x86.html              |    20 +-
 docs/tutorial/autotvm_relay_x86.html               |   269 +-
 docs/tutorial/cross_compilation_and_rpc.html       |     2 +-
 docs/tutorial/intro_topi.html                      |     2 +-
 docs/tutorial/sg_execution_times.html              |    24 +-
 docs/tutorial/tensor_expr_get_started.html         |    44 +-
 862 files changed, 52204 insertions(+), 71015 deletions(-)

diff --git a/docs/_images/sphx_glr_micro_train_001.png b/docs/_images/sphx_glr_micro_train_001.png
index 749f250b96..58230570fb 100644
Binary files a/docs/_images/sphx_glr_micro_train_001.png and b/docs/_images/sphx_glr_micro_train_001.png differ
diff --git a/docs/_images/sphx_glr_micro_train_thumb.png b/docs/_images/sphx_glr_micro_train_thumb.png
index eb961b1b9c..c7f45c5bc4 100644
Binary files a/docs/_images/sphx_glr_micro_train_thumb.png and b/docs/_images/sphx_glr_micro_train_thumb.png differ
diff --git a/docs/_sources/how_to/compile_models/from_darknet.rst.txt b/docs/_sources/how_to/compile_models/from_darknet.rst.txt
index 85fd99b5ea..85b3c0b75f 100644
--- a/docs/_sources/how_to/compile_models/from_darknet.rst.txt
+++ b/docs/_sources/how_to/compile_models/from_darknet.rst.txt
@@ -319,7 +319,7 @@ The process is no different from other examples.
 
 .. rst-class:: sphx-glr-timing
 
-   **Total running time of the script:** ( 1 minutes  13.164 seconds)
+   **Total running time of the script:** ( 1 minutes  9.125 seconds)
 
 
 .. _sphx_glr_download_how_to_compile_models_from_darknet.py:
diff --git a/docs/_sources/how_to/compile_models/from_keras.rst.txt b/docs/_sources/how_to/compile_models/from_keras.rst.txt
index 5acbcba3a4..4984c4f526 100644
--- a/docs/_sources/how_to/compile_models/from_keras.rst.txt
+++ b/docs/_sources/how_to/compile_models/from_keras.rst.txt
@@ -232,7 +232,7 @@ Look up prediction top 1 index in 1000 class synset.
  .. code-block:: none
 
     Relay top-1 id: 285, class name: Egyptian cat
-
    1/1 [==============================] - ETA: 0s
    1/1 [==============================] - 1s 1s/step
+
    1/1 [==============================] - ETA: 0s
    1/1 [==============================] - 1s 952ms/step
     Keras top-1 id: 285, class name: Egyptian cat
 
 
diff --git a/docs/_sources/how_to/compile_models/from_mxnet.rst.txt b/docs/_sources/how_to/compile_models/from_mxnet.rst.txt
index d95127f086..7b564a2e36 100644
--- a/docs/_sources/how_to/compile_models/from_mxnet.rst.txt
+++ b/docs/_sources/how_to/compile_models/from_mxnet.rst.txt
@@ -116,7 +116,7 @@ In this section, we download a pretrained imagenet model and classify an image.
 
  .. code-block:: none
 
-    Downloading /workspace/.mxnet/models/resnet18_v1-a0666292.zipbfa937ef-5d91-43d5-9078-74c75e265eca from https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/gluon/models/resnet18_v1-a0666292.zip...
+    Downloading /workspace/.mxnet/models/resnet18_v1-a0666292.zip7f435239-a9d5-4a5c-bb6f-e647f08a3a07 from https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/gluon/models/resnet18_v1-a0666292.zip...
     x (1, 3, 224, 224)
 
 
diff --git a/docs/_sources/how_to/compile_models/from_oneflow.rst.txt b/docs/_sources/how_to/compile_models/from_oneflow.rst.txt
index 341c5635ca..7e52993cd0 100644
--- a/docs/_sources/how_to/compile_models/from_oneflow.rst.txt
+++ b/docs/_sources/how_to/compile_models/from_oneflow.rst.txt
@@ -121,7 +121,7 @@ Load a pretrained OneFlow model and save model
  .. code-block:: none
 
     Downloading: "https://oneflow-public.oss-cn-beijing.aliyuncs.com/model_zoo/flowvision/classification/ResNet/resnet18.zip" to /workspace/.oneflow/flowvision_cache/resnet18.zip
-
      0%|          | 0.00/41.5M [00:00<?, ?B/s]
     19%|#9        | 7.99M/41.5M [00:00<00:00, 52.1MB/s]
     35%|###4      | 14.3M/41.5M [00:00<00:00, 54.6MB/s]
     47%|####7     | 19.6M/41.5M [00:00<00:00, 40.5MB/s]
     58%|#####7    | 24.0M/41.5M [00:00<00:00, 34.3MB/s]
     77%|#######7  | 32.0M/41.5M [00:00<00:00, 36.8MB/s]
     92%|#########2| 38.3M/41.5M [00:00<00:00, 40.7MB/s]
    100%|##########| 41.5M/41.5M [00:01<00:00, 42.2MB/s]
+
      0%|          | 0.00/41.5M [00:00<?, ?B/s]
     19%|#9        | 7.99M/41.5M [00:00<00:00, 52.1MB/s]
     39%|###8      | 16.0M/41.5M [00:00<00:00, 57.8MB/s]
     58%|#####7    | 24.0M/41.5M [00:00<00:00, 65.0MB/s]
     87%|########6 | 36.0M/41.5M [00:00<00:00, 85.6MB/s]
    100%|##########| 41.5M/41.5M [00:00<00:00, 71.4MB/s]
 
 
 
diff --git a/docs/_sources/how_to/compile_models/from_pytorch.rst.txt b/docs/_sources/how_to/compile_models/from_pytorch.rst.txt
index 31c3125a6f..b28ef6f82c 100644
--- a/docs/_sources/how_to/compile_models/from_pytorch.rst.txt
+++ b/docs/_sources/how_to/compile_models/from_pytorch.rst.txt
@@ -102,7 +102,7 @@ Load a pretrained PyTorch model
     /venv/apache-tvm-py3.7/lib/python3.7/site-packages/torchvision/models/_utils.py:223: UserWarning: Arguments other than a weight enum or `None` for 'weights' are deprecated since 0.13 and will be removed in 0.15. The current behavior is equivalent to passing `weights=ResNet18_Weights.IMAGENET1K_V1`. You can also use `weights=ResNet18_Weights.DEFAULT` to get the most up-to-date weights.
       warnings.warn(msg)
     Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /workspace/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
-
      0%|          | 0.00/44.7M [00:00<?, ?B/s]
     27%|##7       | 12.2M/44.7M [00:00<00:00, 128MB/s]
     55%|#####4    | 24.5M/44.7M [00:00<00:00, 98.9MB/s]
     81%|########1 | 36.2M/44.7M [00:00<00:00, 108MB/s] 
    100%|##########| 44.7M/44.7M [00:00<00:00, 106MB/s]
+
      0%|          | 0.00/44.7M [00:00<?, ?B/s]
     33%|###3      | 14.9M/44.7M [00:00<00:00, 156MB/s]
     67%|######6   | 29.8M/44.7M [00:00<00:00, 122MB/s]
     94%|#########3| 41.9M/44.7M [00:00<00:00, 109MB/s]
    100%|##########| 44.7M/44.7M [00:00<00:00, 97.2MB/s]
 
 
 
diff --git a/docs/_sources/how_to/compile_models/from_tensorflow.rst.txt b/docs/_sources/how_to/compile_models/from_tensorflow.rst.txt
index 2cb96a707b..cb2654a96f 100644
--- a/docs/_sources/how_to/compile_models/from_tensorflow.rst.txt
+++ b/docs/_sources/how_to/compile_models/from_tensorflow.rst.txt
@@ -425,7 +425,7 @@ Run the corresponding model on tensorflow
 
 .. rst-class:: sphx-glr-timing
 
-   **Total running time of the script:** ( 1 minutes  16.036 seconds)
+   **Total running time of the script:** ( 1 minutes  11.934 seconds)
 
 
 .. _sphx_glr_download_how_to_compile_models_from_tensorflow.py:
diff --git a/docs/_sources/how_to/compile_models/sg_execution_times.rst.txt b/docs/_sources/how_to/compile_models/sg_execution_times.rst.txt
index 6ac0ee7005..e687bf59a6 100644
--- a/docs/_sources/how_to/compile_models/sg_execution_times.rst.txt
+++ b/docs/_sources/how_to/compile_models/sg_execution_times.rst.txt
@@ -5,26 +5,26 @@
 
 Computation times
 =================
-**05:59.311** total execution time for **how_to_compile_models** files:
+**05:41.893** total execution time for **how_to_compile_models** files:
 
 +-----------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_compile_models_from_tensorflow.py` (``from_tensorflow.py``) | 01:16.036 | 0.0 MB |
+| :ref:`sphx_glr_how_to_compile_models_from_tensorflow.py` (``from_tensorflow.py``) | 01:11.934 | 0.0 MB |
 +-----------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_compile_models_from_darknet.py` (``from_darknet.py``)       | 01:13.164 | 0.0 MB |
+| :ref:`sphx_glr_how_to_compile_models_from_darknet.py` (``from_darknet.py``)       | 01:09.125 | 0.0 MB |
 +-----------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_compile_models_from_paddle.py` (``from_paddle.py``)         | 00:49.170 | 0.0 MB |
+| :ref:`sphx_glr_how_to_compile_models_from_paddle.py` (``from_paddle.py``)         | 00:46.808 | 0.0 MB |
 +-----------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_compile_models_from_oneflow.py` (``from_oneflow.py``)       | 00:33.872 | 0.0 MB |
+| :ref:`sphx_glr_how_to_compile_models_from_oneflow.py` (``from_oneflow.py``)       | 00:32.173 | 0.0 MB |
 +-----------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_compile_models_from_mxnet.py` (``from_mxnet.py``)           | 00:29.643 | 0.0 MB |
+| :ref:`sphx_glr_how_to_compile_models_from_mxnet.py` (``from_mxnet.py``)           | 00:28.871 | 0.0 MB |
 +-----------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_compile_models_from_coreml.py` (``from_coreml.py``)         | 00:28.161 | 0.0 MB |
+| :ref:`sphx_glr_how_to_compile_models_from_coreml.py` (``from_coreml.py``)         | 00:26.456 | 0.0 MB |
 +-----------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_compile_models_from_tflite.py` (``from_tflite.py``)         | 00:25.497 | 0.0 MB |
+| :ref:`sphx_glr_how_to_compile_models_from_tflite.py` (``from_tflite.py``)         | 00:24.967 | 0.0 MB |
 +-----------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_compile_models_from_pytorch.py` (``from_pytorch.py``)       | 00:23.582 | 0.0 MB |
+| :ref:`sphx_glr_how_to_compile_models_from_pytorch.py` (``from_pytorch.py``)       | 00:22.392 | 0.0 MB |
 +-----------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_compile_models_from_keras.py` (``from_keras.py``)           | 00:17.677 | 0.0 MB |
+| :ref:`sphx_glr_how_to_compile_models_from_keras.py` (``from_keras.py``)           | 00:16.753 | 0.0 MB |
 +-----------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_compile_models_from_onnx.py` (``from_onnx.py``)             | 00:02.509 | 0.0 MB |
+| :ref:`sphx_glr_how_to_compile_models_from_onnx.py` (``from_onnx.py``)             | 00:02.413 | 0.0 MB |
 +-----------------------------------------------------------------------------------+-----------+--------+
diff --git a/docs/_sources/how_to/deploy_models/deploy_model_on_adreno.rst.txt b/docs/_sources/how_to/deploy_models/deploy_model_on_adreno.rst.txt
index 2423ba78d8..a83a5060ac 100644
--- a/docs/_sources/how_to/deploy_models/deploy_model_on_adreno.rst.txt
+++ b/docs/_sources/how_to/deploy_models/deploy_model_on_adreno.rst.txt
@@ -728,7 +728,7 @@ well as provides information about the model's performance
     Evaluate inference time cost...
     Execution time summary:
      mean (ms)   median (ms)    max (ms)     min (ms)     std (ms)  
-     2549.2254    2548.3049    2558.3050    2546.9838      3.1227   
+     2691.7470    2691.0709    2695.0863    2689.2381      1.8076   
                
 
 
diff --git a/docs/_sources/how_to/deploy_models/deploy_model_on_android.rst.txt b/docs/_sources/how_to/deploy_models/deploy_model_on_android.rst.txt
index 5f538e8adf..dddecf7f16 100644
--- a/docs/_sources/how_to/deploy_models/deploy_model_on_android.rst.txt
+++ b/docs/_sources/how_to/deploy_models/deploy_model_on_android.rst.txt
@@ -437,7 +437,7 @@ Execute on TVM
     Evaluate inference time cost...
     Execution time summary:
      mean (ms)   median (ms)    max (ms)     min (ms)     std (ms)  
-      16.5949      16.3838      17.2921      16.2440       0.4092   
+      16.2766      16.1532      16.9554      15.9501       0.3318   
                
 
 
diff --git a/docs/_sources/how_to/deploy_models/deploy_object_detection_pytorch.rst.txt b/docs/_sources/how_to/deploy_models/deploy_object_detection_pytorch.rst.txt
index 4620cd02b7..fd41481ec8 100644
--- a/docs/_sources/how_to/deploy_models/deploy_object_detection_pytorch.rst.txt
+++ b/docs/_sources/how_to/deploy_models/deploy_object_detection_pytorch.rst.txt
@@ -131,7 +131,7 @@ Load pre-trained maskrcnn from torchvision and do tracing
     /venv/apache-tvm-py3.7/lib/python3.7/site-packages/torchvision/models/_utils.py:223: UserWarning: Arguments other than a weight enum or `None` for 'weights' are deprecated since 0.13 and will be removed in 0.15. The current behavior is equivalent to passing `weights=MaskRCNN_ResNet50_FPN_Weights.COCO_V1`. You can also use `weights=MaskRCNN_ResNet50_FPN_Weights.DEFAULT` to get the most up-to-date weights.
       warnings.warn(msg)
     Downloading: "https://download.pytorch.org/models/maskrcnn_resnet50_fpn_coco-bf2d0c1e.pth" to /workspace/.cache/torch/hub/checkpoints/maskrcnn_resnet50_fpn_coco-bf2d0c1e.pth
-
      0%|          | 0.00/170M [00:00<?, ?B/s]
      7%|6         | 11.4M/170M [00:00<00:01, 119MB/s]
     14%|#4        | 24.0M/170M [00:00<00:01, 109MB/s]
     20%|##        | 34.5M/170M [00:00<00:01, 95.7MB/s]
     28%|##8       | 48.1M/170M [00:00<00:01, 112MB/s] 
     35%|###4      | 59.1M/170M [00:00<00:01, 105MB/s]
     41%|####      | 69.3M/170M [00:00<00:01, 99.9MB/s]
     48%|####8     | 82.1M/170M [00:00<00:00, 108MB/s] 
     56%|#####5    | 94.3M/170M [00:00<00:00, 114MB/s]
     62%|######1   | 105M/170M [00:01<00:00, 96.9MB/s]
     68%|######7   | 115M/170M [00:01<00:00, 89.4MB/s]
     74%|#######4  | 126M/170M [00:01<00:00, 96.0MB/s]
     80%|########  | 136M/170M [00:01<00:00, 96.6MB/s]
     86%|########5 | 145M/170M [00:01<00:00, 94.2MB/s]
     94%|#########4| 160M/170M [00:01<00:00, 102MB/s] 
    100%|##########| 170M/170M [00:01<00:00, 104MB/s]
+
      0%|          | 0.00/170M [00:00<?, ?B/s]
      5%|4         | 7.99M/170M [00:00<00:02, 70.7MB/s]
     11%|#         | 18.1M/170M [00:00<00:01, 81.9MB/s]
     16%|#5        | 26.5M/170M [00:00<00:01, 84.6MB/s]
     20%|##        | 34.6M/170M [00:00<00:01, 81.2MB/s]
     25%|##4       | 42.4M/170M [00:00<00:02, 62.8MB/s]
     31%|###       | 52.1M/170M [00:00<00:01, 73.4MB/s]
     38%|###7      | 64.0M/170M [00:00<00:01, 74.0MB/s]
     43%|####3     | 73.4M/170M [00:01<00:01, 80.2MB/s]
     48%|####7     | 81.5M/170M [00:01<00:01, 79.8MB/s]
     53%|#####2    | 89.7M/170M [00:01<00:01, 81.5MB/s]
     58%|#####7    | 98.4M/170M [00:01<00:00, 84.4MB/s]
     64%|######3   | 108M/170M [00:01<00:00, 89.5MB/s] 
     71%|#######   | 120M/170M [00:01<00:00, 82.9MB/s]
     78%|#######7  | 132M/170M [00:01<00:00, 93.3MB/s]
     83%|########3 | 141M/170M [00:01<00:00, 83.3MB/s]
     89%|########9 | 152M/170M [00:01<00:00, 79.3MB/s]
     94%|#########4| 160M/170M [00:02<00:00, 80.0MB/s]
  
    99%|#########8| 168M/170M [00:02<00:00, 72.9MB/s]
    100%|##########| 170M/170M [00:02<00:00, 78.5MB/s]
     /venv/apache-tvm-py3.7/lib/python3.7/site-packages/torch/nn/functional.py:3897: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
       for i in range(dim)
     /venv/apache-tvm-py3.7/lib/python3.7/site-packages/torchvision/models/detection/anchor_utils.py:124: UserWarning: __floordiv__ is deprecated, and its behavior will change in a future version of pytorch. It currently rounds toward 0 (like the 'trunc' function NOT 'floor'). This results in incorrect rounding for negative values. To keep the current behavior, use torch.div(a, b, rounding_mode='trunc'), or for actual floor division, use torch.div(a, b, rounding_mode='floor').
@@ -300,7 +300,7 @@ Get boxes with score larger than 0.9
 
 .. rst-class:: sphx-glr-timing
 
-   **Total running time of the script:** ( 3 minutes  32.060 seconds)
+   **Total running time of the script:** ( 3 minutes  17.253 seconds)
 
 
 .. _sphx_glr_download_how_to_deploy_models_deploy_object_detection_pytorch.py:
diff --git a/docs/_sources/how_to/deploy_models/deploy_prequantized.rst.txt b/docs/_sources/how_to/deploy_models/deploy_prequantized.rst.txt
index cbca39f11d..27e37abed1 100644
--- a/docs/_sources/how_to/deploy_models/deploy_prequantized.rst.txt
+++ b/docs/_sources/how_to/deploy_models/deploy_prequantized.rst.txt
@@ -240,7 +240,7 @@ training. Other models require a full post training calibration.
     /venv/apache-tvm-py3.7/lib/python3.7/site-packages/torchvision/models/_utils.py:223: UserWarning: Arguments other than a weight enum or `None` for 'weights' are deprecated since 0.13 and will be removed in 0.15. The current behavior is equivalent to passing `weights=MobileNet_V2_Weights.IMAGENET1K_V1`. You can also use `weights=MobileNet_V2_Weights.DEFAULT` to get the most up-to-date weights.
       warnings.warn(msg)
     Downloading: "https://download.pytorch.org/models/mobilenet_v2-b0353104.pth" to /workspace/.cache/torch/hub/checkpoints/mobilenet_v2-b0353104.pth
-
      0%|          | 0.00/13.6M [00:00<?, ?B/s]
     59%|#####8    | 7.99M/13.6M [00:00<00:00, 52.3MB/s]
     96%|#########5| 13.0M/13.6M [00:00<00:00, 46.5MB/s]
    100%|##########| 13.6M/13.6M [00:00<00:00, 49.0MB/s]
+
      0%|          | 0.00/13.6M [00:00<?, ?B/s]
     59%|#####8    | 7.99M/13.6M [00:00<00:00, 68.6MB/s]
    100%|##########| 13.6M/13.6M [00:00<00:00, 95.9MB/s]
 
 
 
@@ -422,7 +422,7 @@ Here we give an example of how to measure performance of TVM compiled models.
 
     Execution time summary:
      mean (ms)   median (ms)    max (ms)     min (ms)     std (ms)  
-      90.4948      90.4305      91.5414      90.2734       0.2155   
+      90.5975      90.4758      94.0629      90.1740       0.5058   
                
 
 
@@ -471,7 +471,7 @@ TODO
 
 .. rst-class:: sphx-glr-timing
 
-   **Total running time of the script:** ( 1 minutes  10.336 seconds)
+   **Total running time of the script:** ( 1 minutes  6.859 seconds)
 
 
 .. _sphx_glr_download_how_to_deploy_models_deploy_prequantized.py:
diff --git a/docs/_sources/how_to/deploy_models/deploy_prequantized_tflite.rst.txt b/docs/_sources/how_to/deploy_models/deploy_prequantized_tflite.rst.txt
index 30feb9e84e..01b786b58c 100644
--- a/docs/_sources/how_to/deploy_models/deploy_prequantized_tflite.rst.txt
+++ b/docs/_sources/how_to/deploy_models/deploy_prequantized_tflite.rst.txt
@@ -436,7 +436,7 @@ Here we give an example of how to measure performance of TVM compiled models.
 
     Execution time summary:
      mean (ms)   median (ms)    max (ms)     min (ms)     std (ms)  
-      121.9265     121.7962     124.2560     120.8635      0.6576   
+      120.1702     120.1580     121.6625     119.2851      0.4108   
                
 
 
@@ -473,7 +473,7 @@ Here we give an example of how to measure performance of TVM compiled models.
 
 .. rst-class:: sphx-glr-timing
 
-   **Total running time of the script:** ( 2 minutes  26.227 seconds)
+   **Total running time of the script:** ( 2 minutes  28.878 seconds)
 
 
 .. _sphx_glr_download_how_to_deploy_models_deploy_prequantized_tflite.py:
diff --git a/docs/_sources/how_to/deploy_models/deploy_quantized.rst.txt b/docs/_sources/how_to/deploy_models/deploy_quantized.rst.txt
index e4be74286f..3af8f5ed63 100644
--- a/docs/_sources/how_to/deploy_models/deploy_quantized.rst.txt
+++ b/docs/_sources/how_to/deploy_models/deploy_quantized.rst.txt
@@ -257,7 +257,7 @@ We create a Relay VM to build and execute the model.
 
 .. rst-class:: sphx-glr-timing
 
-   **Total running time of the script:** ( 1 minutes  34.204 seconds)
+   **Total running time of the script:** ( 1 minutes  22.062 seconds)
 
 
 .. _sphx_glr_download_how_to_deploy_models_deploy_quantized.py:
diff --git a/docs/_sources/how_to/deploy_models/deploy_ssd_gluoncv.rst.txt b/docs/_sources/how_to/deploy_models/deploy_ssd_gluoncv.rst.txt
index 24cb86797c..ee5e416b1a 100644
--- a/docs/_sources/how_to/deploy_models/deploy_ssd_gluoncv.rst.txt
+++ b/docs/_sources/how_to/deploy_models/deploy_ssd_gluoncv.rst.txt
@@ -170,7 +170,7 @@ Convert and compile model for CPU.
             data: None
       input_sym_arg_type = in_param.infer_type()[0]
     Downloading /workspace/.mxnet/models/ssd_512_resnet50_v1_voc-9c8b225a.zip from https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/gluon/models/ssd_512_resnet50_v1_voc-9c8b225a.zip...
-
      0%|          | 0/132723 [00:00<?, ?KB/s]
      6%|5         | 7466/132723 [00:00<00:01, 74646.19KB/s]
     12%|#2        | 16203/132723 [00:00<00:01, 82113.91KB/s]
     19%|#8        | 24912/132723 [00:00<00:01, 84382.49KB/s]
     25%|##5       | 33686/132723 [00:00<00:01, 85706.02KB/s]
     32%|###1      | 42424/132723 [00:00<00:01, 86306.24KB/s]
     38%|###8      | 51055/132723 [00:00<00:01, 80413.71KB/s]
     45%|####5     | 59758/132723 [00:00<00:00, 82473.48KB/s]
     51%|#####1    | 68062/132723 [00:00<00:00, 81859.48KB/s]
     58%|#####7    | 76858/132723 [00:00<00:00, 83708.60KB/s]
     65%|######4   | 85625/132723 [00:01<00:00, 84905.56KB/s]
     71%|#######1  | 94323/132723 [00:01<00:00, 85528.31KB/s]
     78%|#######7  | 103128/132723 [00:01<00:00, 86286.46KB/s]
     84%|########4 | 111897/132723 [00:01<00:00, 86707.58KB/s]
     91%|######### | 120578/132723 [00:01<00:00, 61654.37KB/s]
     97%|#########7| 129242/132723 [00:01<00:00, 67496.29KB/s]
    100%|#######
 ###| 132723/132723 [00:01<00:00, 77975.82KB/s]
+
      0%|          | 0/132723 [00:00<?, ?KB/s]
      4%|4         | 5744/132723 [00:00<00:02, 57434.17KB/s]
     10%|#         | 13360/132723 [00:00<00:01, 68447.02KB/s]
     16%|#5        | 21050/132723 [00:00<00:01, 72302.70KB/s]
     22%|##1       | 28715/132723 [00:00<00:01, 74001.48KB/s]
     27%|##7       | 36427/132723 [00:00<00:01, 75124.31KB/s]
     33%|###3      | 44027/132723 [00:00<00:01, 75415.86KB/s]
     39%|###9      | 51810/132723 [00:00<00:01, 76203.04KB/s]
     45%|####4     | 59498/132723 [00:00<00:00, 76407.88KB/s]
     51%|#####     | 67235/132723 [00:00<00:00, 76706.62KB/s]
     56%|#####6    | 74906/132723 [00:01<00:00, 76451.74KB/s]
     62%|######2   | 82595/132723 [00:01<00:00, 76582.84KB/s]
     68%|######8   | 90293/132723 [00:01<00:00, 76701.75KB/s]
     74%|#######3  | 97997/132723 [00:01<00:00, 76800.41KB/s]
     80%|#######9  | 105731/132723 [00:01<00:00, 76960.83KB/s]
     85%|########5 | 113431/132723 [00:01<00:00, 76971.44KB/s]
     91%|#########
 1| 121129/132723 [00:01<00:00, 76872.58KB/s]
     97%|#########7| 128974/132723 [00:01<00:00, 77345.07KB/s]
    100%|##########| 132723/132723 [00:01<00:00, 75944.35KB/s]
 
 
 
@@ -246,7 +246,7 @@ Display result
 
 .. rst-class:: sphx-glr-timing
 
-   **Total running time of the script:** ( 3 minutes  16.119 seconds)
+   **Total running time of the script:** ( 3 minutes  7.395 seconds)
 
 
 .. _sphx_glr_download_how_to_deploy_models_deploy_ssd_gluoncv.py:
diff --git a/docs/_sources/how_to/deploy_models/sg_execution_times.rst.txt b/docs/_sources/how_to/deploy_models/sg_execution_times.rst.txt
index 7ef64c9c0c..9287c6e2f2 100644
--- a/docs/_sources/how_to/deploy_models/sg_execution_times.rst.txt
+++ b/docs/_sources/how_to/deploy_models/sg_execution_times.rst.txt
@@ -5,26 +5,26 @@
 
 Computation times
 =================
-**14:21.989** total execution time for **how_to_deploy_models** files:
+**13:42.282** total execution time for **how_to_deploy_models** files:
 
 +------------------------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_deploy_models_deploy_object_detection_pytorch.py` (``deploy_object_detection_pytorch.py``) | 03:32.060 | 0.0 MB |
+| :ref:`sphx_glr_how_to_deploy_models_deploy_object_detection_pytorch.py` (``deploy_object_detection_pytorch.py``) | 03:17.253 | 0.0 MB |
 +------------------------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_deploy_models_deploy_ssd_gluoncv.py` (``deploy_ssd_gluoncv.py``)                           | 03:16.119 | 0.0 MB |
+| :ref:`sphx_glr_how_to_deploy_models_deploy_ssd_gluoncv.py` (``deploy_ssd_gluoncv.py``)                           | 03:07.395 | 0.0 MB |
 +------------------------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_deploy_models_deploy_prequantized_tflite.py` (``deploy_prequantized_tflite.py``)           | 02:26.227 | 0.0 MB |
+| :ref:`sphx_glr_how_to_deploy_models_deploy_prequantized_tflite.py` (``deploy_prequantized_tflite.py``)           | 02:28.878 | 0.0 MB |
 +------------------------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_deploy_models_deploy_quantized.py` (``deploy_quantized.py``)                               | 01:34.204 | 0.0 MB |
+| :ref:`sphx_glr_how_to_deploy_models_deploy_quantized.py` (``deploy_quantized.py``)                               | 01:22.062 | 0.0 MB |
 +------------------------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_deploy_models_deploy_prequantized.py` (``deploy_prequantized.py``)                         | 01:10.336 | 0.0 MB |
+| :ref:`sphx_glr_how_to_deploy_models_deploy_prequantized.py` (``deploy_prequantized.py``)                         | 01:06.859 | 0.0 MB |
 +------------------------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_deploy_models_deploy_model_on_adreno.py` (``deploy_model_on_adreno.py``)                   | 00:52.749 | 0.0 MB |
+| :ref:`sphx_glr_how_to_deploy_models_deploy_model_on_adreno.py` (``deploy_model_on_adreno.py``)                   | 00:53.523 | 0.0 MB |
 +------------------------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_deploy_models_deploy_model_on_android.py` (``deploy_model_on_android.py``)                 | 00:38.152 | 0.0 MB |
+| :ref:`sphx_glr_how_to_deploy_models_deploy_model_on_android.py` (``deploy_model_on_android.py``)                 | 00:35.880 | 0.0 MB |
 +------------------------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_deploy_models_deploy_model_on_nano.py` (``deploy_model_on_nano.py``)                       | 00:26.323 | 0.0 MB |
+| :ref:`sphx_glr_how_to_deploy_models_deploy_model_on_nano.py` (``deploy_model_on_nano.py``)                       | 00:25.446 | 0.0 MB |
 +------------------------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_deploy_models_deploy_model_on_rasp.py` (``deploy_model_on_rasp.py``)                       | 00:25.812 | 0.0 MB |
+| :ref:`sphx_glr_how_to_deploy_models_deploy_model_on_rasp.py` (``deploy_model_on_rasp.py``)                       | 00:24.979 | 0.0 MB |
 +------------------------------------------------------------------------------------------------------------------+-----------+--------+
 | :ref:`sphx_glr_how_to_deploy_models_deploy_sparse.py` (``deploy_sparse.py``)                                     | 00:00.007 | 0.0 MB |
 +------------------------------------------------------------------------------------------------------------------+-----------+--------+
diff --git a/docs/_sources/how_to/extend_tvm/bring_your_own_datatypes.rst.txt b/docs/_sources/how_to/extend_tvm/bring_your_own_datatypes.rst.txt
index b82286c603..43e48bb118 100644
--- a/docs/_sources/how_to/extend_tvm/bring_your_own_datatypes.rst.txt
+++ b/docs/_sources/how_to/extend_tvm/bring_your_own_datatypes.rst.txt
@@ -476,7 +476,7 @@ First let us define two helper functions to get the mobilenet model and a cat im
 
  .. code-block:: none
 
-    Downloading /workspace/.mxnet/models/mobilenet0.25-9f83e440.zip4655517f-a9e6-4359-9032-f03849cb94c5 from https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/gluon/models/mobilenet0.25-9f83e440.zip...
+    Downloading /workspace/.mxnet/models/mobilenet0.25-9f83e440.zip0df57d2a-3121-4336-98e0-d7239a445491 from https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/gluon/models/mobilenet0.25-9f83e440.zip...
 
 
 
diff --git a/docs/_sources/how_to/extend_tvm/sg_execution_times.rst.txt b/docs/_sources/how_to/extend_tvm/sg_execution_times.rst.txt
index 496b9183bd..1683d0d3c8 100644
--- a/docs/_sources/how_to/extend_tvm/sg_execution_times.rst.txt
+++ b/docs/_sources/how_to/extend_tvm/sg_execution_times.rst.txt
@@ -5,14 +5,14 @@
 
 Computation times
 =================
-**00:51.083** total execution time for **how_to_extend_tvm** files:
+**00:47.946** total execution time for **how_to_extend_tvm** files:
 
 +-------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_extend_tvm_bring_your_own_datatypes.py` (``bring_your_own_datatypes.py``) | 00:47.390 | 0.0 MB |
+| :ref:`sphx_glr_how_to_extend_tvm_bring_your_own_datatypes.py` (``bring_your_own_datatypes.py``) | 00:44.458 | 0.0 MB |
 +-------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_extend_tvm_use_pass_instrument.py` (``use_pass_instrument.py``)           | 00:02.587 | 0.0 MB |
+| :ref:`sphx_glr_how_to_extend_tvm_use_pass_instrument.py` (``use_pass_instrument.py``)           | 00:02.443 | 0.0 MB |
 +-------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_extend_tvm_use_pass_infra.py` (``use_pass_infra.py``)                     | 00:01.097 | 0.0 MB |
+| :ref:`sphx_glr_how_to_extend_tvm_use_pass_infra.py` (``use_pass_infra.py``)                     | 00:01.038 | 0.0 MB |
 +-------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_extend_tvm_low_level_custom_pass.py` (``low_level_custom_pass.py``)       | 00:00.008 | 0.0 MB |
+| :ref:`sphx_glr_how_to_extend_tvm_low_level_custom_pass.py` (``low_level_custom_pass.py``)       | 00:00.007 | 0.0 MB |
 +-------------------------------------------------------------------------------------------------+-----------+--------+
diff --git a/docs/_sources/how_to/extend_tvm/use_pass_instrument.rst.txt b/docs/_sources/how_to/extend_tvm/use_pass_instrument.rst.txt
index c48e83c59e..7ef8c2c525 100644
--- a/docs/_sources/how_to/extend_tvm/use_pass_instrument.rst.txt
+++ b/docs/_sources/how_to/extend_tvm/use_pass_instrument.rst.txt
@@ -220,10 +220,10 @@ profile the execution time of each passes.
  .. code-block:: none
 
     Printing results of timing profile...
-    InferType: 7602us [7602us] (46.05%; 46.05%)
-    FoldScaleAxis: 8905us [9us] (53.95%; 53.95%)
-            FoldConstant: 8896us [1769us] (53.89%; 99.90%)
-                    InferType: 7127us [7127us] (43.18%; 80.12%)
+    InferType: 7289us [7289us] (46.47%; 46.47%)
+    FoldScaleAxis: 8397us [7us] (53.53%; 53.53%)
+            FoldConstant: 8389us [1725us] (53.49%; 99.91%)
+                    InferType: 6664us [6664us] (42.49%; 79.44%)
 
 
 
@@ -262,10 +262,10 @@ Refer to following sections and :py:func:`tvm.instrument.pass_instrument` for th
  .. code-block:: none
 
     Printing results of timing profile...
-    InferType: 7147us [7147us] (44.78%; 44.78%)
-    FoldScaleAxis: 8812us [8us] (55.22%; 55.22%)
-            FoldConstant: 8805us [1797us] (55.17%; 99.91%)
-                    InferType: 7007us [7007us] (43.91%; 79.59%)
+    InferType: 6687us [6687us] (44.81%; 44.81%)
+    FoldScaleAxis: 8235us [5us] (55.19%; 55.19%)
+            FoldConstant: 8230us [1705us] (55.15%; 99.94%)
+                    InferType: 6525us [6525us] (43.73%; 79.29%)
 
 
 
diff --git a/docs/_sources/how_to/optimize_operators/opt_conv_cuda.rst.txt b/docs/_sources/how_to/optimize_operators/opt_conv_cuda.rst.txt
index 4b37375c61..8a24b12fd5 100644
--- a/docs/_sources/how_to/optimize_operators/opt_conv_cuda.rst.txt
+++ b/docs/_sources/how_to/optimize_operators/opt_conv_cuda.rst.txt
@@ -344,7 +344,7 @@ latency of convolution.
 
  .. code-block:: none
 
-    Convolution: 49.932254 ms
+    Convolution: 54.204128 ms
 
 
 
diff --git a/docs/_sources/how_to/optimize_operators/opt_conv_tensorcore.rst.txt b/docs/_sources/how_to/optimize_operators/opt_conv_tensorcore.rst.txt
index 7354e2bbe7..1534167fac 100644
--- a/docs/_sources/how_to/optimize_operators/opt_conv_tensorcore.rst.txt
+++ b/docs/_sources/how_to/optimize_operators/opt_conv_tensorcore.rst.txt
@@ -661,7 +661,7 @@ be able to run on our build server
 
  .. code-block:: none
 
-    conv2d with tensor core: 13.363818 ms
+    conv2d with tensor core: 13.386995 ms
 
 
 
diff --git a/docs/_sources/how_to/optimize_operators/opt_gemm.rst.txt b/docs/_sources/how_to/optimize_operators/opt_gemm.rst.txt
index ee63e26669..d6c5059d18 100644
--- a/docs/_sources/how_to/optimize_operators/opt_gemm.rst.txt
+++ b/docs/_sources/how_to/optimize_operators/opt_gemm.rst.txt
@@ -147,8 +147,8 @@ Then we write a baseline implementation, the simplest way to write a matrix mult
 
  .. code-block:: none
 
-    Numpy running time: 0.019448
-    Baseline: 3.529566
+    Numpy running time: 0.019434
+    Baseline: 3.455880
 
 
 
@@ -242,7 +242,7 @@ fill 32 * 32 * sizeof(float) which is 4KB in the cache whose total size is 32KB
 
  .. code-block:: none
 
-    Opt1: 0.337350
+    Opt1: 0.300253
 
 
 
@@ -344,7 +344,7 @@ In this tutorial, we chose to vectorize the inner loop row data since it is cach
 
  .. code-block:: none
 
-    Opt2: 0.357992
+    Opt2: 0.335396
 
 
 
@@ -439,7 +439,7 @@ the access pattern for A matrix is more cache friendly.
 
  .. code-block:: none
 
-    Opt3: 0.133945
+    Opt3: 0.117203
 
 
 
@@ -563,7 +563,7 @@ flattening.
 
  .. code-block:: none
 
-    Opt4: 0.110158
+    Opt4: 0.109657
 
 
 
@@ -684,7 +684,7 @@ write to C when all the block results are ready.
 
  .. code-block:: none
 
-    Opt5: 0.112511
+    Opt5: 0.111716
 
 
 
@@ -808,7 +808,7 @@ Furthermore, we can also utilize multi-core processors to do the thread-level pa
 
  .. code-block:: none
 
-    Opt6: 0.148622
+    Opt6: 0.147122
 
 
 
diff --git a/docs/_sources/how_to/optimize_operators/sg_execution_times.rst.txt b/docs/_sources/how_to/optimize_operators/sg_execution_times.rst.txt
index bc3eb53814..e4ee4a3ec2 100644
--- a/docs/_sources/how_to/optimize_operators/sg_execution_times.rst.txt
+++ b/docs/_sources/how_to/optimize_operators/sg_execution_times.rst.txt
@@ -5,12 +5,12 @@
 
 Computation times
 =================
-**00:36.570** total execution time for **how_to_optimize_operators** files:
+**00:35.181** total execution time for **how_to_optimize_operators** files:
 
 +-----------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_optimize_operators_opt_gemm.py` (``opt_gemm.py``)                       | 00:33.843 | 0.0 MB |
+| :ref:`sphx_glr_how_to_optimize_operators_opt_gemm.py` (``opt_gemm.py``)                       | 00:32.562 | 0.0 MB |
 +-----------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_optimize_operators_opt_conv_tensorcore.py` (``opt_conv_tensorcore.py``) | 00:01.572 | 0.0 MB |
+| :ref:`sphx_glr_how_to_optimize_operators_opt_conv_tensorcore.py` (``opt_conv_tensorcore.py``) | 00:01.521 | 0.0 MB |
 +-----------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_optimize_operators_opt_conv_cuda.py` (``opt_conv_cuda.py``)             | 00:01.155 | 0.0 MB |
+| :ref:`sphx_glr_how_to_optimize_operators_opt_conv_cuda.py` (``opt_conv_cuda.py``)             | 00:01.098 | 0.0 MB |
 +-----------------------------------------------------------------------------------------------+-----------+--------+
diff --git a/docs/_sources/how_to/tune_with_autoscheduler/sg_execution_times.rst.txt b/docs/_sources/how_to/tune_with_autoscheduler/sg_execution_times.rst.txt
index 08c79566d9..fd8c5edab8 100644
--- a/docs/_sources/how_to/tune_with_autoscheduler/sg_execution_times.rst.txt
+++ b/docs/_sources/how_to/tune_with_autoscheduler/sg_execution_times.rst.txt
@@ -5,18 +5,18 @@
 
 Computation times
 =================
-**09:22.202** total execution time for **how_to_tune_with_autoscheduler** files:
+**09:28.786** total execution time for **how_to_tune_with_autoscheduler** files:
 
 +----------------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_tune_with_autoscheduler_tune_conv2d_layer_cuda.py` (``tune_conv2d_layer_cuda.py``) | 05:48.035 | 0.0 MB |
+| :ref:`sphx_glr_how_to_tune_with_autoscheduler_tune_conv2d_layer_cuda.py` (``tune_conv2d_layer_cuda.py``) | 05:42.208 | 0.0 MB |
 +----------------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_tune_with_autoscheduler_tune_network_x86.py` (``tune_network_x86.py``)             | 01:35.298 | 0.0 MB |
+| :ref:`sphx_glr_how_to_tune_with_autoscheduler_tune_network_x86.py` (``tune_network_x86.py``)             | 01:32.179 | 0.0 MB |
 +----------------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_tune_with_autoscheduler_tune_network_cuda.py` (``tune_network_cuda.py``)           | 01:04.115 | 0.0 MB |
+| :ref:`sphx_glr_how_to_tune_with_autoscheduler_tune_network_cuda.py` (``tune_network_cuda.py``)           | 01:01.921 | 0.0 MB |
 +----------------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_tune_with_autoscheduler_tune_sparse_x86.py` (``tune_sparse_x86.py``)               | 00:30.209 | 0.0 MB |
+| :ref:`sphx_glr_how_to_tune_with_autoscheduler_tune_sparse_x86.py` (``tune_sparse_x86.py``)               | 00:49.065 | 0.0 MB |
 +----------------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_tune_with_autoscheduler_tune_network_arm.py` (``tune_network_arm.py``)             | 00:12.772 | 0.0 MB |
+| :ref:`sphx_glr_how_to_tune_with_autoscheduler_tune_network_arm.py` (``tune_network_arm.py``)             | 00:12.089 | 0.0 MB |
 +----------------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_tune_with_autoscheduler_tune_network_mali.py` (``tune_network_mali.py``)           | 00:11.773 | 0.0 MB |
+| :ref:`sphx_glr_how_to_tune_with_autoscheduler_tune_network_mali.py` (``tune_network_mali.py``)           | 00:11.324 | 0.0 MB |
 +----------------------------------------------------------------------------------------------------------+-----------+--------+
diff --git a/docs/_sources/how_to/tune_with_autoscheduler/tune_conv2d_layer_cuda.rst.txt b/docs/_sources/how_to/tune_with_autoscheduler/tune_conv2d_layer_cuda.rst.txt
index 1616100008..27675042a6 100644
--- a/docs/_sources/how_to/tune_with_autoscheduler/tune_conv2d_layer_cuda.rst.txt
+++ b/docs/_sources/how_to/tune_with_autoscheduler/tune_conv2d_layer_cuda.rst.txt
@@ -243,270 +243,183 @@ cooperative fetching, unrolling and operator fusion.
                  bias: Buffer(bias_2: Pointer(float32), float32, [1, 512, 1, 1], []),
                  compute: Buffer(compute_2: Pointer(float32), float32, [1, 512, 7, 7], [])}
       buffer_map = {data_1: data, kernel_1: kernel, bias_1: bias, compute_1: compute} {
-      attr [IterVar(blockIdx.x: int32, (nullptr), "ThreadIndex", "blockIdx.x")] "thread_extent" = 64;
-      allocate(conv2d_nchw: Pointer(local float32), float32, [7]), storage_scope = local;
-      allocate(pad_temp.shared: Pointer(shared float32), float32, [392]), storage_scope = shared;
-      allocate(kernel.shared: Pointer(shared float32), float32, [64]), storage_scope = shared;
-      attr [IterVar(threadIdx.x: int32, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 56 {
-        conv2d_nchw_1: Buffer(conv2d_nchw, float32, [7], [], scope="local", align=16)[0] = 0f32
-        conv2d_nchw_1[1] = 0f32
+      attr [IterVar(blockIdx.x: int32, (nullptr), "ThreadIndex", "blockIdx.x")] "thread_extent" = 128;
+      allocate(conv2d_nchw: Pointer(local float32), float32, [4]), storage_scope = local;
+      allocate(pad_temp.shared: Pointer(shared float32), float32, [2016]), storage_scope = shared;
+      allocate(kernel.shared: Pointer(shared float32), float32, [384]), storage_scope = shared;
+      attr [IterVar(threadIdx.x: int32, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49 {
+        conv2d_nchw_1: Buffer(conv2d_nchw, float32, [4], [], scope="local", align=8)[0] = 0f32
         conv2d_nchw_1[2] = 0f32
+        conv2d_nchw_1[1] = 0f32
         conv2d_nchw_1[3] = 0f32
-        conv2d_nchw_1[4] = 0f32
-        conv2d_nchw_1[5] = 0f32
-        conv2d_nchw_1[6] = 0f32
-        for (rc.outer.outer: int32, 0, 64) {
-          for (ry.outer.outer: int32, 0, 3) {
-            attr [IterVar(threadIdx.x_1: int32, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 56 {
-              pad_temp.shared_1: Buffer(pad_temp.shared, float32, [392], [], scope="shared")[(threadIdx.x_1*4)] = @tir.if_then_else((((1 <= (floordiv(floormod((threadIdx.x_1*4), 49), 7) + ry.outer.outer)) && ((floordiv(floormod((threadIdx.x_1*4), 49), 7) + ry.outer.outer) < 8)) && (1 <= floormod((threadIdx.x_1*4), 7))), data_3: Buffer(data_2, float32, [25088], [])[((((rc.outer.outer*392) + (ry.outer.outer*7)) + (threadIdx.x_1*4)) - 8)], 0f32, dtype=float32)
-              pad_temp.shared_1[((threadIdx.x_1*4) + 1)] = @tir.if_then_else((((1 <= (floordiv(floormod(((threadIdx.x_1*4) + 1), 49), 7) + ry.outer.outer)) && ((floordiv(floormod(((threadIdx.x_1*4) + 1), 49), 7) + ry.outer.outer) < 8)) && (1 <= floormod(((threadIdx.x_1*4) + 1), 7))), data_3[((((rc.outer.outer*392) + (ry.outer.outer*7)) + (threadIdx.x_1*4)) - 7)], 0f32, dtype=float32)
-              pad_temp.shared_1[((threadIdx.x_1*4) + 2)] = @tir.if_then_else((((1 <= (floordiv(floormod(((threadIdx.x_1*4) + 2), 49), 7) + ry.outer.outer)) && ((floordiv(floormod(((threadIdx.x_1*4) + 2), 49), 7) + ry.outer.outer) < 8)) && (1 <= floormod(((threadIdx.x_1*4) + 2), 7))), data_3[((((rc.outer.outer*392) + (ry.outer.outer*7)) + (threadIdx.x_1*4)) - 6)], 0f32, dtype=float32)
-              pad_temp.shared_1[((threadIdx.x_1*4) + 3)] = @tir.if_then_else((((1 <= (floordiv(floormod(((threadIdx.x_1*4) + 3), 49), 7) + ry.outer.outer)) && ((floordiv(floormod(((threadIdx.x_1*4) + 3), 49), 7) + ry.outer.outer) < 8)) && (1 <= floormod(((threadIdx.x_1*4) + 3), 7))), data_3[((((rc.outer.outer*392) + (ry.outer.outer*7)) + (threadIdx.x_1*4)) - 5)], 0f32, dtype=float32)
-            }
-            attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 56 {
-              if @tir.likely((threadIdx.x_1 < 42), dtype=bool) {
-                pad_temp.shared_1[(((floordiv(((threadIdx.x_1*4) + 224), 49)*49) + (floormod((floordiv((threadIdx.x_1*4), 7) + 4), 7)*7)) + floormod((threadIdx.x_1*4), 7))] = @tir.if_then_else((((1 <= (ry.outer.outer + floormod((floordiv((threadIdx.x_1*4), 7) + 4), 7))) && ((ry.outer.outer + floormod((floordiv((threadIdx.x_1*4), 7) + 4), 7)) < 8)) && (1 <= floormod((threadIdx.x_1*4), 7))), data_3[((((((rc.outer.outer*392) + (floordiv(((threadIdx.x_1*4) + 224), 49)*49)) + (ry.outer.outer* [...]
-              }
-              if @tir.likely((threadIdx.x_1 < 42), dtype=bool) {
-                pad_temp.shared_1[(((floordiv(((threadIdx.x_1*4) + 225), 49)*49) + (floormod((floordiv(((threadIdx.x_1*4) + 1), 7) + 4), 7)*7)) + floormod(((threadIdx.x_1*4) + 1), 7))] = @tir.if_then_else((((1 <= (ry.outer.outer + floormod((floordiv(((threadIdx.x_1*4) + 1), 7) + 4), 7))) && ((ry.outer.outer + floormod((floordiv(((threadIdx.x_1*4) + 1), 7) + 4), 7)) < 8)) && (1 <= floormod(((threadIdx.x_1*4) + 1), 7))), data_3[((((((rc.outer.outer*392) + (floordiv(((threadIdx.x_1*4) + 225 [...]
-              }
-              if @tir.likely((threadIdx.x_1 < 42), dtype=bool) {
-                pad_temp.shared_1[(((floordiv(((threadIdx.x_1*4) + 226), 49)*49) + (floormod((floordiv(((threadIdx.x_1*4) + 2), 7) + 4), 7)*7)) + floormod(((threadIdx.x_1*4) + 2), 7))] = @tir.if_then_else((((1 <= (ry.outer.outer + floormod((floordiv(((threadIdx.x_1*4) + 2), 7) + 4), 7))) && ((ry.outer.outer + floormod((floordiv(((threadIdx.x_1*4) + 2), 7) + 4), 7)) < 8)) && (1 <= floormod(((threadIdx.x_1*4) + 2), 7))), data_3[((((((rc.outer.outer*392) + (floordiv(((threadIdx.x_1*4) + 226 [...]
-              }
-              if @tir.likely((threadIdx.x_1 < 42), dtype=bool) {
-                pad_temp.shared_1[(((floordiv(((threadIdx.x_1*4) + 227), 49)*49) + (floormod((floordiv(((threadIdx.x_1*4) + 3), 7) + 4), 7)*7)) + floormod(((threadIdx.x_1*4) + 3), 7))] = @tir.if_then_else((((1 <= (ry.outer.outer + floormod((floordiv(((threadIdx.x_1*4) + 3), 7) + 4), 7))) && ((ry.outer.outer + floormod((floordiv(((threadIdx.x_1*4) + 3), 7) + 4), 7)) < 8)) && (1 <= floormod(((threadIdx.x_1*4) + 3), 7))), data_3[((((((rc.outer.outer*392) + (floordiv(((threadIdx.x_1*4) + 227 [...]
-              }
-            }
-            attr [IterVar(threadIdx.x_2: int32, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 56;
-            kernel.shared_1: Buffer(kernel.shared, float32, [64], [], scope="shared")[threadIdx.x_2] = kernel_3: Buffer(kernel_2, float32, [2359296], [])[(((((blockIdx.x*36864) + (floordiv(threadIdx.x_2, 8)*4608)) + (rc.outer.outer*72)) + (floormod(threadIdx.x_2, 8)*9)) + (ry.outer.outer*3))]
-            attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 56;
-            if @tir.likely((threadIdx.x_2 < 8), dtype=bool) {
-              kernel.shared_1[(threadIdx.x_2 + 56)] = kernel_3[(((((blockIdx.x*36864) + (rc.outer.outer*72)) + (threadIdx.x_2*9)) + (ry.outer.outer*3)) + 32256)]
-            }
-            conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[floormod(threadIdx.x, 7)]*kernel.shared_1[(floordiv(threadIdx.x, 7)*8)]))
-            conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 7)]*kernel.shared_1[(floordiv(threadIdx.x, 7)*8)]))
-            conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 14)]*kernel.shared_1[(floordiv(threadIdx.x, 7)*8)]))
-            conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 21)]*kernel.shared_1[(floordiv(threadIdx.x, 7)*8)]))
-            conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 28)]*kernel.shared_1[(floordiv(threadIdx.x, 7)*8)]))
-            conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 35)]*kernel.shared_1[(floordiv(threadIdx.x, 7)*8)]))
-            conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 42)]*kernel.shared_1[(floordiv(threadIdx.x, 7)*8)]))
-            conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 49)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 1)]))
-            conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 56)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 1)]))
-            conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 63)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 1)]))
-            conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 70)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 1)]))
-            conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 77)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 1)]))
-            conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 84)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 1)]))
-            conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 91)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 1)]))
-            conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 98)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 2)]))
-            conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 105)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 2)]))
-            conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 112)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 2)]))
-            conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 119)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 2)]))
-            conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 126)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 2)]))
-            conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 133)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 2)]))
-            conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 140)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 2)]))
-            conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 147)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 3)]))
-            conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 154)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 3)]))
-            conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 161)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 3)]))
-            conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 168)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 3)]))
-            conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 175)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 3)]))
-            conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 182)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 3)]))
-            conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 189)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 3)]))
-            conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 196)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 4)]))
-            conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 203)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 4)]))
-            conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 210)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 4)]))
-            conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 217)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 4)]))
-            conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 224)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 4)]))
-            conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 231)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 4)]))
-            conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 238)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 4)]))
-            conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 245)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 5)]))
-            conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 252)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 5)]))
-            conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 259)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 5)]))
-            conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 266)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 5)]))
-            conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 273)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 5)]))
-            conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 280)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 5)]))
-            conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 287)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 5)]))
-            conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 294)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 6)]))
-            conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 301)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 6)]))
-            conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 308)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 6)]))
-            conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 315)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 6)]))
-            conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 322)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 6)]))
-            conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 329)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 6)]))
-            conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 336)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 6)]))
-            conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 343)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 7)]))
-            conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 350)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 7)]))
-            conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 357)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 7)]))
-            conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 364)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 7)]))
-            conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 371)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 7)]))
-            conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 378)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 7)]))
-            conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 385)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 7)]))
-            attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 56 {
-              pad_temp.shared_1[(threadIdx.x_1*4)] = @tir.if_then_else(((1 <= (floordiv(floormod((threadIdx.x_1*4), 49), 7) + ry.outer.outer)) && ((floordiv(floormod((threadIdx.x_1*4), 49), 7) + ry.outer.outer) < 8)), data_3[((((rc.outer.outer*392) + (ry.outer.outer*7)) + (threadIdx.x_1*4)) - 7)], 0f32, dtype=float32)
-              pad_temp.shared_1[((threadIdx.x_1*4) + 1)] = @tir.if_then_else(((1 <= (floordiv(floormod(((threadIdx.x_1*4) + 1), 49), 7) + ry.outer.outer)) && ((floordiv(floormod(((threadIdx.x_1*4) + 1), 49), 7) + ry.outer.outer) < 8)), data_3[((((rc.outer.outer*392) + (ry.outer.outer*7)) + (threadIdx.x_1*4)) - 6)], 0f32, dtype=float32)
-              pad_temp.shared_1[((threadIdx.x_1*4) + 2)] = @tir.if_then_else(((1 <= (floordiv(floormod(((threadIdx.x_1*4) + 2), 49), 7) + ry.outer.outer)) && ((floordiv(floormod(((threadIdx.x_1*4) + 2), 49), 7) + ry.outer.outer) < 8)), data_3[((((rc.outer.outer*392) + (ry.outer.outer*7)) + (threadIdx.x_1*4)) - 5)], 0f32, dtype=float32)
-              pad_temp.shared_1[((threadIdx.x_1*4) + 3)] = @tir.if_then_else(((1 <= (floordiv(floormod(((threadIdx.x_1*4) + 3), 49), 7) + ry.outer.outer)) && ((floordiv(floormod(((threadIdx.x_1*4) + 3), 49), 7) + ry.outer.outer) < 8)), data_3[((((rc.outer.outer*392) + (ry.outer.outer*7)) + (threadIdx.x_1*4)) - 4)], 0f32, dtype=float32)
-            }
-            attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 56 {
-              if @tir.likely((threadIdx.x_1 < 42), dtype=bool) {
-                pad_temp.shared_1[(((floordiv(((threadIdx.x_1*4) + 224), 49)*49) + (floormod((floordiv((threadIdx.x_1*4), 7) + 4), 7)*7)) + floormod((threadIdx.x_1*4), 7))] = @tir.if_then_else(((1 <= (ry.outer.outer + floormod((floordiv((threadIdx.x_1*4), 7) + 4), 7))) && ((ry.outer.outer + floormod((floordiv((threadIdx.x_1*4), 7) + 4), 7)) < 8)), data_3[((((((rc.outer.outer*392) + (floordiv(((threadIdx.x_1*4) + 224), 49)*49)) + (ry.outer.outer*7)) + (floormod((floordiv((threadIdx.x_1*4) [...]
+        for (rc.outer.outer: int32, 0, 16) {
+          for (rx.outer.outer: int32, 0, 3) {
+            let cse_var_2: int32 = (rc.outer.outer*1568)
+            let cse_var_1: int32 = (rc.outer.outer*288)
+             {
+              attr [IterVar(threadIdx.x_1: int32, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
+              pad_temp.shared_1: Buffer(pad_temp.shared, float32, [2016], [], scope="shared")[threadIdx.x_1] = @tir.if_then_else((((7 <= threadIdx.x_1) && (1 <= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) && ((rx.outer.outer + floormod(threadIdx.x_1, 7)) < 8)), data_3: Buffer(data_2, float32, [25088], [])[(((cse_var_2 + threadIdx.x_1) + rx.outer.outer) - 8)], 0f32, dtype=float32)
+              attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
+              pad_temp.shared_1[(threadIdx.x_1 + 49)] = @tir.if_then_else(((((1 <= floormod((floordiv(threadIdx.x_1, 7) + 7), 9)) && (floormod((floordiv(threadIdx.x_1, 7) + 7), 9) < 8)) && (1 <= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) && ((rx.outer.outer + floormod(threadIdx.x_1, 7)) < 8)), data_3[(((((cse_var_2 + (floordiv((threadIdx.x_1 + 49), 63)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 7), 9)*7)) + rx.outer.outer) + floormod(threadIdx.x_1, 7)) - 8)], 0f32, dtype=float32)
+              attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
+              pad_temp.shared_1[(threadIdx.x_1 + 98)] = @tir.if_then_else(((((1 <= floormod((floordiv(threadIdx.x_1, 7) + 5), 9)) && (floormod((floordiv(threadIdx.x_1, 7) + 5), 9) < 8)) && (1 <= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) && ((rx.outer.outer + floormod(threadIdx.x_1, 7)) < 8)), data_3[(((((cse_var_2 + (floordiv((threadIdx.x_1 + 98), 63)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 5), 9)*7)) + rx.outer.outer) + floormod(threadIdx.x_1, 7)) - 8)], 0f32, dtype=float32)
+              attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
+              pad_temp.shared_1[(threadIdx.x_1 + 147)] = @tir.if_then_else(((((1 <= floormod((floordiv(threadIdx.x_1, 7) + 3), 9)) && (floormod((floordiv(threadIdx.x_1, 7) + 3), 9) < 8)) && (1 <= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) && ((rx.outer.outer + floormod(threadIdx.x_1, 7)) < 8)), data_3[(((((cse_var_2 + (floordiv((threadIdx.x_1 + 147), 63)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 3), 9)*7)) + rx.outer.outer) + floormod(threadIdx.x_1, 7)) - 8)], 0f32, dtype=float32)
+              attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
+              pad_temp.shared_1[(threadIdx.x_1 + 196)] = @tir.if_then_else(((1 <= (rx.outer.outer + floormod(threadIdx.x_1, 7))) && ((rx.outer.outer + floormod(threadIdx.x_1, 7)) < 8)), data_3[(((((cse_var_2 + (floordiv((threadIdx.x_1 + 196), 63)*49)) + ((floordiv(threadIdx.x_1, 7) + 1)*7)) + rx.outer.outer) + floormod(threadIdx.x_1, 7)) - 8)], 0f32, dtype=float32)
+              attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
+              pad_temp.shared_1[(threadIdx.x_1 + 245)] = @tir.if_then_else(((((1 <= floormod((floordiv(threadIdx.x_1, 7) + 8), 9)) && (floormod((floordiv(threadIdx.x_1, 7) + 8), 9) < 8)) && (1 <= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) && ((rx.outer.outer + floormod(threadIdx.x_1, 7)) < 8)), data_3[(((((cse_var_2 + (floordiv((threadIdx.x_1 + 245), 63)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 8), 9)*7)) + rx.outer.outer) + floormod(threadIdx.x_1, 7)) - 8)], 0f32, dtype=float32)
+              attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
+              pad_temp.shared_1[(threadIdx.x_1 + 294)] = @tir.if_then_else(((((1 <= floormod((floordiv(threadIdx.x_1, 7) + 6), 9)) && (floormod((floordiv(threadIdx.x_1, 7) + 6), 9) < 8)) && (1 <= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) && ((rx.outer.outer + floormod(threadIdx.x_1, 7)) < 8)), data_3[(((((cse_var_2 + (floordiv((threadIdx.x_1 + 294), 63)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 6), 9)*7)) + rx.outer.outer) + floormod(threadIdx.x_1, 7)) - 8)], 0f32, dtype=float32)
+              attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
+              pad_temp.shared_1[(threadIdx.x_1 + 343)] = @tir.if_then_else(((((1 <= floormod((floordiv(threadIdx.x_1, 7) + 4), 9)) && (floormod((floordiv(threadIdx.x_1, 7) + 4), 9) < 8)) && (1 <= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) && ((rx.outer.outer + floormod(threadIdx.x_1, 7)) < 8)), data_3[(((((cse_var_2 + (floordiv((threadIdx.x_1 + 343), 63)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 4), 9)*7)) + rx.outer.outer) + floormod(threadIdx.x_1, 7)) - 8)], 0f32, dtype=float32)
+              attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
+              pad_temp.shared_1[(threadIdx.x_1 + 392)] = @tir.if_then_else((((threadIdx.x_1 < 42) && (1 <= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) && ((rx.outer.outer + floormod(threadIdx.x_1, 7)) < 8)), data_3[(((((cse_var_2 + (floordiv((threadIdx.x_1 + 392), 63)*49)) + ((floordiv(threadIdx.x_1, 7) + 2)*7)) + rx.outer.outer) + floormod(threadIdx.x_1, 7)) - 8)], 0f32, dtype=float32)
+              attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
+              pad_temp.shared_1[(threadIdx.x_1 + 441)] = @tir.if_then_else((((7 <= threadIdx.x_1) && (1 <= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) && ((rx.outer.outer + floormod(threadIdx.x_1, 7)) < 8)), data_3[(((cse_var_2 + threadIdx.x_1) + rx.outer.outer) + 335)], 0f32, dtype=float32)
+              attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
+              pad_temp.shared_1[(threadIdx.x_1 + 490)] = @tir.if_then_else(((((1 <= floormod((floordiv(threadIdx.x_1, 7) + 7), 9)) && (floormod((floordiv(threadIdx.x_1, 7) + 7), 9) < 8)) && (1 <= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) && ((rx.outer.outer + floormod(threadIdx.x_1, 7)) < 8)), data_3[(((((cse_var_2 + (floordiv((threadIdx.x_1 + 490), 63)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 7), 9)*7)) + rx.outer.outer) + floormod(threadIdx.x_1, 7)) - 8)], 0f32, dtype=float32)
+              attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
+              pad_temp.shared_1[(threadIdx.x_1 + 539)] = @tir.if_then_else(((((1 <= floormod((floordiv(threadIdx.x_1, 7) + 5), 9)) && (floormod((floordiv(threadIdx.x_1, 7) + 5), 9) < 8)) && (1 <= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) && ((rx.outer.outer + floormod(threadIdx.x_1, 7)) < 8)), data_3[(((((cse_var_2 + (floordiv((threadIdx.x_1 + 539), 63)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 5), 9)*7)) + rx.outer.outer) + floormod(threadIdx.x_1, 7)) - 8)], 0f32, dtype=float32)
+              attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
+              pad_temp.shared_1[(threadIdx.x_1 + 588)] = @tir.if_then_else(((((1 <= floormod((floordiv(threadIdx.x_1, 7) + 3), 9)) && (floormod((floordiv(threadIdx.x_1, 7) + 3), 9) < 8)) && (1 <= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) && ((rx.outer.outer + floormod(threadIdx.x_1, 7)) < 8)), data_3[(((((cse_var_2 + (floordiv((threadIdx.x_1 + 588), 63)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 3), 9)*7)) + rx.outer.outer) + floormod(threadIdx.x_1, 7)) - 8)], 0f32, dtype=float32)
+              attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
+              pad_temp.shared_1[(threadIdx.x_1 + 637)] = @tir.if_then_else(((1 <= (rx.outer.outer + floormod(threadIdx.x_1, 7))) && ((rx.outer.outer + floormod(threadIdx.x_1, 7)) < 8)), data_3[(((((cse_var_2 + (floordiv((threadIdx.x_1 + 637), 63)*49)) + ((floordiv(threadIdx.x_1, 7) + 1)*7)) + rx.outer.outer) + floormod(threadIdx.x_1, 7)) - 8)], 0f32, dtype=float32)
+              attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
+              pad_temp.shared_1[(threadIdx.x_1 + 686)] = @tir.if_then_else(((((1 <= floormod((floordiv(threadIdx.x_1, 7) + 8), 9)) && (floormod((floordiv(threadIdx.x_1, 7) + 8), 9) < 8)) && (1 <= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) && ((rx.outer.outer + floormod(threadIdx.x_1, 7)) < 8)), data_3[(((((cse_var_2 + (floordiv((threadIdx.x_1 + 686), 63)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 8), 9)*7)) + rx.outer.outer) + floormod(threadIdx.x_1, 7)) - 8)], 0f32, dtype=float32)
+              attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
+              pad_temp.shared_1[(threadIdx.x_1 + 735)] = @tir.if_then_else(((((1 <= floormod((floordiv(threadIdx.x_1, 7) + 6), 9)) && (floormod((floordiv(threadIdx.x_1, 7) + 6), 9) < 8)) && (1 <= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) && ((rx.outer.outer + floormod(threadIdx.x_1, 7)) < 8)), data_3[(((((cse_var_2 + (floordiv((threadIdx.x_1 + 735), 63)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 6), 9)*7)) + rx.outer.outer) + floormod(threadIdx.x_1, 7)) - 8)], 0f32, dtype=float32)
+              attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
+              pad_temp.shared_1[(threadIdx.x_1 + 784)] = @tir.if_then_else(((((1 <= floormod((floordiv(threadIdx.x_1, 7) + 4), 9)) && (floormod((floordiv(threadIdx.x_1, 7) + 4), 9) < 8)) && (1 <= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) && ((rx.outer.outer + floormod(threadIdx.x_1, 7)) < 8)), data_3[(((((cse_var_2 + (floordiv((threadIdx.x_1 + 784), 63)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 4), 9)*7)) + rx.outer.outer) + floormod(threadIdx.x_1, 7)) - 8)], 0f32, dtype=float32)
+              attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
+              pad_temp.shared_1[(threadIdx.x_1 + 833)] = @tir.if_then_else((((threadIdx.x_1 < 42) && (1 <= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) && ((rx.outer.outer + floormod(threadIdx.x_1, 7)) < 8)), data_3[(((((cse_var_2 + (floordiv((threadIdx.x_1 + 833), 63)*49)) + ((floordiv(threadIdx.x_1, 7) + 2)*7)) + rx.outer.outer) + floormod(threadIdx.x_1, 7)) - 8)], 0f32, dtype=float32)
+              attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
+              pad_temp.shared_1[(threadIdx.x_1 + 882)] = @tir.if_then_else((((7 <= threadIdx.x_1) && (1 <= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) && ((rx.outer.outer + floormod(threadIdx.x_1, 7)) < 8)), data_3[(((cse_var_2 + threadIdx.x_1) + rx.outer.outer) + 678)], 0f32, dtype=float32)
+              attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
+              pad_temp.shared_1[(threadIdx.x_1 + 931)] = @tir.if_then_else(((((1 <= floormod((floordiv(threadIdx.x_1, 7) + 7), 9)) && (floormod((floordiv(threadIdx.x_1, 7) + 7), 9) < 8)) && (1 <= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) && ((rx.outer.outer + floormod(threadIdx.x_1, 7)) < 8)), data_3[(((((cse_var_2 + (floordiv((threadIdx.x_1 + 931), 63)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 7), 9)*7)) + rx.outer.outer) + floormod(threadIdx.x_1, 7)) - 8)], 0f32, dtype=float32)
+              attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
+              pad_temp.shared_1[(threadIdx.x_1 + 980)] = @tir.if_then_else(((((1 <= floormod((floordiv(threadIdx.x_1, 7) + 5), 9)) && (floormod((floordiv(threadIdx.x_1, 7) + 5), 9) < 8)) && (1 <= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) && ((rx.outer.outer + floormod(threadIdx.x_1, 7)) < 8)), data_3[(((((cse_var_2 + (floordiv((threadIdx.x_1 + 980), 63)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 5), 9)*7)) + rx.outer.outer) + floormod(threadIdx.x_1, 7)) - 8)], 0f32, dtype=float32)
+              attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
+              pad_temp.shared_1[(threadIdx.x_1 + 1029)] = @tir.if_then_else(((((1 <= floormod((floordiv(threadIdx.x_1, 7) + 3), 9)) && (floormod((floordiv(threadIdx.x_1, 7) + 3), 9) < 8)) && (1 <= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) && ((rx.outer.outer + floormod(threadIdx.x_1, 7)) < 8)), data_3[(((((cse_var_2 + (floordiv((threadIdx.x_1 + 1029), 63)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 3), 9)*7)) + rx.outer.outer) + floormod(threadIdx.x_1, 7)) - 8)], 0f32, dtype=float32)
+              attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
+              pad_temp.shared_1[(threadIdx.x_1 + 1078)] = @tir.if_then_else(((1 <= (rx.outer.outer + floormod(threadIdx.x_1, 7))) && ((rx.outer.outer + floormod(threadIdx.x_1, 7)) < 8)), data_3[(((((cse_var_2 + (floordiv((threadIdx.x_1 + 1078), 63)*49)) + ((floordiv(threadIdx.x_1, 7) + 1)*7)) + rx.outer.outer) + floormod(threadIdx.x_1, 7)) - 8)], 0f32, dtype=float32)
+              attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
+              pad_temp.shared_1[(threadIdx.x_1 + 1127)] = @tir.if_then_else(((((1 <= floormod((floordiv(threadIdx.x_1, 7) + 8), 9)) && (floormod((floordiv(threadIdx.x_1, 7) + 8), 9) < 8)) && (1 <= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) && ((rx.outer.outer + floormod(threadIdx.x_1, 7)) < 8)), data_3[(((((cse_var_2 + (floordiv((threadIdx.x_1 + 1127), 63)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 8), 9)*7)) + rx.outer.outer) + floormod(threadIdx.x_1, 7)) - 8)], 0f32, dtype=float32)
+              attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
+              pad_temp.shared_1[(threadIdx.x_1 + 1176)] = @tir.if_then_else(((((1 <= floormod((floordiv(threadIdx.x_1, 7) + 6), 9)) && (floormod((floordiv(threadIdx.x_1, 7) + 6), 9) < 8)) && (1 <= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) && ((rx.outer.outer + floormod(threadIdx.x_1, 7)) < 8)), data_3[(((((cse_var_2 + (floordiv((threadIdx.x_1 + 1176), 63)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 6), 9)*7)) + rx.outer.outer) + floormod(threadIdx.x_1, 7)) - 8)], 0f32, dtype=float32)
+              attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
+              pad_temp.shared_1[(threadIdx.x_1 + 1225)] = @tir.if_then_else(((((1 <= floormod((floordiv(threadIdx.x_1, 7) + 4), 9)) && (floormod((floordiv(threadIdx.x_1, 7) + 4), 9) < 8)) && (1 <= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) && ((rx.outer.outer + floormod(threadIdx.x_1, 7)) < 8)), data_3[(((((cse_var_2 + (floordiv((threadIdx.x_1 + 1225), 63)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 4), 9)*7)) + rx.outer.outer) + floormod(threadIdx.x_1, 7)) - 8)], 0f32, dtype=float32)
+              attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
+              pad_temp.shared_1[(threadIdx.x_1 + 1274)] = @tir.if_then_else((((threadIdx.x_1 < 42) && (1 <= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) && ((rx.outer.outer + floormod(threadIdx.x_1, 7)) < 8)), data_3[(((((cse_var_2 + (floordiv((threadIdx.x_1 + 1274), 63)*49)) + ((floordiv(threadIdx.x_1, 7) + 2)*7)) + rx.outer.outer) + floormod(threadIdx.x_1, 7)) - 8)], 0f32, dtype=float32)
+              attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
+              pad_temp.shared_1[(threadIdx.x_1 + 1323)] = @tir.if_then_else((((7 <= threadIdx.x_1) && (1 <= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) && ((rx.outer.outer + floormod(threadIdx.x_1, 7)) < 8)), data_3[(((cse_var_2 + threadIdx.x_1) + rx.outer.outer) + 1021)], 0f32, dtype=float32)
+              attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
+              pad_temp.shared_1[(threadIdx.x_1 + 1372)] = @tir.if_then_else(((((1 <= floormod((floordiv(threadIdx.x_1, 7) + 7), 9)) && (floormod((floordiv(threadIdx.x_1, 7) + 7), 9) < 8)) && (1 <= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) && ((rx.outer.outer + floormod(threadIdx.x_1, 7)) < 8)), data_3[(((((cse_var_2 + (floordiv((threadIdx.x_1 + 1372), 63)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 7), 9)*7)) + rx.outer.outer) + floormod(threadIdx.x_1, 7)) - 8)], 0f32, dtype=float32)
+              attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
+              pad_temp.shared_1[(threadIdx.x_1 + 1421)] = @tir.if_then_else(((((1 <= floormod((floordiv(threadIdx.x_1, 7) + 5), 9)) && (floormod((floordiv(threadIdx.x_1, 7) + 5), 9) < 8)) && (1 <= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) && ((rx.outer.outer + floormod(threadIdx.x_1, 7)) < 8)), data_3[(((((cse_var_2 + (floordiv((threadIdx.x_1 + 1421), 63)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 5), 9)*7)) + rx.outer.outer) + floormod(threadIdx.x_1, 7)) - 8)], 0f32, dtype=float32)
+              attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
+              pad_temp.shared_1[(threadIdx.x_1 + 1470)] = @tir.if_then_else(((((1 <= floormod((floordiv(threadIdx.x_1, 7) + 3), 9)) && (floormod((floordiv(threadIdx.x_1, 7) + 3), 9) < 8)) && (1 <= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) && ((rx.outer.outer + floormod(threadIdx.x_1, 7)) < 8)), data_3[(((((cse_var_2 + (floordiv((threadIdx.x_1 + 1470), 63)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 3), 9)*7)) + rx.outer.outer) + floormod(threadIdx.x_1, 7)) - 8)], 0f32, dtype=float32)
+              attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
+              pad_temp.shared_1[(threadIdx.x_1 + 1519)] = @tir.if_then_else(((1 <= (rx.outer.outer + floormod(threadIdx.x_1, 7))) && ((rx.outer.outer + floormod(threadIdx.x_1, 7)) < 8)), data_3[(((((cse_var_2 + (floordiv((threadIdx.x_1 + 1519), 63)*49)) + ((floordiv(threadIdx.x_1, 7) + 1)*7)) + rx.outer.outer) + floormod(threadIdx.x_1, 7)) - 8)], 0f32, dtype=float32)
+              attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
+              pad_temp.shared_1[(threadIdx.x_1 + 1568)] = @tir.if_then_else(((((1 <= floormod((floordiv(threadIdx.x_1, 7) + 8), 9)) && (floormod((floordiv(threadIdx.x_1, 7) + 8), 9) < 8)) && (1 <= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) && ((rx.outer.outer + floormod(threadIdx.x_1, 7)) < 8)), data_3[(((((cse_var_2 + (floordiv((threadIdx.x_1 + 1568), 63)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 8), 9)*7)) + rx.outer.outer) + floormod(threadIdx.x_1, 7)) - 8)], 0f32, dtype=float32)
+              attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
+              pad_temp.shared_1[(threadIdx.x_1 + 1617)] = @tir.if_then_else(((((1 <= floormod((floordiv(threadIdx.x_1, 7) + 6), 9)) && (floormod((floordiv(threadIdx.x_1, 7) + 6), 9) < 8)) && (1 <= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) && ((rx.outer.outer + floormod(threadIdx.x_1, 7)) < 8)), data_3[(((((cse_var_2 + (floordiv((threadIdx.x_1 + 1617), 63)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 6), 9)*7)) + rx.outer.outer) + floormod(threadIdx.x_1, 7)) - 8)], 0f32, dtype=float32)
+              attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
+              pad_temp.shared_1[(threadIdx.x_1 + 1666)] = @tir.if_then_else(((((1 <= floormod((floordiv(threadIdx.x_1, 7) + 4), 9)) && (floormod((floordiv(threadIdx.x_1, 7) + 4), 9) < 8)) && (1 <= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) && ((rx.outer.outer + floormod(threadIdx.x_1, 7)) < 8)), data_3[(((((cse_var_2 + (floordiv((threadIdx.x_1 + 1666), 63)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 4), 9)*7)) + rx.outer.outer) + floormod(threadIdx.x_1, 7)) - 8)], 0f32, dtype=float32)
+              attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
+              pad_temp.shared_1[(threadIdx.x_1 + 1715)] = @tir.if_then_else((((threadIdx.x_1 < 42) && (1 <= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) && ((rx.outer.outer + floormod(threadIdx.x_1, 7)) < 8)), data_3[(((((cse_var_2 + (floordiv((threadIdx.x_1 + 1715), 63)*49)) + ((floordiv(threadIdx.x_1, 7) + 2)*7)) + rx.outer.outer) + floormod(threadIdx.x_1, 7)) - 8)], 0f32, dtype=float32)
+              attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
+              pad_temp.shared_1[(threadIdx.x_1 + 1764)] = @tir.if_then_else((((7 <= threadIdx.x_1) && (1 <= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) && ((rx.outer.outer + floormod(threadIdx.x_1, 7)) < 8)), data_3[(((cse_var_2 + threadIdx.x_1) + rx.outer.outer) + 1364)], 0f32, dtype=float32)
+              attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
+              pad_temp.shared_1[(threadIdx.x_1 + 1813)] = @tir.if_then_else(((((1 <= floormod((floordiv(threadIdx.x_1, 7) + 7), 9)) && (floormod((floordiv(threadIdx.x_1, 7) + 7), 9) < 8)) && (1 <= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) && ((rx.outer.outer + floormod(threadIdx.x_1, 7)) < 8)), data_3[(((((cse_var_2 + (floordiv((threadIdx.x_1 + 1813), 63)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 7), 9)*7)) + rx.outer.outer) + floormod(threadIdx.x_1, 7)) - 8)], 0f32, dtype=float32)
+              attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
+              pad_temp.shared_1[(threadIdx.x_1 + 1862)] = @tir.if_then_else(((((1 <= floormod((floordiv(threadIdx.x_1, 7) + 5), 9)) && (floormod((floordiv(threadIdx.x_1, 7) + 5), 9) < 8)) && (1 <= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) && ((rx.outer.outer + floormod(threadIdx.x_1, 7)) < 8)), data_3[(((((cse_var_2 + (floordiv((threadIdx.x_1 + 1862), 63)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 5), 9)*7)) + rx.outer.outer) + floormod(threadIdx.x_1, 7)) - 8)], 0f32, dtype=float32)
+              attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
+              pad_temp.shared_1[(threadIdx.x_1 + 1911)] = @tir.if_then_else(((((1 <= floormod((floordiv(threadIdx.x_1, 7) + 3), 9)) && (floormod((floordiv(threadIdx.x_1, 7) + 3), 9) < 8)) && (1 <= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) && ((rx.outer.outer + floormod(threadIdx.x_1, 7)) < 8)), data_3[(((((cse_var_2 + (floordiv((threadIdx.x_1 + 1911), 63)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 3), 9)*7)) + rx.outer.outer) + floormod(threadIdx.x_1, 7)) - 8)], 0f32, dtype=float32)
+              attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
+              pad_temp.shared_1[(threadIdx.x_1 + 1960)] = @tir.if_then_else(((1 <= (rx.outer.outer + floormod(threadIdx.x_1, 7))) && ((rx.outer.outer + floormod(threadIdx.x_1, 7)) < 8)), data_3[(((((cse_var_2 + (floordiv((threadIdx.x_1 + 1960), 63)*49)) + ((floordiv(threadIdx.x_1, 7) + 1)*7)) + rx.outer.outer) + floormod(threadIdx.x_1, 7)) - 8)], 0f32, dtype=float32)
+              attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
+              if @tir.likely((threadIdx.x_1 < 7), dtype=bool) {
+                pad_temp.shared_1[(threadIdx.x_1 + 2009)] = 0f32
               }
-              if @tir.likely((threadIdx.x_1 < 42), dtype=bool) {
-                pad_temp.shared_1[(((floordiv(((threadIdx.x_1*4) + 225), 49)*49) + (floormod((floordiv(((threadIdx.x_1*4) + 1), 7) + 4), 7)*7)) + floormod(((threadIdx.x_1*4) + 1), 7))] = @tir.if_then_else(((1 <= (ry.outer.outer + floormod((floordiv(((threadIdx.x_1*4) + 1), 7) + 4), 7))) && ((ry.outer.outer + floormod((floordiv(((threadIdx.x_1*4) + 1), 7) + 4), 7)) < 8)), data_3[((((((rc.outer.outer*392) + (floordiv(((threadIdx.x_1*4) + 225), 49)*49)) + (ry.outer.outer*7)) + (floormod((fl [...]
+              attr [IterVar(threadIdx.x_2: int32, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
+              kernel.shared_1: Buffer(kernel.shared, float32, [384], [], scope="shared")[threadIdx.x_2] = kernel_3: Buffer(kernel_2, float32, [2359296], [])[((((blockIdx.x*18432) + cse_var_1) + (threadIdx.x_2*3)) + rx.outer.outer)]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
+              kernel.shared_1[(threadIdx.x_2 + 49)] = kernel_3[((((((blockIdx.x*18432) + (floordiv((threadIdx.x_2 + 49), 96)*4608)) + cse_var_1) + (floordiv(floormod((threadIdx.x_2 + 49), 96), 3)*9)) + (floormod((threadIdx.x_2 + 1), 3)*3)) + rx.outer.outer)]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
+              kernel.shared_1[(threadIdx.x_2 + 98)] = kernel_3[(((((blockIdx.x*18432) + (floordiv((threadIdx.x_2 + 98), 96)*4608)) + cse_var_1) + (floormod((threadIdx.x_2 + 2), 96)*3)) + rx.outer.outer)]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
+              kernel.shared_1[(threadIdx.x_2 + 147)] = kernel_3[((((((blockIdx.x*18432) + (floordiv((threadIdx.x_2 + 147), 96)*4608)) + cse_var_1) + (floormod((floordiv(threadIdx.x_2, 3) + 17), 32)*9)) + (floormod(threadIdx.x_2, 3)*3)) + rx.outer.outer)]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
+              kernel.shared_1[(threadIdx.x_2 + 196)] = kernel_3[((((((blockIdx.x*18432) + (floordiv((threadIdx.x_2 + 196), 96)*4608)) + cse_var_1) + (floordiv(floormod((threadIdx.x_2 + 4), 96), 3)*9)) + (floormod((threadIdx.x_2 + 1), 3)*3)) + rx.outer.outer)]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
+              kernel.shared_1[(threadIdx.x_2 + 245)] = kernel_3[((((((blockIdx.x*18432) + (floordiv((threadIdx.x_2 + 245), 96)*4608)) + cse_var_1) + (floordiv(floormod((threadIdx.x_2 + 53), 96), 3)*9)) + (floormod((threadIdx.x_2 + 2), 3)*3)) + rx.outer.outer)]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
+              kernel.shared_1[(threadIdx.x_2 + 294)] = kernel_3[((((((blockIdx.x*18432) + (floordiv((threadIdx.x_2 + 294), 96)*4608)) + cse_var_1) + ((floordiv(threadIdx.x_2, 3) + 2)*9)) + (floormod(threadIdx.x_2, 3)*3)) + rx.outer.outer)]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
+              if @tir.likely((threadIdx.x_2 < 41), dtype=bool) {
+                kernel.shared_1[(threadIdx.x_2 + 343)] = kernel_3[((((((blockIdx.x*18432) + (floordiv((threadIdx.x_2 + 343), 96)*4608)) + cse_var_1) + (floordiv(floormod((threadIdx.x_2 + 55), 96), 3)*9)) + (floormod((threadIdx.x_2 + 1), 3)*3)) + rx.outer.outer)]
               }
-              if @tir.likely((threadIdx.x_1 < 42), dtype=bool) {
-                pad_temp.shared_1[(((floordiv(((threadIdx.x_1*4) + 226), 49)*49) + (floormod((floordiv(((threadIdx.x_1*4) + 2), 7) + 4), 7)*7)) + floormod(((threadIdx.x_1*4) + 2), 7))] = @tir.if_then_else(((1 <= (ry.outer.outer + floormod((floordiv(((threadIdx.x_1*4) + 2), 7) + 4), 7))) && ((ry.outer.outer + floormod((floordiv(((threadIdx.x_1*4) + 2), 7) + 4), 7)) < 8)), data_3[((((((rc.outer.outer*392) + (floordiv(((threadIdx.x_1*4) + 226), 49)*49)) + (ry.outer.outer*7)) + (floormod((fl [...]
+              for (rc.outer.inner: int32, 0, 8) {
+                let cse_var_3: int32 = (rc.outer.inner*12)
+                 {
+                  conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[((rc.outer.inner*252) + threadIdx.x)]*kernel.shared_1[cse_var_3]))
+                  conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[((rc.outer.inner*252) + threadIdx.x)]*kernel.shared_1[(cse_var_3 + 192)]))
+                  conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[((rc.outer.inner*252) + threadIdx.x)]*kernel.shared_1[(cse_var_3 + 96)]))
+                  conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[((rc.outer.inner*252) + threadIdx.x)]*kernel.shared_1[(cse_var_3 + 288)]))
+                  conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*252) + threadIdx.x) + 63)]*kernel.shared_1[(cse_var_3 + 3)]))
+                  conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*252) + threadIdx.x) + 63)]*kernel.shared_1[(cse_var_3 + 195)]))
+                  conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*252) + threadIdx.x) + 63)]*kernel.shared_1[(cse_var_3 + 99)]))
+                  conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*252) + threadIdx.x) + 63)]*kernel.shared_1[(cse_var_3 + 291)]))
+                  conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*252) + threadIdx.x) + 126)]*kernel.shared_1[(cse_var_3 + 6)]))
+                  conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*252) + threadIdx.x) + 126)]*kernel.shared_1[(cse_var_3 + 198)]))
+                  conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*252) + threadIdx.x) + 126)]*kernel.shared_1[(cse_var_3 + 102)]))
+                  conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*252) + threadIdx.x) + 126)]*kernel.shared_1[(cse_var_3 + 294)]))
+                  conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*252) + threadIdx.x) + 189)]*kernel.shared_1[(cse_var_3 + 9)]))
+                  conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*252) + threadIdx.x) + 189)]*kernel.shared_1[(cse_var_3 + 201)]))
+                  conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*252) + threadIdx.x) + 189)]*kernel.shared_1[(cse_var_3 + 105)]))
+                  conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*252) + threadIdx.x) + 189)]*kernel.shared_1[(cse_var_3 + 297)]))
+                  conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*252) + threadIdx.x) + 7)]*kernel.shared_1[(cse_var_3 + 1)]))
+                  conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*252) + threadIdx.x) + 7)]*kernel.shared_1[(cse_var_3 + 193)]))
+                  conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*252) + threadIdx.x) + 7)]*kernel.shared_1[(cse_var_3 + 97)]))
+                  conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*252) + threadIdx.x) + 7)]*kernel.shared_1[(cse_var_3 + 289)]))
+                  conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*252) + threadIdx.x) + 70)]*kernel.shared_1[(cse_var_3 + 4)]))
+                  conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*252) + threadIdx.x) + 70)]*kernel.shared_1[(cse_var_3 + 196)]))
+                  conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*252) + threadIdx.x) + 70)]*kernel.shared_1[(cse_var_3 + 100)]))
+                  conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*252) + threadIdx.x) + 70)]*kernel.shared_1[(cse_var_3 + 292)]))
+                  conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*252) + threadIdx.x) + 133)]*kernel.shared_1[(cse_var_3 + 7)]))
+                  conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*252) + threadIdx.x) + 133)]*kernel.shared_1[(cse_var_3 + 199)]))
+                  conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*252) + threadIdx.x) + 133)]*kernel.shared_1[(cse_var_3 + 103)]))
+                  conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*252) + threadIdx.x) + 133)]*kernel.shared_1[(cse_var_3 + 295)]))
+                  conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*252) + threadIdx.x) + 196)]*kernel.shared_1[(cse_var_3 + 10)]))
+                  conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*252) + threadIdx.x) + 196)]*kernel.shared_1[(cse_var_3 + 202)]))
+                  conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*252) + threadIdx.x) + 196)]*kernel.shared_1[(cse_var_3 + 106)]))
+                  conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*252) + threadIdx.x) + 196)]*kernel.shared_1[(cse_var_3 + 298)]))
+                  conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*252) + threadIdx.x) + 14)]*kernel.shared_1[(cse_var_3 + 2)]))
+                  conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*252) + threadIdx.x) + 14)]*kernel.shared_1[(cse_var_3 + 194)]))
+                  conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*252) + threadIdx.x) + 14)]*kernel.shared_1[(cse_var_3 + 98)]))
+                  conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*252) + threadIdx.x) + 14)]*kernel.shared_1[(cse_var_3 + 290)]))
+                  conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*252) + threadIdx.x) + 77)]*kernel.shared_1[(cse_var_3 + 5)]))
+                  conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*252) + threadIdx.x) + 77)]*kernel.shared_1[(cse_var_3 + 197)]))
+                  conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*252) + threadIdx.x) + 77)]*kernel.shared_1[(cse_var_3 + 101)]))
+                  conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*252) + threadIdx.x) + 77)]*kernel.shared_1[(cse_var_3 + 293)]))
+                  conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*252) + threadIdx.x) + 140)]*kernel.shared_1[(cse_var_3 + 8)]))
+                  conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*252) + threadIdx.x) + 140)]*kernel.shared_1[(cse_var_3 + 200)]))
+                  conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*252) + threadIdx.x) + 140)]*kernel.shared_1[(cse_var_3 + 104)]))
+                  conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*252) + threadIdx.x) + 140)]*kernel.shared_1[(cse_var_3 + 296)]))
+                  conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*252) + threadIdx.x) + 203)]*kernel.shared_1[(cse_var_3 + 11)]))
+                  conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*252) + threadIdx.x) + 203)]*kernel.shared_1[(cse_var_3 + 203)]))
+                  conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*252) + threadIdx.x) + 203)]*kernel.shared_1[(cse_var_3 + 107)]))
+                  conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*252) + threadIdx.x) + 203)]*kernel.shared_1[(cse_var_3 + 299)]))
+                }
               }
-              if @tir.likely((threadIdx.x_1 < 42), dtype=bool) {
-                pad_temp.shared_1[(((floordiv(((threadIdx.x_1*4) + 227), 49)*49) + (floormod((floordiv(((threadIdx.x_1*4) + 3), 7) + 4), 7)*7)) + floormod(((threadIdx.x_1*4) + 3), 7))] = @tir.if_then_else(((1 <= (ry.outer.outer + floormod((floordiv(((threadIdx.x_1*4) + 3), 7) + 4), 7))) && ((ry.outer.outer + floormod((floordiv(((threadIdx.x_1*4) + 3), 7) + 4), 7)) < 8)), data_3[((((((rc.outer.outer*392) + (floordiv(((threadIdx.x_1*4) + 227), 49)*49)) + (ry.outer.outer*7)) + (floormod((fl [...]
-              }
-            }
-            attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 56;
-            kernel.shared_1[threadIdx.x_2] = kernel_3[((((((blockIdx.x*36864) + (floordiv(threadIdx.x_2, 8)*4608)) + (rc.outer.outer*72)) + (floormod(threadIdx.x_2, 8)*9)) + (ry.outer.outer*3)) + 1)]
-            attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 56;
-            if @tir.likely((threadIdx.x_2 < 8), dtype=bool) {
-              kernel.shared_1[(threadIdx.x_2 + 56)] = kernel_3[(((((blockIdx.x*36864) + (rc.outer.outer*72)) + (threadIdx.x_2*9)) + (ry.outer.outer*3)) + 32257)]
-            }
-            conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[floormod(threadIdx.x, 7)]*kernel.shared_1[(floordiv(threadIdx.x, 7)*8)]))
-            conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 7)]*kernel.shared_1[(floordiv(threadIdx.x, 7)*8)]))
-            conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 14)]*kernel.shared_1[(floordiv(threadIdx.x, 7)*8)]))
-            conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 21)]*kernel.shared_1[(floordiv(threadIdx.x, 7)*8)]))
-            conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 28)]*kernel.shared_1[(floordiv(threadIdx.x, 7)*8)]))
-            conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 35)]*kernel.shared_1[(floordiv(threadIdx.x, 7)*8)]))
-            conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 42)]*kernel.shared_1[(floordiv(threadIdx.x, 7)*8)]))
-            conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 49)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 1)]))
-            conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 56)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 1)]))
-            conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 63)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 1)]))
-            conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 70)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 1)]))
-            conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 77)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 1)]))
-            conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 84)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 1)]))
-            conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 91)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 1)]))
-            conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 98)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 2)]))
-            conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 105)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 2)]))
-            conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 112)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 2)]))
-            conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 119)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 2)]))
-            conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 126)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 2)]))
-            conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 133)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 2)]))
-            conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 140)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 2)]))
-            conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 147)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 3)]))
-            conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 154)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 3)]))
-            conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 161)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 3)]))
-            conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 168)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 3)]))
-            conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 175)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 3)]))
-            conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 182)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 3)]))
-            conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 189)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 3)]))
-            conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 196)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 4)]))
-            conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 203)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 4)]))
-            conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 210)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 4)]))
-            conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 217)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 4)]))
-            conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 224)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 4)]))
-            conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 231)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 4)]))
-            conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 238)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 4)]))
-            conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 245)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 5)]))
-            conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 252)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 5)]))
-            conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 259)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 5)]))
-            conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 266)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 5)]))
-            conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 273)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 5)]))
-            conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 280)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 5)]))
-            conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 287)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 5)]))
-            conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 294)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 6)]))
-            conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 301)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 6)]))
-            conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 308)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 6)]))
-            conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 315)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 6)]))
-            conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 322)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 6)]))
-            conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 329)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 6)]))
-            conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 336)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 6)]))
-            conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 343)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 7)]))
-            conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 350)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 7)]))
-            conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 357)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 7)]))
-            conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 364)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 7)]))
-            conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 371)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 7)]))
-            conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 378)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 7)]))
-            conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 385)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 7)]))
-            attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 56 {
-              pad_temp.shared_1[(threadIdx.x_1*4)] = @tir.if_then_else((((1 <= (floordiv(floormod((threadIdx.x_1*4), 49), 7) + ry.outer.outer)) && ((floordiv(floormod((threadIdx.x_1*4), 49), 7) + ry.outer.outer) < 8)) && (floormod((threadIdx.x_1*4), 7) < 6)), data_3[((((rc.outer.outer*392) + (ry.outer.outer*7)) + (threadIdx.x_1*4)) - 6)], 0f32, dtype=float32)
-              pad_temp.shared_1[((threadIdx.x_1*4) + 1)] = @tir.if_then_else((((1 <= (floordiv(floormod(((threadIdx.x_1*4) + 1), 49), 7) + ry.outer.outer)) && ((floordiv(floormod(((threadIdx.x_1*4) + 1), 49), 7) + ry.outer.outer) < 8)) && (floormod(((threadIdx.x_1*4) + 1), 7) < 6)), data_3[((((rc.outer.outer*392) + (ry.outer.outer*7)) + (threadIdx.x_1*4)) - 5)], 0f32, dtype=float32)
-              pad_temp.shared_1[((threadIdx.x_1*4) + 2)] = @tir.if_then_else((((1 <= (floordiv(floormod(((threadIdx.x_1*4) + 2), 49), 7) + ry.outer.outer)) && ((floordiv(floormod(((threadIdx.x_1*4) + 2), 49), 7) + ry.outer.outer) < 8)) && (floormod(((threadIdx.x_1*4) + 2), 7) < 6)), data_3[((((rc.outer.outer*392) + (ry.outer.outer*7)) + (threadIdx.x_1*4)) - 4)], 0f32, dtype=float32)
-              pad_temp.shared_1[((threadIdx.x_1*4) + 3)] = @tir.if_then_else((((1 <= (floordiv(floormod(((threadIdx.x_1*4) + 3), 49), 7) + ry.outer.outer)) && ((floordiv(floormod(((threadIdx.x_1*4) + 3), 49), 7) + ry.outer.outer) < 8)) && (floormod(((threadIdx.x_1*4) + 3), 7) < 6)), data_3[((((rc.outer.outer*392) + (ry.outer.outer*7)) + (threadIdx.x_1*4)) - 3)], 0f32, dtype=float32)
-            }
-            attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 56 {
-              if @tir.likely((threadIdx.x_1 < 42), dtype=bool) {
-                pad_temp.shared_1[(((floordiv(((threadIdx.x_1*4) + 224), 49)*49) + (floormod((floordiv((threadIdx.x_1*4), 7) + 4), 7)*7)) + floormod((threadIdx.x_1*4), 7))] = @tir.if_then_else((((1 <= (ry.outer.outer + floormod((floordiv((threadIdx.x_1*4), 7) + 4), 7))) && ((ry.outer.outer + floormod((floordiv((threadIdx.x_1*4), 7) + 4), 7)) < 8)) && (floormod((threadIdx.x_1*4), 7) < 6)), data_3[((((((rc.outer.outer*392) + (floordiv(((threadIdx.x_1*4) + 224), 49)*49)) + (ry.outer.outer*7 [...]
-              }
-              if @tir.likely((threadIdx.x_1 < 42), dtype=bool) {
-                pad_temp.shared_1[(((floordiv(((threadIdx.x_1*4) + 225), 49)*49) + (floormod((floordiv(((threadIdx.x_1*4) + 1), 7) + 4), 7)*7)) + floormod(((threadIdx.x_1*4) + 1), 7))] = @tir.if_then_else((((1 <= (ry.outer.outer + floormod((floordiv(((threadIdx.x_1*4) + 1), 7) + 4), 7))) && ((ry.outer.outer + floormod((floordiv(((threadIdx.x_1*4) + 1), 7) + 4), 7)) < 8)) && (floormod(((threadIdx.x_1*4) + 1), 7) < 6)), data_3[((((((rc.outer.outer*392) + (floordiv(((threadIdx.x_1*4) + 225) [...]
-              }
-              if @tir.likely((threadIdx.x_1 < 42), dtype=bool) {
-                pad_temp.shared_1[(((floordiv(((threadIdx.x_1*4) + 226), 49)*49) + (floormod((floordiv(((threadIdx.x_1*4) + 2), 7) + 4), 7)*7)) + floormod(((threadIdx.x_1*4) + 2), 7))] = @tir.if_then_else((((1 <= (ry.outer.outer + floormod((floordiv(((threadIdx.x_1*4) + 2), 7) + 4), 7))) && ((ry.outer.outer + floormod((floordiv(((threadIdx.x_1*4) + 2), 7) + 4), 7)) < 8)) && (floormod(((threadIdx.x_1*4) + 2), 7) < 6)), data_3[((((((rc.outer.outer*392) + (floordiv(((threadIdx.x_1*4) + 226) [...]
-              }
-              if @tir.likely((threadIdx.x_1 < 42), dtype=bool) {
-                pad_temp.shared_1[(((floordiv(((threadIdx.x_1*4) + 227), 49)*49) + (floormod((floordiv(((threadIdx.x_1*4) + 3), 7) + 4), 7)*7)) + floormod(((threadIdx.x_1*4) + 3), 7))] = @tir.if_then_else((((1 <= (ry.outer.outer + floormod((floordiv(((threadIdx.x_1*4) + 3), 7) + 4), 7))) && ((ry.outer.outer + floormod((floordiv(((threadIdx.x_1*4) + 3), 7) + 4), 7)) < 8)) && (floormod(((threadIdx.x_1*4) + 3), 7) < 6)), data_3[((((((rc.outer.outer*392) + (floordiv(((threadIdx.x_1*4) + 227) [...]
-              }
-            }
-            attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 56;
-            kernel.shared_1[threadIdx.x_2] = kernel_3[((((((blockIdx.x*36864) + (floordiv(threadIdx.x_2, 8)*4608)) + (rc.outer.outer*72)) + (floormod(threadIdx.x_2, 8)*9)) + (ry.outer.outer*3)) + 2)]
-            attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 56;
-            if @tir.likely((threadIdx.x_2 < 8), dtype=bool) {
-              kernel.shared_1[(threadIdx.x_2 + 56)] = kernel_3[(((((blockIdx.x*36864) + (rc.outer.outer*72)) + (threadIdx.x_2*9)) + (ry.outer.outer*3)) + 32258)]
             }
-            conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[floormod(threadIdx.x, 7)]*kernel.shared_1[(floordiv(threadIdx.x, 7)*8)]))
-            conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 7)]*kernel.shared_1[(floordiv(threadIdx.x, 7)*8)]))
-            conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 14)]*kernel.shared_1[(floordiv(threadIdx.x, 7)*8)]))
-            conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 21)]*kernel.shared_1[(floordiv(threadIdx.x, 7)*8)]))
-            conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 28)]*kernel.shared_1[(floordiv(threadIdx.x, 7)*8)]))
-            conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 35)]*kernel.shared_1[(floordiv(threadIdx.x, 7)*8)]))
-            conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 42)]*kernel.shared_1[(floordiv(threadIdx.x, 7)*8)]))
-            conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 49)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 1)]))
-            conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 56)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 1)]))
-            conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 63)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 1)]))
-            conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 70)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 1)]))
-            conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 77)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 1)]))
-            conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 84)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 1)]))
-            conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 91)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 1)]))
-            conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 98)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 2)]))
-            conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 105)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 2)]))
-            conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 112)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 2)]))
-            conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 119)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 2)]))
-            conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 126)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 2)]))
-            conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 133)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 2)]))
-            conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 140)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 2)]))
-            conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 147)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 3)]))
-            conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 154)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 3)]))
-            conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 161)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 3)]))
-            conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 168)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 3)]))
-            conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 175)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 3)]))
-            conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 182)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 3)]))
-            conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 189)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 3)]))
-            conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 196)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 4)]))
-            conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 203)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 4)]))
-            conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 210)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 4)]))
-            conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 217)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 4)]))
-            conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 224)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 4)]))
-            conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 231)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 4)]))
-            conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 238)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 4)]))
-            conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 245)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 5)]))
-            conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 252)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 5)]))
-            conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 259)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 5)]))
-            conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 266)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 5)]))
-            conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 273)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 5)]))
-            conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 280)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 5)]))
-            conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 287)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 5)]))
-            conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 294)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 6)]))
-            conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 301)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 6)]))
-            conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 308)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 6)]))
-            conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 315)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 6)]))
-            conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 322)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 6)]))
-            conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 329)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 6)]))
-            conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 336)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 6)]))
-            conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 343)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 7)]))
-            conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 350)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 7)]))
-            conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 357)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 7)]))
-            conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 364)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 7)]))
-            conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 371)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 7)]))
-            conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 378)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 7)]))
-            conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 385)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 7)]))
           }
         }
-        for (i2.inner: int32, 0, 7) {
-          compute_3: Buffer(compute_2, float32, [25088], [])[((((blockIdx.x*392) + (floordiv(threadIdx.x, 7)*49)) + (i2.inner*7)) + floormod(threadIdx.x, 7))] = max((conv2d_nchw_1[i2.inner] + bias_3: Buffer(bias_2, float32, [512], [])[((blockIdx.x*8) + floordiv(threadIdx.x, 7))]), 0f32)
+        for (i1.inner: int32, 0, 2) {
+          compute_3: Buffer(compute_2, float32, [25088], [])[(((blockIdx.x*196) + (i1.inner*49)) + threadIdx.x)] = max((conv2d_nchw_1[i1.inner] + bias_3: Buffer(bias_2, float32, [512], [])[((blockIdx.x*4) + i1.inner)]), 0f32)
+          compute_3[((((blockIdx.x*196) + (i1.inner*49)) + threadIdx.x) + 98)] = max((conv2d_nchw_1[(i1.inner + 2)] + bias_3[(((blockIdx.x*4) + i1.inner) + 2)]), 0f32)
         }
       }
     }
@@ -561,7 +474,7 @@ We build the binary and check its correctness and performance.
 
  .. code-block:: none
 
-    Execution time of this operator: 0.552 ms
+    Execution time of this operator: 0.244 ms
 
 
 
@@ -609,33 +522,33 @@ They can be used for debugging and learning the behavior of the auto-scheduler.
     conv2d_nchw_nn_o_o_i, conv2d_nchw_nn_o_i = s[conv2d_nchw].split(conv2d_nchw_nn_o_i, factor=1)
     conv2d_nchw_nn_o_o_o_i, conv2d_nchw_nn_o_o_i = s[conv2d_nchw].split(conv2d_nchw_nn_o_o_i, factor=1)
     conv2d_nchw_nn_o_o_o_o, conv2d_nchw_nn_o_o_o_i = s[conv2d_nchw].split(conv2d_nchw_nn_o_o_o_i, factor=1)
-    conv2d_nchw_ff_o_i, conv2d_nchw_ff_i = s[conv2d_nchw].split(conv2d_nchw_ff, factor=1)
+    conv2d_nchw_ff_o_i, conv2d_nchw_ff_i = s[conv2d_nchw].split(conv2d_nchw_ff, factor=2)
     conv2d_nchw_ff_o_o_i, conv2d_nchw_ff_o_i = s[conv2d_nchw].split(conv2d_nchw_ff_o_i, factor=1)
-    conv2d_nchw_ff_o_o_o_i, conv2d_nchw_ff_o_o_i = s[conv2d_nchw].split(conv2d_nchw_ff_o_o_i, factor=8)
-    conv2d_nchw_ff_o_o_o_o, conv2d_nchw_ff_o_o_o_i = s[conv2d_nchw].split(conv2d_nchw_ff_o_o_o_i, factor=1)
-    conv2d_nchw_yy_o_i, conv2d_nchw_yy_i = s[conv2d_nchw].split(conv2d_nchw_yy, factor=7)
+    conv2d_nchw_ff_o_o_o_i, conv2d_nchw_ff_o_o_i = s[conv2d_nchw].split(conv2d_nchw_ff_o_o_i, factor=1)
+    conv2d_nchw_ff_o_o_o_o, conv2d_nchw_ff_o_o_o_i = s[conv2d_nchw].split(conv2d_nchw_ff_o_o_o_i, factor=2)
+    conv2d_nchw_yy_o_i, conv2d_nchw_yy_i = s[conv2d_nchw].split(conv2d_nchw_yy, factor=1)
     conv2d_nchw_yy_o_o_i, conv2d_nchw_yy_o_i = s[conv2d_nchw].split(conv2d_nchw_yy_o_i, factor=1)
-    conv2d_nchw_yy_o_o_o_i, conv2d_nchw_yy_o_o_i = s[conv2d_nchw].split(conv2d_nchw_yy_o_o_i, factor=1)
+    conv2d_nchw_yy_o_o_o_i, conv2d_nchw_yy_o_o_i = s[conv2d_nchw].split(conv2d_nchw_yy_o_o_i, factor=7)
     conv2d_nchw_yy_o_o_o_o, conv2d_nchw_yy_o_o_o_i = s[conv2d_nchw].split(conv2d_nchw_yy_o_o_o_i, factor=1)
     conv2d_nchw_xx_o_i, conv2d_nchw_xx_i = s[conv2d_nchw].split(conv2d_nchw_xx, factor=1)
     conv2d_nchw_xx_o_o_i, conv2d_nchw_xx_o_i = s[conv2d_nchw].split(conv2d_nchw_xx_o_i, factor=1)
     conv2d_nchw_xx_o_o_o_i, conv2d_nchw_xx_o_o_i = s[conv2d_nchw].split(conv2d_nchw_xx_o_o_i, factor=7)
     conv2d_nchw_xx_o_o_o_o, conv2d_nchw_xx_o_o_o_i = s[conv2d_nchw].split(conv2d_nchw_xx_o_o_o_i, factor=1)
     conv2d_nchw_rc_o_i, conv2d_nchw_rc_i = s[conv2d_nchw].split(conv2d_nchw_rc, factor=4)
-    conv2d_nchw_rc_o_o, conv2d_nchw_rc_o_i = s[conv2d_nchw].split(conv2d_nchw_rc_o_i, factor=2)
+    conv2d_nchw_rc_o_o, conv2d_nchw_rc_o_i = s[conv2d_nchw].split(conv2d_nchw_rc_o_i, factor=8)
     conv2d_nchw_ry_o_i, conv2d_nchw_ry_i = s[conv2d_nchw].split(conv2d_nchw_ry, factor=1)
-    conv2d_nchw_ry_o_o, conv2d_nchw_ry_o_i = s[conv2d_nchw].split(conv2d_nchw_ry_o_i, factor=1)
+    conv2d_nchw_ry_o_o, conv2d_nchw_ry_o_i = s[conv2d_nchw].split(conv2d_nchw_ry_o_i, factor=3)
     conv2d_nchw_rx_o_i, conv2d_nchw_rx_i = s[conv2d_nchw].split(conv2d_nchw_rx, factor=1)
     conv2d_nchw_rx_o_o, conv2d_nchw_rx_o_i = s[conv2d_nchw].split(conv2d_nchw_rx_o_i, factor=1)
     s[conv2d_nchw].reorder(conv2d_nchw_nn_o_o_o_o, conv2d_nchw_ff_o_o_o_o, conv2d_nchw_yy_o_o_o_o, conv2d_nchw_xx_o_o_o_o, conv2d_nchw_nn_o_o_o_i, conv2d_nchw_ff_o_o_o_i, conv2d_nchw_yy_o_o_o_i, conv2d_nchw_xx_o_o_o_i, conv2d_nchw_nn_o_o_i, conv2d_nchw_ff_o_o_i, conv2d_nchw_yy_o_o_i, conv2d_nchw_xx_o_o_i, conv2d_nchw_rc_o_o, conv2d_nchw_ry_o_o, conv2d_nchw_rx_o_o, conv2d_nchw_rc_o_i, conv2d_nchw_ry_o_i, conv2d_nchw_rx_o_i, conv2d_nchw_nn_o_i, conv2d_nchw_ff_o_i, conv2d_nchw_yy_o_i, conv2 [...]
     compute_i0_o_i, compute_i0_i = s[compute].split(compute_i0, factor=1)
     compute_i0_o_o_i, compute_i0_o_i = s[compute].split(compute_i0_o_i, factor=1)
     compute_i0_o_o_o, compute_i0_o_o_i = s[compute].split(compute_i0_o_o_i, factor=1)
-    compute_i1_o_i, compute_i1_i = s[compute].split(compute_i1, factor=1)
-    compute_i1_o_o_i, compute_i1_o_i = s[compute].split(compute_i1_o_i, factor=8)
-    compute_i1_o_o_o, compute_i1_o_o_i = s[compute].split(compute_i1_o_o_i, factor=1)
-    compute_i2_o_i, compute_i2_i = s[compute].split(compute_i2, factor=7)
-    compute_i2_o_o_i, compute_i2_o_i = s[compute].split(compute_i2_o_i, factor=1)
+    compute_i1_o_i, compute_i1_i = s[compute].split(compute_i1, factor=2)
+    compute_i1_o_o_i, compute_i1_o_i = s[compute].split(compute_i1_o_i, factor=1)
+    compute_i1_o_o_o, compute_i1_o_o_i = s[compute].split(compute_i1_o_o_i, factor=2)
+    compute_i2_o_i, compute_i2_i = s[compute].split(compute_i2, factor=1)
+    compute_i2_o_o_i, compute_i2_o_i = s[compute].split(compute_i2_o_i, factor=7)
     compute_i2_o_o_o, compute_i2_o_o_i = s[compute].split(compute_i2_o_o_i, factor=1)
     compute_i3_o_i, compute_i3_i = s[compute].split(compute_i3, factor=1)
     compute_i3_o_o_i, compute_i3_o_i = s[compute].split(compute_i3_o_i, factor=7)
@@ -658,14 +571,14 @@ They can be used for debugging and learning the behavior of the auto-scheduler.
     kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused = s[kernel_shared].fuse(kernel_shared_ax0, kernel_shared_ax1, kernel_shared_ax2, kernel_shared_ax3)
     kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o, kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_i = s[kernel_shared].split(kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused, factor=1)
     s[kernel_shared].vectorize(kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_i)
-    kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_o, kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_i = s[kernel_shared].split(kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o, factor=56)
+    kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_o, kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_i = s[kernel_shared].split(kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o, factor=49)
     s[kernel_shared].bind(kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_i, te.thread_axis("threadIdx.x"))
     pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused = s[pad_temp_shared].fuse(pad_temp_shared_ax0, pad_temp_shared_ax1, pad_temp_shared_ax2, pad_temp_shared_ax3)
-    pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o, pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_i = s[pad_temp_shared].split(pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused, factor=4)
+    pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o, pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_i = s[pad_temp_shared].split(pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused, factor=1)
     s[pad_temp_shared].vectorize(pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_i)
-    pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_o, pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_i = s[pad_temp_shared].split(pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o, factor=56)
+    pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_o, pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_i = s[pad_temp_shared].split(pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o, factor=49)
     s[pad_temp_shared].bind(pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_i, te.thread_axis("threadIdx.x"))
-    s[conv2d_nchw].pragma(conv2d_nchw_nn_o_o_o_o, "auto_unroll_max_step", 512)
+    s[conv2d_nchw].pragma(conv2d_nchw_nn_o_o_o_o, "auto_unroll_max_step", 64)
     s[conv2d_nchw].pragma(conv2d_nchw_nn_o_o_o_o, "unroll_explicit", True)
 
     CUDA source code:
@@ -683,257 +596,127 @@ They can be used for debugging and learning the behavior of the auto-scheduler.
       #define int64_t long long
       #define uint64_t unsigned long long
     #endif
-    extern "C" __global__ void __launch_bounds__(56) default_function_kernel0(float* __restrict__ data, float* __restrict__ kernel, float* __restrict__ compute, float* __restrict__ bias) {
-      float conv2d_nchw[7];
-      __shared__ float pad_temp_shared[392];
-      __shared__ float kernel_shared[64];
+    extern "C" __global__ void __launch_bounds__(49) default_function_kernel0(float* __restrict__ data, float* __restrict__ kernel, float* __restrict__ compute, float* __restrict__ bias) {
+      float conv2d_nchw[4];
+      __shared__ float pad_temp_shared[2016];
+      __shared__ float kernel_shared[384];
       conv2d_nchw[0] = 0.000000e+00f;
-      conv2d_nchw[1] = 0.000000e+00f;
       conv2d_nchw[2] = 0.000000e+00f;
+      conv2d_nchw[1] = 0.000000e+00f;
       conv2d_nchw[3] = 0.000000e+00f;
-      conv2d_nchw[4] = 0.000000e+00f;
-      conv2d_nchw[5] = 0.000000e+00f;
-      conv2d_nchw[6] = 0.000000e+00f;
-      for (int rc_outer_outer = 0; rc_outer_outer < 64; ++rc_outer_outer) {
-        for (int ry_outer_outer = 0; ry_outer_outer < 3; ++ry_outer_outer) {
-          __syncthreads();
-          pad_temp_shared[(((int)threadIdx.x) * 4)] = ((((1 <= ((((((int)threadIdx.x) * 4) % 49) / 7) + ry_outer_outer)) && (((((((int)threadIdx.x) * 4) % 49) / 7) + ry_outer_outer) < 8)) && (1 <= ((((int)threadIdx.x) * 4) % 7))) ? data[((((rc_outer_outer * 392) + (ry_outer_outer * 7)) + (((int)threadIdx.x) * 4)) - 8)] : 0.000000e+00f);
-          pad_temp_shared[((((int)threadIdx.x) * 4) + 1)] = ((((1 <= (((((((int)threadIdx.x) * 4) + 1) % 49) / 7) + ry_outer_outer)) && ((((((((int)threadIdx.x) * 4) + 1) % 49) / 7) + ry_outer_outer) < 8)) && (1 <= (((((int)threadIdx.x) * 4) + 1) % 7))) ? data[((((rc_outer_outer * 392) + (ry_outer_outer * 7)) + (((int)threadIdx.x) * 4)) - 7)] : 0.000000e+00f);
-          pad_temp_shared[((((int)threadIdx.x) * 4) + 2)] = ((((1 <= (((((((int)threadIdx.x) * 4) + 2) % 49) / 7) + ry_outer_outer)) && ((((((((int)threadIdx.x) * 4) + 2) % 49) / 7) + ry_outer_outer) < 8)) && (1 <= (((((int)threadIdx.x) * 4) + 2) % 7))) ? data[((((rc_outer_outer * 392) + (ry_outer_outer * 7)) + (((int)threadIdx.x) * 4)) - 6)] : 0.000000e+00f);
-          pad_temp_shared[((((int)threadIdx.x) * 4) + 3)] = ((((1 <= (((((((int)threadIdx.x) * 4) + 3) % 49) / 7) + ry_outer_outer)) && ((((((((int)threadIdx.x) * 4) + 3) % 49) / 7) + ry_outer_outer) < 8)) && (1 <= (((((int)threadIdx.x) * 4) + 3) % 7))) ? data[((((rc_outer_outer * 392) + (ry_outer_outer * 7)) + (((int)threadIdx.x) * 4)) - 5)] : 0.000000e+00f);
-          if (((int)threadIdx.x) < 42) {
-            pad_temp_shared[((((((((int)threadIdx.x) * 4) + 224) / 49) * 49) + (((((((int)threadIdx.x) * 4) / 7) + 4) % 7) * 7)) + ((((int)threadIdx.x) * 4) % 7))] = ((((1 <= (ry_outer_outer + ((((((int)threadIdx.x) * 4) / 7) + 4) % 7))) && ((ry_outer_outer + ((((((int)threadIdx.x) * 4) / 7) + 4) % 7)) < 8)) && (1 <= ((((int)threadIdx.x) * 4) % 7))) ? data[((((((rc_outer_outer * 392) + ((((((int)threadIdx.x) * 4) + 224) / 49) * 49)) + (ry_outer_outer * 7)) + (((((((int)threadIdx.x) * 4)  [...]
-          }
-          if (((int)threadIdx.x) < 42) {
-            pad_temp_shared[((((((((int)threadIdx.x) * 4) + 225) / 49) * 49) + ((((((((int)threadIdx.x) * 4) + 1) / 7) + 4) % 7) * 7)) + (((((int)threadIdx.x) * 4) + 1) % 7))] = ((((1 <= (ry_outer_outer + (((((((int)threadIdx.x) * 4) + 1) / 7) + 4) % 7))) && ((ry_outer_outer + (((((((int)threadIdx.x) * 4) + 1) / 7) + 4) % 7)) < 8)) && (1 <= (((((int)threadIdx.x) * 4) + 1) % 7))) ? data[((((((rc_outer_outer * 392) + ((((((int)threadIdx.x) * 4) + 225) / 49) * 49)) + (ry_outer_outer * 7)) + [...]
-          }
-          if (((int)threadIdx.x) < 42) {
-            pad_temp_shared[((((((((int)threadIdx.x) * 4) + 226) / 49) * 49) + ((((((((int)threadIdx.x) * 4) + 2) / 7) + 4) % 7) * 7)) + (((((int)threadIdx.x) * 4) + 2) % 7))] = ((((1 <= (ry_outer_outer + (((((((int)threadIdx.x) * 4) + 2) / 7) + 4) % 7))) && ((ry_outer_outer + (((((((int)threadIdx.x) * 4) + 2) / 7) + 4) % 7)) < 8)) && (1 <= (((((int)threadIdx.x) * 4) + 2) % 7))) ? data[((((((rc_outer_outer * 392) + ((((((int)threadIdx.x) * 4) + 226) / 49) * 49)) + (ry_outer_outer * 7)) + [...]
-          }
-          if (((int)threadIdx.x) < 42) {
-            pad_temp_shared[((((((((int)threadIdx.x) * 4) + 227) / 49) * 49) + ((((((((int)threadIdx.x) * 4) + 3) / 7) + 4) % 7) * 7)) + (((((int)threadIdx.x) * 4) + 3) % 7))] = ((((1 <= (ry_outer_outer + (((((((int)threadIdx.x) * 4) + 3) / 7) + 4) % 7))) && ((ry_outer_outer + (((((((int)threadIdx.x) * 4) + 3) / 7) + 4) % 7)) < 8)) && (1 <= (((((int)threadIdx.x) * 4) + 3) % 7))) ? data[((((((rc_outer_outer * 392) + ((((((int)threadIdx.x) * 4) + 227) / 49) * 49)) + (ry_outer_outer * 7)) + [...]
-          }
-          kernel_shared[((int)threadIdx.x)] = kernel[(((((((int)blockIdx.x) * 36864) + ((((int)threadIdx.x) >> 3) * 4608)) + (rc_outer_outer * 72)) + ((((int)threadIdx.x) & 7) * 9)) + (ry_outer_outer * 3))];
-          if (((int)threadIdx.x) < 8) {
-            kernel_shared[(((int)threadIdx.x) + 56)] = kernel[(((((((int)blockIdx.x) * 36864) + (rc_outer_outer * 72)) + (((int)threadIdx.x) * 9)) + (ry_outer_outer * 3)) + 32256)];
-          }
-          __syncthreads();
-          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((int)threadIdx.x) % 7)] * kernel_shared[((((int)threadIdx.x) / 7) * 8)]));
-          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 7)] * kernel_shared[((((int)threadIdx.x) / 7) * 8)]));
-          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 14)] * kernel_shared[((((int)threadIdx.x) / 7) * 8)]));
-          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 21)] * kernel_shared[((((int)threadIdx.x) / 7) * 8)]));
-          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 28)] * kernel_shared[((((int)threadIdx.x) / 7) * 8)]));
-          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 35)] * kernel_shared[((((int)threadIdx.x) / 7) * 8)]));
-          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 42)] * kernel_shared[((((int)threadIdx.x) / 7) * 8)]));
-          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 49)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 1)]));
-          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 56)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 1)]));
-          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 63)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 1)]));
-          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 70)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 1)]));
-          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 77)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 1)]));
-          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 84)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 1)]));
-          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 91)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 1)]));
-          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 98)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 2)]));
-          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 105)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 2)]));
-          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 112)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 2)]));
-          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 119)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 2)]));
-          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 126)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 2)]));
-          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 133)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 2)]));
-          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 140)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 2)]));
-          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 147)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 3)]));
-          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 154)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 3)]));
-          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 161)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 3)]));
-          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 168)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 3)]));
-          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 175)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 3)]));
-          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 182)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 3)]));
-          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 189)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 3)]));
-          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 196)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 4)]));
-          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 203)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 4)]));
-          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 210)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 4)]));
-          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 217)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 4)]));
-          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 224)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 4)]));
-          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 231)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 4)]));
-          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 238)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 4)]));
-          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 245)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 5)]));
-          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 252)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 5)]));
-          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 259)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 5)]));
-          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 266)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 5)]));
-          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 273)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 5)]));
-          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 280)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 5)]));
-          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 287)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 5)]));
-          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 294)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 6)]));
-          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 301)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 6)]));
-          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 308)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 6)]));
-          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 315)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 6)]));
-          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 322)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 6)]));
-          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 329)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 6)]));
-          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 336)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 6)]));
-          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 343)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 7)]));
-          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 350)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 7)]));
-          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 357)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 7)]));
-          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 364)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 7)]));
-          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 371)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 7)]));
-          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 378)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 7)]));
-          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 385)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 7)]));
+      for (int rc_outer_outer = 0; rc_outer_outer < 16; ++rc_outer_outer) {
+        for (int rx_outer_outer = 0; rx_outer_outer < 3; ++rx_outer_outer) {
           __syncthreads();
-          pad_temp_shared[(((int)threadIdx.x) * 4)] = (((1 <= ((((((int)threadIdx.x) * 4) % 49) / 7) + ry_outer_outer)) && (((((((int)threadIdx.x) * 4) % 49) / 7) + ry_outer_outer) < 8)) ? data[((((rc_outer_outer * 392) + (ry_outer_outer * 7)) + (((int)threadIdx.x) * 4)) - 7)] : 0.000000e+00f);
-          pad_temp_shared[((((int)threadIdx.x) * 4) + 1)] = (((1 <= (((((((int)threadIdx.x) * 4) + 1) % 49) / 7) + ry_outer_outer)) && ((((((((int)threadIdx.x) * 4) + 1) % 49) / 7) + ry_outer_outer) < 8)) ? data[((((rc_outer_outer * 392) + (ry_outer_outer * 7)) + (((int)threadIdx.x) * 4)) - 6)] : 0.000000e+00f);
-          pad_temp_shared[((((int)threadIdx.x) * 4) + 2)] = (((1 <= (((((((int)threadIdx.x) * 4) + 2) % 49) / 7) + ry_outer_outer)) && ((((((((int)threadIdx.x) * 4) + 2) % 49) / 7) + ry_outer_outer) < 8)) ? data[((((rc_outer_outer * 392) + (ry_outer_outer * 7)) + (((int)threadIdx.x) * 4)) - 5)] : 0.000000e+00f);
-          pad_temp_shared[((((int)threadIdx.x) * 4) + 3)] = (((1 <= (((((((int)threadIdx.x) * 4) + 3) % 49) / 7) + ry_outer_outer)) && ((((((((int)threadIdx.x) * 4) + 3) % 49) / 7) + ry_outer_outer) < 8)) ? data[((((rc_outer_outer * 392) + (ry_outer_outer * 7)) + (((int)threadIdx.x) * 4)) - 4)] : 0.000000e+00f);
-          if (((int)threadIdx.x) < 42) {
-            pad_temp_shared[((((((((int)threadIdx.x) * 4) + 224) / 49) * 49) + (((((((int)threadIdx.x) * 4) / 7) + 4) % 7) * 7)) + ((((int)threadIdx.x) * 4) % 7))] = (((1 <= (ry_outer_outer + ((((((int)threadIdx.x) * 4) / 7) + 4) % 7))) && ((ry_outer_outer + ((((((int)threadIdx.x) * 4) / 7) + 4) % 7)) < 8)) ? data[((((((rc_outer_outer * 392) + ((((((int)threadIdx.x) * 4) + 224) / 49) * 49)) + (ry_outer_outer * 7)) + (((((((int)threadIdx.x) * 4) / 7) + 4) % 7) * 7)) + ((((int)threadIdx.x) [...]
+          pad_temp_shared[((int)threadIdx.x)] = ((((7 <= ((int)threadIdx.x)) && (1 <= (rx_outer_outer + (((int)threadIdx.x) % 7)))) && ((rx_outer_outer + (((int)threadIdx.x) % 7)) < 8)) ? data[((((rc_outer_outer * 1568) + ((int)threadIdx.x)) + rx_outer_outer) - 8)] : 0.000000e+00f);
+          pad_temp_shared[(((int)threadIdx.x) + 49)] = (((((1 <= (((((int)threadIdx.x) / 7) + 7) % 9)) && ((((((int)threadIdx.x) / 7) + 7) % 9) < 8)) && (1 <= (rx_outer_outer + (((int)threadIdx.x) % 7)))) && ((rx_outer_outer + (((int)threadIdx.x) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 49) / 63) * 49)) + ((((((int)threadIdx.x) / 7) + 7) % 9) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
+          pad_temp_shared[(((int)threadIdx.x) + 98)] = (((((1 <= (((((int)threadIdx.x) / 7) + 5) % 9)) && ((((((int)threadIdx.x) / 7) + 5) % 9) < 8)) && (1 <= (rx_outer_outer + (((int)threadIdx.x) % 7)))) && ((rx_outer_outer + (((int)threadIdx.x) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 98) / 63) * 49)) + ((((((int)threadIdx.x) / 7) + 5) % 9) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
+          pad_temp_shared[(((int)threadIdx.x) + 147)] = (((((1 <= (((((int)threadIdx.x) / 7) + 3) % 9)) && ((((((int)threadIdx.x) / 7) + 3) % 9) < 8)) && (1 <= (rx_outer_outer + (((int)threadIdx.x) % 7)))) && ((rx_outer_outer + (((int)threadIdx.x) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 147) / 63) * 49)) + ((((((int)threadIdx.x) / 7) + 3) % 9) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
+          pad_temp_shared[(((int)threadIdx.x) + 196)] = (((1 <= (rx_outer_outer + (((int)threadIdx.x) % 7))) && ((rx_outer_outer + (((int)threadIdx.x) % 7)) < 8)) ? data[(((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 196) / 63) * 49)) + ((int)threadIdx.x)) + rx_outer_outer) - 1)] : 0.000000e+00f);
+          pad_temp_shared[(((int)threadIdx.x) + 245)] = (((((1 <= (((((int)threadIdx.x) / 7) + 8) % 9)) && ((((((int)threadIdx.x) / 7) + 8) % 9) < 8)) && (1 <= (rx_outer_outer + (((int)threadIdx.x) % 7)))) && ((rx_outer_outer + (((int)threadIdx.x) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 245) / 63) * 49)) + ((((((int)threadIdx.x) / 7) + 8) % 9) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
+          pad_temp_shared[(((int)threadIdx.x) + 294)] = (((((1 <= (((((int)threadIdx.x) / 7) + 6) % 9)) && ((((((int)threadIdx.x) / 7) + 6) % 9) < 8)) && (1 <= (rx_outer_outer + (((int)threadIdx.x) % 7)))) && ((rx_outer_outer + (((int)threadIdx.x) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 294) / 63) * 49)) + ((((((int)threadIdx.x) / 7) + 6) % 9) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
+          pad_temp_shared[(((int)threadIdx.x) + 343)] = (((((1 <= (((((int)threadIdx.x) / 7) + 4) % 9)) && ((((((int)threadIdx.x) / 7) + 4) % 9) < 8)) && (1 <= (rx_outer_outer + (((int)threadIdx.x) % 7)))) && ((rx_outer_outer + (((int)threadIdx.x) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 343) / 63) * 49)) + ((((((int)threadIdx.x) / 7) + 4) % 9) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
+          pad_temp_shared[(((int)threadIdx.x) + 392)] = ((((((int)threadIdx.x) < 42) && (1 <= (rx_outer_outer + (((int)threadIdx.x) % 7)))) && ((rx_outer_outer + (((int)threadIdx.x) % 7)) < 8)) ? data[(((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 392) / 63) * 49)) + ((int)threadIdx.x)) + rx_outer_outer) + 6)] : 0.000000e+00f);
+          pad_temp_shared[(((int)threadIdx.x) + 441)] = ((((7 <= ((int)threadIdx.x)) && (1 <= (rx_outer_outer + (((int)threadIdx.x) % 7)))) && ((rx_outer_outer + (((int)threadIdx.x) % 7)) < 8)) ? data[((((rc_outer_outer * 1568) + ((int)threadIdx.x)) + rx_outer_outer) + 335)] : 0.000000e+00f);
+          pad_temp_shared[(((int)threadIdx.x) + 490)] = (((((1 <= (((((int)threadIdx.x) / 7) + 7) % 9)) && ((((((int)threadIdx.x) / 7) + 7) % 9) < 8)) && (1 <= (rx_outer_outer + (((int)threadIdx.x) % 7)))) && ((rx_outer_outer + (((int)threadIdx.x) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 490) / 63) * 49)) + ((((((int)threadIdx.x) / 7) + 7) % 9) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
+          pad_temp_shared[(((int)threadIdx.x) + 539)] = (((((1 <= (((((int)threadIdx.x) / 7) + 5) % 9)) && ((((((int)threadIdx.x) / 7) + 5) % 9) < 8)) && (1 <= (rx_outer_outer + (((int)threadIdx.x) % 7)))) && ((rx_outer_outer + (((int)threadIdx.x) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 539) / 63) * 49)) + ((((((int)threadIdx.x) / 7) + 5) % 9) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
+          pad_temp_shared[(((int)threadIdx.x) + 588)] = (((((1 <= (((((int)threadIdx.x) / 7) + 3) % 9)) && ((((((int)threadIdx.x) / 7) + 3) % 9) < 8)) && (1 <= (rx_outer_outer + (((int)threadIdx.x) % 7)))) && ((rx_outer_outer + (((int)threadIdx.x) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 588) / 63) * 49)) + ((((((int)threadIdx.x) / 7) + 3) % 9) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
+          pad_temp_shared[(((int)threadIdx.x) + 637)] = (((1 <= (rx_outer_outer + (((int)threadIdx.x) % 7))) && ((rx_outer_outer + (((int)threadIdx.x) % 7)) < 8)) ? data[(((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 637) / 63) * 49)) + ((int)threadIdx.x)) + rx_outer_outer) - 1)] : 0.000000e+00f);
+          pad_temp_shared[(((int)threadIdx.x) + 686)] = (((((1 <= (((((int)threadIdx.x) / 7) + 8) % 9)) && ((((((int)threadIdx.x) / 7) + 8) % 9) < 8)) && (1 <= (rx_outer_outer + (((int)threadIdx.x) % 7)))) && ((rx_outer_outer + (((int)threadIdx.x) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 686) / 63) * 49)) + ((((((int)threadIdx.x) / 7) + 8) % 9) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
+          pad_temp_shared[(((int)threadIdx.x) + 735)] = (((((1 <= (((((int)threadIdx.x) / 7) + 6) % 9)) && ((((((int)threadIdx.x) / 7) + 6) % 9) < 8)) && (1 <= (rx_outer_outer + (((int)threadIdx.x) % 7)))) && ((rx_outer_outer + (((int)threadIdx.x) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 735) / 63) * 49)) + ((((((int)threadIdx.x) / 7) + 6) % 9) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
+          pad_temp_shared[(((int)threadIdx.x) + 784)] = (((((1 <= (((((int)threadIdx.x) / 7) + 4) % 9)) && ((((((int)threadIdx.x) / 7) + 4) % 9) < 8)) && (1 <= (rx_outer_outer + (((int)threadIdx.x) % 7)))) && ((rx_outer_outer + (((int)threadIdx.x) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 784) / 63) * 49)) + ((((((int)threadIdx.x) / 7) + 4) % 9) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
+          pad_temp_shared[(((int)threadIdx.x) + 833)] = ((((((int)threadIdx.x) < 42) && (1 <= (rx_outer_outer + (((int)threadIdx.x) % 7)))) && ((rx_outer_outer + (((int)threadIdx.x) % 7)) < 8)) ? data[(((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 833) / 63) * 49)) + ((int)threadIdx.x)) + rx_outer_outer) + 6)] : 0.000000e+00f);
+          pad_temp_shared[(((int)threadIdx.x) + 882)] = ((((7 <= ((int)threadIdx.x)) && (1 <= (rx_outer_outer + (((int)threadIdx.x) % 7)))) && ((rx_outer_outer + (((int)threadIdx.x) % 7)) < 8)) ? data[((((rc_outer_outer * 1568) + ((int)threadIdx.x)) + rx_outer_outer) + 678)] : 0.000000e+00f);
+          pad_temp_shared[(((int)threadIdx.x) + 931)] = (((((1 <= (((((int)threadIdx.x) / 7) + 7) % 9)) && ((((((int)threadIdx.x) / 7) + 7) % 9) < 8)) && (1 <= (rx_outer_outer + (((int)threadIdx.x) % 7)))) && ((rx_outer_outer + (((int)threadIdx.x) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 931) / 63) * 49)) + ((((((int)threadIdx.x) / 7) + 7) % 9) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
+          pad_temp_shared[(((int)threadIdx.x) + 980)] = (((((1 <= (((((int)threadIdx.x) / 7) + 5) % 9)) && ((((((int)threadIdx.x) / 7) + 5) % 9) < 8)) && (1 <= (rx_outer_outer + (((int)threadIdx.x) % 7)))) && ((rx_outer_outer + (((int)threadIdx.x) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 980) / 63) * 49)) + ((((((int)threadIdx.x) / 7) + 5) % 9) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
+          pad_temp_shared[(((int)threadIdx.x) + 1029)] = (((((1 <= (((((int)threadIdx.x) / 7) + 3) % 9)) && ((((((int)threadIdx.x) / 7) + 3) % 9) < 8)) && (1 <= (rx_outer_outer + (((int)threadIdx.x) % 7)))) && ((rx_outer_outer + (((int)threadIdx.x) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 1029) / 63) * 49)) + ((((((int)threadIdx.x) / 7) + 3) % 9) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
+          pad_temp_shared[(((int)threadIdx.x) + 1078)] = (((1 <= (rx_outer_outer + (((int)threadIdx.x) % 7))) && ((rx_outer_outer + (((int)threadIdx.x) % 7)) < 8)) ? data[(((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 1078) / 63) * 49)) + ((int)threadIdx.x)) + rx_outer_outer) - 1)] : 0.000000e+00f);
+          pad_temp_shared[(((int)threadIdx.x) + 1127)] = (((((1 <= (((((int)threadIdx.x) / 7) + 8) % 9)) && ((((((int)threadIdx.x) / 7) + 8) % 9) < 8)) && (1 <= (rx_outer_outer + (((int)threadIdx.x) % 7)))) && ((rx_outer_outer + (((int)threadIdx.x) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 1127) / 63) * 49)) + ((((((int)threadIdx.x) / 7) + 8) % 9) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
+          pad_temp_shared[(((int)threadIdx.x) + 1176)] = (((((1 <= (((((int)threadIdx.x) / 7) + 6) % 9)) && ((((((int)threadIdx.x) / 7) + 6) % 9) < 8)) && (1 <= (rx_outer_outer + (((int)threadIdx.x) % 7)))) && ((rx_outer_outer + (((int)threadIdx.x) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 1176) / 63) * 49)) + ((((((int)threadIdx.x) / 7) + 6) % 9) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
+          pad_temp_shared[(((int)threadIdx.x) + 1225)] = (((((1 <= (((((int)threadIdx.x) / 7) + 4) % 9)) && ((((((int)threadIdx.x) / 7) + 4) % 9) < 8)) && (1 <= (rx_outer_outer + (((int)threadIdx.x) % 7)))) && ((rx_outer_outer + (((int)threadIdx.x) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 1225) / 63) * 49)) + ((((((int)threadIdx.x) / 7) + 4) % 9) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
+          pad_temp_shared[(((int)threadIdx.x) + 1274)] = ((((((int)threadIdx.x) < 42) && (1 <= (rx_outer_outer + (((int)threadIdx.x) % 7)))) && ((rx_outer_outer + (((int)threadIdx.x) % 7)) < 8)) ? data[(((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 1274) / 63) * 49)) + ((int)threadIdx.x)) + rx_outer_outer) + 6)] : 0.000000e+00f);
+          pad_temp_shared[(((int)threadIdx.x) + 1323)] = ((((7 <= ((int)threadIdx.x)) && (1 <= (rx_outer_outer + (((int)threadIdx.x) % 7)))) && ((rx_outer_outer + (((int)threadIdx.x) % 7)) < 8)) ? data[((((rc_outer_outer * 1568) + ((int)threadIdx.x)) + rx_outer_outer) + 1021)] : 0.000000e+00f);
+          pad_temp_shared[(((int)threadIdx.x) + 1372)] = (((((1 <= (((((int)threadIdx.x) / 7) + 7) % 9)) && ((((((int)threadIdx.x) / 7) + 7) % 9) < 8)) && (1 <= (rx_outer_outer + (((int)threadIdx.x) % 7)))) && ((rx_outer_outer + (((int)threadIdx.x) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 1372) / 63) * 49)) + ((((((int)threadIdx.x) / 7) + 7) % 9) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
+          pad_temp_shared[(((int)threadIdx.x) + 1421)] = (((((1 <= (((((int)threadIdx.x) / 7) + 5) % 9)) && ((((((int)threadIdx.x) / 7) + 5) % 9) < 8)) && (1 <= (rx_outer_outer + (((int)threadIdx.x) % 7)))) && ((rx_outer_outer + (((int)threadIdx.x) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 1421) / 63) * 49)) + ((((((int)threadIdx.x) / 7) + 5) % 9) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
+          pad_temp_shared[(((int)threadIdx.x) + 1470)] = (((((1 <= (((((int)threadIdx.x) / 7) + 3) % 9)) && ((((((int)threadIdx.x) / 7) + 3) % 9) < 8)) && (1 <= (rx_outer_outer + (((int)threadIdx.x) % 7)))) && ((rx_outer_outer + (((int)threadIdx.x) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 1470) / 63) * 49)) + ((((((int)threadIdx.x) / 7) + 3) % 9) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
+          pad_temp_shared[(((int)threadIdx.x) + 1519)] = (((1 <= (rx_outer_outer + (((int)threadIdx.x) % 7))) && ((rx_outer_outer + (((int)threadIdx.x) % 7)) < 8)) ? data[(((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 1519) / 63) * 49)) + ((int)threadIdx.x)) + rx_outer_outer) - 1)] : 0.000000e+00f);
+          pad_temp_shared[(((int)threadIdx.x) + 1568)] = (((((1 <= (((((int)threadIdx.x) / 7) + 8) % 9)) && ((((((int)threadIdx.x) / 7) + 8) % 9) < 8)) && (1 <= (rx_outer_outer + (((int)threadIdx.x) % 7)))) && ((rx_outer_outer + (((int)threadIdx.x) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 1568) / 63) * 49)) + ((((((int)threadIdx.x) / 7) + 8) % 9) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
+          pad_temp_shared[(((int)threadIdx.x) + 1617)] = (((((1 <= (((((int)threadIdx.x) / 7) + 6) % 9)) && ((((((int)threadIdx.x) / 7) + 6) % 9) < 8)) && (1 <= (rx_outer_outer + (((int)threadIdx.x) % 7)))) && ((rx_outer_outer + (((int)threadIdx.x) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 1617) / 63) * 49)) + ((((((int)threadIdx.x) / 7) + 6) % 9) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
+          pad_temp_shared[(((int)threadIdx.x) + 1666)] = (((((1 <= (((((int)threadIdx.x) / 7) + 4) % 9)) && ((((((int)threadIdx.x) / 7) + 4) % 9) < 8)) && (1 <= (rx_outer_outer + (((int)threadIdx.x) % 7)))) && ((rx_outer_outer + (((int)threadIdx.x) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 1666) / 63) * 49)) + ((((((int)threadIdx.x) / 7) + 4) % 9) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
+          pad_temp_shared[(((int)threadIdx.x) + 1715)] = ((((((int)threadIdx.x) < 42) && (1 <= (rx_outer_outer + (((int)threadIdx.x) % 7)))) && ((rx_outer_outer + (((int)threadIdx.x) % 7)) < 8)) ? data[(((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 1715) / 63) * 49)) + ((int)threadIdx.x)) + rx_outer_outer) + 6)] : 0.000000e+00f);
+          pad_temp_shared[(((int)threadIdx.x) + 1764)] = ((((7 <= ((int)threadIdx.x)) && (1 <= (rx_outer_outer + (((int)threadIdx.x) % 7)))) && ((rx_outer_outer + (((int)threadIdx.x) % 7)) < 8)) ? data[((((rc_outer_outer * 1568) + ((int)threadIdx.x)) + rx_outer_outer) + 1364)] : 0.000000e+00f);
+          pad_temp_shared[(((int)threadIdx.x) + 1813)] = (((((1 <= (((((int)threadIdx.x) / 7) + 7) % 9)) && ((((((int)threadIdx.x) / 7) + 7) % 9) < 8)) && (1 <= (rx_outer_outer + (((int)threadIdx.x) % 7)))) && ((rx_outer_outer + (((int)threadIdx.x) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 1813) / 63) * 49)) + ((((((int)threadIdx.x) / 7) + 7) % 9) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
+          pad_temp_shared[(((int)threadIdx.x) + 1862)] = (((((1 <= (((((int)threadIdx.x) / 7) + 5) % 9)) && ((((((int)threadIdx.x) / 7) + 5) % 9) < 8)) && (1 <= (rx_outer_outer + (((int)threadIdx.x) % 7)))) && ((rx_outer_outer + (((int)threadIdx.x) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 1862) / 63) * 49)) + ((((((int)threadIdx.x) / 7) + 5) % 9) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
+          pad_temp_shared[(((int)threadIdx.x) + 1911)] = (((((1 <= (((((int)threadIdx.x) / 7) + 3) % 9)) && ((((((int)threadIdx.x) / 7) + 3) % 9) < 8)) && (1 <= (rx_outer_outer + (((int)threadIdx.x) % 7)))) && ((rx_outer_outer + (((int)threadIdx.x) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 1911) / 63) * 49)) + ((((((int)threadIdx.x) / 7) + 3) % 9) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
+          pad_temp_shared[(((int)threadIdx.x) + 1960)] = (((1 <= (rx_outer_outer + (((int)threadIdx.x) % 7))) && ((rx_outer_outer + (((int)threadIdx.x) % 7)) < 8)) ? data[(((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 1960) / 63) * 49)) + ((int)threadIdx.x)) + rx_outer_outer) - 1)] : 0.000000e+00f);
+          if (((int)threadIdx.x) < 7) {
+            pad_temp_shared[(((int)threadIdx.x) + 2009)] = 0.000000e+00f;
           }
-          if (((int)threadIdx.x) < 42) {
-            pad_temp_shared[((((((((int)threadIdx.x) * 4) + 225) / 49) * 49) + ((((((((int)threadIdx.x) * 4) + 1) / 7) + 4) % 7) * 7)) + (((((int)threadIdx.x) * 4) + 1) % 7))] = (((1 <= (ry_outer_outer + (((((((int)threadIdx.x) * 4) + 1) / 7) + 4) % 7))) && ((ry_outer_outer + (((((((int)threadIdx.x) * 4) + 1) / 7) + 4) % 7)) < 8)) ? data[((((((rc_outer_outer * 392) + ((((((int)threadIdx.x) * 4) + 225) / 49) * 49)) + (ry_outer_outer * 7)) + ((((((((int)threadIdx.x) * 4) + 1) / 7) + 4) % 7 [...]
-          }
-          if (((int)threadIdx.x) < 42) {
-            pad_temp_shared[((((((((int)threadIdx.x) * 4) + 226) / 49) * 49) + ((((((((int)threadIdx.x) * 4) + 2) / 7) + 4) % 7) * 7)) + (((((int)threadIdx.x) * 4) + 2) % 7))] = (((1 <= (ry_outer_outer + (((((((int)threadIdx.x) * 4) + 2) / 7) + 4) % 7))) && ((ry_outer_outer + (((((((int)threadIdx.x) * 4) + 2) / 7) + 4) % 7)) < 8)) ? data[((((((rc_outer_outer * 392) + ((((((int)threadIdx.x) * 4) + 226) / 49) * 49)) + (ry_outer_outer * 7)) + ((((((((int)threadIdx.x) * 4) + 2) / 7) + 4) % 7 [...]
-          }
-          if (((int)threadIdx.x) < 42) {
-            pad_temp_shared[((((((((int)threadIdx.x) * 4) + 227) / 49) * 49) + ((((((((int)threadIdx.x) * 4) + 3) / 7) + 4) % 7) * 7)) + (((((int)threadIdx.x) * 4) + 3) % 7))] = (((1 <= (ry_outer_outer + (((((((int)threadIdx.x) * 4) + 3) / 7) + 4) % 7))) && ((ry_outer_outer + (((((((int)threadIdx.x) * 4) + 3) / 7) + 4) % 7)) < 8)) ? data[((((((rc_outer_outer * 392) + ((((((int)threadIdx.x) * 4) + 227) / 49) * 49)) + (ry_outer_outer * 7)) + ((((((((int)threadIdx.x) * 4) + 3) / 7) + 4) % 7 [...]
-          }
-          kernel_shared[((int)threadIdx.x)] = kernel[((((((((int)blockIdx.x) * 36864) + ((((int)threadIdx.x) >> 3) * 4608)) + (rc_outer_outer * 72)) + ((((int)threadIdx.x) & 7) * 9)) + (ry_outer_outer * 3)) + 1)];
-          if (((int)threadIdx.x) < 8) {
-            kernel_shared[(((int)threadIdx.x) + 56)] = kernel[(((((((int)blockIdx.x) * 36864) + (rc_outer_outer * 72)) + (((int)threadIdx.x) * 9)) + (ry_outer_outer * 3)) + 32257)];
+          kernel_shared[((int)threadIdx.x)] = kernel[((((((int)blockIdx.x) * 18432) + (rc_outer_outer * 288)) + (((int)threadIdx.x) * 3)) + rx_outer_outer)];
+          kernel_shared[(((int)threadIdx.x) + 49)] = kernel[((((((((int)blockIdx.x) * 18432) + (((((int)threadIdx.x) + 49) / 96) * 4608)) + (rc_outer_outer * 288)) + ((((((int)threadIdx.x) + 49) % 96) / 3) * 9)) + (((((int)threadIdx.x) + 1) % 3) * 3)) + rx_outer_outer)];
+          kernel_shared[(((int)threadIdx.x) + 98)] = kernel[(((((((int)blockIdx.x) * 18432) + (((((int)threadIdx.x) + 98) / 96) * 4608)) + (rc_outer_outer * 288)) + ((((int)threadIdx.x) + 2) * 3)) + rx_outer_outer)];
+          kernel_shared[(((int)threadIdx.x) + 147)] = kernel[((((((((int)blockIdx.x) * 18432) + (((((int)threadIdx.x) + 147) / 96) * 4608)) + (rc_outer_outer * 288)) + ((((((int)threadIdx.x) / 3) + 17) & 31) * 9)) + ((((int)threadIdx.x) % 3) * 3)) + rx_outer_outer)];
+          kernel_shared[(((int)threadIdx.x) + 196)] = kernel[((((((((int)blockIdx.x) * 18432) + (((((int)threadIdx.x) + 196) / 96) * 4608)) + (rc_outer_outer * 288)) + (((((int)threadIdx.x) + 4) / 3) * 9)) + (((((int)threadIdx.x) + 1) % 3) * 3)) + rx_outer_outer)];
+          kernel_shared[(((int)threadIdx.x) + 245)] = kernel[((((((((int)blockIdx.x) * 18432) + (((((int)threadIdx.x) + 245) / 96) * 4608)) + (rc_outer_outer * 288)) + ((((((int)threadIdx.x) + 53) % 96) / 3) * 9)) + (((((int)threadIdx.x) + 2) % 3) * 3)) + rx_outer_outer)];
+          kernel_shared[(((int)threadIdx.x) + 294)] = kernel[((((((((int)blockIdx.x) * 18432) + (((((int)threadIdx.x) + 294) / 96) * 4608)) + (rc_outer_outer * 288)) + (((int)threadIdx.x) * 3)) + rx_outer_outer) + 18)];
+          if (((int)threadIdx.x) < 41) {
+            kernel_shared[(((int)threadIdx.x) + 343)] = kernel[((((((((int)blockIdx.x) * 18432) + (((((int)threadIdx.x) + 343) / 96) * 4608)) + (rc_outer_outer * 288)) + (((((int)threadIdx.x) + 55) / 3) * 9)) + (((((int)threadIdx.x) + 1) % 3) * 3)) + rx_outer_outer)];
           }
           __syncthreads();
-          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((int)threadIdx.x) % 7)] * kernel_shared[((((int)threadIdx.x) / 7) * 8)]));
-          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 7)] * kernel_shared[((((int)threadIdx.x) / 7) * 8)]));
-          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 14)] * kernel_shared[((((int)threadIdx.x) / 7) * 8)]));
-          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 21)] * kernel_shared[((((int)threadIdx.x) / 7) * 8)]));
-          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 28)] * kernel_shared[((((int)threadIdx.x) / 7) * 8)]));
-          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 35)] * kernel_shared[((((int)threadIdx.x) / 7) * 8)]));
-          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 42)] * kernel_shared[((((int)threadIdx.x) / 7) * 8)]));
-          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 49)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 1)]));
-          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 56)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 1)]));
-          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 63)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 1)]));
-          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 70)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 1)]));
-          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 77)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 1)]));
-          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 84)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 1)]));
-          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 91)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 1)]));
-          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 98)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 2)]));
-          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 105)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 2)]));
-          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 112)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 2)]));
-          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 119)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 2)]));
-          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 126)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 2)]));
-          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 133)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 2)]));
-          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 140)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 2)]));
-          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 147)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 3)]));
-          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 154)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 3)]));
-          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 161)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 3)]));
-          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 168)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 3)]));
-          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 175)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 3)]));
-          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 182)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 3)]));
-          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 189)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 3)]));
-          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 196)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 4)]));
-          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 203)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 4)]));
-          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 210)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 4)]));
-          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 217)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 4)]));
-          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 224)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 4)]));
-          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 231)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 4)]));
-          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 238)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 4)]));
-          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 245)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 5)]));
-          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 252)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 5)]));
-          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 259)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 5)]));
-          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 266)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 5)]));
-          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 273)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 5)]));
-          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 280)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 5)]));
-          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 287)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 5)]));
-          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 294)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 6)]));
-          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 301)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 6)]));
-          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 308)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 6)]));
-          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 315)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 6)]));
-          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 322)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 6)]));
-          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 329)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 6)]));
-          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 336)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 6)]));
-          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 343)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 7)]));
-          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 350)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 7)]));
-          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 357)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 7)]));
-          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 364)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 7)]));
-          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 371)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 7)]));
-          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 378)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 7)]));
-          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 385)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 7)]));
-          __syncthreads();
-          pad_temp_shared[(((int)threadIdx.x) * 4)] = ((((1 <= ((((((int)threadIdx.x) * 4) % 49) / 7) + ry_outer_outer)) && (((((((int)threadIdx.x) * 4) % 49) / 7) + ry_outer_outer) < 8)) && (((((int)threadIdx.x) * 4) % 7) < 6)) ? data[((((rc_outer_outer * 392) + (ry_outer_outer * 7)) + (((int)threadIdx.x) * 4)) - 6)] : 0.000000e+00f);
-          pad_temp_shared[((((int)threadIdx.x) * 4) + 1)] = ((((1 <= (((((((int)threadIdx.x) * 4) + 1) % 49) / 7) + ry_outer_outer)) && ((((((((int)threadIdx.x) * 4) + 1) % 49) / 7) + ry_outer_outer) < 8)) && ((((((int)threadIdx.x) * 4) + 1) % 7) < 6)) ? data[((((rc_outer_outer * 392) + (ry_outer_outer * 7)) + (((int)threadIdx.x) * 4)) - 5)] : 0.000000e+00f);
-          pad_temp_shared[((((int)threadIdx.x) * 4) + 2)] = ((((1 <= (((((((int)threadIdx.x) * 4) + 2) % 49) / 7) + ry_outer_outer)) && ((((((((int)threadIdx.x) * 4) + 2) % 49) / 7) + ry_outer_outer) < 8)) && ((((((int)threadIdx.x) * 4) + 2) % 7) < 6)) ? data[((((rc_outer_outer * 392) + (ry_outer_outer * 7)) + (((int)threadIdx.x) * 4)) - 4)] : 0.000000e+00f);
-          pad_temp_shared[((((int)threadIdx.x) * 4) + 3)] = ((((1 <= (((((((int)threadIdx.x) * 4) + 3) % 49) / 7) + ry_outer_outer)) && ((((((((int)threadIdx.x) * 4) + 3) % 49) / 7) + ry_outer_outer) < 8)) && ((((((int)threadIdx.x) * 4) + 3) % 7) < 6)) ? data[((((rc_outer_outer * 392) + (ry_outer_outer * 7)) + (((int)threadIdx.x) * 4)) - 3)] : 0.000000e+00f);
-          if (((int)threadIdx.x) < 42) {
-            pad_temp_shared[((((((((int)threadIdx.x) * 4) + 224) / 49) * 49) + (((((((int)threadIdx.x) * 4) / 7) + 4) % 7) * 7)) + ((((int)threadIdx.x) * 4) % 7))] = ((((1 <= (ry_outer_outer + ((((((int)threadIdx.x) * 4) / 7) + 4) % 7))) && ((ry_outer_outer + ((((((int)threadIdx.x) * 4) / 7) + 4) % 7)) < 8)) && (((((int)threadIdx.x) * 4) % 7) < 6)) ? data[((((((rc_outer_outer * 392) + ((((((int)threadIdx.x) * 4) + 224) / 49) * 49)) + (ry_outer_outer * 7)) + (((((((int)threadIdx.x) * 4) / [...]
-          }
-          if (((int)threadIdx.x) < 42) {
-            pad_temp_shared[((((((((int)threadIdx.x) * 4) + 225) / 49) * 49) + ((((((((int)threadIdx.x) * 4) + 1) / 7) + 4) % 7) * 7)) + (((((int)threadIdx.x) * 4) + 1) % 7))] = ((((1 <= (ry_outer_outer + (((((((int)threadIdx.x) * 4) + 1) / 7) + 4) % 7))) && ((ry_outer_outer + (((((((int)threadIdx.x) * 4) + 1) / 7) + 4) % 7)) < 8)) && ((((((int)threadIdx.x) * 4) + 1) % 7) < 6)) ? data[((((((rc_outer_outer * 392) + ((((((int)threadIdx.x) * 4) + 225) / 49) * 49)) + (ry_outer_outer * 7)) +  [...]
+          for (int rc_outer_inner = 0; rc_outer_inner < 8; ++rc_outer_inner) {
+            conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[((rc_outer_inner * 252) + ((int)threadIdx.x))] * kernel_shared[(rc_outer_inner * 12)]));
+            conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[((rc_outer_inner * 252) + ((int)threadIdx.x))] * kernel_shared[((rc_outer_inner * 12) + 192)]));
+            conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[((rc_outer_inner * 252) + ((int)threadIdx.x))] * kernel_shared[((rc_outer_inner * 12) + 96)]));
+            conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[((rc_outer_inner * 252) + ((int)threadIdx.x))] * kernel_shared[((rc_outer_inner * 12) + 288)]));
+            conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 252) + ((int)threadIdx.x)) + 63)] * kernel_shared[((rc_outer_inner * 12) + 3)]));
+            conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 252) + ((int)threadIdx.x)) + 63)] * kernel_shared[((rc_outer_inner * 12) + 195)]));
+            conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 252) + ((int)threadIdx.x)) + 63)] * kernel_shared[((rc_outer_inner * 12) + 99)]));
+            conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 252) + ((int)threadIdx.x)) + 63)] * kernel_shared[((rc_outer_inner * 12) + 291)]));
+            conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 252) + ((int)threadIdx.x)) + 126)] * kernel_shared[((rc_outer_inner * 12) + 6)]));
+            conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 252) + ((int)threadIdx.x)) + 126)] * kernel_shared[((rc_outer_inner * 12) + 198)]));
+            conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 252) + ((int)threadIdx.x)) + 126)] * kernel_shared[((rc_outer_inner * 12) + 102)]));
+            conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 252) + ((int)threadIdx.x)) + 126)] * kernel_shared[((rc_outer_inner * 12) + 294)]));
+            conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 252) + ((int)threadIdx.x)) + 189)] * kernel_shared[((rc_outer_inner * 12) + 9)]));
+            conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 252) + ((int)threadIdx.x)) + 189)] * kernel_shared[((rc_outer_inner * 12) + 201)]));
+            conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 252) + ((int)threadIdx.x)) + 189)] * kernel_shared[((rc_outer_inner * 12) + 105)]));
+            conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 252) + ((int)threadIdx.x)) + 189)] * kernel_shared[((rc_outer_inner * 12) + 297)]));
+            conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 252) + ((int)threadIdx.x)) + 7)] * kernel_shared[((rc_outer_inner * 12) + 1)]));
+            conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 252) + ((int)threadIdx.x)) + 7)] * kernel_shared[((rc_outer_inner * 12) + 193)]));
+            conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 252) + ((int)threadIdx.x)) + 7)] * kernel_shared[((rc_outer_inner * 12) + 97)]));
+            conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 252) + ((int)threadIdx.x)) + 7)] * kernel_shared[((rc_outer_inner * 12) + 289)]));
+            conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 252) + ((int)threadIdx.x)) + 70)] * kernel_shared[((rc_outer_inner * 12) + 4)]));
+            conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 252) + ((int)threadIdx.x)) + 70)] * kernel_shared[((rc_outer_inner * 12) + 196)]));
+            conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 252) + ((int)threadIdx.x)) + 70)] * kernel_shared[((rc_outer_inner * 12) + 100)]));
+            conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 252) + ((int)threadIdx.x)) + 70)] * kernel_shared[((rc_outer_inner * 12) + 292)]));
+            conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 252) + ((int)threadIdx.x)) + 133)] * kernel_shared[((rc_outer_inner * 12) + 7)]));
+            conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 252) + ((int)threadIdx.x)) + 133)] * kernel_shared[((rc_outer_inner * 12) + 199)]));
+            conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 252) + ((int)threadIdx.x)) + 133)] * kernel_shared[((rc_outer_inner * 12) + 103)]));
+            conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 252) + ((int)threadIdx.x)) + 133)] * kernel_shared[((rc_outer_inner * 12) + 295)]));
+            conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 252) + ((int)threadIdx.x)) + 196)] * kernel_shared[((rc_outer_inner * 12) + 10)]));
+            conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 252) + ((int)threadIdx.x)) + 196)] * kernel_shared[((rc_outer_inner * 12) + 202)]));
+            conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 252) + ((int)threadIdx.x)) + 196)] * kernel_shared[((rc_outer_inner * 12) + 106)]));
+            conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 252) + ((int)threadIdx.x)) + 196)] * kernel_shared[((rc_outer_inner * 12) + 298)]));
+            conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 252) + ((int)threadIdx.x)) + 14)] * kernel_shared[((rc_outer_inner * 12) + 2)]));
+            conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 252) + ((int)threadIdx.x)) + 14)] * kernel_shared[((rc_outer_inner * 12) + 194)]));
+            conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 252) + ((int)threadIdx.x)) + 14)] * kernel_shared[((rc_outer_inner * 12) + 98)]));
+            conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 252) + ((int)threadIdx.x)) + 14)] * kernel_shared[((rc_outer_inner * 12) + 290)]));
+            conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 252) + ((int)threadIdx.x)) + 77)] * kernel_shared[((rc_outer_inner * 12) + 5)]));
+            conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 252) + ((int)threadIdx.x)) + 77)] * kernel_shared[((rc_outer_inner * 12) + 197)]));
+            conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 252) + ((int)threadIdx.x)) + 77)] * kernel_shared[((rc_outer_inner * 12) + 101)]));
+            conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 252) + ((int)threadIdx.x)) + 77)] * kernel_shared[((rc_outer_inner * 12) + 293)]));
+            conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 252) + ((int)threadIdx.x)) + 140)] * kernel_shared[((rc_outer_inner * 12) + 8)]));
+            conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 252) + ((int)threadIdx.x)) + 140)] * kernel_shared[((rc_outer_inner * 12) + 200)]));
+            conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 252) + ((int)threadIdx.x)) + 140)] * kernel_shared[((rc_outer_inner * 12) + 104)]));
+            conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 252) + ((int)threadIdx.x)) + 140)] * kernel_shared[((rc_outer_inner * 12) + 296)]));
+            conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 252) + ((int)threadIdx.x)) + 203)] * kernel_shared[((rc_outer_inner * 12) + 11)]));
+            conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 252) + ((int)threadIdx.x)) + 203)] * kernel_shared[((rc_outer_inner * 12) + 203)]));
+            conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 252) + ((int)threadIdx.x)) + 203)] * kernel_shared[((rc_outer_inner * 12) + 107)]));
+            conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 252) + ((int)threadIdx.x)) + 203)] * kernel_shared[((rc_outer_inner * 12) + 299)]));
           }
-          if (((int)threadIdx.x) < 42) {
-            pad_temp_shared[((((((((int)threadIdx.x) * 4) + 226) / 49) * 49) + ((((((((int)threadIdx.x) * 4) + 2) / 7) + 4) % 7) * 7)) + (((((int)threadIdx.x) * 4) + 2) % 7))] = ((((1 <= (ry_outer_outer + (((((((int)threadIdx.x) * 4) + 2) / 7) + 4) % 7))) && ((ry_outer_outer + (((((((int)threadIdx.x) * 4) + 2) / 7) + 4) % 7)) < 8)) && ((((((int)threadIdx.x) * 4) + 2) % 7) < 6)) ? data[((((((rc_outer_outer * 392) + ((((((int)threadIdx.x) * 4) + 226) / 49) * 49)) + (ry_outer_outer * 7)) +  [...]
-          }
-          if (((int)threadIdx.x) < 42) {
-            pad_temp_shared[((((((((int)threadIdx.x) * 4) + 227) / 49) * 49) + ((((((((int)threadIdx.x) * 4) + 3) / 7) + 4) % 7) * 7)) + (((((int)threadIdx.x) * 4) + 3) % 7))] = ((((1 <= (ry_outer_outer + (((((((int)threadIdx.x) * 4) + 3) / 7) + 4) % 7))) && ((ry_outer_outer + (((((((int)threadIdx.x) * 4) + 3) / 7) + 4) % 7)) < 8)) && ((((((int)threadIdx.x) * 4) + 3) % 7) < 6)) ? data[((((((rc_outer_outer * 392) + ((((((int)threadIdx.x) * 4) + 227) / 49) * 49)) + (ry_outer_outer * 7)) +  [...]
-          }
-          kernel_shared[((int)threadIdx.x)] = kernel[((((((((int)blockIdx.x) * 36864) + ((((int)threadIdx.x) >> 3) * 4608)) + (rc_outer_outer * 72)) + ((((int)threadIdx.x) & 7) * 9)) + (ry_outer_outer * 3)) + 2)];
-          if (((int)threadIdx.x) < 8) {
-            kernel_shared[(((int)threadIdx.x) + 56)] = kernel[(((((((int)blockIdx.x) * 36864) + (rc_outer_outer * 72)) + (((int)threadIdx.x) * 9)) + (ry_outer_outer * 3)) + 32258)];
-          }
-          __syncthreads();
-          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((int)threadIdx.x) % 7)] * kernel_shared[((((int)threadIdx.x) / 7) * 8)]));
-          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 7)] * kernel_shared[((((int)threadIdx.x) / 7) * 8)]));
-          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 14)] * kernel_shared[((((int)threadIdx.x) / 7) * 8)]));
-          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 21)] * kernel_shared[((((int)threadIdx.x) / 7) * 8)]));
-          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 28)] * kernel_shared[((((int)threadIdx.x) / 7) * 8)]));
-          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 35)] * kernel_shared[((((int)threadIdx.x) / 7) * 8)]));
-          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 42)] * kernel_shared[((((int)threadIdx.x) / 7) * 8)]));
-          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 49)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 1)]));
-          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 56)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 1)]));
-          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 63)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 1)]));
-          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 70)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 1)]));
-          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 77)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 1)]));
-          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 84)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 1)]));
-          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 91)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 1)]));
-          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 98)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 2)]));
-          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 105)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 2)]));
-          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 112)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 2)]));
-          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 119)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 2)]));
-          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 126)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 2)]));
-          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 133)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 2)]));
-          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 140)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 2)]));
-          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 147)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 3)]));
-          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 154)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 3)]));
-          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 161)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 3)]));
-          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 168)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 3)]));
-          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 175)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 3)]));
-          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 182)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 3)]));
-          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 189)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 3)]));
-          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 196)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 4)]));
-          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 203)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 4)]));
-          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 210)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 4)]));
-          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 217)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 4)]));
-          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 224)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 4)]));
-          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 231)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 4)]));
-          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 238)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 4)]));
-          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 245)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 5)]));
-          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 252)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 5)]));
-          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 259)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 5)]));
-          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 266)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 5)]));
-          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 273)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 5)]));
-          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 280)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 5)]));
-          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 287)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 5)]));
-          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 294)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 6)]));
-          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 301)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 6)]));
-          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 308)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 6)]));
-          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 315)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 6)]));
-          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 322)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 6)]));
-          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 329)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 6)]));
-          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 336)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 6)]));
-          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 343)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 7)]));
-          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 350)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 7)]));
-          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 357)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 7)]));
-          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 364)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 7)]));
-          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 371)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 7)]));
-          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 378)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 7)]));
-          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 385)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 7)]));
         }
       }
-      for (int i2_inner = 0; i2_inner < 7; ++i2_inner) {
-        compute[((((((int)blockIdx.x) * 392) + ((((int)threadIdx.x) / 7) * 49)) + (i2_inner * 7)) + (((int)threadIdx.x) % 7))] = max((conv2d_nchw[i2_inner] + bias[((((int)blockIdx.x) * 8) + (((int)threadIdx.x) / 7))]), 0.000000e+00f);
+      for (int i1_inner = 0; i1_inner < 2; ++i1_inner) {
+        compute[(((((int)blockIdx.x) * 196) + (i1_inner * 49)) + ((int)threadIdx.x))] = max((conv2d_nchw[i1_inner] + bias[((((int)blockIdx.x) * 4) + i1_inner)]), 0.000000e+00f);
+        compute[((((((int)blockIdx.x) * 196) + (i1_inner * 49)) + ((int)threadIdx.x)) + 98)] = max((conv2d_nchw[(i1_inner + 2)] + bias[(((((int)blockIdx.x) * 4) + i1_inner) + 2)]), 0.000000e+00f);
       }
     }
 
@@ -995,7 +778,7 @@ In the example below we resume the status and do more 5 trials.
 
 .. rst-class:: sphx-glr-timing
 
-   **Total running time of the script:** ( 5 minutes  48.035 seconds)
+   **Total running time of the script:** ( 5 minutes  42.208 seconds)
 
 
 .. _sphx_glr_download_how_to_tune_with_autoscheduler_tune_conv2d_layer_cuda.py:
diff --git a/docs/_sources/how_to/tune_with_autoscheduler/tune_network_cuda.rst.txt b/docs/_sources/how_to/tune_with_autoscheduler/tune_network_cuda.rst.txt
index fa282629aa..c99229c605 100644
--- a/docs/_sources/how_to/tune_with_autoscheduler/tune_network_cuda.rst.txt
+++ b/docs/_sources/how_to/tune_with_autoscheduler/tune_network_cuda.rst.txt
@@ -647,7 +647,7 @@ so we can read the log file and load the best schedules.
     Evaluate inference time cost...
     Execution time summary:
      mean (ms)   median (ms)    max (ms)     min (ms)     std (ms)  
-       7.8815       7.8801       7.8879       7.8764       0.0048   
+       7.8874       7.8938       7.8943       7.8741       0.0094   
                
 
 
@@ -675,7 +675,7 @@ Other Tips
 
 .. rst-class:: sphx-glr-timing
 
-   **Total running time of the script:** ( 1 minutes  4.115 seconds)
+   **Total running time of the script:** ( 1 minutes  1.921 seconds)
 
 
 .. _sphx_glr_download_how_to_tune_with_autoscheduler_tune_network_cuda.py:
diff --git a/docs/_sources/how_to/tune_with_autoscheduler/tune_network_x86.rst.txt b/docs/_sources/how_to/tune_with_autoscheduler/tune_network_x86.rst.txt
index 0a4cb5c0f4..e96ac64cff 100644
--- a/docs/_sources/how_to/tune_with_autoscheduler/tune_network_x86.rst.txt
+++ b/docs/_sources/how_to/tune_with_autoscheduler/tune_network_x86.rst.txt
@@ -666,7 +666,7 @@ so we can read the log file and load the best schedules.
     Evaluate inference time cost...
     Execution time summary:
      mean (ms)   median (ms)    max (ms)     min (ms)     std (ms)  
-      762.8631     763.0774     764.4738     761.0381      1.4108   
+      749.6636     749.6433     749.9993     749.3483      0.2661   
                
 
 
@@ -694,7 +694,7 @@ Other Tips
 
 .. rst-class:: sphx-glr-timing
 
-   **Total running time of the script:** ( 1 minutes  35.298 seconds)
+   **Total running time of the script:** ( 1 minutes  32.179 seconds)
 
 
 .. _sphx_glr_download_how_to_tune_with_autoscheduler_tune_network_x86.py:
diff --git a/docs/_sources/how_to/tune_with_autoscheduler/tune_sparse_x86.rst.txt b/docs/_sources/how_to/tune_with_autoscheduler/tune_sparse_x86.rst.txt
index 8c6da0711a..8585841a08 100644
--- a/docs/_sources/how_to/tune_with_autoscheduler/tune_sparse_x86.rst.txt
+++ b/docs/_sources/how_to/tune_with_autoscheduler/tune_sparse_x86.rst.txt
@@ -390,102 +390,29 @@ layout transformation, parallelization, vectorization, unrolling, and operator f
                  placeholder_4: Buffer(placeholder_14: Pointer(float32), float32, [128, 512], []),
                  compute: Buffer(compute_2: Pointer(float32), float32, [128, 512], [])}
       buffer_map = {placeholder_5: placeholder, placeholder_6: placeholder_1, placeholder_7: placeholder_2, placeholder_8: placeholder_3, placeholder_9: placeholder_4, compute_1: compute} {
-      for (i0.outer.i1.outer.fused: int32, 0, 32) "parallel" {
-        allocate(compute_3: Pointer(global float32), float32, [2048]), storage_scope = global {
-          for (i.outer.inner: int32, 0, 2) {
-            for (i.inner.init: int32, 0, 64) {
-              let cse_var_1: int32 = ((i.outer.inner*1024) + (i.inner.init*16))
-               {
-                compute_4: Buffer(compute_3, float32, [2048], [])[cse_var_1] = 0f32
-                compute_4[(cse_var_1 + 1)] = 0f32
-                compute_4[(cse_var_1 + 2)] = 0f32
-                compute_4[(cse_var_1 + 3)] = 0f32
-                compute_4[(cse_var_1 + 4)] = 0f32
-                compute_4[(cse_var_1 + 5)] = 0f32
-                compute_4[(cse_var_1 + 6)] = 0f32
-                compute_4[(cse_var_1 + 7)] = 0f32
-                compute_4[(cse_var_1 + 8)] = 0f32
-                compute_4[(cse_var_1 + 9)] = 0f32
-                compute_4[(cse_var_1 + 10)] = 0f32
-                compute_4[(cse_var_1 + 11)] = 0f32
-                compute_4[(cse_var_1 + 12)] = 0f32
-                compute_4[(cse_var_1 + 13)] = 0f32
-                compute_4[(cse_var_1 + 14)] = 0f32
-                compute_4[(cse_var_1 + 15)] = 0f32
-              }
-            }
-            for (elem_idx: int32, 0, (placeholder_15: Buffer(placeholder_13, int32, [33], [])[(i0.outer.i1.outer.fused + 1)] - placeholder_15[i0.outer.i1.outer.fused])) {
-              for (i.inner: int32, 0, 64) {
-                if @tir.likely((elem_idx < (placeholder_15[(i0.outer.i1.outer.fused + 1)] - placeholder_15[i0.outer.i1.outer.fused])), dtype=bool) {
-                  let cse_var_2: int32 = ((i.outer.inner*1024) + (i.inner*16))
-                  compute_4[cse_var_2] = (compute_4[cse_var_2] + (placeholder_16: Buffer(placeholder_11, float32, [78656], [])[((placeholder_15[i0.outer.i1.outer.fused]*16) + (elem_idx*16))]*max(placeholder_17: Buffer(placeholder_10, float32, [32768], [])[(((i.outer.inner*16384) + (i.inner*256)) + placeholder_18: Buffer(placeholder_12, int32, [4916], [])[(placeholder_15[i0.outer.i1.outer.fused] + elem_idx)])], 0f32)))
-                }
-                if @tir.likely((elem_idx < (placeholder_15[(i0.outer.i1.outer.fused + 1)] - placeholder_15[i0.outer.i1.outer.fused])), dtype=bool) {
-                  let cse_var_3: int32 = (((i.outer.inner*1024) + (i.inner*16)) + 1)
-                  compute_4[cse_var_3] = (compute_4[cse_var_3] + (placeholder_16[(((placeholder_15[i0.outer.i1.outer.fused]*16) + (elem_idx*16)) + 1)]*max(placeholder_17[(((i.outer.inner*16384) + (i.inner*256)) + placeholder_18[(placeholder_15[i0.outer.i1.outer.fused] + elem_idx)])], 0f32)))
-                }
-                if @tir.likely((elem_idx < (placeholder_15[(i0.outer.i1.outer.fused + 1)] - placeholder_15[i0.outer.i1.outer.fused])), dtype=bool) {
-                  let cse_var_4: int32 = (((i.outer.inner*1024) + (i.inner*16)) + 2)
-                  compute_4[cse_var_4] = (compute_4[cse_var_4] + (placeholder_16[(((placeholder_15[i0.outer.i1.outer.fused]*16) + (elem_idx*16)) + 2)]*max(placeholder_17[(((i.outer.inner*16384) + (i.inner*256)) + placeholder_18[(placeholder_15[i0.outer.i1.outer.fused] + elem_idx)])], 0f32)))
-                }
-                if @tir.likely((elem_idx < (placeholder_15[(i0.outer.i1.outer.fused + 1)] - placeholder_15[i0.outer.i1.outer.fused])), dtype=bool) {
-                  let cse_var_5: int32 = (((i.outer.inner*1024) + (i.inner*16)) + 3)
-                  compute_4[cse_var_5] = (compute_4[cse_var_5] + (placeholder_16[(((placeholder_15[i0.outer.i1.outer.fused]*16) + (elem_idx*16)) + 3)]*max(placeholder_17[(((i.outer.inner*16384) + (i.inner*256)) + placeholder_18[(placeholder_15[i0.outer.i1.outer.fused] + elem_idx)])], 0f32)))
-                }
-                if @tir.likely((elem_idx < (placeholder_15[(i0.outer.i1.outer.fused + 1)] - placeholder_15[i0.outer.i1.outer.fused])), dtype=bool) {
-                  let cse_var_6: int32 = (((i.outer.inner*1024) + (i.inner*16)) + 4)
-                  compute_4[cse_var_6] = (compute_4[cse_var_6] + (placeholder_16[(((placeholder_15[i0.outer.i1.outer.fused]*16) + (elem_idx*16)) + 4)]*max(placeholder_17[(((i.outer.inner*16384) + (i.inner*256)) + placeholder_18[(placeholder_15[i0.outer.i1.outer.fused] + elem_idx)])], 0f32)))
-                }
-                if @tir.likely((elem_idx < (placeholder_15[(i0.outer.i1.outer.fused + 1)] - placeholder_15[i0.outer.i1.outer.fused])), dtype=bool) {
-                  let cse_var_7: int32 = (((i.outer.inner*1024) + (i.inner*16)) + 5)
-                  compute_4[cse_var_7] = (compute_4[cse_var_7] + (placeholder_16[(((placeholder_15[i0.outer.i1.outer.fused]*16) + (elem_idx*16)) + 5)]*max(placeholder_17[(((i.outer.inner*16384) + (i.inner*256)) + placeholder_18[(placeholder_15[i0.outer.i1.outer.fused] + elem_idx)])], 0f32)))
-                }
-                if @tir.likely((elem_idx < (placeholder_15[(i0.outer.i1.outer.fused + 1)] - placeholder_15[i0.outer.i1.outer.fused])), dtype=bool) {
-                  let cse_var_8: int32 = (((i.outer.inner*1024) + (i.inner*16)) + 6)
-                  compute_4[cse_var_8] = (compute_4[cse_var_8] + (placeholder_16[(((placeholder_15[i0.outer.i1.outer.fused]*16) + (elem_idx*16)) + 6)]*max(placeholder_17[(((i.outer.inner*16384) + (i.inner*256)) + placeholder_18[(placeholder_15[i0.outer.i1.outer.fused] + elem_idx)])], 0f32)))
-                }
-                if @tir.likely((elem_idx < (placeholder_15[(i0.outer.i1.outer.fused + 1)] - placeholder_15[i0.outer.i1.outer.fused])), dtype=bool) {
-                  let cse_var_9: int32 = (((i.outer.inner*1024) + (i.inner*16)) + 7)
-                  compute_4[cse_var_9] = (compute_4[cse_var_9] + (placeholder_16[(((placeholder_15[i0.outer.i1.outer.fused]*16) + (elem_idx*16)) + 7)]*max(placeholder_17[(((i.outer.inner*16384) + (i.inner*256)) + placeholder_18[(placeholder_15[i0.outer.i1.outer.fused] + elem_idx)])], 0f32)))
+      for (i0.outer.i1.outer.fused: int32, 0, 64) "parallel" {
+        allocate(compute_3: Pointer(global float32), float32, [1024]), storage_scope = global {
+          for (i.outer.inner: int32, 0, 16) {
+            for (nb_j.inner: int32, 0, 2) {
+              for (i.inner.init: int32, 0, 2) {
+                for (j.init: int32, 0, 16) {
+                  compute_4: Buffer(compute_3, float32, [1024], [])[((((i.outer.inner*64) + (i.inner.init*32)) + (nb_j.inner*16)) + j.init)] = 0f32
                 }
-                if @tir.likely((elem_idx < (placeholder_15[(i0.outer.i1.outer.fused + 1)] - placeholder_15[i0.outer.i1.outer.fused])), dtype=bool) {
-                  let cse_var_10: int32 = (((i.outer.inner*1024) + (i.inner*16)) + 8)
-                  compute_4[cse_var_10] = (compute_4[cse_var_10] + (placeholder_16[(((placeholder_15[i0.outer.i1.outer.fused]*16) + (elem_idx*16)) + 8)]*max(placeholder_17[(((i.outer.inner*16384) + (i.inner*256)) + placeholder_18[(placeholder_15[i0.outer.i1.outer.fused] + elem_idx)])], 0f32)))
-                }
-                if @tir.likely((elem_idx < (placeholder_15[(i0.outer.i1.outer.fused + 1)] - placeholder_15[i0.outer.i1.outer.fused])), dtype=bool) {
-                  let cse_var_11: int32 = (((i.outer.inner*1024) + (i.inner*16)) + 9)
-                  compute_4[cse_var_11] = (compute_4[cse_var_11] + (placeholder_16[(((placeholder_15[i0.outer.i1.outer.fused]*16) + (elem_idx*16)) + 9)]*max(placeholder_17[(((i.outer.inner*16384) + (i.inner*256)) + placeholder_18[(placeholder_15[i0.outer.i1.outer.fused] + elem_idx)])], 0f32)))
-                }
-                if @tir.likely((elem_idx < (placeholder_15[(i0.outer.i1.outer.fused + 1)] - placeholder_15[i0.outer.i1.outer.fused])), dtype=bool) {
-                  let cse_var_12: int32 = (((i.outer.inner*1024) + (i.inner*16)) + 10)
-                  compute_4[cse_var_12] = (compute_4[cse_var_12] + (placeholder_16[(((placeholder_15[i0.outer.i1.outer.fused]*16) + (elem_idx*16)) + 10)]*max(placeholder_17[(((i.outer.inner*16384) + (i.inner*256)) + placeholder_18[(placeholder_15[i0.outer.i1.outer.fused] + elem_idx)])], 0f32)))
-                }
-                if @tir.likely((elem_idx < (placeholder_15[(i0.outer.i1.outer.fused + 1)] - placeholder_15[i0.outer.i1.outer.fused])), dtype=bool) {
-                  let cse_var_13: int32 = (((i.outer.inner*1024) + (i.inner*16)) + 11)
-                  compute_4[cse_var_13] = (compute_4[cse_var_13] + (placeholder_16[(((placeholder_15[i0.outer.i1.outer.fused]*16) + (elem_idx*16)) + 11)]*max(placeholder_17[(((i.outer.inner*16384) + (i.inner*256)) + placeholder_18[(placeholder_15[i0.outer.i1.outer.fused] + elem_idx)])], 0f32)))
-                }
-                if @tir.likely((elem_idx < (placeholder_15[(i0.outer.i1.outer.fused + 1)] - placeholder_15[i0.outer.i1.outer.fused])), dtype=bool) {
-                  let cse_var_14: int32 = (((i.outer.inner*1024) + (i.inner*16)) + 12)
-                  compute_4[cse_var_14] = (compute_4[cse_var_14] + (placeholder_16[(((placeholder_15[i0.outer.i1.outer.fused]*16) + (elem_idx*16)) + 12)]*max(placeholder_17[(((i.outer.inner*16384) + (i.inner*256)) + placeholder_18[(placeholder_15[i0.outer.i1.outer.fused] + elem_idx)])], 0f32)))
-                }
-                if @tir.likely((elem_idx < (placeholder_15[(i0.outer.i1.outer.fused + 1)] - placeholder_15[i0.outer.i1.outer.fused])), dtype=bool) {
-                  let cse_var_15: int32 = (((i.outer.inner*1024) + (i.inner*16)) + 13)
-                  compute_4[cse_var_15] = (compute_4[cse_var_15] + (placeholder_16[(((placeholder_15[i0.outer.i1.outer.fused]*16) + (elem_idx*16)) + 13)]*max(placeholder_17[(((i.outer.inner*16384) + (i.inner*256)) + placeholder_18[(placeholder_15[i0.outer.i1.outer.fused] + elem_idx)])], 0f32)))
-                }
-                if @tir.likely((elem_idx < (placeholder_15[(i0.outer.i1.outer.fused + 1)] - placeholder_15[i0.outer.i1.outer.fused])), dtype=bool) {
-                  let cse_var_16: int32 = (((i.outer.inner*1024) + (i.inner*16)) + 14)
-                  compute_4[cse_var_16] = (compute_4[cse_var_16] + (placeholder_16[(((placeholder_15[i0.outer.i1.outer.fused]*16) + (elem_idx*16)) + 14)]*max(placeholder_17[(((i.outer.inner*16384) + (i.inner*256)) + placeholder_18[(placeholder_15[i0.outer.i1.outer.fused] + elem_idx)])], 0f32)))
-                }
-                if @tir.likely((elem_idx < (placeholder_15[(i0.outer.i1.outer.fused + 1)] - placeholder_15[i0.outer.i1.outer.fused])), dtype=bool) {
-                  let cse_var_17: int32 = (((i.outer.inner*1024) + (i.inner*16)) + 15)
-                  compute_4[cse_var_17] = (compute_4[cse_var_17] + (placeholder_16[(((placeholder_15[i0.outer.i1.outer.fused]*16) + (elem_idx*16)) + 15)]*max(placeholder_17[(((i.outer.inner*16384) + (i.inner*256)) + placeholder_18[(placeholder_15[i0.outer.i1.outer.fused] + elem_idx)])], 0f32)))
+              }
+              for (elem_idx: int32, 0, let cse_var_1: int32 = ((floormod(i0.outer.i1.outer.fused, 16)*2) + nb_j.inner) in (placeholder_15: Buffer(placeholder_13, int32, [33], [])[(cse_var_1 + 1)] - placeholder_15[cse_var_1])) {
+                for (i.inner: int32, 0, 2) {
+                  for (j: int32, 0, 16) {
+                    let cse_var_3: int32 = ((floormod(i0.outer.i1.outer.fused, 16)*2) + nb_j.inner)
+                    let cse_var_2: int32 = ((((i.outer.inner*64) + (i.inner*32)) + (nb_j.inner*16)) + j)
+                    compute_4[cse_var_2] = (compute_4[cse_var_2] + (placeholder_16: Buffer(placeholder_11, float32, [78656], [])[(((placeholder_15[cse_var_3]*16) + (elem_idx*16)) + j)]*max(placeholder_17: Buffer(placeholder_10, float32, [32768], [])[((((floordiv(i0.outer.i1.outer.fused, 16)*8192) + (i.outer.inner*512)) + (i.inner*256)) + placeholder_18: Buffer(placeholder_12, int32, [4916], [])[(placeholder_15[cse_var_3] + elem_idx)])], 0f32)))
+                  }
                 }
               }
             }
           }
-          for (i0.inner: int32, 0, 128) {
-            let cse_var_18: int32 = ((i0.inner*512) + (i0.outer.i1.outer.fused*16))
-            compute_5: Buffer(compute_2, float32, [65536], [])[ramp(cse_var_18, 1, 16)] = max((compute_4[ramp((i0.inner*16), 1, 16)] + placeholder_19: Buffer(placeholder_14, float32, [65536], [])[ramp(cse_var_18, 1, 16)]), broadcast(0f32, 16))
+          for (i0.inner: int32, 0, 32) {
+            let cse_var_4: int32 = (((floordiv(i0.outer.i1.outer.fused, 16)*16384) + (i0.inner*512)) + (floormod(i0.outer.i1.outer.fused, 16)*32))
+            compute_5: Buffer(compute_2, float32, [65536], [])[ramp(cse_var_4, 1, 32)] = max((compute_4[ramp((i0.inner*32), 1, 32)] + placeholder_19: Buffer(placeholder_14, float32, [65536], [])[ramp(cse_var_4, 1, 32)]), broadcast(0f32, 32))
           }
         }
       }
@@ -541,7 +468,7 @@ We build the binary and check its correctness and performance.
 
  .. code-block:: none
 
-    Execution time of this operator: 1.841 ms
+    Execution time of this operator: 2.130 ms
 
 
 
diff --git a/docs/_sources/how_to/tune_with_autotvm/sg_execution_times.rst.txt b/docs/_sources/how_to/tune_with_autotvm/sg_execution_times.rst.txt
index 16b591b828..1f402e7066 100644
--- a/docs/_sources/how_to/tune_with_autotvm/sg_execution_times.rst.txt
+++ b/docs/_sources/how_to/tune_with_autotvm/sg_execution_times.rst.txt
@@ -5,16 +5,16 @@
 
 Computation times
 =================
-**00:30.708** total execution time for **how_to_tune_with_autotvm** files:
+**00:33.178** total execution time for **how_to_tune_with_autotvm** files:
 
 +--------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_tune_with_autotvm_tune_conv2d_cuda.py` (``tune_conv2d_cuda.py``)           | 00:30.671 | 0.0 MB |
+| :ref:`sphx_glr_how_to_tune_with_autotvm_tune_conv2d_cuda.py` (``tune_conv2d_cuda.py``)           | 00:33.143 | 0.0 MB |
 +--------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_tune_with_autotvm_tune_relay_x86.py` (``tune_relay_x86.py``)               | 00:00.021 | 0.0 MB |
-+--------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_tune_with_autotvm_tune_relay_cuda.py` (``tune_relay_cuda.py``)             | 00:00.006 | 0.0 MB |
+| :ref:`sphx_glr_how_to_tune_with_autotvm_tune_relay_x86.py` (``tune_relay_x86.py``)               | 00:00.020 | 0.0 MB |
 +--------------------------------------------------------------------------------------------------+-----------+--------+
 | :ref:`sphx_glr_how_to_tune_with_autotvm_tune_relay_arm.py` (``tune_relay_arm.py``)               | 00:00.005 | 0.0 MB |
 +--------------------------------------------------------------------------------------------------+-----------+--------+
+| :ref:`sphx_glr_how_to_tune_with_autotvm_tune_relay_cuda.py` (``tune_relay_cuda.py``)             | 00:00.005 | 0.0 MB |
++--------------------------------------------------------------------------------------------------+-----------+--------+
 | :ref:`sphx_glr_how_to_tune_with_autotvm_tune_relay_mobile_gpu.py` (``tune_relay_mobile_gpu.py``) | 00:00.005 | 0.0 MB |
 +--------------------------------------------------------------------------------------------------+-----------+--------+
diff --git a/docs/_sources/how_to/tune_with_autotvm/tune_conv2d_cuda.rst.txt b/docs/_sources/how_to/tune_with_autotvm/tune_conv2d_cuda.rst.txt
index e0d3658993..7564983168 100644
--- a/docs/_sources/how_to/tune_with_autotvm/tune_conv2d_cuda.rst.txt
+++ b/docs/_sources/how_to/tune_with_autotvm/tune_conv2d_cuda.rst.txt
@@ -391,7 +391,7 @@ for this template
       File "tvm/_ffi/_cython/./packed_func.pxi", line 56, in tvm._ffi._cy3.core.tvm_callback
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 875, in verify_pass
         raise InstantiationError("Skipped because of invalid gpu kernel")
-    tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 32, 2, 2]), ('tile_y', [-1, 1, 1, 7]), ('tile_x', [-1, 1, 1, 1]), ('tile_rc', [-1, 8, 2]), ('tile_ry', [-1, 3, 1]), ('tile_rx', [-1, 3, 1]), ('auto_unroll_max_step', 0), ('unroll_explicit', 1)],None,6048089
+    tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 1, 16, 1]), ('tile_y', [-1, 1, 7, 1]), ('tile_x', [-1, 7, 1, 1]), ('tile_rc', [-1, 64, 4]), ('tile_ry', [-1, 1, 3]), ('tile_rx', [-1, 1, 1]), ('auto_unroll_max_step', 512), ('unroll_explicit', 0)],None,2218954
     No: 2   GFLOPS: 0.00/0.00       result: Traceback (most recent call last):
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 592, in __call__
         func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
@@ -514,7 +514,7 @@ for this template
       File "tvm/_ffi/_cython/./packed_func.pxi", line 56, in tvm._ffi._cy3.core.tvm_callback
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 875, in verify_pass
         raise InstantiationError("Skipped because of invalid gpu kernel")
-    tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 32, 2, 2]), ('tile_y', [-1, 1, 1, 7]), ('tile_x', [-1, 7, 1, 1]), ('tile_rc', [-1, 8, 2]), ('tile_ry', [-1, 1, 3]), ('tile_rx', [-1, 3, 1]), ('auto_unroll_max_step', 512), ('unroll_explicit', 1)],None,7984969
+    tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 2, 2, 8]), ('tile_y', [-1, 1, 1, 1]), ('tile_x', [-1, 7, 1, 1]), ('tile_rc', [-1, 256, 1]), ('tile_ry', [-1, 1, 1]), ('tile_rx', [-1, 3, 1]), ('auto_unroll_max_step', 1500), ('unroll_explicit', 0)],None,4094784
     No: 3   GFLOPS: 0.00/0.00       result: Traceback (most recent call last):
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 592, in __call__
         func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
@@ -637,10 +637,8 @@ for this template
       File "tvm/_ffi/_cython/./packed_func.pxi", line 56, in tvm._ffi._cy3.core.tvm_callback
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 875, in verify_pass
         raise InstantiationError("Skipped because of invalid gpu kernel")
-    tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 1, 4, 8]), ('tile_y', [-1, 1, 7, 1]), ('tile_x', [-1, 1, 1, 7]), ('tile_rc', [-1, 1, 512]), ('tile_ry', [-1, 1, 1]), ('tile_rx', [-1, 3, 1]), ('auto_unroll_max_step', 0), ('unroll_explicit', 1)],None,6001309
-    No: 4   GFLOPS: 27.51/27.51     result: MeasureResult(costs=(0.008415714000000001,), error_no=MeasureErrorNo.NO_ERROR, all_cost=2.996152639389038, timestamp=1673074612.875652) [('tile_f', [-1, 8, 16, 4]), ('tile_y', [-1, 1, 7, 1]), ('tile_x', [-1, 7, 1, 1]), ('tile_rc', [-1, 1, 2]), ('tile_ry', [-1, 1, 1]), ('tile_rx', [-1, 1, 3]), ('auto_unroll_max_step', 512), ('unroll_explicit', 1)],None,8167849
-    No: 5   GFLOPS: 1158.14/1158.14 result: MeasureResult(costs=(0.0001998908324022346,), error_no=MeasureErrorNo.NO_ERROR, all_cost=1.7755820751190186, timestamp=1673074614.818961)       [('tile_f', [-1, 1, 8, 1]), ('tile_y', [-1, 1, 1, 7]), ('tile_x', [-1, 1, 7, 1]), ('tile_rc', [-1, 2, 4]), ('tile_ry', [-1, 1, 1]), ('tile_rx', [-1, 3, 1]), ('auto_unroll_max_step', 1500), ('unroll_explicit', 1)],None,9365647
-    No: 6   GFLOPS: 0.00/1158.14    result: Traceback (most recent call last):
+    tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 2, 32, 2]), ('tile_y', [-1, 1, 1, 7]), ('tile_x', [-1, 1, 1, 1]), ('tile_rc', [-1, 16, 2]), ('tile_ry', [-1, 1, 1]), ('tile_rx', [-1, 3, 1]), ('auto_unroll_max_step', 512), ('unroll_explicit', 0)],None,2373231
+    No: 4   GFLOPS: 0.00/0.00       result: Traceback (most recent call last):
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 592, in __call__
         func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 544, in _build_func_common
@@ -762,8 +760,8 @@ for this template
       File "tvm/_ffi/_cython/./packed_func.pxi", line 56, in tvm._ffi._cy3.core.tvm_callback
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 875, in verify_pass
         raise InstantiationError("Skipped because of invalid gpu kernel")
-    tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 2, 1, 256]), ('tile_y', [-1, 1, 1, 1]), ('tile_x', [-1, 7, 1, 1]), ('tile_rc', [-1, 4, 64]), ('tile_ry', [-1, 1, 3]), ('tile_rx', [-1, 3, 1]), ('auto_unroll_max_step', 0), ('unroll_explicit', 0)],None,1134537
-    No: 7   GFLOPS: 0.00/1158.14    result: Traceback (most recent call last):
+    tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 2, 1, 64]), ('tile_y', [-1, 1, 7, 1]), ('tile_x', [-1, 7, 1, 1]), ('tile_rc', [-1, 1, 16]), ('tile_ry', [-1, 3, 1]), ('tile_rx', [-1, 1, 3]), ('auto_unroll_max_step', 0), ('unroll_explicit', 1)],None,6703601
+    No: 5   GFLOPS: 0.00/0.00       result: Traceback (most recent call last):
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 592, in __call__
         func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 544, in _build_func_common
@@ -885,9 +883,8 @@ for this template
       File "tvm/_ffi/_cython/./packed_func.pxi", line 56, in tvm._ffi._cy3.core.tvm_callback
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 875, in verify_pass
         raise InstantiationError("Skipped because of invalid gpu kernel")
-    tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 4, 2, 64]), ('tile_y', [-1, 7, 1, 1]), ('tile_x', [-1, 1, 1, 1]), ('tile_rc', [-1, 1, 256]), ('tile_ry', [-1, 1, 1]), ('tile_rx', [-1, 3, 1]), ('auto_unroll_max_step', 1500), ('unroll_explicit', 1)],None,9476266
-    No: 8   GFLOPS: 942.65/1158.14  result: MeasureResult(costs=(0.00024558444140625,), error_no=MeasureErrorNo.NO_ERROR, all_cost=1.3535137176513672, timestamp=1673074617.8424668)        [('tile_f', [-1, 1, 4, 1]), ('tile_y', [-1, 7, 1, 1]), ('tile_x', [-1, 1, 7, 1]), ('tile_rc', [-1, 2, 2]), ('tile_ry', [-1, 1, 1]), ('tile_rx', [-1, 3, 1]), ('auto_unroll_max_step', 0), ('unroll_explicit', 0)],None,621519
-    No: 9   GFLOPS: 0.00/1158.14    result: Traceback (most recent call last):
+    tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 128, 2, 2]), ('tile_y', [-1, 1, 1, 7]), ('tile_x', [-1, 1, 7, 1]), ('tile_rc', [-1, 4, 4]), ('tile_ry', [-1, 3, 1]), ('tile_rx', [-1, 1, 1]), ('auto_unroll_max_step', 1500), ('unroll_explicit', 0)],None,3754811
+    No: 6   GFLOPS: 0.00/0.00       result: Traceback (most recent call last):
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 592, in __call__
         func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 544, in _build_func_common
@@ -1009,9 +1006,10 @@ for this template
       File "tvm/_ffi/_cython/./packed_func.pxi", line 56, in tvm._ffi._cy3.core.tvm_callback
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 875, in verify_pass
         raise InstantiationError("Skipped because of invalid gpu kernel")
-    tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 1, 8, 16]), ('tile_y', [-1, 1, 1, 1]), ('tile_x', [-1, 7, 1, 1]), ('tile_rc', [-1, 32, 1]), ('tile_ry', [-1, 1, 3]), ('tile_rx', [-1, 1, 3]), ('auto_unroll_max_step', 0), ('unroll_explicit', 1)],None,6794659
-    No: 10  GFLOPS: 6.28/1158.14    result: MeasureResult(costs=(0.036839053999999996,), error_no=MeasureErrorNo.NO_ERROR, all_cost=5.837235689163208, timestamp=1673074623.8672774)        [('tile_f', [-1, 8, 8, 2]), ('tile_y', [-1, 1, 1, 7]), ('tile_x', [-1, 1, 1, 7]), ('tile_rc', [-1, 2, 16]), ('tile_ry', [-1, 1, 1]), ('tile_rx', [-1, 1, 1]), ('auto_unroll_max_step', 512), ('unroll_explicit', 0)],None,1868982
-    No: 11  GFLOPS: 0.00/1158.14    result: Traceback (most recent call last):
+    tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 4, 2, 1]), ('tile_y', [-1, 1, 7, 1]), ('tile_x', [-1, 1, 1, 7]), ('tile_rc', [-1, 8, 64]), ('tile_ry', [-1, 3, 1]), ('tile_rx', [-1, 3, 1]), ('auto_unroll_max_step', 512), ('unroll_explicit', 0)],None,2688852
+    No: 7   GFLOPS: 81.13/81.13     result: MeasureResult(costs=(0.002853425575,), error_no=MeasureErrorNo.NO_ERROR, all_cost=1.593247652053833, timestamp=1673207944.3081067)      [('tile_f', [-1, 1, 64, 4]), ('tile_y', [-1, 7, 1, 1]), ('tile_x', [-1, 1, 1, 7]), ('tile_rc', [-1, 4, 1]), ('tile_ry', [-1, 1, 1]), ('tile_rx', [-1, 1, 1]), ('auto_unroll_max_step', 1500), ('unroll_explicit', 0)],None,3494833
+    No: 8   GFLOPS: 565.24/565.24   result: MeasureResult(costs=(0.00040955998465473144,), error_no=MeasureErrorNo.NO_ERROR, all_cost=1.5637280941009521, timestamp=1673207945.3138244)     [('tile_f', [-1, 4, 2, 2]), ('tile_y', [-1, 1, 7, 1]), ('tile_x', [-1, 1, 7, 1]), ('tile_rc', [-1, 8, 2]), ('tile_ry', [-1, 1, 3]), ('tile_rx', [-1, 1, 3]), ('auto_unroll_max_step', 1500), ('unroll_explicit', 0)],None,5081626
+    No: 9   GFLOPS: 0.00/565.24     result: Traceback (most recent call last):
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 592, in __call__
         func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 544, in _build_func_common
@@ -1133,8 +1131,8 @@ for this template
       File "tvm/_ffi/_cython/./packed_func.pxi", line 56, in tvm._ffi._cy3.core.tvm_callback
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 875, in verify_pass
         raise InstantiationError("Skipped because of invalid gpu kernel")
-    tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 64, 2, 2]), ('tile_y', [-1, 1, 1, 1]), ('tile_x', [-1, 1, 1, 7]), ('tile_rc', [-1, 32, 2]), ('tile_ry', [-1, 3, 1]), ('tile_rx', [-1, 1, 3]), ('auto_unroll_max_step', 512), ('unroll_explicit', 1)],None,8380310
-    No: 12  GFLOPS: 0.00/1158.14    result: Traceback (most recent call last):
+    tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 16, 8, 2]), ('tile_y', [-1, 1, 1, 7]), ('tile_x', [-1, 7, 1, 1]), ('tile_rc', [-1, 8, 32]), ('tile_ry', [-1, 1, 1]), ('tile_rx', [-1, 1, 3]), ('auto_unroll_max_step', 512), ('unroll_explicit', 1)],None,8284183
+    No: 10  GFLOPS: 0.00/565.24     result: Traceback (most recent call last):
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 592, in __call__
         func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 544, in _build_func_common
@@ -1256,8 +1254,10 @@ for this template
       File "tvm/_ffi/_cython/./packed_func.pxi", line 56, in tvm._ffi._cy3.core.tvm_callback
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 875, in verify_pass
         raise InstantiationError("Skipped because of invalid gpu kernel")
-    tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 2, 8, 2]), ('tile_y', [-1, 1, 1, 7]), ('tile_x', [-1, 7, 1, 1]), ('tile_rc', [-1, 128, 4]), ('tile_ry', [-1, 1, 1]), ('tile_rx', [-1, 1, 3]), ('auto_unroll_max_step', 512), ('unroll_explicit', 1)],None,8224340
-    No: 13  GFLOPS: 0.00/1158.14    result: Traceback (most recent call last):
+    tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 8, 32, 1]), ('tile_y', [-1, 7, 1, 1]), ('tile_x', [-1, 1, 1, 1]), ('tile_rc', [-1, 256, 2]), ('tile_ry', [-1, 3, 1]), ('tile_rx', [-1, 3, 1]), ('auto_unroll_max_step', 512), ('unroll_explicit', 0)],None,2580423
+    No: 11  GFLOPS: 6.14/565.24     result: MeasureResult(costs=(0.03770536025,), error_no=MeasureErrorNo.NO_ERROR, all_cost=2.2243459224700928, timestamp=1673207948.3442025)      [('tile_f', [-1, 64, 8, 1]), ('tile_y', [-1, 1, 1, 1]), ('tile_x', [-1, 7, 1, 1]), ('tile_rc', [-1, 4, 1]), ('tile_ry', [-1, 1, 1]), ('tile_rx', [-1, 3, 1]), ('auto_unroll_max_step', 512), ('unroll_explicit', 0)],None,2331153
+    No: 12  GFLOPS: 212.14/565.24   result: MeasureResult(costs=(0.0010912774489795919,), error_no=MeasureErrorNo.NO_ERROR, all_cost=2.7804789543151855, timestamp=1673207949.3513148)      [('tile_f', [-1, 2, 32, 1]), ('tile_y', [-1, 7, 1, 1]), ('tile_x', [-1, 1, 1, 7]), ('tile_rc', [-1, 16, 4]), ('tile_ry', [-1, 1, 1]), ('tile_rx', [-1, 1, 1]), ('auto_unroll_max_step', 512), ('unroll_explicit', 1)],None,7053461
+    No: 13  GFLOPS: 0.00/565.24     result: Traceback (most recent call last):
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 592, in __call__
         func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 544, in _build_func_common
@@ -1379,8 +1379,8 @@ for this template
       File "tvm/_ffi/_cython/./packed_func.pxi", line 56, in tvm._ffi._cy3.core.tvm_callback
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 875, in verify_pass
         raise InstantiationError("Skipped because of invalid gpu kernel")
-    tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 4, 4, 32]), ('tile_y', [-1, 1, 1, 7]), ('tile_x', [-1, 1, 1, 1]), ('tile_rc', [-1, 128, 2]), ('tile_ry', [-1, 3, 1]), ('tile_rx', [-1, 3, 1]), ('auto_unroll_max_step', 1500), ('unroll_explicit', 1)],None,9547096
-    No: 14  GFLOPS: 0.00/1158.14    result: Traceback (most recent call last):
+    tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 32, 2, 4]), ('tile_y', [-1, 1, 1, 7]), ('tile_x', [-1, 1, 1, 7]), ('tile_rc', [-1, 2, 128]), ('tile_ry', [-1, 3, 1]), ('tile_rx', [-1, 1, 1]), ('auto_unroll_max_step', 1500), ('unroll_explicit', 0)],None,3857813
+    No: 14  GFLOPS: 0.00/565.24     result: Traceback (most recent call last):
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 592, in __call__
         func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 544, in _build_func_common
@@ -1502,254 +1502,161 @@ for this template
       File "tvm/_ffi/_cython/./packed_func.pxi", line 56, in tvm._ffi._cy3.core.tvm_callback
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 875, in verify_pass
         raise InstantiationError("Skipped because of invalid gpu kernel")
-    tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 4, 16, 8]), ('tile_y', [-1, 1, 7, 1]), ('tile_x', [-1, 1, 1, 1]), ('tile_rc', [-1, 8, 32]), ('tile_ry', [-1, 1, 1]), ('tile_rx', [-1, 1, 1]), ('auto_unroll_max_step', 512), ('unroll_explicit', 1)],None,7121560
-    No: 15  GFLOPS: 0.00/1158.14    result: Traceback (most recent call last):
-      File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 592, in __call__
-        func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
-      File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 544, in _build_func_common
-        func = build(s, args, target_host=task.target_host, runtime=runtime)
-      File "/workspace/python/tvm/driver/build_module.py", line 227, in build
-        input_mod = lower(inputs, args, name=name, binds=binds)
-      File "/workspace/python/tvm/driver/build_module.py", line 134, in lower
-        return ffi.lower_schedule(inp, args, name, binds, simple_mode)
+    tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 1, 1, 16]), ('tile_y', [-1, 1, 1, 7]), ('tile_x', [-1, 1, 7, 1]), ('tile_rc', [-1, 32, 16]), ('tile_ry', [-1, 1, 1]), ('tile_rx', [-1, 1, 1]), ('auto_unroll_max_step', 512), ('unroll_explicit', 1)],None,7109464
+    No: 15  GFLOPS: 0.00/565.24     result: Traceback (most recent call last):
+      File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 742, in __call__
+        yield remote, remote.load_module(os.path.split(build_result.filename)[1])
+      File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 706, in run_through_rpc
+        costs = time_f(*args).results
+      File "/workspace/python/tvm/runtime/module.py", line 357, in evaluator
+        blob = feval(*args)
       File "tvm/_ffi/_cython/./packed_func.pxi", line 331, in tvm._ffi._cy3.core.PackedFuncBase.__call__
-      File "tvm/_ffi/_cython/./packed_func.pxi", line 276, in tvm._ffi._cy3.core.FuncCall
+      File "tvm/_ffi/_cython/./packed_func.pxi", line 262, in tvm._ffi._cy3.core.FuncCall
+      File "tvm/_ffi/_cython/./packed_func.pxi", line 251, in tvm._ffi._cy3.core.FuncCall3
       File "tvm/_ffi/_cython/./base.pxi", line 181, in tvm._ffi._cy3.core.CHECK_CALL
     tvm._ffi.base.TVMError: Traceback (most recent call last):
-      24: TVMFuncCall
+      4: TVMFuncCall
             at ../src/runtime/c_runtime_api.cc:477
-      23: tvm::runtime::PackedFuncObj::CallPacked(tvm::runtime::TVMArgs, tvm::runtime::TVMRetValue*) const
-            at ../include/tvm/runtime/packed_func.h:1217
-      22: Call
-            at ../include/tvm/runtime/packed_func.h:1213
-      21: operator()
-            at ../include/tvm/runtime/packed_func.h:1730
-      20: unpack_call<tvm::IRModule, 5, tvm::<lambda(tvm::te::Schedule, const tvm::runtime::Array<tvm::runtime::ObjectRef>&, const tvm::runtime::String&, const tvm::runtime::Map<tvm::te::Tensor, tvm::tir::Buffer>&, bool)> >
-            at ../include/tvm/runtime/packed_func.h:1670
-      19: run<>
-            at ../include/tvm/runtime/packed_func.h:1630
-      18: run<tvm::runtime::TVMMovableArgValueWithContext_>
-            at ../include/tvm/runtime/packed_func.h:1630
-      17: run<tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_>
-            at ../include/tvm/runtime/packed_func.h:1630
-      16: run<tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_>
-            at ../include/tvm/runtime/packed_func.h:1630
-      15: run<tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_>
-            at ../include/tvm/runtime/packed_func.h:1630
-      14: run<tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_>
-            at ../include/tvm/runtime/packed_func.h:1645
-      13: operator()
-            at ../src/driver/driver_api.cc:395
-      12: tvm::LowerSchedule(tvm::te::Schedule, tvm::runtime::Array<tvm::runtime::ObjectRef, void> const&, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, std::unordered_map<tvm::te::Tensor, tvm::tir::Buffer, std::hash<tvm::te::Tensor>, std::equal_to<tvm::te::Tensor>, std::allocator<std::pair<tvm::te::Tensor const, tvm::tir::Buffer> > > const&, tvm::GlobalVarSupply, bool)
-            at ../src/driver/driver_api.cc:381
-      11: tvm::LowerWithPassList(tvm::IRModule, tvm::runtime::Array<tvm::transform::Pass, void>)
-            at ../src/driver/driver_api.cc:276
-      10: tvm::transform::Pass::operator()(tvm::IRModule) const
-            at ../src/ir/transform.cc:258
-      9: tvm::transform::Pass::operator()(tvm::IRModule, tvm::transform::PassContext const&) const
-            at ../src/ir/transform.cc:274
-      8: tvm::transform::SequentialNode::operator()(tvm::IRModule, tvm::transform::PassContext const&) const
-            at ../src/ir/transform.cc:454
-      7: tvm::transform::Pass::operator()(tvm::IRModule, tvm::transform::PassContext const&) const
-            at ../src/ir/transform.cc:274
-      6: tvm::tir::transform::PrimFuncPassNode::operator()(tvm::IRModule, tvm::transform::PassContext const&) const
-            at ../src/tir/ir/transform.cc:100
-      5: tvm::runtime::TypedPackedFunc<tvm::tir::PrimFunc (tvm::tir::PrimFunc, tvm::IRModule, tvm::transform::PassContext)>::operator()(tvm::tir::PrimFunc, tvm::IRModule, tvm::transform::PassContext) const
-            at ../include/tvm/runtime/packed_func.h:1749
-      4: tvm::tir::PrimFunc tvm::runtime::detail::typed_packed_call_dispatcher<tvm::tir::PrimFunc>::run<tvm::tir::PrimFunc, tvm::IRModule, tvm::transform::PassContext>(tvm::runtime::PackedFunc const&, tvm::tir::PrimFunc&&, tvm::IRModule&&, tvm::transform::PassContext&&)
-            at ../include/tvm/runtime/packed_func.h:1693
-      3: tvm::runtime::TVMRetValue tvm::runtime::PackedFunc::operator()<tvm::tir::PrimFunc, tvm::IRModule, tvm::transform::PassContext>(tvm::tir::PrimFunc&&, tvm::IRModule&&, tvm::transform::PassContext&&) const
-            at ../include/tvm/runtime/packed_func.h:1617
-      2: tvm::runtime::PackedFuncObj::CallPacked(tvm::runtime::TVMArgs, tvm::runtime::TVMRetValue*) const
-            at ../include/tvm/runtime/packed_func.h:1217
-      1: Call
-            at ../include/tvm/runtime/packed_func.h:1213
-      0: operator()
-            at ../src/runtime/c_runtime_api.cc:534
-      File "tvm/_ffi/_cython/./packed_func.pxi", line 56, in tvm._ffi._cy3.core.tvm_callback
-      File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 875, in verify_pass
-        raise InstantiationError("Skipped because of invalid gpu kernel")
-    tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel
+      3: tvm::runtime::PackedFuncObj::CallPacked(tvm::runtime::TVMArgs, tvm::runtime::TVMRetValue*) const
+            at ../include/tvm/runtime/packed_func.h:1217
+      2: tvm::runtime::RPCWrappedFunc::operator()(tvm::runtime::TVMArgs, tvm::runtime::TVMRetValue*) const
+            at ../src/runtime/rpc/rpc_module.cc:129
+      1: tvm::runtime::RPCClientSession::CallFunc(void*, TVMValue const*, int const*, int, std::function<void (tvm::runtime::TVMArgs)> const&)
+            at ../src/runtime/rpc/rpc_endpoint.cc:1012
+      0: tvm::runtime::RPCEndpoint::CallFunc(void*, TVMValue const*, int const*, int, std::function<void (tvm::runtime::TVMArgs)>)
+            at ../src/runtime/rpc/rpc_endpoint.cc:804
+      File "../src/runtime/rpc/rpc_endpoint.cc", line 804
+    TVMError: 
+    ---------------------------------------------------------------
+    An error occurred during the execution of TVM.
+    For more information, please see: https://tvm.apache.org/docs/errors.html
+    ---------------------------------------------------------------
+      Check failed: (code == RPCCode::kReturn) is false: code=kShutdown
+
+    During handling of the above exception, another exception occurred:
 
     Traceback (most recent call last):
-      24: TVMFuncCall
-            at ../src/runtime/c_runtime_api.cc:477
-      23: tvm::runtime::PackedFuncObj::CallPacked(tvm::runtime::TVMArgs, tvm::runtime::TVMRetValue*) const
-            at ../include/tvm/runtime/packed_func.h:1217
-      22: Call
-            at ../include/tvm/runtime/packed_func.h:1213
-      21: operator()
-            at ../include/tvm/runtime/packed_func.h:1730
-      20: unpack_call<tvm::IRModule, 5, tvm::<lambda(tvm::te::Schedule, const tvm::runtime::Array<tvm::runtime::ObjectRef>&, const tvm::runtime::String&, const tvm::runtime::Map<tvm::te::Tensor, tvm::tir::Buffer>&, bool)> >
-            at ../include/tvm/runtime/packed_func.h:1670
-      19: run<>
-            at ../include/tvm/runtime/packed_func.h:1630
-      18: run<tvm::runtime::TVMMovableArgValueWithContext_>
-            at ../include/tvm/runtime/packed_func.h:1630
-      17: run<tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_>
-            at ../include/tvm/runtime/packed_func.h:1630
-      16: run<tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_>
-            at ../include/tvm/runtime/packed_func.h:1630
-      15: run<tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_>
-            at ../include/tvm/runtime/packed_func.h:1630
-      14: run<tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_>
-            at ../include/tvm/runtime/packed_func.h:1645
-      13: operator()
-            at ../src/driver/driver_api.cc:395
-      12: tvm::LowerSchedule(tvm::te::Schedule, tvm::runtime::Array<tvm::runtime::ObjectRef, void> const&, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, std::unordered_map<tvm::te::Tensor, tvm::tir::Buffer, std::hash<tvm::te::Tensor>, std::equal_to<tvm::te::Tensor>, std::allocator<std::pair<tvm::te::Tensor const, tvm::tir::Buffer> > > const&, tvm::GlobalVarSupply, bool)
-            at ../src/driver/driver_api.cc:381
-      11: tvm::LowerWithPassList(tvm::IRModule, tvm::runtime::Array<tvm::transform::Pass, void>)
-            at ../src/driver/driver_api.cc:276
-      10: tvm::transform::Pass::operator()(tvm::IRModule) const
-            at ../src/ir/transform.cc:258
-      9: tvm::transform::Pass::operator()(tvm::IRModule, tvm::transform::PassContext const&) const
-            at ../src/ir/transform.cc:274
-      8: tvm::transform::SequentialNode::operator()(tvm::IRModule, tvm::transform::PassContext const&) const
-            at ../src/ir/transform.cc:454
-      7: tvm::transform::Pass::operator()(tvm::IRModule, tvm::transform::PassContext const&) const
-            at ../src/ir/transform.cc:274
-      6: tvm::tir::transform::PrimFuncPassNode::operator()(tvm::IRModule, tvm::transform::PassContext const&) const
-            at ../src/tir/ir/transform.cc:100
-      5: tvm::runtime::TypedPackedFunc<tvm::tir::PrimFunc (tvm::tir::PrimFunc, tvm::IRModule, tvm::transform::PassContext)>::operator()(tvm::tir::PrimFunc, tvm::IRModule, tvm::transform::PassContext) const
-            at ../include/tvm/runtime/packed_func.h:1749
-      4: tvm::tir::PrimFunc tvm::runtime::detail::typed_packed_call_dispatcher<tvm::tir::PrimFunc>::run<tvm::tir::PrimFunc, tvm::IRModule, tvm::transform::PassContext>(tvm::runtime::PackedFunc const&, tvm::tir::PrimFunc&&, tvm::IRModule&&, tvm::transform::PassContext&&)
-            at ../include/tvm/runtime/packed_func.h:1693
-      3: tvm::runtime::TVMRetValue tvm::runtime::PackedFunc::operator()<tvm::tir::PrimFunc, tvm::IRModule, tvm::transform::PassContext>(tvm::tir::PrimFunc&&, tvm::IRModule&&, tvm::transform::PassContext&&) const
-            at ../include/tvm/runtime/packed_func.h:1617
-      2: tvm::runtime::PackedFuncObj::CallPacked(tvm::runtime::TVMArgs, tvm::runtime::TVMRetValue*) const
-            at ../include/tvm/runtime/packed_func.h:1217
-      1: Call
-            at ../include/tvm/runtime/packed_func.h:1213
-      0: operator()
-            at ../src/runtime/c_runtime_api.cc:534
-      File "tvm/_ffi/_cython/./packed_func.pxi", line 56, in tvm._ffi._cy3.core.tvm_callback
-      File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 875, in verify_pass
-        raise InstantiationError("Skipped because of invalid gpu kernel")
-    tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 2, 64, 4]), ('tile_y', [-1, 1, 1, 1]), ('tile_x', [-1, 1, 1, 7]), ('tile_rc', [-1, 4, 2]), ('tile_ry', [-1, 1, 3]), ('tile_rx', [-1, 1, 1]), ('auto_unroll_max_step', 512), ('unroll_explicit', 0)],None,2174614
-    No: 16  GFLOPS: 0.00/1158.14    result: Traceback (most recent call last):
-      File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 592, in __call__
-        func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
-      File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 544, in _build_func_common
-        func = build(s, args, target_host=task.target_host, runtime=runtime)
-      File "/workspace/python/tvm/driver/build_module.py", line 227, in build
-        input_mod = lower(inputs, args, name=name, binds=binds)
-      File "/workspace/python/tvm/driver/build_module.py", line 134, in lower
-        return ffi.lower_schedule(inp, args, name, binds, simple_mode)
-      File "tvm/_ffi/_cython/./packed_func.pxi", line 331, in tvm._ffi._cy3.core.PackedFuncBase.__call__
-      File "tvm/_ffi/_cython/./packed_func.pxi", line 276, in tvm._ffi._cy3.core.FuncCall
-      File "tvm/_ffi/_cython/./base.pxi", line 181, in tvm._ffi._cy3.core.CHECK_CALL
+      File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 706, in run_through_rpc
+        costs = time_f(*args).results
+      File "/usr/lib/python3.7/contextlib.py", line 130, in __exit__
+        self.gen.throw(type, value, traceback)
+      File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 746, in __call__
+        remote.remove(build_result.filename)
+      File "/workspace/python/tvm/rpc/client.py", line 144, in remove
+        self._remote_funcs["remove"] = self.get_function("tvm.rpc.server.remove")
+      File "/workspace/python/tvm/rpc/client.py", line 72, in get_function
+        return self._sess.get_function(name)
+      File "/workspace/python/tvm/runtime/module.py", line 171, in get_function
+        self.handle, c_str(name), ctypes.c_int(query_imports), ctypes.byref(ret_handle)
+      File "/workspace/python/tvm/_ffi/base.py", line 348, in check_call
+        raise get_last_ffi_error()
     tvm._ffi.base.TVMError: Traceback (most recent call last):
-      24: TVMFuncCall
-            at ../src/runtime/c_runtime_api.cc:477
-      23: tvm::runtime::PackedFuncObj::CallPacked(tvm::runtime::TVMArgs, tvm::runtime::TVMRetValue*) const
-            at ../include/tvm/runtime/packed_func.h:1217
-      22: Call
-            at ../include/tvm/runtime/packed_func.h:1213
-      21: operator()
-            at ../include/tvm/runtime/packed_func.h:1730
-      20: unpack_call<tvm::IRModule, 5, tvm::<lambda(tvm::te::Schedule, const tvm::runtime::Array<tvm::runtime::ObjectRef>&, const tvm::runtime::String&, const tvm::runtime::Map<tvm::te::Tensor, tvm::tir::Buffer>&, bool)> >
-            at ../include/tvm/runtime/packed_func.h:1670
-      19: run<>
-            at ../include/tvm/runtime/packed_func.h:1630
-      18: run<tvm::runtime::TVMMovableArgValueWithContext_>
-            at ../include/tvm/runtime/packed_func.h:1630
-      17: run<tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_>
-            at ../include/tvm/runtime/packed_func.h:1630
-      16: run<tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_>
-            at ../include/tvm/runtime/packed_func.h:1630
-      15: run<tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_>
-            at ../include/tvm/runtime/packed_func.h:1630
-      14: run<tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_>
-            at ../include/tvm/runtime/packed_func.h:1645
-      13: operator()
-            at ../src/driver/driver_api.cc:395
-      12: tvm::LowerSchedule(tvm::te::Schedule, tvm::runtime::Array<tvm::runtime::ObjectRef, void> const&, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, std::unordered_map<tvm::te::Tensor, tvm::tir::Buffer, std::hash<tvm::te::Tensor>, std::equal_to<tvm::te::Tensor>, std::allocator<std::pair<tvm::te::Tensor const, tvm::tir::Buffer> > > const&, tvm::GlobalVarSupply, bool)
-            at ../src/driver/driver_api.cc:381
-      11: tvm::LowerWithPassList(tvm::IRModule, tvm::runtime::Array<tvm::transform::Pass, void>)
-            at ../src/driver/driver_api.cc:276
-      10: tvm::transform::Pass::operator()(tvm::IRModule) const
-            at ../src/ir/transform.cc:258
-      9: tvm::transform::Pass::operator()(tvm::IRModule, tvm::transform::PassContext const&) const
-            at ../src/ir/transform.cc:274
-      8: tvm::transform::SequentialNode::operator()(tvm::IRModule, tvm::transform::PassContext const&) const
-            at ../src/ir/transform.cc:454
-      7: tvm::transform::Pass::operator()(tvm::IRModule, tvm::transform::PassContext const&) const
-            at ../src/ir/transform.cc:274
-      6: tvm::tir::transform::PrimFuncPassNode::operator()(tvm::IRModule, tvm::transform::PassContext const&) const
-            at ../src/tir/ir/transform.cc:100
-      5: tvm::runtime::TypedPackedFunc<tvm::tir::PrimFunc (tvm::tir::PrimFunc, tvm::IRModule, tvm::transform::PassContext)>::operator()(tvm::tir::PrimFunc, tvm::IRModule, tvm::transform::PassContext) const
-            at ../include/tvm/runtime/packed_func.h:1749
-      4: tvm::tir::PrimFunc tvm::runtime::detail::typed_packed_call_dispatcher<tvm::tir::PrimFunc>::run<tvm::tir::PrimFunc, tvm::IRModule, tvm::transform::PassContext>(tvm::runtime::PackedFunc const&, tvm::tir::PrimFunc&&, tvm::IRModule&&, tvm::transform::PassContext&&)
-            at ../include/tvm/runtime/packed_func.h:1693
-      3: tvm::runtime::TVMRetValue tvm::runtime::PackedFunc::operator()<tvm::tir::PrimFunc, tvm::IRModule, tvm::transform::PassContext>(tvm::tir::PrimFunc&&, tvm::IRModule&&, tvm::transform::PassContext&&) const
+      52: 0xffffffffffffffff
+      51: _start
+      50: __libc_start_main
+      49: _Py_UnixMain
+      48: 0x0000000000650da0
+      47: 0x0000000000650afa
+      46: _PyFunction_FastCallDict
+      45: _PyEval_EvalCodeWithName
+      44: _PyEval_EvalFrameDefault
+      43: _PyFunction_FastCallKeywords
+      42: _PyEval_EvalCodeWithName
+      41: _PyEval_EvalFrameDefault
+      40: _PyMethodDef_RawFastCallKeywords
+      39: 0x0000000000546369
+      38: _PyEval_EvalCodeWithName
+      37: _PyEval_EvalFrameDefault
+      36: _PyFunction_FastCallKeywords
+      35: _PyEval_EvalCodeWithName
+      34: _PyEval_EvalFrameDefault
+      33: _PyFunction_FastCallDict
+      32: _PyEval_EvalCodeWithName
+      31: _PyEval_EvalFrameDefault
+      30: _PyObject_FastCallDict
+      29: 0x00000000004c06e1
+      28: _PyFunction_FastCallDict
+      27: _PyEval_EvalFrameDefault
+      26: _PyMethodDescr_FastCallKeywords
+      25: 0x00000000005dcb58
+      24: 0x00000000005dc83f
+      23: 0x00000000004ba127
+      22: _PyEval_EvalFrameDefault
+      21: _PyFunction_FastCallKeywords
+      20: _PyEval_EvalFrameDefault
+      19: _PyFunction_FastCallKeywords
+      18: _PyEval_EvalFrameDefault
+      17: _PyFunction_FastCallKeywords
+      16: _PyEval_EvalCodeWithName
+      15: _PyEval_EvalFrameDefault
+      14: 0x0000000000537c30
+      13: _PyObject_FastCallKeywords
+      12: 0x00007f9e4d319fa2
+      11: _ctypes_callproc
+      10: ffi_call
+      9: ffi_call_unix64
+      8: TVMModGetFunction
+            at ../src/runtime/c_runtime_api.cc:408
+      7: tvm::runtime::ModuleNode::GetFunction(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, bool)
+            at ../src/runtime/module.cc:66
+      6: tvm::runtime::RPCModuleNode::GetFunction(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, tvm::runtime::ObjectPtr<tvm::runtime::Object> const&)
+            at ../src/runtime/rpc/rpc_module.cc:185
+      5: tvm::runtime::RPCClientSession::GetFunction(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&)
+            at ../src/runtime/rpc/rpc_endpoint.cc:1007
+      4: tvm::runtime::TVMRetValue tvm::runtime::RPCEndpoint::SysCallRemote<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&>(tvm::runtime::RPCCode, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&)
+            at ../src/runtime/rpc/rpc_endpoint.h:223
+      3: tvm::runtime::TVMRetValue tvm::runtime::PackedFunc::operator()<int, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&>(int&&, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&) const
             at ../include/tvm/runtime/packed_func.h:1617
       2: tvm::runtime::PackedFuncObj::CallPacked(tvm::runtime::TVMArgs, tvm::runtime::TVMRetValue*) const
             at ../include/tvm/runtime/packed_func.h:1217
       1: Call
             at ../include/tvm/runtime/packed_func.h:1213
       0: operator()
-            at ../src/runtime/c_runtime_api.cc:534
-      File "tvm/_ffi/_cython/./packed_func.pxi", line 56, in tvm._ffi._cy3.core.tvm_callback
-      File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 875, in verify_pass
-        raise InstantiationError("Skipped because of invalid gpu kernel")
-    tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel
+            at ../src/runtime/rpc/rpc_endpoint.cc:684
+      File "../src/runtime/rpc/rpc_endpoint.cc", line 684
+    TVMError: 
+    ---------------------------------------------------------------
+    An error occurred during the execution of TVM.
+    For more information, please see: https://tvm.apache.org/docs/errors.html
+    ---------------------------------------------------------------
+      Check failed: (code == RPCCode::kReturn) is false: code=1
 
     Traceback (most recent call last):
-      24: TVMFuncCall
-            at ../src/runtime/c_runtime_api.cc:477
-      23: tvm::runtime::PackedFuncObj::CallPacked(tvm::runtime::TVMArgs, tvm::runtime::TVMRetValue*) const
-            at ../include/tvm/runtime/packed_func.h:1217
-      22: Call
-            at ../include/tvm/runtime/packed_func.h:1213
-      21: operator()
-            at ../include/tvm/runtime/packed_func.h:1730
-      20: unpack_call<tvm::IRModule, 5, tvm::<lambda(tvm::te::Schedule, const tvm::runtime::Array<tvm::runtime::ObjectRef>&, const tvm::runtime::String&, const tvm::runtime::Map<tvm::te::Tensor, tvm::tir::Buffer>&, bool)> >
-            at ../include/tvm/runtime/packed_func.h:1670
-      19: run<>
-            at ../include/tvm/runtime/packed_func.h:1630
-      18: run<tvm::runtime::TVMMovableArgValueWithContext_>
-            at ../include/tvm/runtime/packed_func.h:1630
-      17: run<tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_>
-            at ../include/tvm/runtime/packed_func.h:1630
-      16: run<tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_>
-            at ../include/tvm/runtime/packed_func.h:1630
-      15: run<tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_>
-            at ../include/tvm/runtime/packed_func.h:1630
-      14: run<tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_>
-            at ../include/tvm/runtime/packed_func.h:1645
-      13: operator()
-            at ../src/driver/driver_api.cc:395
-      12: tvm::LowerSchedule(tvm::te::Schedule, tvm::runtime::Array<tvm::runtime::ObjectRef, void> const&, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, std::unordered_map<tvm::te::Tensor, tvm::tir::Buffer, std::hash<tvm::te::Tensor>, std::equal_to<tvm::te::Tensor>, std::allocator<std::pair<tvm::te::Tensor const, tvm::tir::Buffer> > > const&, tvm::GlobalVarSupply, bool)
-            at ../src/driver/driver_api.cc:381
-      11: tvm::LowerWithPassList(tvm::IRModule, tvm::runtime::Array<tvm::transform::Pass, void>)
-            at ../src/driver/driver_api.cc:276
-      10: tvm::transform::Pass::operator()(tvm::IRModule) const
-            at ../src/ir/transform.cc:258
-      9: tvm::transform::Pass::operator()(tvm::IRModule, tvm::transform::PassContext const&) const
-            at ../src/ir/transform.cc:274
-      8: tvm::transform::SequentialNode::operator()(tvm::IRModule, tvm::transform::PassContext const&) const
-            at ../src/ir/transform.cc:454
-      7: tvm::transform::Pass::operator()(tvm::IRModule, tvm::transform::PassContext const&) const
-            at ../src/ir/transform.cc:274
-      6: tvm::tir::transform::PrimFuncPassNode::operator()(tvm::IRModule, tvm::transform::PassContext const&) const
-            at ../src/tir/ir/transform.cc:100
-      5: tvm::runtime::TypedPackedFunc<tvm::tir::PrimFunc (tvm::tir::PrimFunc, tvm::IRModule, tvm::transform::PassContext)>::operator()(tvm::tir::PrimFunc, tvm::IRModule, tvm::transform::PassContext) const
-            at ../include/tvm/runtime/packed_func.h:1749
-      4: tvm::tir::PrimFunc tvm::runtime::detail::typed_packed_call_dispatcher<tvm::tir::PrimFunc>::run<tvm::tir::PrimFunc, tvm::IRModule, tvm::transform::PassContext>(tvm::runtime::PackedFunc const&, tvm::tir::PrimFunc&&, tvm::IRModule&&, tvm::transform::PassContext&&)
-            at ../include/tvm/runtime/packed_func.h:1693
-      3: tvm::runtime::TVMRetValue tvm::runtime::PackedFunc::operator()<tvm::tir::PrimFunc, tvm::IRModule, tvm::transform::PassContext>(tvm::tir::PrimFunc&&, tvm::IRModule&&, tvm::transform::PassContext&&) const
-            at ../include/tvm/runtime/packed_func.h:1617
-      2: tvm::runtime::PackedFuncObj::CallPacked(tvm::runtime::TVMArgs, tvm::runtime::TVMRetValue*) const
-            at ../include/tvm/runtime/packed_func.h:1217
-      1: Call
-            at ../include/tvm/runtime/packed_func.h:1213
-      0: operator()
-            at ../src/runtime/c_runtime_api.cc:534
-      File "tvm/_ffi/_cython/./packed_func.pxi", line 56, in tvm._ffi._cy3.core.tvm_callback
-      File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 875, in verify_pass
-        raise InstantiationError("Skipped because of invalid gpu kernel")
-    tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 1, 4, 32]), ('tile_y', [-1, 7, 1, 1]), ('tile_x', [-1, 1, 1, 7]), ('tile_rc', [-1, 2, 16]), ('tile_ry', [-1, 1, 3]), ('tile_rx', [-1, 3, 1]), ('auto_unroll_max_step', 0), ('unroll_explicit', 1)],None,6321454
-    No: 17  GFLOPS: 0.00/1158.14    result: Traceback (most recent call last):
+      52: 0xffffffffffffffff
+      51: _start
+      50: __libc_start_main
+      49: _Py_UnixMain
+      48: 0x0000000000650da0
+      47: 0x0000000000650afa
+      46: _PyFunction_FastCallDict
+      45: _PyEval_EvalCodeWithName
+      44: _PyEval_EvalFrameDefault
+      43: _PyFunction_FastCallKeywords
+      42: _PyEval_EvalCodeWithName
+      41: _PyEval_EvalFrameDefault
+      40: _PyMethodDef_RawFastCallKeywords
+      39: 0x0000000000546369
+      38: _PyEval_EvalCodeWithName
+      37: _PyEval_EvalFrameDefault
+      36: _PyFunction_FastCallKeywords
+      35: _PyEval_EvalCodeWithName
+      34: _PyEval_EvalFrameDefault
+      33: _PyFunction_FastCallDict
+      32: _PyEval_EvalCodeWithName
+      31: _PyEval_EvalFrameDefault
+      30: _PyObject_FastCallDict
+      29: 0x00000000004c06e1
+      28: _PyFunction_FastCallDict
+      27: _PyEval_EvalFrameDefault
+      26: _PyMethodDescr_FastCallKeywords
+      25: 0x00000000005dcb58
+      24: 0x00000000005dc83f
+      23: 0x00000000004ba127
+      22: _PyEval_EvalFrameDefault
+      21: _PyFunction_FastCallKeywords
+      20: _PyEval_EvalFrameDefault
+      19: _PyFunction_FastCall      [('tile_f', [-1, 32, 1, 16]), ('tile_y', [-1, 7, 1, 1]), ('tile_x', [-1, 1, 1, 1]), ('tile_rc', [-1, 1, 2]), ('tile_ry', [-1, 3, 1]), ('tile_rx', [-1, 1, 1]), ('auto_unroll_max_step', 0), ('unroll_explicit', 1)],None,5456389
+    No: 16  GFLOPS: 0.00/565.24     result: Traceback (most recent call last):
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 592, in __call__
         func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 544, in _build_func_common
@@ -1871,8 +1778,8 @@ for this template
       File "tvm/_ffi/_cython/./packed_func.pxi", line 56, in tvm._ffi._cy3.core.tvm_callback
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 875, in verify_pass
         raise InstantiationError("Skipped because of invalid gpu kernel")
-    tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 1, 4, 64]), ('tile_y', [-1, 1, 7, 1]), ('tile_x', [-1, 1, 1, 1]), ('tile_rc', [-1, 2, 32]), ('tile_ry', [-1, 3, 1]), ('tile_rx', [-1, 1, 1]), ('auto_unroll_max_step', 512), ('unroll_explicit', 0)],None,2080967
-    No: 18  GFLOPS: 0.00/1158.14    result: Traceback (most recent call last):
+    tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 2, 8, 4]), ('tile_y', [-1, 1, 1, 1]), ('tile_x', [-1, 1, 1, 1]), ('tile_rc', [-1, 1, 32]), ('tile_ry', [-1, 3, 1]), ('tile_rx', [-1, 1, 3]), ('auto_unroll_max_step', 0), ('unroll_explicit', 1)],None,6723322
+    No: 17  GFLOPS: 0.00/565.24     result: Traceback (most recent call last):
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 592, in __call__
         func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 544, in _build_func_common
@@ -1994,8 +1901,8 @@ for this template
       File "tvm/_ffi/_cython/./packed_func.pxi", line 56, in tvm._ffi._cy3.core.tvm_callback
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 875, in verify_pass
         raise InstantiationError("Skipped because of invalid gpu kernel")
-    tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 4, 128, 1]), ('tile_y', [-1, 1, 1, 1]), ('tile_x', [-1, 7, 1, 1]), ('tile_rc', [-1, 2, 128]), ('tile_ry', [-1, 1, 1]), ('tile_rx', [-1, 1, 1]), ('auto_unroll_max_step', 0), ('unroll_explicit', 0)],None,176931
-    No: 19  GFLOPS: 0.00/1158.14    result: Traceback (most recent call last):
+    tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 1, 1, 4]), ('tile_y', [-1, 1, 7, 1]), ('tile_x', [-1, 1, 7, 1]), ('tile_rc', [-1, 4, 32]), ('tile_ry', [-1, 1, 3]), ('tile_rx', [-1, 1, 3]), ('auto_unroll_max_step', 512), ('unroll_explicit', 0)],None,3441340
+    No: 18  GFLOPS: 0.00/565.24     result: Traceback (most recent call last):
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 592, in __call__
         func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 544, in _build_func_common
@@ -2117,8 +2024,8 @@ for this template
       File "tvm/_ffi/_cython/./packed_func.pxi", line 56, in tvm._ffi._cy3.core.tvm_callback
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 875, in verify_pass
         raise InstantiationError("Skipped because of invalid gpu kernel")
-    tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 32, 1, 16]), ('tile_y', [-1, 1, 1, 7]), ('tile_x', [-1, 1, 7, 1]), ('tile_rc', [-1, 2, 16]), ('tile_ry', [-1, 3, 1]), ('tile_rx', [-1, 3, 1]), ('auto_unroll_max_step', 1500), ('unroll_explicit', 1)],None,9612189
-    No: 20  GFLOPS: 0.00/1158.14    result: Traceback (most recent call last):
+    tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 8, 64, 1]), ('tile_y', [-1, 1, 1, 7]), ('tile_x', [-1, 1, 1, 1]), ('tile_rc', [-1, 16, 1]), ('tile_ry', [-1, 1, 3]), ('tile_rx', [-1, 3, 1]), ('auto_unroll_max_step', 0), ('unroll_explicit', 0)],None,982788
+    No: 19  GFLOPS: 0.00/565.24     result: Traceback (most recent call last):
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 592, in __call__
         func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 544, in _build_func_common
@@ -2240,7 +2147,8 @@ for this template
       File "tvm/_ffi/_cython/./packed_func.pxi", line 56, in tvm._ffi._cy3.core.tvm_callback
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 875, in verify_pass
         raise InstantiationError("Skipped because of invalid gpu kernel")
-    tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 8, 1, 16]), ('tile_y', [-1, 7, 1, 1]), ('tile_x', [-1, 1, 7, 1]), ('tile_rc', [-1, 8, 32]), ('tile_ry', [-1, 3, 1]), ('tile_rx', [-1, 1, 1]), ('auto_unroll_max_step', 1500), ('unroll_explicit', 0)],None,3831907
+    tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 1, 1, 32]), ('tile_y', [-1, 1, 7, 1]), ('tile_x', [-1, 7, 1, 1]), ('tile_rc', [-1, 128, 1]), ('tile_ry', [-1, 1, 1]), ('tile_rx', [-1, 1, 3]), ('auto_unroll_max_step', 512), ('unroll_explicit', 0)],None,2930145
+    No: 20  GFLOPS: 5.25/565.24     result: MeasureResult(costs=(0.044077056749999996,), error_no=MeasureErrorNo.NO_ERROR, all_cost=2.035950183868408, timestamp=1673207957.5529802)        [('tile_f', [-1, 1, 2, 32]), ('tile_y', [-1, 1, 1, 7]), ('tile_x', [-1, 1, 1, 1]), ('tile_rc', [-1, 2, 8]), ('tile_ry', [-1, 1, 1]), ('tile_rx', [-1, 1, 1]), ('auto_unroll_max_step', 0), ('unroll_explicit', 1)],None,5326610
 
 
 
@@ -2295,9 +2203,9 @@ and measure running time.
     Finish loading 20 records
 
     Best config:
-    [('tile_f', [-1, 1, 8, 1]), ('tile_y', [-1, 1, 1, 7]), ('tile_x', [-1, 1, 7, 1]), ('tile_rc', [-1, 2, 4]), ('tile_ry', [-1, 1, 1]), ('tile_rx', [-1, 3, 1]), ('auto_unroll_max_step', 1500), ('unroll_explicit', 1)],None,9365647
+    [('tile_f', [-1, 4, 2, 2]), ('tile_y', [-1, 1, 7, 1]), ('tile_x', [-1, 1, 7, 1]), ('tile_rc', [-1, 8, 2]), ('tile_ry', [-1, 1, 3]), ('tile_rx', [-1, 1, 3]), ('auto_unroll_max_step', 1500), ('unroll_explicit', 0)],None,5081626
     Finish loading 20 records
-    Time cost of this operator: 0.000514
+    Time cost of this operator: 0.000721
 
 
 
diff --git a/docs/_sources/how_to/work_with_microtvm/micro_autotune.rst.txt b/docs/_sources/how_to/work_with_microtvm/micro_autotune.rst.txt
index 0bbc620835..b8432065f6 100644
--- a/docs/_sources/how_to/work_with_microtvm/micro_autotune.rst.txt
+++ b/docs/_sources/how_to/work_with_microtvm/micro_autotune.rst.txt
@@ -368,10 +368,10 @@ Timing the untuned program
     ########## Build without Autotuning ##########
     Node Name                                     Ops                                           Time(us)  Time(%)  Shape              Inputs  Outputs  Measurements(us)  
     ---------                                     ---                                           --------  -------  -----              ------  -------  ----------------  
-    tvmgen_default_fused_nn_contrib_conv2d_NCHWc  tvmgen_default_fused_nn_contrib_conv2d_NCHWc  309.7     98.724   (1, 2, 10, 10, 3)  2       1        [309.7]           
-    tvmgen_default_fused_layout_transform_1       tvmgen_default_fused_layout_transform_1       3.027     0.965    (1, 6, 10, 10)     1       1        [3.027]           
-    tvmgen_default_fused_layout_transform         tvmgen_default_fused_layout_transform         0.976     0.311    (1, 1, 10, 10, 3)  1       1        [0.976]           
-    Total_time                                    -                                             313.703   -        -                  -       -        -                 
+    tvmgen_default_fused_nn_contrib_conv2d_NCHWc  tvmgen_default_fused_nn_contrib_conv2d_NCHWc  310.6     98.715   (1, 2, 10, 10, 3)  2       1        [310.6]           
+    tvmgen_default_fused_layout_transform_1       tvmgen_default_fused_layout_transform_1       3.076     0.978    (1, 6, 10, 10)     1       1        [3.076]           
+    tvmgen_default_fused_layout_transform         tvmgen_default_fused_layout_transform         0.969     0.308    (1, 1, 10, 10, 3)  1       1        [0.969]           
+    Total_time                                    -                                             314.644   -        -                  -       -        -                 
 
 
 
@@ -436,10 +436,10 @@ Timing the tuned program
     ########## Build with Autotuning ##########
     Node Name                                     Ops                                           Time(us)  Time(%)  Shape              Inputs  Outputs  Measurements(us)  
     ---------                                     ---                                           --------  -------  -----              ------  -------  ----------------  
-    tvmgen_default_fused_nn_contrib_conv2d_NCHWc  tvmgen_default_fused_nn_contrib_conv2d_NCHWc  103.0     97.385   (1, 6, 10, 10, 1)  2       1        [103.0]           
-    tvmgen_default_fused_layout_transform_1       tvmgen_default_fused_layout_transform_1       1.807     1.708    (1, 6, 10, 10)     1       1        [1.807]           
-    tvmgen_default_fused_layout_transform         tvmgen_default_fused_layout_transform         0.959     0.906    (1, 1, 10, 10, 3)  1       1        [0.959]           
-    Total_time                                    -                                             105.765   -        -                  -       -        -                 
+    tvmgen_default_fused_nn_contrib_conv2d_NCHWc  tvmgen_default_fused_nn_contrib_conv2d_NCHWc  105.1     97.451   (1, 6, 10, 10, 1)  2       1        [105.1]           
+    tvmgen_default_fused_layout_transform_1       tvmgen_default_fused_layout_transform_1       1.787     1.657    (1, 6, 10, 10)     1       1        [1.787]           
+    tvmgen_default_fused_layout_transform         tvmgen_default_fused_layout_transform         0.963     0.893    (1, 1, 10, 10, 3)  1       1        [0.963]           
+    Total_time                                    -                                             107.849   -        -                  -       -        -                 
 
 
 
diff --git a/docs/_sources/how_to/work_with_microtvm/micro_pytorch.rst.txt b/docs/_sources/how_to/work_with_microtvm/micro_pytorch.rst.txt
index 7f72f68af1..3ee1ff8d88 100644
--- a/docs/_sources/how_to/work_with_microtvm/micro_pytorch.rst.txt
+++ b/docs/_sources/how_to/work_with_microtvm/micro_pytorch.rst.txt
@@ -117,7 +117,7 @@ download a cat image and preprocess it to use as the model input.
     /venv/apache-tvm-py3.7/lib/python3.7/site-packages/torch/ao/quantization/utils.py:281: UserWarning: must run observer before calling calculate_qparams. Returning default values.
       "must run observer before calling calculate_qparams. " +
     Downloading: "https://download.pytorch.org/models/quantized/mobilenet_v2_qnnpack_37f702c5.pth" to /workspace/.cache/torch/hub/checkpoints/mobilenet_v2_qnnpack_37f702c5.pth
-
      0%|          | 0.00/3.42M [00:00<?, ?B/s]
    100%|##########| 3.42M/3.42M [00:00<00:00, 59.4MB/s]
+
      0%|          | 0.00/3.42M [00:00<?, ?B/s]
    100%|##########| 3.42M/3.42M [00:00<00:00, 114MB/s]
     /workspace/python/tvm/relay/frontend/pytorch_utils.py:47: DeprecationWarning: distutils Version classes are deprecated. Use packaging.version instead.
       return LooseVersion(torch_ver) > ver
     /venv/apache-tvm-py3.7/lib/python3.7/site-packages/setuptools/_distutils/version.py:346: DeprecationWarning: distutils Version classes are deprecated. Use packaging.version instead.
@@ -322,7 +322,7 @@ Look up prediction top 1 index in 1000 class synset.
 
 .. rst-class:: sphx-glr-timing
 
-   **Total running time of the script:** ( 1 minutes  7.728 seconds)
+   **Total running time of the script:** ( 1 minutes  4.488 seconds)
 
 
 .. _sphx_glr_download_how_to_work_with_microtvm_micro_pytorch.py:
diff --git a/docs/_sources/how_to/work_with_microtvm/micro_train.rst.txt b/docs/_sources/how_to/work_with_microtvm/micro_train.rst.txt
index 4de7f5f81b..c2c07a86dc 100644
--- a/docs/_sources/how_to/work_with_microtvm/micro_train.rst.txt
+++ b/docs/_sources/how_to/work_with_microtvm/micro_train.rst.txt
@@ -218,7 +218,7 @@ take about **2 minutes** to download the Stanford Cars, while COCO 2017 validati
  .. code-block:: none
 
 
-    '/tmp/tmpehl9pho3/images/random'
+    '/tmp/tmpd4y05yt_/images/random'
 
 
 
@@ -309,7 +309,7 @@ objects to other stuff? We can display some examples from our datasets using ``m
 
 
 .. image-sg:: /how_to/work_with_microtvm/images/sphx_glr_micro_train_001.png
-   :alt: [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [1.0, 0.0], [1.0, 0.0], [0.0, 1.0], [1.0, 0.0], [1.0, 0.0]
+   :alt: [0.0, 1.0], [0.0, 1.0], [1.0, 0.0], [0.0, 1.0], [0.0, 1.0], [1.0, 0.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0]
    :srcset: /how_to/work_with_microtvm/images/sphx_glr_micro_train_001.png
    :class: sphx-glr-single-img
 
@@ -318,8 +318,8 @@ objects to other stuff? We can display some examples from our datasets using ``m
 
  .. code-block:: none
 
-    /tmp/tmpehl9pho3/images/target contains 8144 images
-    /tmp/tmpehl9pho3/images/random contains 5000 images
+    /tmp/tmpd4y05yt_/images/target contains 8144 images
+    /tmp/tmpd4y05yt_/images/random contains 5000 images
 
 
 
@@ -494,13 +494,13 @@ the time on our validation set).
  .. code-block:: none
 
     Epoch 1/3
-    328/328 - 49s - loss: 0.2185 - accuracy: 0.9234 - val_loss: 0.1424 - val_accuracy: 0.9543 - 49s/epoch - 149ms/step
+    328/328 - 47s - loss: 0.1971 - accuracy: 0.9311 - val_loss: 0.2416 - val_accuracy: 0.9139 - 47s/epoch - 143ms/step
     Epoch 2/3
-    328/328 - 45s - loss: 0.0920 - accuracy: 0.9654 - val_loss: 0.1235 - val_accuracy: 0.9581 - 45s/epoch - 136ms/step
+    328/328 - 44s - loss: 0.0919 - accuracy: 0.9670 - val_loss: 0.1066 - val_accuracy: 0.9698 - 44s/epoch - 133ms/step
     Epoch 3/3
-    328/328 - 44s - loss: 0.0635 - accuracy: 0.9770 - val_loss: 0.0954 - val_accuracy: 0.9687 - 44s/epoch - 136ms/step
+    328/328 - 43s - loss: 0.0586 - accuracy: 0.9781 - val_loss: 0.1025 - val_accuracy: 0.9690 - 43s/epoch - 133ms/step
 
-    <keras.callbacks.History object at 0x7faf4b537f90>
+    <keras.callbacks.History object at 0x7f2f09e55910>
 
 
 
@@ -857,7 +857,7 @@ Arduino tutorial for how to do that `on GitHub <https://github.com/guberti/tvm-a
 
 .. rst-class:: sphx-glr-timing
 
-   **Total running time of the script:** ( 5 minutes  44.695 seconds)
+   **Total running time of the script:** ( 4 minutes  17.521 seconds)
 
 
 .. _sphx_glr_download_how_to_work_with_microtvm_micro_train.py:
diff --git a/docs/_sources/how_to/work_with_microtvm/sg_execution_times.rst.txt b/docs/_sources/how_to/work_with_microtvm/sg_execution_times.rst.txt
index 4359d17609..30258fd961 100644
--- a/docs/_sources/how_to/work_with_microtvm/sg_execution_times.rst.txt
+++ b/docs/_sources/how_to/work_with_microtvm/sg_execution_times.rst.txt
@@ -5,20 +5,20 @@
 
 Computation times
 =================
-**07:59.299** total execution time for **how_to_work_with_microtvm** files:
+**06:25.983** total execution time for **how_to_work_with_microtvm** files:
 
 +---------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_work_with_microtvm_micro_train.py` (``micro_train.py``)               | 05:44.695 | 0.0 MB |
+| :ref:`sphx_glr_how_to_work_with_microtvm_micro_train.py` (``micro_train.py``)               | 04:17.521 | 0.0 MB |
 +---------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_work_with_microtvm_micro_pytorch.py` (``micro_pytorch.py``)           | 01:07.728 | 0.0 MB |
+| :ref:`sphx_glr_how_to_work_with_microtvm_micro_pytorch.py` (``micro_pytorch.py``)           | 01:04.488 | 0.0 MB |
 +---------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_work_with_microtvm_micro_autotune.py` (``micro_autotune.py``)         | 00:54.499 | 0.0 MB |
+| :ref:`sphx_glr_how_to_work_with_microtvm_micro_autotune.py` (``micro_autotune.py``)         | 00:52.163 | 0.0 MB |
 +---------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_work_with_microtvm_micro_aot.py` (``micro_aot.py``)                   | 00:08.319 | 0.0 MB |
+| :ref:`sphx_glr_how_to_work_with_microtvm_micro_aot.py` (``micro_aot.py``)                   | 00:07.974 | 0.0 MB |
 +---------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_work_with_microtvm_micro_tflite.py` (``micro_tflite.py``)             | 00:04.056 | 0.0 MB |
+| :ref:`sphx_glr_how_to_work_with_microtvm_micro_tflite.py` (``micro_tflite.py``)             | 00:03.834 | 0.0 MB |
 +---------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_work_with_microtvm_micro_reference_vm.py` (``micro_reference_vm.py``) | 00:00.002 | 0.0 MB |
+| :ref:`sphx_glr_how_to_work_with_microtvm_micro_reference_vm.py` (``micro_reference_vm.py``) | 00:00.001 | 0.0 MB |
 +---------------------------------------------------------------------------------------------+-----------+--------+
 | :ref:`sphx_glr_how_to_work_with_microtvm_micro_ethosu.py` (``micro_ethosu.py``)             | 00:00.001 | 0.0 MB |
 +---------------------------------------------------------------------------------------------+-----------+--------+
diff --git a/docs/_sources/how_to/work_with_relay/sg_execution_times.rst.txt b/docs/_sources/how_to/work_with_relay/sg_execution_times.rst.txt
index ab2bf1ac5b..cf645a9a15 100644
--- a/docs/_sources/how_to/work_with_relay/sg_execution_times.rst.txt
+++ b/docs/_sources/how_to/work_with_relay/sg_execution_times.rst.txt
@@ -5,14 +5,14 @@
 
 Computation times
 =================
-**00:46.494** total execution time for **how_to_work_with_relay** files:
+**00:44.747** total execution time for **how_to_work_with_relay** files:
 
 +----------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_work_with_relay_using_pipeline_executor.py` (``using_pipeline_executor.py``) | 00:34.269 | 0.0 MB |
+| :ref:`sphx_glr_how_to_work_with_relay_using_pipeline_executor.py` (``using_pipeline_executor.py``) | 00:32.865 | 0.0 MB |
 +----------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_work_with_relay_using_external_lib.py` (``using_external_lib.py``)           | 00:10.355 | 0.0 MB |
+| :ref:`sphx_glr_how_to_work_with_relay_using_external_lib.py` (``using_external_lib.py``)           | 00:10.206 | 0.0 MB |
 +----------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_work_with_relay_build_gcn.py` (``build_gcn.py``)                             | 00:01.863 | 0.0 MB |
+| :ref:`sphx_glr_how_to_work_with_relay_build_gcn.py` (``build_gcn.py``)                             | 00:01.669 | 0.0 MB |
 +----------------------------------------------------------------------------------------------------+-----------+--------+
 | :ref:`sphx_glr_how_to_work_with_relay_using_relay_viz.py` (``using_relay_viz.py``)                 | 00:00.007 | 0.0 MB |
 +----------------------------------------------------------------------------------------------------+-----------+--------+
diff --git a/docs/_sources/how_to/work_with_schedules/intrin_math.rst.txt b/docs/_sources/how_to/work_with_schedules/intrin_math.rst.txt
index ed5228d6f6..ca7e35f5ca 100644
--- a/docs/_sources/how_to/work_with_schedules/intrin_math.rst.txt
+++ b/docs/_sources/how_to/work_with_schedules/intrin_math.rst.txt
@@ -265,7 +265,7 @@ The following example customizes CUDA lowering rule for :code:`exp`.
  .. code-block:: none
 
 
-    <function my_cuda_math_rule at 0x7faeea543320>
+    <function my_cuda_math_rule at 0x7f2f0a77d8c0>
 
 
 
diff --git a/docs/_sources/how_to/work_with_schedules/sg_execution_times.rst.txt b/docs/_sources/how_to/work_with_schedules/sg_execution_times.rst.txt
index 0149979cd1..c2f19dbbc4 100644
--- a/docs/_sources/how_to/work_with_schedules/sg_execution_times.rst.txt
+++ b/docs/_sources/how_to/work_with_schedules/sg_execution_times.rst.txt
@@ -5,22 +5,22 @@
 
 Computation times
 =================
-**00:07.390** total execution time for **how_to_work_with_schedules** files:
+**00:07.699** total execution time for **how_to_work_with_schedules** files:
 
 +------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_work_with_schedules_intrin_math.py` (``intrin_math.py``)                 | 00:04.827 | 0.0 MB |
+| :ref:`sphx_glr_how_to_work_with_schedules_intrin_math.py` (``intrin_math.py``)                 | 00:05.211 | 0.0 MB |
 +------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_work_with_schedules_tensorize.py` (``tensorize.py``)                     | 00:01.153 | 0.0 MB |
+| :ref:`sphx_glr_how_to_work_with_schedules_tensorize.py` (``tensorize.py``)                     | 00:01.120 | 0.0 MB |
 +------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_work_with_schedules_reduction.py` (``reduction.py``)                     | 00:00.602 | 0.0 MB |
+| :ref:`sphx_glr_how_to_work_with_schedules_reduction.py` (``reduction.py``)                     | 00:00.581 | 0.0 MB |
 +------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_work_with_schedules_scan.py` (``scan.py``)                               | 00:00.581 | 0.0 MB |
+| :ref:`sphx_glr_how_to_work_with_schedules_scan.py` (``scan.py``)                               | 00:00.569 | 0.0 MB |
 +------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_work_with_schedules_extern_op.py` (``extern_op.py``)                     | 00:00.118 | 0.0 MB |
+| :ref:`sphx_glr_how_to_work_with_schedules_extern_op.py` (``extern_op.py``)                     | 00:00.116 | 0.0 MB |
 +------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_work_with_schedules_schedule_primitives.py` (``schedule_primitives.py``) | 00:00.055 | 0.0 MB |
+| :ref:`sphx_glr_how_to_work_with_schedules_schedule_primitives.py` (``schedule_primitives.py``) | 00:00.050 | 0.0 MB |
 +------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_work_with_schedules_tedd.py` (``tedd.py``)                               | 00:00.030 | 0.0 MB |
+| :ref:`sphx_glr_how_to_work_with_schedules_tedd.py` (``tedd.py``)                               | 00:00.029 | 0.0 MB |
 +------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_work_with_schedules_tuple_inputs.py` (``tuple_inputs.py``)               | 00:00.025 | 0.0 MB |
+| :ref:`sphx_glr_how_to_work_with_schedules_tuple_inputs.py` (``tuple_inputs.py``)               | 00:00.024 | 0.0 MB |
 +------------------------------------------------------------------------------------------------+-----------+--------+
diff --git a/docs/_sources/how_to/work_with_schedules/tensorize.rst.txt b/docs/_sources/how_to/work_with_schedules/tensorize.rst.txt
index daa97a48c6..bcc8c2ee8c 100644
--- a/docs/_sources/how_to/work_with_schedules/tensorize.rst.txt
+++ b/docs/_sources/how_to/work_with_schedules/tensorize.rst.txt
@@ -347,7 +347,7 @@ The importing needs to happen before the tensorized GEMV being executed.
                  B: Buffer(B_2: Pointer(float32), float32, [512, 64], []),
                  C: Buffer(C_2: Pointer(float32), float32, [1024, 512], [])}
       buffer_map = {A_1: A, B_1: B, C_1: C} {
-      attr [IterVar(i: int32, (nullptr), "DataPar", "")] "pragma_import_llvm" = "; ModuleID = '/tmp/tmpw4tmhn7f/input0.cc'\nsource_filename = \"/tmp/tmpw4tmhn7f/input0.cc\"\ntarget datalayout = \"e-m:e-i64:64-f80:128-n8:16:32:64-S128\"\ntarget triple = \"x86_64-pc-linux-gnu\"\n\n; Function Attrs: noinline nounwind optnone uwtable\ndefine dso_local i32 @gemv_update(float*, float*, float*, i32, i32, i32) #0 {\n  %7 = alloca float*, align 8\n  %8 = alloca float*, align 8\n  %9 = alloca floa [...]
+      attr [IterVar(i: int32, (nullptr), "DataPar", "")] "pragma_import_llvm" = "; ModuleID = '/tmp/tmp1re09rxh/input0.cc'\nsource_filename = \"/tmp/tmp1re09rxh/input0.cc\"\ntarget datalayout = \"e-m:e-i64:64-f80:128-n8:16:32:64-S128\"\ntarget triple = \"x86_64-pc-linux-gnu\"\n\n; Function Attrs: noinline nounwind optnone uwtable\ndefine dso_local i32 @gemv_update(float*, float*, float*, i32, i32, i32) #0 {\n  %7 = alloca float*, align 8\n  %8 = alloca float*, align 8\n  %9 = alloca floa [...]
       for (i, 0, 1024) {
         for (j.outer: int32, 0, 32) {
           @tir.call_extern("gemv_update", @tir.tvm_access_ptr(@tir.type_annotation(, dtype=float32), C_2, ((i*512) + (j.outer*16)), 16, 2, dtype=handle), @tir.tvm_access_ptr(@tir.type_annotation(, dtype=float32), A_2, (i*64), 64, 1, dtype=handle), @tir.tvm_access_ptr(@tir.type_annotation(, dtype=float32), B_2, (j.outer*1024), 1024, 1, dtype=handle), 16, 64, 64, dtype=int32)
diff --git a/docs/_sources/topic/vta/tutorials/autotvm/sg_execution_times.rst.txt b/docs/_sources/topic/vta/tutorials/autotvm/sg_execution_times.rst.txt
index e72889e723..566908b267 100644
--- a/docs/_sources/topic/vta/tutorials/autotvm/sg_execution_times.rst.txt
+++ b/docs/_sources/topic/vta/tutorials/autotvm/sg_execution_times.rst.txt
@@ -5,10 +5,10 @@
 
 Computation times
 =================
-**00:28.097** total execution time for **topic_vta_tutorials_autotvm** files:
+**00:26.490** total execution time for **topic_vta_tutorials_autotvm** files:
 
 +---------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_topic_vta_tutorials_autotvm_tune_relay_vta.py` (``tune_relay_vta.py``) | 00:28.091 | 0.0 MB |
+| :ref:`sphx_glr_topic_vta_tutorials_autotvm_tune_relay_vta.py` (``tune_relay_vta.py``) | 00:26.484 | 0.0 MB |
 +---------------------------------------------------------------------------------------+-----------+--------+
 | :ref:`sphx_glr_topic_vta_tutorials_autotvm_tune_alu_vta.py` (``tune_alu_vta.py``)     | 00:00.007 | 0.0 MB |
 +---------------------------------------------------------------------------------------+-----------+--------+
diff --git a/docs/_sources/topic/vta/tutorials/frontend/deploy_classification.rst.txt b/docs/_sources/topic/vta/tutorials/frontend/deploy_classification.rst.txt
index 81c2118604..bbe53ce80a 100644
--- a/docs/_sources/topic/vta/tutorials/frontend/deploy_classification.rst.txt
+++ b/docs/_sources/topic/vta/tutorials/frontend/deploy_classification.rst.txt
@@ -293,7 +293,7 @@ The compilation steps are:
       DeprecationWarning,
     /workspace/vta/tutorials/frontend/deploy_classification.py:213: DeprecationWarning: legacy graph executor behavior of producing json / lib / params will be removed in the next release. Please see documents of tvm.contrib.graph_executor.GraphModule for the  new recommended usage.
       relay_prog, target=tvm.target.Target(target, host=env.target_host), params=params
-    resnet18_v1 inference graph built in 31.40s!
+    resnet18_v1 inference graph built in 29.20s!
 
 
 
diff --git a/docs/_sources/topic/vta/tutorials/frontend/deploy_detection.rst.txt b/docs/_sources/topic/vta/tutorials/frontend/deploy_detection.rst.txt
index 7f10eccb97..6099be0853 100644
--- a/docs/_sources/topic/vta/tutorials/frontend/deploy_detection.rst.txt
+++ b/docs/_sources/topic/vta/tutorials/frontend/deploy_detection.rst.txt
@@ -337,7 +337,7 @@ The compilation steps are:
 
     /workspace/python/tvm/relay/build_module.py:348: DeprecationWarning: Please use input parameter mod (tvm.IRModule) instead of deprecated parameter mod (tvm.relay.function.Function)
       DeprecationWarning,
-    yolov3-tiny inference graph built in 21.07s!
+    yolov3-tiny inference graph built in 19.78s!
 
 
 
diff --git a/docs/_sources/topic/vta/tutorials/frontend/sg_execution_times.rst.txt b/docs/_sources/topic/vta/tutorials/frontend/sg_execution_times.rst.txt
index a38dee0cb5..0e0ffd59f8 100644
--- a/docs/_sources/topic/vta/tutorials/frontend/sg_execution_times.rst.txt
+++ b/docs/_sources/topic/vta/tutorials/frontend/sg_execution_times.rst.txt
@@ -5,10 +5,10 @@
 
 Computation times
 =================
-**01:36.386** total execution time for **topic_vta_tutorials_frontend** files:
+**01:32.432** total execution time for **topic_vta_tutorials_frontend** files:
 
 +------------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_topic_vta_tutorials_frontend_deploy_classification.py` (``deploy_classification.py``) | 00:48.381 | 0.0 MB |
+| :ref:`sphx_glr_topic_vta_tutorials_frontend_deploy_detection.py` (``deploy_detection.py``)           | 00:46.372 | 0.0 MB |
 +------------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_topic_vta_tutorials_frontend_deploy_detection.py` (``deploy_detection.py``)           | 00:48.005 | 0.0 MB |
+| :ref:`sphx_glr_topic_vta_tutorials_frontend_deploy_classification.py` (``deploy_classification.py``) | 00:46.060 | 0.0 MB |
 +------------------------------------------------------------------------------------------------------+-----------+--------+
diff --git a/docs/_sources/topic/vta/tutorials/optimize/sg_execution_times.rst.txt b/docs/_sources/topic/vta/tutorials/optimize/sg_execution_times.rst.txt
index bef4d073b9..08e3e43e61 100644
--- a/docs/_sources/topic/vta/tutorials/optimize/sg_execution_times.rst.txt
+++ b/docs/_sources/topic/vta/tutorials/optimize/sg_execution_times.rst.txt
@@ -5,10 +5,10 @@
 
 Computation times
 =================
-**00:03.221** total execution time for **topic_vta_tutorials_optimize** files:
+**00:03.193** total execution time for **topic_vta_tutorials_optimize** files:
 
 +--------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_topic_vta_tutorials_optimize_convolution_opt.py` (``convolution_opt.py``)         | 00:02.741 | 0.0 MB |
+| :ref:`sphx_glr_topic_vta_tutorials_optimize_convolution_opt.py` (``convolution_opt.py``)         | 00:02.729 | 0.0 MB |
 +--------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_topic_vta_tutorials_optimize_matrix_multiply_opt.py` (``matrix_multiply_opt.py``) | 00:00.480 | 0.0 MB |
+| :ref:`sphx_glr_topic_vta_tutorials_optimize_matrix_multiply_opt.py` (``matrix_multiply_opt.py``) | 00:00.464 | 0.0 MB |
 +--------------------------------------------------------------------------------------------------+-----------+--------+
diff --git a/docs/_sources/topic/vta/tutorials/sg_execution_times.rst.txt b/docs/_sources/topic/vta/tutorials/sg_execution_times.rst.txt
index c18ab7c6f3..379cc980a6 100644
--- a/docs/_sources/topic/vta/tutorials/sg_execution_times.rst.txt
+++ b/docs/_sources/topic/vta/tutorials/sg_execution_times.rst.txt
@@ -5,10 +5,10 @@
 
 Computation times
 =================
-**00:00.852** total execution time for **topic_vta_tutorials** files:
+**00:00.824** total execution time for **topic_vta_tutorials** files:
 
 +---------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_topic_vta_tutorials_matrix_multiply.py` (``matrix_multiply.py``) | 00:00.450 | 0.0 MB |
+| :ref:`sphx_glr_topic_vta_tutorials_matrix_multiply.py` (``matrix_multiply.py``) | 00:00.442 | 0.0 MB |
 +---------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_topic_vta_tutorials_vta_get_started.py` (``vta_get_started.py``) | 00:00.402 | 0.0 MB |
+| :ref:`sphx_glr_topic_vta_tutorials_vta_get_started.py` (``vta_get_started.py``) | 00:00.382 | 0.0 MB |
 +---------------------------------------------------------------------------------+-----------+--------+
diff --git a/docs/_sources/tutorial/auto_scheduler_matmul_x86.rst.txt b/docs/_sources/tutorial/auto_scheduler_matmul_x86.rst.txt
index d85e47c43f..29cc07c9aa 100644
--- a/docs/_sources/tutorial/auto_scheduler_matmul_x86.rst.txt
+++ b/docs/_sources/tutorial/auto_scheduler_matmul_x86.rst.txt
@@ -329,7 +329,7 @@ We build the binary and check its correctness and performance.
 
  .. code-block:: none
 
-    Execution time of this operator: 99.445 ms
+    Execution time of this operator: 93.969 ms
 
 
 
@@ -447,7 +447,7 @@ operations.
 
 .. rst-class:: sphx-glr-timing
 
-   **Total running time of the script:** ( 1 minutes  26.089 seconds)
+   **Total running time of the script:** ( 1 minutes  7.591 seconds)
 
 
 .. _sphx_glr_download_tutorial_auto_scheduler_matmul_x86.py:
diff --git a/docs/_sources/tutorial/autotvm_matmul_x86.rst.txt b/docs/_sources/tutorial/autotvm_matmul_x86.rst.txt
index 652135c7fe..bc2c291ba9 100644
--- a/docs/_sources/tutorial/autotvm_matmul_x86.rst.txt
+++ b/docs/_sources/tutorial/autotvm_matmul_x86.rst.txt
@@ -454,16 +454,16 @@ reduce variance, we take 5 measurements and average them.
     waiting for device...
     device available
     Get devices for measurement successfully!
-    No: 1   GFLOPS: 0.45/0.45       result: MeasureResult(costs=(0.5989130603999999,), error_no=MeasureErrorNo.NO_ERROR, all_cost=9.826404094696045, timestamp=1673073127.0982652)  [('tile_y', [-1, 512]), ('tile_x', [-1, 1])],None,9
-    No: 2   GFLOPS: 1.58/1.58       result: MeasureResult(costs=(0.1695935918,), error_no=MeasureErrorNo.NO_ERROR, all_cost=2.945117235183716, timestamp=1673073130.0652544)        [('tile_y', [-1, 1]), ('tile_x', [-1, 4])],None,20
-    No: 3   GFLOPS: 2.69/2.69       result: MeasureResult(costs=(0.09983295839999999,), error_no=MeasureErrorNo.NO_ERROR, all_cost=1.814579725265503, timestamp=1673073132.742834)  [('tile_y', [-1, 16]), ('tile_x', [-1, 4])],None,24
-    No: 4   GFLOPS: 10.49/10.49     result: MeasureResult(costs=(0.025590576799999997,), error_no=MeasureErrorNo.NO_ERROR, all_cost=0.6441330909729004, timestamp=1673073134.226831)        [('tile_y', [-1, 2]), ('tile_x', [-1, 64])],None,61
-    No: 5   GFLOPS: 2.99/10.49      result: MeasureResult(costs=(0.0896580874,), error_no=MeasureErrorNo.NO_ERROR, all_cost=1.685713529586792, timestamp=1673073136.0650449)        [('tile_y', [-1, 2]), ('tile_x', [-1, 16])],None,41
-    No: 6   GFLOPS: 1.60/10.49      result: MeasureResult(costs=(0.1672929772,), error_no=MeasureErrorNo.NO_ERROR, all_cost=2.921504497528076, timestamp=1673073138.9985132)        [('tile_y', [-1, 32]), ('tile_x', [-1, 4])],None,25
-    No: 7   GFLOPS: 2.09/10.49      result: MeasureResult(costs=(0.1284203072,), error_no=MeasureErrorNo.NO_ERROR, all_cost=2.2968268394470215, timestamp=1673073142.1255877)       [('tile_y', [-1, 256]), ('tile_x', [-1, 4])],None,28
-    No: 8   GFLOPS: 11.67/11.67     result: MeasureResult(costs=(0.0229939246,), error_no=MeasureErrorNo.NO_ERROR, all_cost=0.6503844261169434, timestamp=1673073142.753008)        [('tile_y', [-1, 16]), ('tile_x', [-1, 256])],None,84
-    No: 9   GFLOPS: 1.51/11.67      result: MeasureResult(costs=(0.1773730808,), error_no=MeasureErrorNo.NO_ERROR, all_cost=3.05289626121521, timestamp=1673073145.9764822) [('tile_y', [-1, 1]), ('tile_x', [-1, 1])],None,0
-    No: 10  GFLOPS: 10.50/11.67     result: MeasureResult(costs=(0.025562763,), error_no=MeasureErrorNo.NO_ERROR, all_cost=0.684990406036377, timestamp=1673073146.6434777) [('tile_y', [-1, 512]), ('tile_x', [-1, 64])],None,69
+    No: 1   GFLOPS: 9.62/9.62       result: MeasureResult(costs=(0.027898718,), error_no=MeasureErrorNo.NO_ERROR, all_cost=0.6814179420471191, timestamp=1673206541.7217193)        [('tile_y', [-1, 1]), ('tile_x', [-1, 256])],None,80
+    No: 2   GFLOPS: 3.31/9.62       result: MeasureResult(costs=(0.081053556,), error_no=MeasureErrorNo.NO_ERROR, all_cost=1.5457041263580322, timestamp=1673206543.2708719)        [('tile_y', [-1, 64]), ('tile_x', [-1, 8])],None,36
+    No: 3   GFLOPS: 1.81/9.62       result: MeasureResult(costs=(0.1486419228,), error_no=MeasureErrorNo.NO_ERROR, all_cost=2.6199307441711426, timestamp=1673206546.717056)        [('tile_y', [-1, 512]), ('tile_x', [-1, 4])],None,29
+    No: 4   GFLOPS: 11.74/11.74     result: MeasureResult(costs=(0.0228617026,), error_no=MeasureErrorNo.NO_ERROR, all_cost=0.5938427448272705, timestamp=1673206547.3465004)       [('tile_y', [-1, 256]), ('tile_x', [-1, 512])],None,98
+    No: 5   GFLOPS: 1.86/11.74      result: MeasureResult(costs=(0.144347768,), error_no=MeasureErrorNo.NO_ERROR, all_cost=2.5399982929229736, timestamp=1673206550.092905) [('tile_y', [-1, 512]), ('tile_x', [-1, 8])],None,39
+    No: 6   GFLOPS: 2.71/11.74      result: MeasureResult(costs=(0.0989268586,), error_no=MeasureErrorNo.NO_ERROR, all_cost=1.8124949932098389, timestamp=1673206552.6885347)       [('tile_y', [-1, 2]), ('tile_x', [-1, 8])],None,31
+    No: 7   GFLOPS: 12.73/12.73     result: MeasureResult(costs=(0.021078582399999997,), error_no=MeasureErrorNo.NO_ERROR, all_cost=0.5693621635437012, timestamp=1673206554.0728147)       [('tile_y', [-1, 32]), ('tile_x', [-1, 512])],None,95
+    No: 8   GFLOPS: 12.68/12.73     result: MeasureResult(costs=(0.021176568200000002,), error_no=MeasureErrorNo.NO_ERROR, all_cost=0.652878999710083, timestamp=1673206554.6630762)        [('tile_y', [-1, 1]), ('tile_x', [-1, 64])],None,60
+    No: 9   GFLOPS: 12.24/12.73     result: MeasureResult(costs=(0.0219353664,), error_no=MeasureErrorNo.NO_ERROR, all_cost=0.5713191032409668, timestamp=1673206555.346897)        [('tile_y', [-1, 8]), ('tile_x', [-1, 256])],None,83
+    No: 10  GFLOPS: 1.75/12.73      result: MeasureResult(costs=(0.15359519800000002,), error_no=MeasureErrorNo.NO_ERROR, all_cost=2.6658265590667725, timestamp=1673206558.0598083)        [('tile_y', [-1, 16]), ('tile_x', [-1, 2])],None,14
 
 
 
diff --git a/docs/_sources/tutorial/autotvm_relay_x86.rst.txt b/docs/_sources/tutorial/autotvm_relay_x86.rst.txt
index 2c38807840..d6d9b3700b 100644
--- a/docs/_sources/tutorial/autotvm_relay_x86.rst.txt
+++ b/docs/_sources/tutorial/autotvm_relay_x86.rst.txt
@@ -324,7 +324,7 @@ standard deviation.
 
  .. code-block:: none
 
-    {'mean': 523.7592217600013, 'median': 523.6211977000039, 'std': 1.0037636096764102}
+    {'mean': 512.9814760500005, 'median': 513.116963899995, 'std': 2.1695880671952343}
 
 
 
@@ -558,30 +558,32 @@ the tuning data to.
 
  .. code-block:: none
 
-
    [Task  1/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  1/25]  Current/Best:   11.03/  17.91 GFLOPS | Progress: (4/20) | 8.84 s
    [Task  1/25]  Current/Best:   17.35/  17.91 GFLOPS | Progress: (8/20) | 13.99 s
    [Task  1/25]  Current/Best:   10.50/  17.91 GFLOPS | Progress: (12/20) | 16.77 s
    [Task  1/25]  Current/Best:   10.94/  17.91 GFLOPS | Progress: (16/20) | 19.56 s
    [Task  1/25]  Current/Best:   12.57/  17.91 GFLOPS | Progress: (20/20) | 22.53 s Done.
-
    [Task  2/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  2/25]  Current/Best:   20.94/  20.94 GFLOPS | Progress: (4/20) | 3.27 s
    [Task  2/25]  Current/Best:   18.42/  22.66 GFLOPS | Progress: (8/20) | 5.61 s
    [Task  2/25]  Current/Best:    5.53/  22.66 GFLOPS | Progress: (12/20) | 8.05 s
    [Task  2/25]  Current/Best:    6.35/  22.66 GFLOPS | Progress: (16/20) | 9.49 s
    [Task  2/25]  Current/Best:   11.22/  22.66 GFLOPS | Progress: (20/20) | 11.17 s Done.
-
    [Task  3/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  3/25]  Current/Best:   12.56/  23.03 GFLOPS | Progress: (4/20) | 4.09 s
    [Task  3/25]  Current/Best:   10.78/  23.03 GFLOPS | Progress: (8/20) | 6.61 s
    [Task  3/25]  Current/Best:   14.77/  23.03 GFLOPS | Progress: (12/20) | 8.57 s
    [Task  3/25]  Current/Best:   15.27/  23.03 GFLOPS | Progress: (16/20) | 11.20 s
    [Task  3/25]  Current/Best:   14.86/  23.03 GFLOPS | Progress: (20/20) | 14.73 s Done.
-
    [Task  4/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  4/25]  Current/Best:    6.75/  15.17 GFLOPS | Progress: (4/20) | 4.25 s
    [Task  4/25]  Current/Best:   14.34/  15.91 GFLOPS | Progress: (8/20) | 12.84 s
    [Task  4/25]  Current/Best:   11.79/  15.91 GFLOPS | Progress: (12/20) | 15.63 s
    [Task  4/25]  Current/Best:   21.71/  21.71 GFLOPS | Progress: (16/20) | 17.33 s
    [Task  4/25]  Current/Best:   17.85/  21.71 GFLOPS | Progress: (20/20) | 21.63 s Done.
-
    [Task  5/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  5/25]  Current/Best:   16.90/  16.90 GFLOPS | Progress: (4/20) | 4.03 s
    [Task  5/25]  Current/Best:    6.00/  21.59 GFLOPS | Progress: (8/20) | 6.38 s
    [Task  5/25]  Current/Best:   21.41/  21.59 GFLOPS | Progress: (12/20) | 8.59 s
    [Task  5/25]  Current/Best:   13.20/  21.59 GFLOPS | Progress: (16/20) | 10.94 s
    [Task  5/25]  Current/Best:   12.76/  21.59 GFLOPS | Progress: (20/20) | 12.99 s Done.
-
    [Task  6/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  6/25]  Current/Best:    2.92/  14.45 GFLOPS | Progress: (4/20) | 6.40 s
    [Task  6/25]  Current/Best:    8.37/  20.02 GFLOPS | Progress: (8/20) | 9.13 s
    [Task  6/25]  Current/Best:   13.37/  20.02 GFLOPS | Progress: (12/20) | 15.65 s
    [Task  6/25]  Current/Best:    4.81/  20.02 GFLOPS | Progress: (16/20) | 18.34 s
    [Task  6/25]  Current/Best:    9.57/  20.02 GFLOPS | Progress: (20/20) | 21.07 s Done.
-
    [Task  7/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  7/25]  Current/Best:    6.06/  16.19 GFLOPS | Progress: (4/20) | 4.51 s
    [Task  7/25]  Current/Best:   18.20/  18.20 GFLOPS | Progress: (8/20) | 7.17 s
    [Task  7/25]  Current/Best:   11.57/  18.20 GFLOPS | Progress: (12/20) | 9.70 s
    [Task  7/25]  Current/Best:   18.28/  18.28 GFLOPS | Progress: (16/20) | 12.26 s
    [Task  7/25]  Current/Best:   13.25/  19.94 GFLOPS | Progress: (20/20) | 14.98 s Done.
-
    [Task  8/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  8/25]  Current/Best:   13.49/  13.49 GFLOPS | Progress: (4/20) | 6.15 s
    [Task  8/25]  Current/Best:    7.94/  20.05 GFLOPS | Progress: (8/20) | 8.97 s
    [Task  8/25]  Current/Best:    3.20/  21.27 GFLOPS | Progress: (12/20) | 12.47 s
    [Task  8/25]  Current/Best:   11.92/  21.27 GFLOPS | Progress: (16/20) | 14.94 s
    [Task  8/25]  Current/Best:   11.77/  21.27 GFLOPS | Progress: (20/20) | 19.08 s Done.
-
    [Task  9/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  9/25]  Current/Best:   12.80/  16.65 GFLOPS | Progress: (4/20) | 9.49 s
    [Task  9/25]  Current/Best:   10.86/  19.42 GFLOPS | Progress: (8/20) | 13.99 s
    [Task  9/25]  Current/Best:   18.54/  19.42 GFLOPS | Progress: (12/20) | 23.09 s
    [Task  9/25]  Current/Best:    6.59/  19.42 GFLOPS | Progress: (16/20) | 25.94 s
    [Task  9/25]  Current/Best:   12.21/  19.42 GFLOPS | Progress: (20/20) | 31.53 s Done.
-
    [Task 10/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 10/25]  Current/Best:    8.13/   8.13 GFLOPS | Progress: (4/20) | 4.09 s
    [Task 10/25]  Current/Best:   16.30/  16.30 GFLOPS | Progress: (8/20) | 5.90 s
    [Task 10/25]  Current/Best:    6.12/  20.04 GFLOPS | Progress: (12/20) | 7.63 s
    [Task 10/25]  Current/Best:    6.83/  20.04 GFLOPS | Progress: (16/20) | 9.66 s
    [Task 10/25]  Current/Best:   13.35/  20.04 GFLOPS | Progress: (20/20) | 11.37 s Done.
-
    [Task 11/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 11/25]  Current/Best:   11.27/  18.00 GFLOPS | Progress: (4/20) | 5.49 s
    [Task 11/25]  Current/Best:   19.44/  19.44 GFLOPS | Progress: (8/20) | 7.29 s
    [Task 11/25]  Current/Best:   19.66/  19.66 GFLOPS | Progress: (12/20) | 10.03 s
    [Task 11/25]  Current/Best:   15.59/  19.66 GFLOPS | Progress: (16/20) | 13.13 s
    [Task 11/25]  Current/Best:   16.46/  19.66 GFLOPS | Progress: (20/20) | 15.27 s Done.
-
    [Task 12/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 12/25]  Current/Best:   14.84/  14.84 GFLOPS | Progress: (4/20) | 4.42 s
    [Task 12/25]  Current/Best:   13.81/  15.50 GFLOPS | Progress: (8/20) | 6.72 s
    [Task 12/25]  Current/Best:   11.21/  15.50 GFLOPS | Progress: (12/20) | 9.87 s
    [Task 12/25]  Current/Best:   10.85/  18.48 GFLOPS | Progress: (16/20) | 12.18 s
    [Task 12/25]  Current/Best:   15.44/  18.48 GFLOPS | Progress: (20/20) | 16.21 s Done.
-
    [Task 13/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 13/25]  Current/Best:   17.30/  17.30 GFLOPS | Progress: (4/20) | 4.38 s
    [Task 13/25]  Current/Best:    8.98/  20.60 GFLOPS | Progress: (8/20) | 7.22 s
    [Task 13/25]  Current/Best:    5.90/  20.60 GFLOPS | Progress: (12/20) | 10.30 s
    [Task 13/25]  Current/Best:   18.12/  20.60 GFLOPS | Progress: (16/20) | 13.55 s
    [Task 13/25]  Current/Best:    6.01/  20.60 GFLOPS | Progress: (20/20) | 16.77 s Done.
-
    [Task 14/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 14/25]  Current/Best:   19.30/  19.30 GFLOPS | Progress: (4/20) | 4.11 s
    [Task 14/25]  Current/Best:   16.14/  19.30 GFLOPS | Progress: (8/20) | 7.23 s
    [Task 14/25]  Current/Best:   14.05/  19.30 GFLOPS | Progress: (12/20) | 10.53 s
    [Task 14/25]  Current/Best:    7.02/  19.30 GFLOPS | Progress: (16/20) | 13.15 s
    [Task 14/25]  Current/Best:    9.91/  19.30 GFLOPS | Progress: (20/20) | 19.31 s
    [Task 15/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 15/25]  Current/Best:   14.31/  14.53 GFLOPS | Progress: (4/20) | 4.46 s
    [Task 15/25]  Current/Best:   16.20/  16.20 GFLOPS | Progress: (8/20) | 6.25 s
    [Task 15/25]  Current/Best:   15.08/  18.02 GFLOPS | Progress: (12/20) | 7.58 s
    [Task 15/25]  Current/Best:   16.30/  18.10 GFLOPS | Progress: (16/20) | 9.97 s Done.
-
    [Task 15/25]  Current/Best:   16.42/  18.10 GFLOPS | Progress: (20/20) | 11.85 s Done.
-
    [Task 16/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 16/25]  Current/Best:   17.57/  17.57 GFLOPS | Progress: (4/20) | 3.80 s
    [Task 16/25]  Current/Best:   13.49/  21.53 GFLOPS | Progress: (8/20) | 6.02 s
    [Task 16/25]  Current/Best:    8.42/  21.53 GFLOPS | Progress: (12/20) | 8.20 s
    [Task 16/25]  Current/Best:   14.65/  21.53 GFLOPS | Progress: (16/20) | 9.99 s
    [Task 16/25]  Current/Best:   15.91/  21.53 GFLOPS | Progress: (20/20) | 11.65 s Done.
-
    [Task 17/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 17/25]  Current/Best:   16.77/  16.77 GFLOPS | Progress: (4/20) | 4.60 s
    [Task 17/25]  Current/Best:   19.22/  22.20 GFLOPS | Progress: (8/20) | 6.72 s
    [Task 17/25]  Current/Best:   11.69/  22.20 GFLOPS | Progress: (12/20) | 9.11 s
    [Task 17/25]  Current/Best:   22.32/  22.32 GFLOPS | Progress: (16/20) | 11.78 s
    [Task 17/25]  Current/Best:   21.92/  22.32 GFLOPS | Progress: (20/20) | 14.97 s Done.
-
    [Task 18/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 18/25]  Current/Best:   18.47/  18.47 GFLOPS | Progress: (4/20) | 3.89 s
    [Task 18/25]  Current/Best:    6.23/  18.47 GFLOPS | Progress: (8/20) | 6.19 s
    [Task 18/25]  Current/Best:   13.03/  18.47 GFLOPS | Progress: (12/20) | 10.28 s
    [Task 18/25]  Current/Best:   10.57/  18.47 GFLOPS | Progress: (16/20) | 12.39 s
    [Task 18/25]  Current/Best:   15.90/  19.38 GFLOPS | Progress: (20/20) | 17.35 s Done.
-
    [Task 19/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 19/25]  Current/Best:    6.06/  17.96 GFLOPS | Progress: (4/20) | 7.39 s
    [Task 19/25]  Current/Best:    8.63/  17.96 GFLOPS | Progress: (8/20) | 10.15 s
    [Task 19/25]  Current/Best:   19.20/  19.82 GFLOPS | Progress: (12/20) | 12.34 s
    [Task 19/25]  Current/Best:    7.41/  19.82 GFLOPS | Progress: (16/20) | 15.96 s
    [Task 19/25]  Current/Best:   12.08/  19.82 GFLOPS | Progress: (20/20) | 18.31 s Done.
-
    [Task 20/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 20/25]  Current/Best:   10.62/  13.34 GFLOPS | Progress: (4/20) | 4.25 s
    [Task 20/25]  Current/Best:   10.33/  13.34 GFLOPS | Progress: (8/20) | 6.21 s
    [Task 20/25]  Current/Best:   14.58/  14.58 GFLOPS | Progress: (12/20) | 10.40 s
    [Task 20/25]  Current/Best:    4.94/  19.72 GFLOPS | Progress: (16/20) | 13.06 s
    [Task 20/25]  Current/Best:   10.95/  19.72 GFLOPS | Progress: (20/20) | 15.72 s
    [Task 21/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 21/25]  Current/Best:    6.88/   7.20 GFLOPS | Progress: (4/20) | 7.09 s Done.
-
    [Task 21/25]  Current/Best:    8.09/   8.09 GFLOPS | Progress: (8/20) | 10.00 s
    [Task 21/25]  Current/Best:   15.96/  15.96 GFLOPS | Progress: (12/20) | 12.64 s
    [Task 21/25]  Current/Best:   11.63/  17.50 GFLOPS | Progress: (16/20) | 14.66 s
    [Task 21/25]  Current/Best:   13.98/  18.22 GFLOPS | Progress: (20/20) | 16.42 s
    [Task 22/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 22/25]  Current/Best:   10.68/  10.68 GFLOPS | Progress: (4/20) | 6.96 s
    [Task 22/25]  Current/Best:    5.27/  15.92 GFLOPS | Progress: (8/20) | 8.79 s
    [Task 22/25]  Current/Best:   11.71/  16.67 GFLOPS | Progress: (12/20) | 10.68 s
    [Task 22/25]  Current/Best:    1.55/  20.85 GFLOPS | Progress: (16/20) | 13.02 s
    [Task 22/25]  Current/Best:   16.03/  20.85 GFLOPS | Progress: (20/20) | 14.64 s Done.
-
    [Task 23/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 23/25]  Current/Best:   18.50/  18.68 GFLOPS | Progress: (4/20) | 3.98 s
    [Task 23/25]  Current/Best:    6.12/  19.47 GFLOPS | Progress: (8/20) | 7.28 s
    [Task 23/25]  Current/Best:   10.98/  19.67 GFLOPS | Progress: (12/20) | 11.07 s
    [Task 23/25]  Current/Best:   11.86/  19.67 GFLOPS | Progress: (16/20) | 13.85 s
    [Task 23/25]  Current/Best:   18.14/  19.67 GFLOPS | Progress: (20/20) | 17.56 s Done.
-
    [Task 24/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 24/25]  Current/Best:    8.81/   8.81 GFLOPS | Progress: (4/20) | 12.57 s
    [Task 24/25]  Current/Best:    7.37/   8.81 GFLOPS | Progress: (8/20) | 15.73 s
    [Task 24/25]  Current/Best:    2.86/   8.81 GFLOPS | Progress: (12/20) | 19.86 s
    [Task 24/25]  Current/Best:    3.35/   8.81 GFLOPS | Progress: (16/20) | 21.30 s
    [Task 24/25]  Current/Best:    2.86/   8.81 GFLOPS | Progress: (20/20) | 32.29 s
    [Task 25/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 25/25]  Current/Best:    7.24/   8.11 GFLOPS | Progress: (4/20) | 4.30 s Done.
-
    [Task 25/25]  Current/Best:    2.93/   8.44 GFLOPS | Progress: (8/20) | 9.48 s
    [Task 25/25]  Current/Best:    2.96/   8.80 GFLOPS | Progress: (12/20) | 20.43 s
    [Task 25/25]  Current/Best:    7.78/   9.46 GFLOPS | Progress: (16/20) | 26.35 s
    [Task 25/25]  Current/Best:    5.60/   9.46 GFLOPS | Progress: (20/20) | 37.30 s
+
    [Task  1/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  1/25]  Current/Best:    9.44/  16.64 GFLOPS | Progress: (4/20) | 7.85 s
    [Task  1/25]  Current/Best:   11.19/  16.64 GFLOPS | Progress: (8/20) | 11.81 s
    [Task  1/25]  Current/Best:   14.30/  22.29 GFLOPS | Progress: (12/20) | 14.30 s
    [Task  1/25]  Current/Best:   11.04/  22.29 GFLOPS | Progress: (16/20) | 17.25 s
    [Task  1/25]  Current/Best:   23.94/  23.94 GFLOPS | Progress: (20/20) | 19.21 s Done.
+
    [Task  2/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  2/25]  Current/Best:   12.87/  12.92 GFLOPS | Progress: (4/20) | 4.11 s
    [Task  2/25]  Current/Best:   11.01/  18.58 GFLOPS | Progress: (8/20) | 5.81 s
    [Task  2/25]  Current/Best:   15.19/  23.00 GFLOPS | Progress: (12/20) | 7.51 s
    [Task  2/25]  Current/Best:   19.23/  23.00 GFLOPS | Progress: (16/20) | 9.20 s
    [Task  2/25]  Current/Best:   12.68/  23.00 GFLOPS | Progress: (20/20) | 10.61 s Done.
+
    [Task  3/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  3/25]  Current/Best:   15.86/  22.40 GFLOPS | Progress: (4/20) | 3.88 s
    [Task  3/25]  Current/Best:   23.37/  23.37 GFLOPS | Progress: (8/20) | 6.04 s
    [Task  3/25]  Current/Best:    3.17/  23.37 GFLOPS | Progress: (12/20) | 10.15 s
    [Task  3/25]  Current/Best:    3.14/  23.37 GFLOPS | Progress: (16/20) | 13.40 s
    [Task  3/25]  Current/Best:   19.33/  23.37 GFLOPS | Progress: (20/20) | 15.40 s Done.
+
    [Task  4/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  4/25]  Current/Best:   13.25/  16.98 GFLOPS | Progress: (4/20) | 4.00 s
    [Task  4/25]  Current/Best:    8.45/  20.64 GFLOPS | Progress: (8/20) | 6.02 s
    [Task  4/25]  Current/Best:   16.85/  20.64 GFLOPS | Progress: (12/20) | 8.04 s
    [Task  4/25]  Current/Best:   13.77/  20.64 GFLOPS | Progress: (16/20) | 11.11 s
    [Task  4/25]  Current/Best:   11.21/  20.64 GFLOPS | Progress: (20/20) | 14.23 s Done.
+
    [Task  5/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  5/25]  Current/Best:   10.67/  11.44 GFLOPS | Progress: (4/20) | 4.20 s
    [Task  5/25]  Current/Best:   13.26/  13.26 GFLOPS | Progress: (8/20) | 6.94 s
    [Task  5/25]  Current/Best:    6.01/  13.26 GFLOPS | Progress: (12/20) | 9.21 s
    [Task  5/25]  Current/Best:    9.55/  13.49 GFLOPS | Progress: (16/20) | 11.37 s
    [Task  5/25]  Current/Best:    3.62/  18.22 GFLOPS | Progress: (20/20) | 13.84 s Done.
+
    [Task  6/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  6/25]  Current/Best:   13.17/  14.65 GFLOPS | Progress: (4/20) | 4.35 s
    [Task  6/25]  Current/Best:    9.66/  19.63 GFLOPS | Progress: (8/20) | 6.63 s
    [Task  6/25]  Current/Best:    8.86/  19.63 GFLOPS | Progress: (12/20) | 8.97 s
    [Task  6/25]  Current/Best:   11.10/  19.63 GFLOPS | Progress: (16/20) | 12.36 s
    [Task  6/25]  Current/Best:    6.08/  19.63 GFLOPS | Progress: (20/20) | 15.42 s Done.
+
    [Task  7/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  7/25]  Current/Best:   11.75/  20.02 GFLOPS | Progress: (4/20) | 4.79 s
    [Task  7/25]  Current/Best:   13.52/  20.02 GFLOPS | Progress: (8/20) | 7.79 s
    [Task  7/25]  Current/Best:    8.81/  20.02 GFLOPS | Progress: (12/20) | 10.28 s
    [Task  7/25]  Current/Best:    6.11/  20.02 GFLOPS | Progress: (16/20) | 12.86 s
    [Task  7/25]  Current/Best:   18.44/  20.02 GFLOPS | Progress: (20/20) | 15.39 s Done.
+
    [Task  8/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  8/25]  Current/Best:    8.86/  13.74 GFLOPS | Progress: (4/20) | 4.86 s
    [Task  8/25]  Current/Best:    2.10/  18.54 GFLOPS | Progress: (8/20) | 8.07 s
    [Task  8/25]  Current/Best:   10.50/  18.54 GFLOPS | Progress: (12/20) | 20.06 s
    [Task  8/25]  Current/Best:    2.77/  18.54 GFLOPS | Progress: (16/20) | 23.05 s
    [Task  8/25]  Current/Best:   12.51/  18.54 GFLOPS | Progress: (20/20) | 34.43 s
    [Task  9/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  9/25]  Current/Best:   11.53/  21.54 GFLOPS | Progress: (4/20) | 8.03 s
    [Task  9/25]  Current/Best:    6.49/  21.54 GFLOPS | Progress: (8/20) | 9.82 s
    [Task  9/25]  Current/Best:   14.49/  21.54 GFLOPS | Progress: (12/20) | 14.09 s
    [Task  9/25]  Current/Best:   11.36/  21.54 GFLOPS | Progress: (16/20) | 16.31 s
    [Task  9/25]  Current/Best:   16.49/  21.54 GFLOPS | Progress: (20/20
 ) | 18.17 s Done.
+
    [Task 10/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 10/25]  Current/Best:    4.27/  12.90 GFLOPS | Progress: (4/20) | 3.84 s
    [Task 10/25]  Current/Best:   11.56/  16.10 GFLOPS | Progress: (8/20) | 6.40 s
    [Task 10/25]  Current/Best:   15.00/  16.10 GFLOPS | Progress: (12/20) | 9.89 s
    [Task 10/25]  Current/Best:   14.47/  16.10 GFLOPS | Progress: (16/20) | 12.17 s
    [Task 10/25]  Current/Best:   10.07/  17.27 GFLOPS | Progress: (20/20) | 14.95 s Done.
+
    [Task 11/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 11/25]  Current/Best:    7.05/  23.97 GFLOPS | Progress: (4/20) | 4.22 s
    [Task 11/25]  Current/Best:   11.56/  23.97 GFLOPS | Progress: (8/20) | 8.07 s
    [Task 11/25]  Current/Best:    3.13/  23.97 GFLOPS | Progress: (12/20) | 10.93 s
    [Task 11/25]  Current/Best:   15.95/  23.97 GFLOPS | Progress: (16/20) | 13.79 s
    [Task 11/25]  Current/Best:   17.29/  23.97 GFLOPS | Progress: (20/20) | 16.05 s Done.
+
    [Task 12/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 12/25]  Current/Best:    4.46/  15.48 GFLOPS | Progress: (4/20) | 8.06 s
    [Task 12/25]  Current/Best:   13.49/  15.48 GFLOPS | Progress: (8/20) | 11.96 s
    [Task 12/25]  Current/Best:   13.07/  17.99 GFLOPS | Progress: (12/20) | 15.00 s
    [Task 12/25]  Current/Best:   17.50/  18.36 GFLOPS | Progress: (16/20) | 17.30 s
    [Task 12/25]  Current/Best:    2.94/  18.36 GFLOPS | Progress: (20/20) | 22.28 s Done.
+
    [Task 13/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 13/25]  Current/Best:   13.96/  19.63 GFLOPS | Progress: (4/20) | 4.50 s
    [Task 13/25]  Current/Best:    6.38/  19.63 GFLOPS | Progress: (8/20) | 7.19 s
    [Task 13/25]  Current/Best:   15.51/  21.63 GFLOPS | Progress: (12/20) | 9.52 s
    [Task 13/25]  Current/Best:   19.83/  21.63 GFLOPS | Progress: (16/20) | 12.86 s
    [Task 13/25]  Current/Best:    6.21/  21.63 GFLOPS | Progress: (20/20) | 16.54 s Done.
+
    [Task 14/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 14/25]  Current/Best:    9.86/  13.25 GFLOPS | Progress: (4/20) | 5.13 s
    [Task 14/25]  Current/Best:   12.93/  16.26 GFLOPS | Progress: (8/20) | 8.76 s
    [Task 14/25]  Current/Best:   17.65/  17.65 GFLOPS | Progress: (12/20) | 10.55 s
    [Task 14/25]  Current/Best:    6.16/  17.65 GFLOPS | Progress: (16/20) | 14.31 s
    [Task 14/25]  Current/Best:   19.69/  19.69 GFLOPS | Progress: (20/20) | 17.56 s
    [Task 15/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 15/25]  Current/Best:    6.63/  15.46 GFLOPS | Progress: (4/20) | 7.98 s Done.
+     Done.
+
    [Task 15/25]  Current/Best:    6.41/  15.46 GFLOPS | Progress: (8/20) | 13.32 s
    [Task 15/25]  Current/Best:    9.40/  20.68 GFLOPS | Progress: (12/20) | 15.64 s
    [Task 15/25]  Current/Best:   13.80/  20.68 GFLOPS | Progress: (16/20) | 17.21 s
    [Task 15/25]  Current/Best:   22.44/  22.44 GFLOPS | Progress: (20/20) | 19.06 s Done.
+
    [Task 16/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 16/25]  Current/Best:    8.90/  19.15 GFLOPS | Progress: (4/20) | 3.36 s
    [Task 16/25]  Current/Best:   18.50/  19.15 GFLOPS | Progress: (8/20) | 5.81 s
    [Task 16/25]  Current/Best:   18.30/  19.15 GFLOPS | Progress: (12/20) | 7.43 s
    [Task 16/25]  Current/Best:   14.68/  19.15 GFLOPS | Progress: (16/20) | 9.77 s
    [Task 16/25]  Current/Best:   11.12/  19.15 GFLOPS | Progress: (20/20) | 11.43 s Done.
+
    [Task 17/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 17/25]  Current/Best:   17.11/  17.94 GFLOPS | Progress: (4/20) | 4.51 s
    [Task 17/25]  Current/Best:   17.67/  23.04 GFLOPS | Progress: (8/20) | 6.45 s
    [Task 17/25]  Current/Best:   13.11/  23.04 GFLOPS | Progress: (12/20) | 8.33 s
    [Task 17/25]  Current/Best:   11.98/  23.04 GFLOPS | Progress: (16/20) | 10.87 s
    [Task 17/25]  Current/Best:   14.08/  23.04 GFLOPS | Progress: (20/20) | 13.63 s Done.
+
    [Task 18/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 18/25]  Current/Best:    4.34/  17.17 GFLOPS | Progress: (4/20) | 4.25 s
    [Task 18/25]  Current/Best:   19.33/  19.86 GFLOPS | Progress: (8/20) | 7.18 s
    [Task 18/25]  Current/Best:    8.96/  19.86 GFLOPS | Progress: (12/20) | 9.95 s
    [Task 18/25]  Current/Best:    5.61/  19.86 GFLOPS | Progress: (16/20) | 16.09 s
    [Task 18/25]  Current/Best:   15.71/  19.86 GFLOPS | Progress: (20/20) | 18.67 s Done.
+
    [Task 19/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 19/25]  Current/Best:   11.34/  11.34 GFLOPS | Progress: (4/20) | 5.79 s
    [Task 19/25]  Current/Best:   17.77/  17.77 GFLOPS | Progress: (8/20) | 10.95 s
    [Task 19/25]  Current/Best:    5.28/  19.19 GFLOPS | Progress: (12/20) | 14.44 s
    [Task 19/25]  Current/Best:    8.94/  23.19 GFLOPS | Progress: (16/20) | 17.37 s
    [Task 19/25]  Current/Best:    5.33/  23.19 GFLOPS | Progress: (20/20) | 20.07 s Done.
+
    [Task 20/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 20/25]  Current/Best:    8.85/  12.88 GFLOPS | Progress: (4/20) | 5.36 s
    [Task 20/25]  Current/Best:    4.78/  15.08 GFLOPS | Progress: (8/20) | 9.11 s
    [Task 20/25]  Current/Best:    1.58/  16.87 GFLOPS | Progress: (12/20) | 12.50 s
    [Task 20/25]  Current/Best:   15.85/  16.87 GFLOPS | Progress: (16/20) | 16.26 s
    [Task 20/25]  Current/Best:    7.34/  19.61 GFLOPS | Progress: (20/20) | 19.08 s
    [Task 21/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 21/25]  Current/Best:    7.99/  16.32 GFLOPS | Progress: (4/20) | 3.87 s
    [Task 21/25]  Current/Best:   13.72/  21.63 GFLOPS | Progress: (8/20) | 6.35 s
    [Task 21/25]  Current/Best:    5.31/  21.63 GFLOPS | Progress: (12/20) | 8.59 s
    [Task 21/25]  Current/Best:   15.03/  21.63 GFLOPS | Progress: (16/20) | 11.85 s
    [Task 21/25]  Current/Best:    8.25/  21.63 GFLOPS | Progress: (20/20)
  | 14.34 s
    [Task 22/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 22/25]  Current/Best:   12.82/  12.82 GFLOPS | Progress: (4/20) | 4.36 s Done.
+     Done.
+
    [Task 22/25]  Current/Best:    5.30/  15.88 GFLOPS | Progress: (8/20) | 9.28 s
    [Task 22/25]  Current/Best:   11.36/  18.43 GFLOPS | Progress: (12/20) | 11.41 s
    [Task 22/25]  Current/Best:   14.04/  22.02 GFLOPS | Progress: (16/20) | 13.52 s
    [Task 22/25]  Current/Best:   15.99/  22.02 GFLOPS | Progress: (20/20) | 15.22 s Done.
+
    [Task 23/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 23/25]  Current/Best:   14.25/  14.25 GFLOPS | Progress: (4/20) | 5.16 s
    [Task 23/25]  Current/Best:   10.59/  22.87 GFLOPS | Progress: (8/20) | 7.67 s
    [Task 23/25]  Current/Best:   18.50/  22.87 GFLOPS | Progress: (12/20) | 9.81 s
    [Task 23/25]  Current/Best:    6.13/  22.87 GFLOPS | Progress: (16/20) | 13.76 s
    [Task 23/25]  Current/Best:   10.69/  22.87 GFLOPS | Progress: (20/20) | 17.43 s Done.
+
    [Task 24/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 24/25]  Current/Best:    3.50/   3.50 GFLOPS | Progress: (4/20) | 10.28 s
    [Task 24/25]  Current/Best:    7.50/   7.50 GFLOPS | Progress: (8/20) | 17.66 s
    [Task 24/25]  Current/Best:    6.85/   7.50 GFLOPS | Progress: (12/20) | 28.59 s
    [Task 24/25]  Current/Best:    0.68/   7.50 GFLOPS | Progress: (16/20) | 39.58 s
    [Task 24/25]  Current/Best:    9.07/   9.07 GFLOPS | Progress: (20/20) | 47.55 s
    [Task 25/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 25/25]  Current/Best:    3.49/   8.60 GFLOPS | Progress: (4/20) | 7.30 s
    [Task 25/25]  Current/Best:    2.86/   8.60 GFLOPS | Progress: (8/20) | 9.06 s
    [Task 25/25]  Current/Best:    0.00/   8.60 GFLOPS | Progress: (12/20) | 19.15 s Done.
+
    [Task 25/25]  Current/Best:    2.87/   8.60 GFLOPS | Progress: (16/20) | 22.03 s
    [Task 25/25]  Current/Best:    1.55/   8.60 GFLOPS | Progress: (20/20) | 23.42 s Done.
+
 
 
 
@@ -677,9 +679,9 @@ Verify that the optimized model runs and produces the same results:
 
  .. code-block:: none
 
-    class='n02123045 tabby, tabby cat' with probability=0.621103
-    class='n02123159 tiger cat' with probability=0.356379
-    class='n02124075 Egyptian cat' with probability=0.019712
+    class='n02123045 tabby, tabby cat' with probability=0.621104
+    class='n02123159 tiger cat' with probability=0.356377
+    class='n02124075 Egyptian cat' with probability=0.019713
     class='n02129604 tiger, Panthera tigris' with probability=0.001215
     class='n04040759 radiator' with probability=0.000262
 
@@ -735,8 +737,8 @@ improvement in comparing the optimized model to the unoptimized model.
 
  .. code-block:: none
 
-    optimized: {'mean': 427.8582285500056, 'median': 428.07140465000657, 'std': 1.4402819413372328}
-    unoptimized: {'mean': 523.7592217600013, 'median': 523.6211977000039, 'std': 1.0037636096764102}
+    optimized: {'mean': 408.15722969999797, 'median': 408.19497814999295, 'std': 2.4655740798439547}
+    unoptimized: {'mean': 512.9814760500005, 'median': 513.116963899995, 'std': 2.1695880671952343}
 
 
 
@@ -759,7 +761,7 @@ profiling/benchmarking.
 
 .. rst-class:: sphx-glr-timing
 
-   **Total running time of the script:** ( 11 minutes  14.966 seconds)
+   **Total running time of the script:** ( 11 minutes  11.378 seconds)
 
 
 .. _sphx_glr_download_tutorial_autotvm_relay_x86.py:
diff --git a/docs/_sources/tutorial/cross_compilation_and_rpc.rst.txt b/docs/_sources/tutorial/cross_compilation_and_rpc.rst.txt
index b126d785b9..003bc4c93c 100644
--- a/docs/_sources/tutorial/cross_compilation_and_rpc.rst.txt
+++ b/docs/_sources/tutorial/cross_compilation_and_rpc.rst.txt
@@ -274,7 +274,7 @@ device and returns the measured cost. Network overhead is excluded.
 
  .. code-block:: none
 
-    1.255e-07 secs/op
+    1.245e-07 secs/op
 
 
 
diff --git a/docs/_sources/tutorial/intro_topi.rst.txt b/docs/_sources/tutorial/intro_topi.rst.txt
index 53d55b44c0..75040d1713 100644
--- a/docs/_sources/tutorial/intro_topi.rst.txt
+++ b/docs/_sources/tutorial/intro_topi.rst.txt
@@ -264,7 +264,7 @@ As you can see, scheduled stages of computation have been accumulated and we can
 
  .. code-block:: none
 
-    [stage(a, placeholder(a, 0x2034cc90)), stage(b, placeholder(b, 0x19d01570)), stage(T_add, compute(T_add, body=[(a[ax0, ax1, ax2] + b[ax1, ax2])], axis=[iter_var(ax0, range(min=0, ext=100)), iter_var(ax1, range(min=0, ext=10)), iter_var(ax2, range(min=0, ext=10))], reduce_axis=[], tag=broadcast, attrs={})), stage(T_multiply, compute(T_multiply, body=[(a[ax0, ax1, ax2]*b[ax1, ax2])], axis=[iter_var(ax0, range(min=0, ext=100)), iter_var(ax1, range(min=0, ext=10)), iter_var(ax2, range(mi [...]
+    [stage(a, placeholder(a, 0x17e034a0)), stage(b, placeholder(b, 0x5306300)), stage(T_add, compute(T_add, body=[(a[ax0, ax1, ax2] + b[ax1, ax2])], axis=[iter_var(ax0, range(min=0, ext=100)), iter_var(ax1, range(min=0, ext=10)), iter_var(ax2, range(min=0, ext=10))], reduce_axis=[], tag=broadcast, attrs={})), stage(T_multiply, compute(T_multiply, body=[(a[ax0, ax1, ax2]*b[ax1, ax2])], axis=[iter_var(ax0, range(min=0, ext=100)), iter_var(ax1, range(min=0, ext=10)), iter_var(ax2, range(min [...]
 
 
 
diff --git a/docs/_sources/tutorial/sg_execution_times.rst.txt b/docs/_sources/tutorial/sg_execution_times.rst.txt
index 2bd41a636e..78d901d8da 100644
--- a/docs/_sources/tutorial/sg_execution_times.rst.txt
+++ b/docs/_sources/tutorial/sg_execution_times.rst.txt
@@ -5,32 +5,32 @@
 
 Computation times
 =================
-**14:56.659** total execution time for **tutorial** files:
+**14:19.345** total execution time for **tutorial** files:
 
 +------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_tutorial_autotvm_relay_x86.py` (``autotvm_relay_x86.py``)                 | 11:14.966 | 0.0 MB |
+| :ref:`sphx_glr_tutorial_autotvm_relay_x86.py` (``autotvm_relay_x86.py``)                 | 11:11.378 | 0.0 MB |
 +------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_tutorial_auto_scheduler_matmul_x86.py` (``auto_scheduler_matmul_x86.py``) | 01:26.089 | 0.0 MB |
+| :ref:`sphx_glr_tutorial_auto_scheduler_matmul_x86.py` (``auto_scheduler_matmul_x86.py``) | 01:07.591 | 0.0 MB |
 +------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_tutorial_tensor_expr_get_started.py` (``tensor_expr_get_started.py``)     | 01:04.020 | 0.0 MB |
+| :ref:`sphx_glr_tutorial_tensor_expr_get_started.py` (``tensor_expr_get_started.py``)     | 01:02.668 | 0.0 MB |
 +------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_tutorial_relay_quick_start.py` (``relay_quick_start.py``)                 | 00:35.241 | 0.0 MB |
+| :ref:`sphx_glr_tutorial_relay_quick_start.py` (``relay_quick_start.py``)                 | 00:34.021 | 0.0 MB |
 +------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_tutorial_autotvm_matmul_x86.py` (``autotvm_matmul_x86.py``)               | 00:33.941 | 0.0 MB |
+| :ref:`sphx_glr_tutorial_autotvm_matmul_x86.py` (``autotvm_matmul_x86.py``)               | 00:21.240 | 0.0 MB |
 +------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_tutorial_tensor_ir_blitz_course.py` (``tensor_ir_blitz_course.py``)       | 00:01.336 | 0.0 MB |
+| :ref:`sphx_glr_tutorial_tensor_ir_blitz_course.py` (``tensor_ir_blitz_course.py``)       | 00:01.439 | 0.0 MB |
 +------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_tutorial_intro_topi.py` (``intro_topi.py``)                               | 00:00.861 | 0.0 MB |
+| :ref:`sphx_glr_tutorial_intro_topi.py` (``intro_topi.py``)                               | 00:00.818 | 0.0 MB |
 +------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_tutorial_cross_compilation_and_rpc.py` (``cross_compilation_and_rpc.py``) | 00:00.194 | 0.0 MB |
+| :ref:`sphx_glr_tutorial_cross_compilation_and_rpc.py` (``cross_compilation_and_rpc.py``) | 00:00.179 | 0.0 MB |
 +------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_tutorial_introduction.py` (``introduction.py``)                           | 00:00.007 | 0.0 MB |
+| :ref:`sphx_glr_tutorial_introduction.py` (``introduction.py``)                           | 00:00.006 | 0.0 MB |
 +------------------------------------------------------------------------------------------+-----------+--------+
 | :ref:`sphx_glr_tutorial_uma.py` (``uma.py``)                                             | 00:00.002 | 0.0 MB |
 +------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_tutorial_tvmc_python.py` (``tvmc_python.py``)                             | 00:00.001 | 0.0 MB |
-+------------------------------------------------------------------------------------------+-----------+--------+
 | :ref:`sphx_glr_tutorial_tvmc_command_line_driver.py` (``tvmc_command_line_driver.py``)   | 00:00.001 | 0.0 MB |
 +------------------------------------------------------------------------------------------+-----------+--------+
+| :ref:`sphx_glr_tutorial_tvmc_python.py` (``tvmc_python.py``)                             | 00:00.001 | 0.0 MB |
++------------------------------------------------------------------------------------------+-----------+--------+
 | :ref:`sphx_glr_tutorial_install.py` (``install.py``)                                     | 00:00.001 | 0.0 MB |
 +------------------------------------------------------------------------------------------+-----------+--------+
diff --git a/docs/_sources/tutorial/tensor_expr_get_started.rst.txt b/docs/_sources/tutorial/tensor_expr_get_started.rst.txt
index d426909047..c1938a5acf 100644
--- a/docs/_sources/tutorial/tensor_expr_get_started.rst.txt
+++ b/docs/_sources/tutorial/tensor_expr_get_started.rst.txt
@@ -298,7 +298,7 @@ helper function to run a profile of the TVM generated code.
 
  .. code-block:: none
 
-    Numpy running time: 0.000007
+    Numpy running time: 0.000008
     naive: 0.000007
 
 
@@ -452,7 +452,7 @@ factor to be the number of threads on your CPU.
 
  .. code-block:: none
 
-    vector: 0.000046
+    vector: 0.000025
     @main = primfn(A_1: handle, B_1: handle, C_1: handle) -> ()
       attr = {"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True}
       buffers = {A: Buffer(A_2: Pointer(float32), float32, [n: int32], [stride: int32], type="auto"),
@@ -503,10 +503,10 @@ We can now compare the different schedules
  .. code-block:: none
 
                 Operator                  Timing             Performance
-                   numpy    7.403479999084084e-06                    1.0
-                   naive    6.6970000000000004e-06    0.9045746055677215
-                parallel    6.278200000000001e-06     0.8480066132111796
-                  vector              4.5648e-05       6.165749080925093
+                   numpy    7.756029999654856e-06                    1.0
+                   naive              6.6697e-06      0.8599373649014769
+                parallel    6.0653999999999996e-06    0.7820237931351361
+                  vector    2.4616699999999998e-05    3.1738789046838964
 
 
 
@@ -927,7 +927,7 @@ matrix multiplication.
 
  .. code-block:: none
 
-    Numpy running time: 0.019703
+    Numpy running time: 0.018343
 
 
 
@@ -985,7 +985,7 @@ optimizations.
 
  .. code-block:: none
 
-    none: 3.542855
+    none: 3.535982
 
 
 
@@ -1087,7 +1087,7 @@ schedule.
 
  .. code-block:: none
 
-    blocking: 0.340276
+    blocking: 0.299280
 
 
 
@@ -1182,7 +1182,7 @@ already cache friendly from our previous optimizations.
 
  .. code-block:: none
 
-    vectorization: 0.357398
+    vectorization: 0.335652
     @main = primfn(A_1: handle, B_1: handle, C_1: handle) -> ()
       attr = {"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True}
       buffers = {A: Buffer(A_2: Pointer(float32), float32, [1024, 1024], []),
@@ -1255,7 +1255,7 @@ more cache friendly.
 
  .. code-block:: none
 
-    loop permutation: 0.137701
+    loop permutation: 0.119530
     @main = primfn(A_1: handle, B_1: handle, C_1: handle) -> ()
       attr = {"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True}
       buffers = {A: Buffer(A_2: Pointer(float32), float32, [1024, 1024], []),
@@ -1353,7 +1353,7 @@ optimized schedule.
 
  .. code-block:: none
 
-    array packing: 0.110424
+    array packing: 0.109487
     @main = primfn(A_1: handle, B_1: handle, C_1: handle) -> ()
       attr = {"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True}
       buffers = {A: Buffer(A_2: Pointer(float32), float32, [1024, 1024], []),
@@ -1445,7 +1445,7 @@ to `C` when all the block results are ready.
 
  .. code-block:: none
 
-    block caching: 0.113347
+    block caching: 0.110940
     @main = primfn(A_1: handle, B_1: handle, C_1: handle) -> ()
       attr = {"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True}
       buffers = {A: Buffer(A_2: Pointer(float32), float32, [1024, 1024], []),
@@ -1530,7 +1530,7 @@ of thread-level parallelization.
 
  .. code-block:: none
 
-    parallelization: 0.147643
+    parallelization: 0.146024
     @main = primfn(A_1: handle, B_1: handle, C_1: handle) -> ()
       attr = {"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True}
       buffers = {A: Buffer(A_2: Pointer(float32), float32, [1024, 1024], []),
@@ -1610,13 +1610,13 @@ working, we can compare the results.
  .. code-block:: none
 
                 Operator                  Timing             Performance
-                    none      3.5428551887000004                     1.0
-                blocking            0.3402764115     0.09604581428710879
-           vectorization     0.35739753139999997     0.10087839111796772
-        loop permutation            0.1377012306      0.0388673042689412
-           array packing            0.1104235586     0.03116795711893557
-           block caching     0.11334720440000001    0.031993180178948026
-         parallelization            0.1476433858     0.04167355930067681
+                    none             3.535982266                     1.0
+                blocking            0.2992804805     0.08463856942318726
+           vectorization            0.3356515825          0.094924566146
+        loop permutation     0.11952960940000001    0.033803792103068206
+           array packing            0.1094872316     0.03096373888884204
+           block caching     0.11093986420000002     0.03137455333606586
+         parallelization            0.1460238378     0.04129654133282353
 
 
 
@@ -1658,7 +1658,7 @@ the computation for specific platforms.
 
 .. rst-class:: sphx-glr-timing
 
-   **Total running time of the script:** ( 1 minutes  4.020 seconds)
+   **Total running time of the script:** ( 1 minutes  2.668 seconds)
 
 
 .. _sphx_glr_download_tutorial_tensor_expr_get_started.py:
diff --git a/docs/commit_hash b/docs/commit_hash
index b531254725..8dbd312f1e 100644
--- a/docs/commit_hash
+++ b/docs/commit_hash
@@ -1 +1 @@
-875296c762f4654da7cd560674485dabdadcfdb6
+a99f0c15458653896c0bbe00ebf91d144c37aff2
diff --git a/docs/how_to/compile_models/from_darknet.html b/docs/how_to/compile_models/from_darknet.html
index 5d9bde1e92..845eaa3660 100644
--- a/docs/how_to/compile_models/from_darknet.html
+++ b/docs/how_to/compile_models/from_darknet.html
@@ -585,7 +585,7 @@ class:[&#39;truck 0.9266&#39;] left:471 top:83 right:689 bottom:169
 class:[&#39;bicycle 0.9984&#39;] left:111 top:113 right:577 bottom:447
 </pre></div>
 </div>
-<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 1 minutes  13.164 seconds)</p>
+<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 1 minutes  9.125 seconds)</p>
 <div class="sphx-glr-footer sphx-glr-footer-example docutils container" id="sphx-glr-download-how-to-compile-models-from-darknet-py">
 <div class="sphx-glr-download sphx-glr-download-python docutils container">
 <p><a class="reference download internal" download="" href="../../_downloads/7716f96385bd5abb6e822041e285be54/from_darknet.py"><code class="xref download docutils literal notranslate"><span class="pre">Download</span> <span class="pre">Python</span> <span class="pre">source</span> <span class="pre">code:</span> <span class="pre">from_darknet.py</span></code></a></p>
diff --git a/docs/how_to/compile_models/from_keras.html b/docs/how_to/compile_models/from_keras.html
index 8ad5364484..835b1db04f 100644
--- a/docs/how_to/compile_models/from_keras.html
+++ b/docs/how_to/compile_models/from_keras.html
@@ -506,7 +506,7 @@ Tensorflow is also required since it’s used as the default backend of keras.</
 <div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Relay top-1 id: 285, class name: Egyptian cat
 
 1/1 [==============================] - ETA: 0s
-1/1 [==============================] - 1s 1s/step
+1/1 [==============================] - 1s 952ms/step
 Keras top-1 id: 285, class name: Egyptian cat
 </pre></div>
 </div>
diff --git a/docs/how_to/compile_models/from_mxnet.html b/docs/how_to/compile_models/from_mxnet.html
index 176b1ddcab..8386b6be2a 100644
--- a/docs/how_to/compile_models/from_mxnet.html
+++ b/docs/how_to/compile_models/from_mxnet.html
@@ -439,7 +439,7 @@
 <span class="nb">print</span><span class="p">(</span><span class="s2">&quot;x&quot;</span><span class="p">,</span> <a href="https://docs.python.org/3/library/stdtypes.html#tuple" title="builtins.tuple" class="sphx-glr-backref-module-builtins sphx-glr-backref-type-py-class sphx-glr-backref-instance"><span class="n">x</span><span class="o">.</span><span class="n">shape</span></a><span class="p">)</span>
 </pre></div>
 </div>
-<img src="../../_images/sphx_glr_from_mxnet_001.png" srcset="../../_images/sphx_glr_from_mxnet_001.png" alt="from mxnet" class = "sphx-glr-single-img"/><div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Downloading /workspace/.mxnet/models/resnet18_v1-a0666292.zipbfa937ef-5d91-43d5-9078-74c75e265eca from https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/gluon/models/resnet18_v1-a0666292.zip...
+<img src="../../_images/sphx_glr_from_mxnet_001.png" srcset="../../_images/sphx_glr_from_mxnet_001.png" alt="from mxnet" class = "sphx-glr-single-img"/><div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Downloading /workspace/.mxnet/models/resnet18_v1-a0666292.zip7f435239-a9d5-4a5c-bb6f-e647f08a3a07 from https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/gluon/models/resnet18_v1-a0666292.zip...
 x (1, 3, 224, 224)
 </pre></div>
 </div>
diff --git a/docs/how_to/compile_models/from_oneflow.html b/docs/how_to/compile_models/from_oneflow.html
index 9e729a45a6..76679c11c1 100644
--- a/docs/how_to/compile_models/from_oneflow.html
+++ b/docs/how_to/compile_models/from_oneflow.html
@@ -450,12 +450,10 @@ Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdo
 
   0%|          | 0.00/41.5M [00:00&lt;?, ?B/s]
  19%|#9        | 7.99M/41.5M [00:00&lt;00:00, 52.1MB/s]
- 35%|###4      | 14.3M/41.5M [00:00&lt;00:00, 54.6MB/s]
- 47%|####7     | 19.6M/41.5M [00:00&lt;00:00, 40.5MB/s]
- 58%|#####7    | 24.0M/41.5M [00:00&lt;00:00, 34.3MB/s]
- 77%|#######7  | 32.0M/41.5M [00:00&lt;00:00, 36.8MB/s]
- 92%|#########2| 38.3M/41.5M [00:00&lt;00:00, 40.7MB/s]
-100%|##########| 41.5M/41.5M [00:01&lt;00:00, 42.2MB/s]
+ 39%|###8      | 16.0M/41.5M [00:00&lt;00:00, 57.8MB/s]
+ 58%|#####7    | 24.0M/41.5M [00:00&lt;00:00, 65.0MB/s]
+ 87%|########6 | 36.0M/41.5M [00:00&lt;00:00, 85.6MB/s]
+100%|##########| 41.5M/41.5M [00:00&lt;00:00, 71.4MB/s]
 </pre></div>
 </div>
 </div>
diff --git a/docs/how_to/compile_models/from_pytorch.html b/docs/how_to/compile_models/from_pytorch.html
index bad949d4fe..a4bc9afb6d 100644
--- a/docs/how_to/compile_models/from_pytorch.html
+++ b/docs/how_to/compile_models/from_pytorch.html
@@ -432,10 +432,10 @@ be unstable.</p>
 Downloading: &quot;https://download.pytorch.org/models/resnet18-f37072fd.pth&quot; to /workspace/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
 
   0%|          | 0.00/44.7M [00:00&lt;?, ?B/s]
- 27%|##7       | 12.2M/44.7M [00:00&lt;00:00, 128MB/s]
- 55%|#####4    | 24.5M/44.7M [00:00&lt;00:00, 98.9MB/s]
- 81%|########1 | 36.2M/44.7M [00:00&lt;00:00, 108MB/s]
-100%|##########| 44.7M/44.7M [00:00&lt;00:00, 106MB/s]
+ 33%|###3      | 14.9M/44.7M [00:00&lt;00:00, 156MB/s]
+ 67%|######6   | 29.8M/44.7M [00:00&lt;00:00, 122MB/s]
+ 94%|#########3| 41.9M/44.7M [00:00&lt;00:00, 109MB/s]
+100%|##########| 44.7M/44.7M [00:00&lt;00:00, 97.2MB/s]
 </pre></div>
 </div>
 </div>
diff --git a/docs/how_to/compile_models/from_tensorflow.html b/docs/how_to/compile_models/from_tensorflow.html
index 4a5e81f18e..eca91944a9 100644
--- a/docs/how_to/compile_models/from_tensorflow.html
+++ b/docs/how_to/compile_models/from_tensorflow.html
@@ -649,7 +649,7 @@ banana (score = 0.00022)
 desk (score = 0.00019)
 </pre></div>
 </div>
-<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 1 minutes  16.036 seconds)</p>
+<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 1 minutes  11.934 seconds)</p>
 <div class="sphx-glr-footer sphx-glr-footer-example docutils container" id="sphx-glr-download-how-to-compile-models-from-tensorflow-py">
 <div class="sphx-glr-download sphx-glr-download-python docutils container">
 <p><a class="reference download internal" download="" href="../../_downloads/7f1d3d1b878694c201c614c807cdebc8/from_tensorflow.py"><code class="xref download docutils literal notranslate"><span class="pre">Download</span> <span class="pre">Python</span> <span class="pre">source</span> <span class="pre">code:</span> <span class="pre">from_tensorflow.py</span></code></a></p>
diff --git a/docs/how_to/compile_models/sg_execution_times.html b/docs/how_to/compile_models/sg_execution_times.html
index f6980ff727..bbcc72ce74 100644
--- a/docs/how_to/compile_models/sg_execution_times.html
+++ b/docs/how_to/compile_models/sg_execution_times.html
@@ -340,7 +340,7 @@
             
   <div class="section" id="computation-times">
 <span id="sphx-glr-how-to-compile-models-sg-execution-times"></span><h1>Computation times<a class="headerlink" href="#computation-times" title="Permalink to this headline">¶</a></h1>
-<p><strong>05:59.311</strong> total execution time for <strong>how_to_compile_models</strong> files:</p>
+<p><strong>05:41.893</strong> total execution time for <strong>how_to_compile_models</strong> files:</p>
 <table class="docutils align-default">
 <colgroup>
 <col style="width: 81%" />
@@ -349,43 +349,43 @@
 </colgroup>
 <tbody>
 <tr class="row-odd"><td><p><a class="reference internal" href="from_tensorflow.html#sphx-glr-how-to-compile-models-from-tensorflow-py"><span class="std std-ref">Compile Tensorflow Models</span></a> (<code class="docutils literal notranslate"><span class="pre">from_tensorflow.py</span></code>)</p></td>
-<td><p>01:16.036</p></td>
+<td><p>01:11.934</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="from_darknet.html#sphx-glr-how-to-compile-models-from-darknet-py"><span class="std std-ref">Compile YOLO-V2 and YOLO-V3 in DarkNet Models</span></a> (<code class="docutils literal notranslate"><span class="pre">from_darknet.py</span></code>)</p></td>
-<td><p>01:13.164</p></td>
+<td><p>01:09.125</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-odd"><td><p><a class="reference internal" href="from_paddle.html#sphx-glr-how-to-compile-models-from-paddle-py"><span class="std std-ref">Compile PaddlePaddle Models</span></a> (<code class="docutils literal notranslate"><span class="pre">from_paddle.py</span></code>)</p></td>
-<td><p>00:49.170</p></td>
+<td><p>00:46.808</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="from_oneflow.html#sphx-glr-how-to-compile-models-from-oneflow-py"><span class="std std-ref">Compile OneFlow Models</span></a> (<code class="docutils literal notranslate"><span class="pre">from_oneflow.py</span></code>)</p></td>
-<td><p>00:33.872</p></td>
+<td><p>00:32.173</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-odd"><td><p><a class="reference internal" href="from_mxnet.html#sphx-glr-how-to-compile-models-from-mxnet-py"><span class="std std-ref">Compile MXNet Models</span></a> (<code class="docutils literal notranslate"><span class="pre">from_mxnet.py</span></code>)</p></td>
-<td><p>00:29.643</p></td>
+<td><p>00:28.871</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="from_coreml.html#sphx-glr-how-to-compile-models-from-coreml-py"><span class="std std-ref">Compile CoreML Models</span></a> (<code class="docutils literal notranslate"><span class="pre">from_coreml.py</span></code>)</p></td>
-<td><p>00:28.161</p></td>
+<td><p>00:26.456</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-odd"><td><p><a class="reference internal" href="from_tflite.html#sphx-glr-how-to-compile-models-from-tflite-py"><span class="std std-ref">Compile TFLite Models</span></a> (<code class="docutils literal notranslate"><span class="pre">from_tflite.py</span></code>)</p></td>
-<td><p>00:25.497</p></td>
+<td><p>00:24.967</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="from_pytorch.html#sphx-glr-how-to-compile-models-from-pytorch-py"><span class="std std-ref">Compile PyTorch Models</span></a> (<code class="docutils literal notranslate"><span class="pre">from_pytorch.py</span></code>)</p></td>
-<td><p>00:23.582</p></td>
+<td><p>00:22.392</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-odd"><td><p><a class="reference internal" href="from_keras.html#sphx-glr-how-to-compile-models-from-keras-py"><span class="std std-ref">Compile Keras Models</span></a> (<code class="docutils literal notranslate"><span class="pre">from_keras.py</span></code>)</p></td>
-<td><p>00:17.677</p></td>
+<td><p>00:16.753</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="from_onnx.html#sphx-glr-how-to-compile-models-from-onnx-py"><span class="std std-ref">Compile ONNX Models</span></a> (<code class="docutils literal notranslate"><span class="pre">from_onnx.py</span></code>)</p></td>
-<td><p>00:02.509</p></td>
+<td><p>00:02.413</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 </tbody>
diff --git a/docs/how_to/deploy_models/deploy_model_on_adreno.html b/docs/how_to/deploy_models/deploy_model_on_adreno.html
index 47443b69f3..8a39df1689 100644
--- a/docs/how_to/deploy_models/deploy_model_on_adreno.html
+++ b/docs/how_to/deploy_models/deploy_model_on_adreno.html
@@ -920,7 +920,7 @@ Top5 predictions:
 Evaluate inference time cost...
 Execution time summary:
  mean (ms)   median (ms)    max (ms)     min (ms)     std (ms)
- 2549.2254    2548.3049    2558.3050    2546.9838      3.1227
+ 2691.7470    2691.0709    2695.0863    2689.2381      1.8076
 </pre></div>
 </div>
 <div class="sphx-glr-footer sphx-glr-footer-example docutils container" id="sphx-glr-download-how-to-deploy-models-deploy-model-on-adreno-py">
diff --git a/docs/how_to/deploy_models/deploy_model_on_android.html b/docs/how_to/deploy_models/deploy_model_on_android.html
index 72e75f3367..9fe428e5d2 100644
--- a/docs/how_to/deploy_models/deploy_model_on_android.html
+++ b/docs/how_to/deploy_models/deploy_model_on_android.html
@@ -662,7 +662,7 @@ to the remote android device.</p>
 Evaluate inference time cost...
 Execution time summary:
  mean (ms)   median (ms)    max (ms)     min (ms)     std (ms)
-  16.5949      16.3838      17.2921      16.2440       0.4092
+  16.2766      16.1532      16.9554      15.9501       0.3318
 </pre></div>
 </div>
 </div>
diff --git a/docs/how_to/deploy_models/deploy_object_detection_pytorch.html b/docs/how_to/deploy_models/deploy_object_detection_pytorch.html
index ca63630630..3cf684dbdd 100644
--- a/docs/how_to/deploy_models/deploy_object_detection_pytorch.html
+++ b/docs/how_to/deploy_models/deploy_object_detection_pytorch.html
@@ -454,21 +454,25 @@ be unstable.</p>
 Downloading: &quot;https://download.pytorch.org/models/maskrcnn_resnet50_fpn_coco-bf2d0c1e.pth&quot; to /workspace/.cache/torch/hub/checkpoints/maskrcnn_resnet50_fpn_coco-bf2d0c1e.pth
 
   0%|          | 0.00/170M [00:00&lt;?, ?B/s]
-  7%|6         | 11.4M/170M [00:00&lt;00:01, 119MB/s]
- 14%|#4        | 24.0M/170M [00:00&lt;00:01, 109MB/s]
- 20%|##        | 34.5M/170M [00:00&lt;00:01, 95.7MB/s]
- 28%|##8       | 48.1M/170M [00:00&lt;00:01, 112MB/s]
- 35%|###4      | 59.1M/170M [00:00&lt;00:01, 105MB/s]
- 41%|####      | 69.3M/170M [00:00&lt;00:01, 99.9MB/s]
- 48%|####8     | 82.1M/170M [00:00&lt;00:00, 108MB/s]
- 56%|#####5    | 94.3M/170M [00:00&lt;00:00, 114MB/s]
- 62%|######1   | 105M/170M [00:01&lt;00:00, 96.9MB/s]
- 68%|######7   | 115M/170M [00:01&lt;00:00, 89.4MB/s]
- 74%|#######4  | 126M/170M [00:01&lt;00:00, 96.0MB/s]
- 80%|########  | 136M/170M [00:01&lt;00:00, 96.6MB/s]
- 86%|########5 | 145M/170M [00:01&lt;00:00, 94.2MB/s]
- 94%|#########4| 160M/170M [00:01&lt;00:00, 102MB/s]
-100%|##########| 170M/170M [00:01&lt;00:00, 104MB/s]
+  5%|4         | 7.99M/170M [00:00&lt;00:02, 70.7MB/s]
+ 11%|#         | 18.1M/170M [00:00&lt;00:01, 81.9MB/s]
+ 16%|#5        | 26.5M/170M [00:00&lt;00:01, 84.6MB/s]
+ 20%|##        | 34.6M/170M [00:00&lt;00:01, 81.2MB/s]
+ 25%|##4       | 42.4M/170M [00:00&lt;00:02, 62.8MB/s]
+ 31%|###       | 52.1M/170M [00:00&lt;00:01, 73.4MB/s]
+ 38%|###7      | 64.0M/170M [00:00&lt;00:01, 74.0MB/s]
+ 43%|####3     | 73.4M/170M [00:01&lt;00:01, 80.2MB/s]
+ 48%|####7     | 81.5M/170M [00:01&lt;00:01, 79.8MB/s]
+ 53%|#####2    | 89.7M/170M [00:01&lt;00:01, 81.5MB/s]
+ 58%|#####7    | 98.4M/170M [00:01&lt;00:00, 84.4MB/s]
+ 64%|######3   | 108M/170M [00:01&lt;00:00, 89.5MB/s]
+ 71%|#######   | 120M/170M [00:01&lt;00:00, 82.9MB/s]
+ 78%|#######7  | 132M/170M [00:01&lt;00:00, 93.3MB/s]
+ 83%|########3 | 141M/170M [00:01&lt;00:00, 83.3MB/s]
+ 89%|########9 | 152M/170M [00:01&lt;00:00, 79.3MB/s]
+ 94%|#########4| 160M/170M [00:02&lt;00:00, 80.0MB/s]
+ 99%|#########8| 168M/170M [00:02&lt;00:00, 72.9MB/s]
+100%|##########| 170M/170M [00:02&lt;00:00, 78.5MB/s]
 /venv/apache-tvm-py3.7/lib/python3.7/site-packages/torch/nn/functional.py:3897: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
   for i in range(dim)
 /venv/apache-tvm-py3.7/lib/python3.7/site-packages/torchvision/models/detection/anchor_utils.py:124: UserWarning: __floordiv__ is deprecated, and its behavior will change in a future version of pytorch. It currently rounds toward 0 (like the &#39;trunc&#39; function NOT &#39;floor&#39;). This results in incorrect rounding for negative values. To keep the current behavior, use torch.div(a, b, rounding_mode=&#39;trunc&#39;), or for actual floor division, use torch.div(a, b, rounding_mode=& [...]
@@ -566,7 +570,7 @@ torchvision rcnn models.</p>
 <div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Get 9 valid boxes
 </pre></div>
 </div>
-<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 3 minutes  32.060 seconds)</p>
+<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 3 minutes  17.253 seconds)</p>
 <div class="sphx-glr-footer sphx-glr-footer-example docutils container" id="sphx-glr-download-how-to-deploy-models-deploy-object-detection-pytorch-py">
 <div class="sphx-glr-download sphx-glr-download-python docutils container">
 <p><a class="reference download internal" download="" href="../../_downloads/7795da4b258c8feff986668b95ef57ad/deploy_object_detection_pytorch.py"><code class="xref download docutils literal notranslate"><span class="pre">Download</span> <span class="pre">Python</span> <span class="pre">source</span> <span class="pre">code:</span> <span class="pre">deploy_object_detection_pytorch.py</span></code></a></p>
diff --git a/docs/how_to/deploy_models/deploy_prequantized.html b/docs/how_to/deploy_models/deploy_prequantized.html
index eed337a7ff..d15ccb8011 100644
--- a/docs/how_to/deploy_models/deploy_prequantized.html
+++ b/docs/how_to/deploy_models/deploy_prequantized.html
@@ -498,9 +498,8 @@ training. Other models require a full post training calibration.</p>
 Downloading: &quot;https://download.pytorch.org/models/mobilenet_v2-b0353104.pth&quot; to /workspace/.cache/torch/hub/checkpoints/mobilenet_v2-b0353104.pth
 
   0%|          | 0.00/13.6M [00:00&lt;?, ?B/s]
- 59%|#####8    | 7.99M/13.6M [00:00&lt;00:00, 52.3MB/s]
- 96%|#########5| 13.0M/13.6M [00:00&lt;00:00, 46.5MB/s]
-100%|##########| 13.6M/13.6M [00:00&lt;00:00, 49.0MB/s]
+ 59%|#####8    | 7.99M/13.6M [00:00&lt;00:00, 68.6MB/s]
+100%|##########| 13.6M/13.6M [00:00&lt;00:00, 95.9MB/s]
 </pre></div>
 </div>
 </div>
@@ -591,7 +590,7 @@ output values are identical out of 1000 outputs from mobilenet v2.</p>
 </div>
 <div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Execution time summary:
  mean (ms)   median (ms)    max (ms)     min (ms)     std (ms)
-  90.4948      90.4305      91.5414      90.2734       0.2155
+  90.5975      90.4758      94.0629      90.1740       0.5058
 </pre></div>
 </div>
 <div class="admonition note">
@@ -630,7 +629,7 @@ This includes support for the VNNI 8 bit dot product instruction (CascadeLake or
 <div class="section" id="deploy-a-quantized-tflite-model">
 <h2>Deploy a quantized TFLite Model<a class="headerlink" href="#deploy-a-quantized-tflite-model" title="Permalink to this headline">¶</a></h2>
 <p>TODO</p>
-<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 1 minutes  10.336 seconds)</p>
+<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 1 minutes  6.859 seconds)</p>
 <div class="sphx-glr-footer sphx-glr-footer-example docutils container" id="sphx-glr-download-how-to-deploy-models-deploy-prequantized-py">
 <div class="sphx-glr-download sphx-glr-download-python docutils container">
 <p><a class="reference download internal" download="" href="../../_downloads/fb8217c13f4351224c6cf3aacf1a87fc/deploy_prequantized.py"><code class="xref download docutils literal notranslate"><span class="pre">Download</span> <span class="pre">Python</span> <span class="pre">source</span> <span class="pre">code:</span> <span class="pre">deploy_prequantized.py</span></code></a></p>
diff --git a/docs/how_to/deploy_models/deploy_prequantized_tflite.html b/docs/how_to/deploy_models/deploy_prequantized_tflite.html
index 559ea41084..fa4cdd1f13 100644
--- a/docs/how_to/deploy_models/deploy_prequantized_tflite.html
+++ b/docs/how_to/deploy_models/deploy_prequantized_tflite.html
@@ -583,7 +583,7 @@ TFLite Top-5 labels: [387 102 386 341 349]
 </div>
 <div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Execution time summary:
  mean (ms)   median (ms)    max (ms)     min (ms)     std (ms)
-  121.9265     121.7962     124.2560     120.8635      0.6576
+  120.1702     120.1580     121.6625     119.2851      0.4108
 </pre></div>
 </div>
 <div class="admonition note">
@@ -611,7 +611,7 @@ network for ARM CPU</span></a>.</p></li>
 </ul>
 </div></blockquote>
 </div>
-<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 2 minutes  26.227 seconds)</p>
+<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 2 minutes  28.878 seconds)</p>
 <div class="sphx-glr-footer sphx-glr-footer-example docutils container" id="sphx-glr-download-how-to-deploy-models-deploy-prequantized-tflite-py">
 <div class="sphx-glr-download sphx-glr-download-python docutils container">
 <p><a class="reference download internal" download="" href="../../_downloads/56691c7a27d45da61d112276334640d3/deploy_prequantized_tflite.py"><code class="xref download docutils literal notranslate"><span class="pre">Download</span> <span class="pre">Python</span> <span class="pre">source</span> <span class="pre">code:</span> <span class="pre">deploy_prequantized_tflite.py</span></code></a></p>
diff --git a/docs/how_to/deploy_models/deploy_quantized.html b/docs/how_to/deploy_models/deploy_quantized.html
index d9f8624baa..c2d6457360 100644
--- a/docs/how_to/deploy_models/deploy_quantized.html
+++ b/docs/how_to/deploy_models/deploy_quantized.html
@@ -521,7 +521,7 @@ for calibration. But the accuracy might be impacted.</p>
   DeprecationWarning,
 </pre></div>
 </div>
-<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 1 minutes  34.204 seconds)</p>
+<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 1 minutes  22.062 seconds)</p>
 <div class="sphx-glr-footer sphx-glr-footer-example docutils container" id="sphx-glr-download-how-to-deploy-models-deploy-quantized-py">
 <div class="sphx-glr-download sphx-glr-download-python docutils container">
 <p><a class="reference download internal" download="" href="../../_downloads/7810ecf51bfc05f7d5e8a400ac3e815d/deploy_quantized.py"><code class="xref download docutils literal notranslate"><span class="pre">Download</span> <span class="pre">Python</span> <span class="pre">source</span> <span class="pre">code:</span> <span class="pre">deploy_quantized.py</span></code></a></p>
diff --git a/docs/how_to/deploy_models/deploy_ssd_gluoncv.html b/docs/how_to/deploy_models/deploy_ssd_gluoncv.html
index 19ecabc357..4bb6e8237e 100644
--- a/docs/how_to/deploy_models/deploy_ssd_gluoncv.html
+++ b/docs/how_to/deploy_models/deploy_ssd_gluoncv.html
@@ -463,22 +463,24 @@ to your device.</p>
 Downloading /workspace/.mxnet/models/ssd_512_resnet50_v1_voc-9c8b225a.zip from https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/gluon/models/ssd_512_resnet50_v1_voc-9c8b225a.zip...
 
   0%|          | 0/132723 [00:00&lt;?, ?KB/s]
-  6%|5         | 7466/132723 [00:00&lt;00:01, 74646.19KB/s]
- 12%|#2        | 16203/132723 [00:00&lt;00:01, 82113.91KB/s]
- 19%|#8        | 24912/132723 [00:00&lt;00:01, 84382.49KB/s]
- 25%|##5       | 33686/132723 [00:00&lt;00:01, 85706.02KB/s]
- 32%|###1      | 42424/132723 [00:00&lt;00:01, 86306.24KB/s]
- 38%|###8      | 51055/132723 [00:00&lt;00:01, 80413.71KB/s]
- 45%|####5     | 59758/132723 [00:00&lt;00:00, 82473.48KB/s]
- 51%|#####1    | 68062/132723 [00:00&lt;00:00, 81859.48KB/s]
- 58%|#####7    | 76858/132723 [00:00&lt;00:00, 83708.60KB/s]
- 65%|######4   | 85625/132723 [00:01&lt;00:00, 84905.56KB/s]
- 71%|#######1  | 94323/132723 [00:01&lt;00:00, 85528.31KB/s]
- 78%|#######7  | 103128/132723 [00:01&lt;00:00, 86286.46KB/s]
- 84%|########4 | 111897/132723 [00:01&lt;00:00, 86707.58KB/s]
- 91%|######### | 120578/132723 [00:01&lt;00:00, 61654.37KB/s]
- 97%|#########7| 129242/132723 [00:01&lt;00:00, 67496.29KB/s]
-100%|##########| 132723/132723 [00:01&lt;00:00, 77975.82KB/s]
+  4%|4         | 5744/132723 [00:00&lt;00:02, 57434.17KB/s]
+ 10%|#         | 13360/132723 [00:00&lt;00:01, 68447.02KB/s]
+ 16%|#5        | 21050/132723 [00:00&lt;00:01, 72302.70KB/s]
+ 22%|##1       | 28715/132723 [00:00&lt;00:01, 74001.48KB/s]
+ 27%|##7       | 36427/132723 [00:00&lt;00:01, 75124.31KB/s]
+ 33%|###3      | 44027/132723 [00:00&lt;00:01, 75415.86KB/s]
+ 39%|###9      | 51810/132723 [00:00&lt;00:01, 76203.04KB/s]
+ 45%|####4     | 59498/132723 [00:00&lt;00:00, 76407.88KB/s]
+ 51%|#####     | 67235/132723 [00:00&lt;00:00, 76706.62KB/s]
+ 56%|#####6    | 74906/132723 [00:01&lt;00:00, 76451.74KB/s]
+ 62%|######2   | 82595/132723 [00:01&lt;00:00, 76582.84KB/s]
+ 68%|######8   | 90293/132723 [00:01&lt;00:00, 76701.75KB/s]
+ 74%|#######3  | 97997/132723 [00:01&lt;00:00, 76800.41KB/s]
+ 80%|#######9  | 105731/132723 [00:01&lt;00:00, 76960.83KB/s]
+ 85%|########5 | 113431/132723 [00:01&lt;00:00, 76971.44KB/s]
+ 91%|#########1| 121129/132723 [00:01&lt;00:00, 76872.58KB/s]
+ 97%|#########7| 128974/132723 [00:01&lt;00:00, 77345.07KB/s]
+100%|##########| 132723/132723 [00:01&lt;00:00, 75944.35KB/s]
 </pre></div>
 </div>
 <p>Create TVM runtime and do inference
@@ -517,7 +519,7 @@ Downloading /workspace/.mxnet/models/ssd_512_resnet50_v1_voc-9c8b225a.zip from h
 <span class="n">plt</span><span class="o">.</span><span class="n">show</span><span class="p">()</span>
 </pre></div>
 </div>
-<img src="../../_images/sphx_glr_deploy_ssd_gluoncv_001.png" srcset="../../_images/sphx_glr_deploy_ssd_gluoncv_001.png" alt="deploy ssd gluoncv" class = "sphx-glr-single-img"/><p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 3 minutes  16.119 seconds)</p>
+<img src="../../_images/sphx_glr_deploy_ssd_gluoncv_001.png" srcset="../../_images/sphx_glr_deploy_ssd_gluoncv_001.png" alt="deploy ssd gluoncv" class = "sphx-glr-single-img"/><p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 3 minutes  7.395 seconds)</p>
 <div class="sphx-glr-footer sphx-glr-footer-example docutils container" id="sphx-glr-download-how-to-deploy-models-deploy-ssd-gluoncv-py">
 <div class="sphx-glr-download sphx-glr-download-python docutils container">
 <p><a class="reference download internal" download="" href="../../_downloads/cccb17d28e5e8b2e94ea8cd5ec59f6ed/deploy_ssd_gluoncv.py"><code class="xref download docutils literal notranslate"><span class="pre">Download</span> <span class="pre">Python</span> <span class="pre">source</span> <span class="pre">code:</span> <span class="pre">deploy_ssd_gluoncv.py</span></code></a></p>
diff --git a/docs/how_to/deploy_models/sg_execution_times.html b/docs/how_to/deploy_models/sg_execution_times.html
index 4ee06f6a00..f098497271 100644
--- a/docs/how_to/deploy_models/sg_execution_times.html
+++ b/docs/how_to/deploy_models/sg_execution_times.html
@@ -340,7 +340,7 @@
             
   <div class="section" id="computation-times">
 <span id="sphx-glr-how-to-deploy-models-sg-execution-times"></span><h1>Computation times<a class="headerlink" href="#computation-times" title="Permalink to this headline">¶</a></h1>
-<p><strong>14:21.989</strong> total execution time for <strong>how_to_deploy_models</strong> files:</p>
+<p><strong>13:42.282</strong> total execution time for <strong>how_to_deploy_models</strong> files:</p>
 <table class="docutils align-default">
 <colgroup>
 <col style="width: 86%" />
@@ -349,39 +349,39 @@
 </colgroup>
 <tbody>
 <tr class="row-odd"><td><p><a class="reference internal" href="deploy_object_detection_pytorch.html#sphx-glr-how-to-deploy-models-deploy-object-detection-pytorch-py"><span class="std std-ref">Compile PyTorch Object Detection Models</span></a> (<code class="docutils literal notranslate"><span class="pre">deploy_object_detection_pytorch.py</span></code>)</p></td>
-<td><p>03:32.060</p></td>
+<td><p>03:17.253</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="deploy_ssd_gluoncv.html#sphx-glr-how-to-deploy-models-deploy-ssd-gluoncv-py"><span class="std std-ref">Deploy Single Shot Multibox Detector(SSD) model</span></a> (<code class="docutils literal notranslate"><span class="pre">deploy_ssd_gluoncv.py</span></code>)</p></td>
-<td><p>03:16.119</p></td>
+<td><p>03:07.395</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-odd"><td><p><a class="reference internal" href="deploy_prequantized_tflite.html#sphx-glr-how-to-deploy-models-deploy-prequantized-tflite-py"><span class="std std-ref">Deploy a Framework-prequantized Model with TVM - Part 3 (TFLite)</span></a> (<code class="docutils literal notranslate"><span class="pre">deploy_prequantized_tflite.py</span></code>)</p></td>
-<td><p>02:26.227</p></td>
+<td><p>02:28.878</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="deploy_quantized.html#sphx-glr-how-to-deploy-models-deploy-quantized-py"><span class="std std-ref">Deploy a Quantized Model on Cuda</span></a> (<code class="docutils literal notranslate"><span class="pre">deploy_quantized.py</span></code>)</p></td>
-<td><p>01:34.204</p></td>
+<td><p>01:22.062</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-odd"><td><p><a class="reference internal" href="deploy_prequantized.html#sphx-glr-how-to-deploy-models-deploy-prequantized-py"><span class="std std-ref">Deploy a Framework-prequantized Model with TVM</span></a> (<code class="docutils literal notranslate"><span class="pre">deploy_prequantized.py</span></code>)</p></td>
-<td><p>01:10.336</p></td>
+<td><p>01:06.859</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="deploy_model_on_adreno.html#sphx-glr-how-to-deploy-models-deploy-model-on-adreno-py"><span class="std std-ref">Deploy the Pretrained Model on Adreno</span></a> (<code class="docutils literal notranslate"><span class="pre">deploy_model_on_adreno.py</span></code>)</p></td>
-<td><p>00:52.749</p></td>
+<td><p>00:53.523</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-odd"><td><p><a class="reference internal" href="deploy_model_on_android.html#sphx-glr-how-to-deploy-models-deploy-model-on-android-py"><span class="std std-ref">Deploy the Pretrained Model on Android</span></a> (<code class="docutils literal notranslate"><span class="pre">deploy_model_on_android.py</span></code>)</p></td>
-<td><p>00:38.152</p></td>
+<td><p>00:35.880</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="deploy_model_on_nano.html#sphx-glr-how-to-deploy-models-deploy-model-on-nano-py"><span class="std std-ref">Deploy the Pretrained Model on Jetson Nano</span></a> (<code class="docutils literal notranslate"><span class="pre">deploy_model_on_nano.py</span></code>)</p></td>
-<td><p>00:26.323</p></td>
+<td><p>00:25.446</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-odd"><td><p><a class="reference internal" href="deploy_model_on_rasp.html#sphx-glr-how-to-deploy-models-deploy-model-on-rasp-py"><span class="std std-ref">Deploy the Pretrained Model on Raspberry Pi</span></a> (<code class="docutils literal notranslate"><span class="pre">deploy_model_on_rasp.py</span></code>)</p></td>
-<td><p>00:25.812</p></td>
+<td><p>00:24.979</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="deploy_sparse.html#sphx-glr-how-to-deploy-models-deploy-sparse-py"><span class="std std-ref">Deploy a Hugging Face Pruned Model on CPU</span></a> (<code class="docutils literal notranslate"><span class="pre">deploy_sparse.py</span></code>)</p></td>
diff --git a/docs/how_to/extend_tvm/bring_your_own_datatypes.html b/docs/how_to/extend_tvm/bring_your_own_datatypes.html
index 8706afb3a7..907cd2a754 100644
--- a/docs/how_to/extend_tvm/bring_your_own_datatypes.html
+++ b/docs/how_to/extend_tvm/bring_your_own_datatypes.html
@@ -622,7 +622,7 @@ In this alpha state of the Bring Your Own Datatypes framework, we have not imple
 <span class="n">module</span><span class="p">,</span> <a href="https://docs.python.org/3/library/stdtypes.html#dict" title="builtins.dict" class="sphx-glr-backref-module-builtins sphx-glr-backref-type-py-class sphx-glr-backref-instance"><span class="n">params</span></a> <span class="o">=</span> <span class="n">get_mobilenet</span><span class="p">()</span>
 </pre></div>
 </div>
-<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Downloading /workspace/.mxnet/models/mobilenet0.25-9f83e440.zip4655517f-a9e6-4359-9032-f03849cb94c5 from https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/gluon/models/mobilenet0.25-9f83e440.zip...
+<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Downloading /workspace/.mxnet/models/mobilenet0.25-9f83e440.zip0df57d2a-3121-4336-98e0-d7239a445491 from https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/gluon/models/mobilenet0.25-9f83e440.zip...
 </pre></div>
 </div>
 <p>It’s easy to execute MobileNet with native TVM:</p>
diff --git a/docs/how_to/extend_tvm/sg_execution_times.html b/docs/how_to/extend_tvm/sg_execution_times.html
index 165dc6f515..25fdefc4d3 100644
--- a/docs/how_to/extend_tvm/sg_execution_times.html
+++ b/docs/how_to/extend_tvm/sg_execution_times.html
@@ -340,7 +340,7 @@
             
   <div class="section" id="computation-times">
 <span id="sphx-glr-how-to-extend-tvm-sg-execution-times"></span><h1>Computation times<a class="headerlink" href="#computation-times" title="Permalink to this headline">¶</a></h1>
-<p><strong>00:51.083</strong> total execution time for <strong>how_to_extend_tvm</strong> files:</p>
+<p><strong>00:47.946</strong> total execution time for <strong>how_to_extend_tvm</strong> files:</p>
 <table class="docutils align-default">
 <colgroup>
 <col style="width: 84%" />
@@ -349,19 +349,19 @@
 </colgroup>
 <tbody>
 <tr class="row-odd"><td><p><a class="reference internal" href="bring_your_own_datatypes.html#sphx-glr-how-to-extend-tvm-bring-your-own-datatypes-py"><span class="std std-ref">Bring Your Own Datatypes to TVM</span></a> (<code class="docutils literal notranslate"><span class="pre">bring_your_own_datatypes.py</span></code>)</p></td>
-<td><p>00:47.390</p></td>
+<td><p>00:44.458</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="use_pass_instrument.html#sphx-glr-how-to-extend-tvm-use-pass-instrument-py"><span class="std std-ref">How to Use TVM Pass Instrument</span></a> (<code class="docutils literal notranslate"><span class="pre">use_pass_instrument.py</span></code>)</p></td>
-<td><p>00:02.587</p></td>
+<td><p>00:02.443</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-odd"><td><p><a class="reference internal" href="use_pass_infra.html#sphx-glr-how-to-extend-tvm-use-pass-infra-py"><span class="std std-ref">How to Use TVM Pass Infra</span></a> (<code class="docutils literal notranslate"><span class="pre">use_pass_infra.py</span></code>)</p></td>
-<td><p>00:01.097</p></td>
+<td><p>00:01.038</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="low_level_custom_pass.html#sphx-glr-how-to-extend-tvm-low-level-custom-pass-py"><span class="std std-ref">Writing a Customized Pass</span></a> (<code class="docutils literal notranslate"><span class="pre">low_level_custom_pass.py</span></code>)</p></td>
-<td><p>00:00.008</p></td>
+<td><p>00:00.007</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 </tbody>
diff --git a/docs/how_to/extend_tvm/use_pass_instrument.html b/docs/how_to/extend_tvm/use_pass_instrument.html
index c3db40bfee..df3d7844b6 100644
--- a/docs/how_to/extend_tvm/use_pass_instrument.html
+++ b/docs/how_to/extend_tvm/use_pass_instrument.html
@@ -526,10 +526,10 @@ profile the execution time of each passes.</p>
 </pre></div>
 </div>
 <div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Printing results of timing profile...
-InferType: 7602us [7602us] (46.05%; 46.05%)
-FoldScaleAxis: 8905us [9us] (53.95%; 53.95%)
-        FoldConstant: 8896us [1769us] (53.89%; 99.90%)
-                InferType: 7127us [7127us] (43.18%; 80.12%)
+InferType: 7289us [7289us] (46.47%; 46.47%)
+FoldScaleAxis: 8397us [7us] (53.53%; 53.53%)
+        FoldConstant: 8389us [1725us] (53.49%; 99.91%)
+                InferType: 6664us [6664us] (42.49%; 79.44%)
 </pre></div>
 </div>
 </div>
@@ -551,10 +551,10 @@ Refer to following sections and <a class="reference internal" href="../../refere
 </pre></div>
 </div>
 <div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Printing results of timing profile...
-InferType: 7147us [7147us] (44.78%; 44.78%)
-FoldScaleAxis: 8812us [8us] (55.22%; 55.22%)
-        FoldConstant: 8805us [1797us] (55.17%; 99.91%)
-                InferType: 7007us [7007us] (43.91%; 79.59%)
+InferType: 6687us [6687us] (44.81%; 44.81%)
+FoldScaleAxis: 8235us [5us] (55.19%; 55.19%)
+        FoldConstant: 8230us [1705us] (55.15%; 99.94%)
+                InferType: 6525us [6525us] (43.73%; 79.29%)
 </pre></div>
 </div>
 <p>Register empty list to clear existing instruments.</p>
diff --git a/docs/how_to/optimize_operators/opt_conv_cuda.html b/docs/how_to/optimize_operators/opt_conv_cuda.html
index ff498072f6..624f7ba2eb 100644
--- a/docs/how_to/optimize_operators/opt_conv_cuda.html
+++ b/docs/how_to/optimize_operators/opt_conv_cuda.html
@@ -578,7 +578,7 @@ latency of convolution.</p>
 <span class="nb">print</span><span class="p">(</span><span class="s2">&quot;Convolution: </span><span class="si">%f</span><span class="s2"> ms&quot;</span> <span class="o">%</span> <span class="p">(</span><span class="n">evaluator</span><span class="p">(</span><span class="n">a</span><span class="p">,</span> <span class="n">w</span><span class="p">,</span> <span class="n">b</span><span class="p">)</span><span class="o">.</span><span class="n">mean</span> <span class="o">*</span> <span cl [...]
 </pre></div>
 </div>
-<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Convolution: 49.932254 ms
+<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Convolution: 54.204128 ms
 </pre></div>
 </div>
 <div class="sphx-glr-footer sphx-glr-footer-example docutils container" id="sphx-glr-download-how-to-optimize-operators-opt-conv-cuda-py">
diff --git a/docs/how_to/optimize_operators/opt_conv_tensorcore.html b/docs/how_to/optimize_operators/opt_conv_tensorcore.html
index 82396d9de1..debde1df3f 100644
--- a/docs/how_to/optimize_operators/opt_conv_tensorcore.html
+++ b/docs/how_to/optimize_operators/opt_conv_tensorcore.html
@@ -915,7 +915,7 @@ be able to run on our build server</p>
     <span class="nb">print</span><span class="p">(</span><span class="s2">&quot;conv2d with tensor core: </span><span class="si">%f</span><span class="s2"> ms&quot;</span> <span class="o">%</span> <span class="p">(</span><span class="n">evaluator</span><span class="p">(</span><span class="n">a</span><span class="p">,</span> <span class="n">w</span><span class="p">,</span> <span class="n">c</span><span class="p">)</span><span class="o">.</span><span class="n">mean</span> <span class="o">* [...]
 </pre></div>
 </div>
-<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>conv2d with tensor core: 13.363818 ms
+<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>conv2d with tensor core: 13.386995 ms
 </pre></div>
 </div>
 </div>
diff --git a/docs/how_to/optimize_operators/opt_gemm.html b/docs/how_to/optimize_operators/opt_gemm.html
index f9d5e6fa56..4b3887cb51 100644
--- a/docs/how_to/optimize_operators/opt_gemm.html
+++ b/docs/how_to/optimize_operators/opt_gemm.html
@@ -475,8 +475,8 @@ Then we write a baseline implementation, the simplest way to write a matrix mult
 <span class="nb">print</span><span class="p">(</span><span class="s2">&quot;Baseline: </span><span class="si">%f</span><span class="s2">&quot;</span> <span class="o">%</span> <span class="n">evaluator</span><span class="p">(</span><span class="n">a</span><span class="p">,</span> <span class="n">b</span><span class="p">,</span> <span class="n">c</span><span class="p">)</span><span class="o">.</span><span class="n">mean</span><span class="p">)</span>
 </pre></div>
 </div>
-<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Numpy running time: 0.019448
-Baseline: 3.529566
+<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Numpy running time: 0.019434
+Baseline: 3.455880
 </pre></div>
 </div>
 <p>In TVM, we can always inspect lower level IR to debug or optimize our schedule.
@@ -535,7 +535,7 @@ fill 32 * 32 * sizeof(float) which is 4KB in the cache whose total size is 32KB
 <span class="nb">print</span><span class="p">(</span><span class="s2">&quot;Opt1: </span><span class="si">%f</span><span class="s2">&quot;</span> <span class="o">%</span> <span class="n">evaluator</span><span class="p">(</span><span class="n">a</span><span class="p">,</span> <span class="n">b</span><span class="p">,</span> <span class="n">c</span><span class="p">)</span><span class="o">.</span><span class="n">mean</span><span class="p">)</span>
 </pre></div>
 </div>
-<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Opt1: 0.337350
+<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Opt1: 0.300253
 </pre></div>
 </div>
 <p>Here is the generated IR after blocking.</p>
@@ -601,7 +601,7 @@ vastly.</p>
 <span class="nb">print</span><span class="p">(</span><span class="s2">&quot;Opt2: </span><span class="si">%f</span><span class="s2">&quot;</span> <span class="o">%</span> <span class="n">evaluator</span><span class="p">(</span><span class="n">a</span><span class="p">,</span> <span class="n">b</span><span class="p">,</span> <span class="n">c</span><span class="p">)</span><span class="o">.</span><span class="n">mean</span><span class="p">)</span>
 </pre></div>
 </div>
-<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Opt2: 0.357992
+<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Opt2: 0.335396
 </pre></div>
 </div>
 <p>Here is the generated IR after vectorization.</p>
@@ -661,7 +661,7 @@ the access pattern for A matrix is more cache friendly.</p>
 <span class="nb">print</span><span class="p">(</span><span class="s2">&quot;Opt3: </span><span class="si">%f</span><span class="s2">&quot;</span> <span class="o">%</span> <span class="n">evaluator</span><span class="p">(</span><span class="n">a</span><span class="p">,</span> <span class="n">b</span><span class="p">,</span> <span class="n">c</span><span class="p">)</span><span class="o">.</span><span class="n">mean</span><span class="p">)</span>
 </pre></div>
 </div>
-<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Opt3: 0.133945
+<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Opt3: 0.117203
 </pre></div>
 </div>
 <p>Here is the generated IR after loop permutation.</p>
@@ -743,7 +743,7 @@ flattening.</p>
 <span class="nb">print</span><span class="p">(</span><span class="s2">&quot;Opt4: </span><span class="si">%f</span><span class="s2">&quot;</span> <span class="o">%</span> <span class="n">evaluator</span><span class="p">(</span><span class="n">a</span><span class="p">,</span> <span class="n">b</span><span class="p">,</span> <span class="n">c</span><span class="p">)</span><span class="o">.</span><span class="n">mean</span><span class="p">)</span>
 </pre></div>
 </div>
-<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Opt4: 0.110158
+<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Opt4: 0.109657
 </pre></div>
 </div>
 <p>Here is the generated IR after array packing.</p>
@@ -828,7 +828,7 @@ write to C when all the block results are ready.</p>
 <span class="nb">print</span><span class="p">(</span><span class="s2">&quot;Opt5: </span><span class="si">%f</span><span class="s2">&quot;</span> <span class="o">%</span> <span class="n">evaluator</span><span class="p">(</span><span class="n">a</span><span class="p">,</span> <span class="n">b</span><span class="p">,</span> <span class="n">c</span><span class="p">)</span><span class="o">.</span><span class="n">mean</span><span class="p">)</span>
 </pre></div>
 </div>
-<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Opt5: 0.112511
+<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Opt5: 0.111716
 </pre></div>
 </div>
 <p>Here is the generated IR after blocking.</p>
@@ -917,7 +917,7 @@ write to C when all the block results are ready.</p>
 <span class="nb">print</span><span class="p">(</span><span class="s2">&quot;Opt6: </span><span class="si">%f</span><span class="s2">&quot;</span> <span class="o">%</span> <span class="n">opt6_time</span><span class="p">)</span>
 </pre></div>
 </div>
-<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Opt6: 0.148622
+<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Opt6: 0.147122
 </pre></div>
 </div>
 <p>Here is the generated IR after parallelization.</p>
diff --git a/docs/how_to/optimize_operators/sg_execution_times.html b/docs/how_to/optimize_operators/sg_execution_times.html
index 3f055e2890..f045d7952a 100644
--- a/docs/how_to/optimize_operators/sg_execution_times.html
+++ b/docs/how_to/optimize_operators/sg_execution_times.html
@@ -340,7 +340,7 @@
             
   <div class="section" id="computation-times">
 <span id="sphx-glr-how-to-optimize-operators-sg-execution-times"></span><h1>Computation times<a class="headerlink" href="#computation-times" title="Permalink to this headline">¶</a></h1>
-<p><strong>00:36.570</strong> total execution time for <strong>how_to_optimize_operators</strong> files:</p>
+<p><strong>00:35.181</strong> total execution time for <strong>how_to_optimize_operators</strong> files:</p>
 <table class="docutils align-default">
 <colgroup>
 <col style="width: 83%" />
@@ -349,15 +349,15 @@
 </colgroup>
 <tbody>
 <tr class="row-odd"><td><p><a class="reference internal" href="opt_gemm.html#sphx-glr-how-to-optimize-operators-opt-gemm-py"><span class="std std-ref">How to optimize GEMM on CPU</span></a> (<code class="docutils literal notranslate"><span class="pre">opt_gemm.py</span></code>)</p></td>
-<td><p>00:33.843</p></td>
+<td><p>00:32.562</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="opt_conv_tensorcore.html#sphx-glr-how-to-optimize-operators-opt-conv-tensorcore-py"><span class="std std-ref">How to optimize convolution using TensorCores</span></a> (<code class="docutils literal notranslate"><span class="pre">opt_conv_tensorcore.py</span></code>)</p></td>
-<td><p>00:01.572</p></td>
+<td><p>00:01.521</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-odd"><td><p><a class="reference internal" href="opt_conv_cuda.html#sphx-glr-how-to-optimize-operators-opt-conv-cuda-py"><span class="std std-ref">How to optimize convolution on GPU</span></a> (<code class="docutils literal notranslate"><span class="pre">opt_conv_cuda.py</span></code>)</p></td>
-<td><p>00:01.155</p></td>
+<td><p>00:01.098</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 </tbody>
diff --git a/docs/how_to/tune_with_autoscheduler/sg_execution_times.html b/docs/how_to/tune_with_autoscheduler/sg_execution_times.html
index a662611e97..d1a67c2ee6 100644
--- a/docs/how_to/tune_with_autoscheduler/sg_execution_times.html
+++ b/docs/how_to/tune_with_autoscheduler/sg_execution_times.html
@@ -340,7 +340,7 @@
             
   <div class="section" id="computation-times">
 <span id="sphx-glr-how-to-tune-with-autoscheduler-sg-execution-times"></span><h1>Computation times<a class="headerlink" href="#computation-times" title="Permalink to this headline">¶</a></h1>
-<p><strong>09:22.202</strong> total execution time for <strong>how_to_tune_with_autoscheduler</strong> files:</p>
+<p><strong>09:28.786</strong> total execution time for <strong>how_to_tune_with_autoscheduler</strong> files:</p>
 <table class="docutils align-default">
 <colgroup>
 <col style="width: 85%" />
@@ -349,27 +349,27 @@
 </colgroup>
 <tbody>
 <tr class="row-odd"><td><p><a class="reference internal" href="tune_conv2d_layer_cuda.html#sphx-glr-how-to-tune-with-autoscheduler-tune-conv2d-layer-cuda-py"><span class="std std-ref">Auto-scheduling a Convolution Layer for GPU</span></a> (<code class="docutils literal notranslate"><span class="pre">tune_conv2d_layer_cuda.py</span></code>)</p></td>
-<td><p>05:48.035</p></td>
+<td><p>05:42.208</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="tune_network_x86.html#sphx-glr-how-to-tune-with-autoscheduler-tune-network-x86-py"><span class="std std-ref">Auto-scheduling a Neural Network for x86 CPU</span></a> (<code class="docutils literal notranslate"><span class="pre">tune_network_x86.py</span></code>)</p></td>
-<td><p>01:35.298</p></td>
+<td><p>01:32.179</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-odd"><td><p><a class="reference internal" href="tune_network_cuda.html#sphx-glr-how-to-tune-with-autoscheduler-tune-network-cuda-py"><span class="std std-ref">Auto-scheduling a Neural Network for NVIDIA GPU</span></a> (<code class="docutils literal notranslate"><span class="pre">tune_network_cuda.py</span></code>)</p></td>
-<td><p>01:04.115</p></td>
+<td><p>01:01.921</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="tune_sparse_x86.html#sphx-glr-how-to-tune-with-autoscheduler-tune-sparse-x86-py"><span class="std std-ref">Auto-scheduling Sparse Matrix Multiplication on CPU with Custom Sketch Rule</span></a> (<code class="docutils literal notranslate"><span class="pre">tune_sparse_x86.py</span></code>)</p></td>
-<td><p>00:30.209</p></td>
+<td><p>00:49.065</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-odd"><td><p><a class="reference internal" href="tune_network_arm.html#sphx-glr-how-to-tune-with-autoscheduler-tune-network-arm-py"><span class="std std-ref">Auto-scheduling a Neural Network for ARM CPU</span></a> (<code class="docutils literal notranslate"><span class="pre">tune_network_arm.py</span></code>)</p></td>
-<td><p>00:12.772</p></td>
+<td><p>00:12.089</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="tune_network_mali.html#sphx-glr-how-to-tune-with-autoscheduler-tune-network-mali-py"><span class="std std-ref">Auto-scheduling a Neural Network for mali GPU</span></a> (<code class="docutils literal notranslate"><span class="pre">tune_network_mali.py</span></code>)</p></td>
-<td><p>00:11.773</p></td>
+<td><p>00:11.324</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 </tbody>
diff --git a/docs/how_to/tune_with_autoscheduler/tune_conv2d_layer_cuda.html b/docs/how_to/tune_with_autoscheduler/tune_conv2d_layer_cuda.html
index 2aa8d87653..c37dc3b655 100644
--- a/docs/how_to/tune_with_autoscheduler/tune_conv2d_layer_cuda.html
+++ b/docs/how_to/tune_with_autoscheduler/tune_conv2d_layer_cuda.html
@@ -504,270 +504,183 @@ cooperative fetching, unrolling and operator fusion.</p>
              bias: Buffer(bias_2: Pointer(float32), float32, [1, 512, 1, 1], []),
              compute: Buffer(compute_2: Pointer(float32), float32, [1, 512, 7, 7], [])}
   buffer_map = {data_1: data, kernel_1: kernel, bias_1: bias, compute_1: compute} {
-  attr [IterVar(blockIdx.x: int32, (nullptr), &quot;ThreadIndex&quot;, &quot;blockIdx.x&quot;)] &quot;thread_extent&quot; = 64;
-  allocate(conv2d_nchw: Pointer(local float32), float32, [7]), storage_scope = local;
-  allocate(pad_temp.shared: Pointer(shared float32), float32, [392]), storage_scope = shared;
-  allocate(kernel.shared: Pointer(shared float32), float32, [64]), storage_scope = shared;
-  attr [IterVar(threadIdx.x: int32, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 56 {
-    conv2d_nchw_1: Buffer(conv2d_nchw, float32, [7], [], scope=&quot;local&quot;, align=16)[0] = 0f32
-    conv2d_nchw_1[1] = 0f32
+  attr [IterVar(blockIdx.x: int32, (nullptr), &quot;ThreadIndex&quot;, &quot;blockIdx.x&quot;)] &quot;thread_extent&quot; = 128;
+  allocate(conv2d_nchw: Pointer(local float32), float32, [4]), storage_scope = local;
+  allocate(pad_temp.shared: Pointer(shared float32), float32, [2016]), storage_scope = shared;
+  allocate(kernel.shared: Pointer(shared float32), float32, [384]), storage_scope = shared;
+  attr [IterVar(threadIdx.x: int32, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 49 {
+    conv2d_nchw_1: Buffer(conv2d_nchw, float32, [4], [], scope=&quot;local&quot;, align=8)[0] = 0f32
     conv2d_nchw_1[2] = 0f32
+    conv2d_nchw_1[1] = 0f32
     conv2d_nchw_1[3] = 0f32
-    conv2d_nchw_1[4] = 0f32
-    conv2d_nchw_1[5] = 0f32
-    conv2d_nchw_1[6] = 0f32
-    for (rc.outer.outer: int32, 0, 64) {
-      for (ry.outer.outer: int32, 0, 3) {
-        attr [IterVar(threadIdx.x_1: int32, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 56 {
-          pad_temp.shared_1: Buffer(pad_temp.shared, float32, [392], [], scope=&quot;shared&quot;)[(threadIdx.x_1*4)] = @tir.if_then_else((((1 &lt;= (floordiv(floormod((threadIdx.x_1*4), 49), 7) + ry.outer.outer)) &amp;&amp; ((floordiv(floormod((threadIdx.x_1*4), 49), 7) + ry.outer.outer) &lt; 8)) &amp;&amp; (1 &lt;= floormod((threadIdx.x_1*4), 7))), data_3: Buffer(data_2, float32, [25088], [])[((((rc.outer.outer*392) + (ry.outer.outer*7)) + (threadIdx.x_1*4)) - 8)], 0f32, dtype=float32)
-          pad_temp.shared_1[((threadIdx.x_1*4) + 1)] = @tir.if_then_else((((1 &lt;= (floordiv(floormod(((threadIdx.x_1*4) + 1), 49), 7) + ry.outer.outer)) &amp;&amp; ((floordiv(floormod(((threadIdx.x_1*4) + 1), 49), 7) + ry.outer.outer) &lt; 8)) &amp;&amp; (1 &lt;= floormod(((threadIdx.x_1*4) + 1), 7))), data_3[((((rc.outer.outer*392) + (ry.outer.outer*7)) + (threadIdx.x_1*4)) - 7)], 0f32, dtype=float32)
-          pad_temp.shared_1[((threadIdx.x_1*4) + 2)] = @tir.if_then_else((((1 &lt;= (floordiv(floormod(((threadIdx.x_1*4) + 2), 49), 7) + ry.outer.outer)) &amp;&amp; ((floordiv(floormod(((threadIdx.x_1*4) + 2), 49), 7) + ry.outer.outer) &lt; 8)) &amp;&amp; (1 &lt;= floormod(((threadIdx.x_1*4) + 2), 7))), data_3[((((rc.outer.outer*392) + (ry.outer.outer*7)) + (threadIdx.x_1*4)) - 6)], 0f32, dtype=float32)
-          pad_temp.shared_1[((threadIdx.x_1*4) + 3)] = @tir.if_then_else((((1 &lt;= (floordiv(floormod(((threadIdx.x_1*4) + 3), 49), 7) + ry.outer.outer)) &amp;&amp; ((floordiv(floormod(((threadIdx.x_1*4) + 3), 49), 7) + ry.outer.outer) &lt; 8)) &amp;&amp; (1 &lt;= floormod(((threadIdx.x_1*4) + 3), 7))), data_3[((((rc.outer.outer*392) + (ry.outer.outer*7)) + (threadIdx.x_1*4)) - 5)], 0f32, dtype=float32)
-        }
-        attr [IterVar(threadIdx.x_1, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 56 {
-          if @tir.likely((threadIdx.x_1 &lt; 42), dtype=bool) {
-            pad_temp.shared_1[(((floordiv(((threadIdx.x_1*4) + 224), 49)*49) + (floormod((floordiv((threadIdx.x_1*4), 7) + 4), 7)*7)) + floormod((threadIdx.x_1*4), 7))] = @tir.if_then_else((((1 &lt;= (ry.outer.outer + floormod((floordiv((threadIdx.x_1*4), 7) + 4), 7))) &amp;&amp; ((ry.outer.outer + floormod((floordiv((threadIdx.x_1*4), 7) + 4), 7)) &lt; 8)) &amp;&amp; (1 &lt;= floormod((threadIdx.x_1*4), 7))), data_3[((((((rc.outer.outer*392) + (floordiv(((threadIdx.x_1*4) + 224), 49)*49 [...]
-          }
-          if @tir.likely((threadIdx.x_1 &lt; 42), dtype=bool) {
-            pad_temp.shared_1[(((floordiv(((threadIdx.x_1*4) + 225), 49)*49) + (floormod((floordiv(((threadIdx.x_1*4) + 1), 7) + 4), 7)*7)) + floormod(((threadIdx.x_1*4) + 1), 7))] = @tir.if_then_else((((1 &lt;= (ry.outer.outer + floormod((floordiv(((threadIdx.x_1*4) + 1), 7) + 4), 7))) &amp;&amp; ((ry.outer.outer + floormod((floordiv(((threadIdx.x_1*4) + 1), 7) + 4), 7)) &lt; 8)) &amp;&amp; (1 &lt;= floormod(((threadIdx.x_1*4) + 1), 7))), data_3[((((((rc.outer.outer*392) + (floordiv(((t [...]
-          }
-          if @tir.likely((threadIdx.x_1 &lt; 42), dtype=bool) {
-            pad_temp.shared_1[(((floordiv(((threadIdx.x_1*4) + 226), 49)*49) + (floormod((floordiv(((threadIdx.x_1*4) + 2), 7) + 4), 7)*7)) + floormod(((threadIdx.x_1*4) + 2), 7))] = @tir.if_then_else((((1 &lt;= (ry.outer.outer + floormod((floordiv(((threadIdx.x_1*4) + 2), 7) + 4), 7))) &amp;&amp; ((ry.outer.outer + floormod((floordiv(((threadIdx.x_1*4) + 2), 7) + 4), 7)) &lt; 8)) &amp;&amp; (1 &lt;= floormod(((threadIdx.x_1*4) + 2), 7))), data_3[((((((rc.outer.outer*392) + (floordiv(((t [...]
-          }
-          if @tir.likely((threadIdx.x_1 &lt; 42), dtype=bool) {
-            pad_temp.shared_1[(((floordiv(((threadIdx.x_1*4) + 227), 49)*49) + (floormod((floordiv(((threadIdx.x_1*4) + 3), 7) + 4), 7)*7)) + floormod(((threadIdx.x_1*4) + 3), 7))] = @tir.if_then_else((((1 &lt;= (ry.outer.outer + floormod((floordiv(((threadIdx.x_1*4) + 3), 7) + 4), 7))) &amp;&amp; ((ry.outer.outer + floormod((floordiv(((threadIdx.x_1*4) + 3), 7) + 4), 7)) &lt; 8)) &amp;&amp; (1 &lt;= floormod(((threadIdx.x_1*4) + 3), 7))), data_3[((((((rc.outer.outer*392) + (floordiv(((t [...]
-          }
-        }
-        attr [IterVar(threadIdx.x_2: int32, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 56;
-        kernel.shared_1: Buffer(kernel.shared, float32, [64], [], scope=&quot;shared&quot;)[threadIdx.x_2] = kernel_3: Buffer(kernel_2, float32, [2359296], [])[(((((blockIdx.x*36864) + (floordiv(threadIdx.x_2, 8)*4608)) + (rc.outer.outer*72)) + (floormod(threadIdx.x_2, 8)*9)) + (ry.outer.outer*3))]
-        attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 56;
-        if @tir.likely((threadIdx.x_2 &lt; 8), dtype=bool) {
-          kernel.shared_1[(threadIdx.x_2 + 56)] = kernel_3[(((((blockIdx.x*36864) + (rc.outer.outer*72)) + (threadIdx.x_2*9)) + (ry.outer.outer*3)) + 32256)]
-        }
-        conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[floormod(threadIdx.x, 7)]*kernel.shared_1[(floordiv(threadIdx.x, 7)*8)]))
-        conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 7)]*kernel.shared_1[(floordiv(threadIdx.x, 7)*8)]))
-        conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 14)]*kernel.shared_1[(floordiv(threadIdx.x, 7)*8)]))
-        conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 21)]*kernel.shared_1[(floordiv(threadIdx.x, 7)*8)]))
-        conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 28)]*kernel.shared_1[(floordiv(threadIdx.x, 7)*8)]))
-        conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 35)]*kernel.shared_1[(floordiv(threadIdx.x, 7)*8)]))
-        conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 42)]*kernel.shared_1[(floordiv(threadIdx.x, 7)*8)]))
-        conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 49)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 1)]))
-        conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 56)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 1)]))
-        conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 63)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 1)]))
-        conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 70)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 1)]))
-        conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 77)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 1)]))
-        conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 84)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 1)]))
-        conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 91)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 1)]))
-        conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 98)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 2)]))
-        conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 105)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 2)]))
-        conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 112)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 2)]))
-        conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 119)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 2)]))
-        conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 126)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 2)]))
-        conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 133)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 2)]))
-        conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 140)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 2)]))
-        conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 147)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 3)]))
-        conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 154)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 3)]))
-        conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 161)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 3)]))
-        conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 168)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 3)]))
-        conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 175)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 3)]))
-        conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 182)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 3)]))
-        conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 189)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 3)]))
-        conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 196)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 4)]))
-        conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 203)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 4)]))
-        conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 210)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 4)]))
-        conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 217)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 4)]))
-        conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 224)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 4)]))
-        conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 231)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 4)]))
-        conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 238)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 4)]))
-        conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 245)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 5)]))
-        conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 252)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 5)]))
-        conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 259)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 5)]))
-        conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 266)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 5)]))
-        conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 273)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 5)]))
-        conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 280)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 5)]))
-        conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 287)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 5)]))
-        conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 294)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 6)]))
-        conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 301)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 6)]))
-        conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 308)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 6)]))
-        conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 315)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 6)]))
-        conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 322)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 6)]))
-        conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 329)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 6)]))
-        conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 336)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 6)]))
-        conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 343)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 7)]))
-        conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 350)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 7)]))
-        conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 357)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 7)]))
-        conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 364)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 7)]))
-        conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 371)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 7)]))
-        conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 378)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 7)]))
-        conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 385)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 7)]))
-        attr [IterVar(threadIdx.x_1, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 56 {
-          pad_temp.shared_1[(threadIdx.x_1*4)] = @tir.if_then_else(((1 &lt;= (floordiv(floormod((threadIdx.x_1*4), 49), 7) + ry.outer.outer)) &amp;&amp; ((floordiv(floormod((threadIdx.x_1*4), 49), 7) + ry.outer.outer) &lt; 8)), data_3[((((rc.outer.outer*392) + (ry.outer.outer*7)) + (threadIdx.x_1*4)) - 7)], 0f32, dtype=float32)
-          pad_temp.shared_1[((threadIdx.x_1*4) + 1)] = @tir.if_then_else(((1 &lt;= (floordiv(floormod(((threadIdx.x_1*4) + 1), 49), 7) + ry.outer.outer)) &amp;&amp; ((floordiv(floormod(((threadIdx.x_1*4) + 1), 49), 7) + ry.outer.outer) &lt; 8)), data_3[((((rc.outer.outer*392) + (ry.outer.outer*7)) + (threadIdx.x_1*4)) - 6)], 0f32, dtype=float32)
-          pad_temp.shared_1[((threadIdx.x_1*4) + 2)] = @tir.if_then_else(((1 &lt;= (floordiv(floormod(((threadIdx.x_1*4) + 2), 49), 7) + ry.outer.outer)) &amp;&amp; ((floordiv(floormod(((threadIdx.x_1*4) + 2), 49), 7) + ry.outer.outer) &lt; 8)), data_3[((((rc.outer.outer*392) + (ry.outer.outer*7)) + (threadIdx.x_1*4)) - 5)], 0f32, dtype=float32)
-          pad_temp.shared_1[((threadIdx.x_1*4) + 3)] = @tir.if_then_else(((1 &lt;= (floordiv(floormod(((threadIdx.x_1*4) + 3), 49), 7) + ry.outer.outer)) &amp;&amp; ((floordiv(floormod(((threadIdx.x_1*4) + 3), 49), 7) + ry.outer.outer) &lt; 8)), data_3[((((rc.outer.outer*392) + (ry.outer.outer*7)) + (threadIdx.x_1*4)) - 4)], 0f32, dtype=float32)
-        }
-        attr [IterVar(threadIdx.x_1, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 56 {
-          if @tir.likely((threadIdx.x_1 &lt; 42), dtype=bool) {
-            pad_temp.shared_1[(((floordiv(((threadIdx.x_1*4) + 224), 49)*49) + (floormod((floordiv((threadIdx.x_1*4), 7) + 4), 7)*7)) + floormod((threadIdx.x_1*4), 7))] = @tir.if_then_else(((1 &lt;= (ry.outer.outer + floormod((floordiv((threadIdx.x_1*4), 7) + 4), 7))) &amp;&amp; ((ry.outer.outer + floormod((floordiv((threadIdx.x_1*4), 7) + 4), 7)) &lt; 8)), data_3[((((((rc.outer.outer*392) + (floordiv(((threadIdx.x_1*4) + 224), 49)*49)) + (ry.outer.outer*7)) + (floormod((floordiv((thread [...]
+    for (rc.outer.outer: int32, 0, 16) {
+      for (rx.outer.outer: int32, 0, 3) {
+        let cse_var_2: int32 = (rc.outer.outer*1568)
+        let cse_var_1: int32 = (rc.outer.outer*288)
+         {
+          attr [IterVar(threadIdx.x_1: int32, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 49;
+          pad_temp.shared_1: Buffer(pad_temp.shared, float32, [2016], [], scope=&quot;shared&quot;)[threadIdx.x_1] = @tir.if_then_else((((7 &lt;= threadIdx.x_1) &amp;&amp; (1 &lt;= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) &amp;&amp; ((rx.outer.outer + floormod(threadIdx.x_1, 7)) &lt; 8)), data_3: Buffer(data_2, float32, [25088], [])[(((cse_var_2 + threadIdx.x_1) + rx.outer.outer) - 8)], 0f32, dtype=float32)
+          attr [IterVar(threadIdx.x_1, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 49;
+          pad_temp.shared_1[(threadIdx.x_1 + 49)] = @tir.if_then_else(((((1 &lt;= floormod((floordiv(threadIdx.x_1, 7) + 7), 9)) &amp;&amp; (floormod((floordiv(threadIdx.x_1, 7) + 7), 9) &lt; 8)) &amp;&amp; (1 &lt;= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) &amp;&amp; ((rx.outer.outer + floormod(threadIdx.x_1, 7)) &lt; 8)), data_3[(((((cse_var_2 + (floordiv((threadIdx.x_1 + 49), 63)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 7), 9)*7)) + rx.outer.outer) + floormod(threadIdx.x_1 [...]
+          attr [IterVar(threadIdx.x_1, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 49;
+          pad_temp.shared_1[(threadIdx.x_1 + 98)] = @tir.if_then_else(((((1 &lt;= floormod((floordiv(threadIdx.x_1, 7) + 5), 9)) &amp;&amp; (floormod((floordiv(threadIdx.x_1, 7) + 5), 9) &lt; 8)) &amp;&amp; (1 &lt;= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) &amp;&amp; ((rx.outer.outer + floormod(threadIdx.x_1, 7)) &lt; 8)), data_3[(((((cse_var_2 + (floordiv((threadIdx.x_1 + 98), 63)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 5), 9)*7)) + rx.outer.outer) + floormod(threadIdx.x_1 [...]
+          attr [IterVar(threadIdx.x_1, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 49;
+          pad_temp.shared_1[(threadIdx.x_1 + 147)] = @tir.if_then_else(((((1 &lt;= floormod((floordiv(threadIdx.x_1, 7) + 3), 9)) &amp;&amp; (floormod((floordiv(threadIdx.x_1, 7) + 3), 9) &lt; 8)) &amp;&amp; (1 &lt;= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) &amp;&amp; ((rx.outer.outer + floormod(threadIdx.x_1, 7)) &lt; 8)), data_3[(((((cse_var_2 + (floordiv((threadIdx.x_1 + 147), 63)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 3), 9)*7)) + rx.outer.outer) + floormod(threadIdx.x [...]
+          attr [IterVar(threadIdx.x_1, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 49;
+          pad_temp.shared_1[(threadIdx.x_1 + 196)] = @tir.if_then_else(((1 &lt;= (rx.outer.outer + floormod(threadIdx.x_1, 7))) &amp;&amp; ((rx.outer.outer + floormod(threadIdx.x_1, 7)) &lt; 8)), data_3[(((((cse_var_2 + (floordiv((threadIdx.x_1 + 196), 63)*49)) + ((floordiv(threadIdx.x_1, 7) + 1)*7)) + rx.outer.outer) + floormod(threadIdx.x_1, 7)) - 8)], 0f32, dtype=float32)
+          attr [IterVar(threadIdx.x_1, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 49;
+          pad_temp.shared_1[(threadIdx.x_1 + 245)] = @tir.if_then_else(((((1 &lt;= floormod((floordiv(threadIdx.x_1, 7) + 8), 9)) &amp;&amp; (floormod((floordiv(threadIdx.x_1, 7) + 8), 9) &lt; 8)) &amp;&amp; (1 &lt;= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) &amp;&amp; ((rx.outer.outer + floormod(threadIdx.x_1, 7)) &lt; 8)), data_3[(((((cse_var_2 + (floordiv((threadIdx.x_1 + 245), 63)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 8), 9)*7)) + rx.outer.outer) + floormod(threadIdx.x [...]
+          attr [IterVar(threadIdx.x_1, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 49;
+          pad_temp.shared_1[(threadIdx.x_1 + 294)] = @tir.if_then_else(((((1 &lt;= floormod((floordiv(threadIdx.x_1, 7) + 6), 9)) &amp;&amp; (floormod((floordiv(threadIdx.x_1, 7) + 6), 9) &lt; 8)) &amp;&amp; (1 &lt;= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) &amp;&amp; ((rx.outer.outer + floormod(threadIdx.x_1, 7)) &lt; 8)), data_3[(((((cse_var_2 + (floordiv((threadIdx.x_1 + 294), 63)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 6), 9)*7)) + rx.outer.outer) + floormod(threadIdx.x [...]
+          attr [IterVar(threadIdx.x_1, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 49;
+          pad_temp.shared_1[(threadIdx.x_1 + 343)] = @tir.if_then_else(((((1 &lt;= floormod((floordiv(threadIdx.x_1, 7) + 4), 9)) &amp;&amp; (floormod((floordiv(threadIdx.x_1, 7) + 4), 9) &lt; 8)) &amp;&amp; (1 &lt;= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) &amp;&amp; ((rx.outer.outer + floormod(threadIdx.x_1, 7)) &lt; 8)), data_3[(((((cse_var_2 + (floordiv((threadIdx.x_1 + 343), 63)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 4), 9)*7)) + rx.outer.outer) + floormod(threadIdx.x [...]
+          attr [IterVar(threadIdx.x_1, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 49;
+          pad_temp.shared_1[(threadIdx.x_1 + 392)] = @tir.if_then_else((((threadIdx.x_1 &lt; 42) &amp;&amp; (1 &lt;= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) &amp;&amp; ((rx.outer.outer + floormod(threadIdx.x_1, 7)) &lt; 8)), data_3[(((((cse_var_2 + (floordiv((threadIdx.x_1 + 392), 63)*49)) + ((floordiv(threadIdx.x_1, 7) + 2)*7)) + rx.outer.outer) + floormod(threadIdx.x_1, 7)) - 8)], 0f32, dtype=float32)
+          attr [IterVar(threadIdx.x_1, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 49;
+          pad_temp.shared_1[(threadIdx.x_1 + 441)] = @tir.if_then_else((((7 &lt;= threadIdx.x_1) &amp;&amp; (1 &lt;= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) &amp;&amp; ((rx.outer.outer + floormod(threadIdx.x_1, 7)) &lt; 8)), data_3[(((cse_var_2 + threadIdx.x_1) + rx.outer.outer) + 335)], 0f32, dtype=float32)
+          attr [IterVar(threadIdx.x_1, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 49;
+          pad_temp.shared_1[(threadIdx.x_1 + 490)] = @tir.if_then_else(((((1 &lt;= floormod((floordiv(threadIdx.x_1, 7) + 7), 9)) &amp;&amp; (floormod((floordiv(threadIdx.x_1, 7) + 7), 9) &lt; 8)) &amp;&amp; (1 &lt;= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) &amp;&amp; ((rx.outer.outer + floormod(threadIdx.x_1, 7)) &lt; 8)), data_3[(((((cse_var_2 + (floordiv((threadIdx.x_1 + 490), 63)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 7), 9)*7)) + rx.outer.outer) + floormod(threadIdx.x [...]
+          attr [IterVar(threadIdx.x_1, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 49;
+          pad_temp.shared_1[(threadIdx.x_1 + 539)] = @tir.if_then_else(((((1 &lt;= floormod((floordiv(threadIdx.x_1, 7) + 5), 9)) &amp;&amp; (floormod((floordiv(threadIdx.x_1, 7) + 5), 9) &lt; 8)) &amp;&amp; (1 &lt;= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) &amp;&amp; ((rx.outer.outer + floormod(threadIdx.x_1, 7)) &lt; 8)), data_3[(((((cse_var_2 + (floordiv((threadIdx.x_1 + 539), 63)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 5), 9)*7)) + rx.outer.outer) + floormod(threadIdx.x [...]
+          attr [IterVar(threadIdx.x_1, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 49;
+          pad_temp.shared_1[(threadIdx.x_1 + 588)] = @tir.if_then_else(((((1 &lt;= floormod((floordiv(threadIdx.x_1, 7) + 3), 9)) &amp;&amp; (floormod((floordiv(threadIdx.x_1, 7) + 3), 9) &lt; 8)) &amp;&amp; (1 &lt;= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) &amp;&amp; ((rx.outer.outer + floormod(threadIdx.x_1, 7)) &lt; 8)), data_3[(((((cse_var_2 + (floordiv((threadIdx.x_1 + 588), 63)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 3), 9)*7)) + rx.outer.outer) + floormod(threadIdx.x [...]
+          attr [IterVar(threadIdx.x_1, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 49;
+          pad_temp.shared_1[(threadIdx.x_1 + 637)] = @tir.if_then_else(((1 &lt;= (rx.outer.outer + floormod(threadIdx.x_1, 7))) &amp;&amp; ((rx.outer.outer + floormod(threadIdx.x_1, 7)) &lt; 8)), data_3[(((((cse_var_2 + (floordiv((threadIdx.x_1 + 637), 63)*49)) + ((floordiv(threadIdx.x_1, 7) + 1)*7)) + rx.outer.outer) + floormod(threadIdx.x_1, 7)) - 8)], 0f32, dtype=float32)
+          attr [IterVar(threadIdx.x_1, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 49;
+          pad_temp.shared_1[(threadIdx.x_1 + 686)] = @tir.if_then_else(((((1 &lt;= floormod((floordiv(threadIdx.x_1, 7) + 8), 9)) &amp;&amp; (floormod((floordiv(threadIdx.x_1, 7) + 8), 9) &lt; 8)) &amp;&amp; (1 &lt;= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) &amp;&amp; ((rx.outer.outer + floormod(threadIdx.x_1, 7)) &lt; 8)), data_3[(((((cse_var_2 + (floordiv((threadIdx.x_1 + 686), 63)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 8), 9)*7)) + rx.outer.outer) + floormod(threadIdx.x [...]
+          attr [IterVar(threadIdx.x_1, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 49;
+          pad_temp.shared_1[(threadIdx.x_1 + 735)] = @tir.if_then_else(((((1 &lt;= floormod((floordiv(threadIdx.x_1, 7) + 6), 9)) &amp;&amp; (floormod((floordiv(threadIdx.x_1, 7) + 6), 9) &lt; 8)) &amp;&amp; (1 &lt;= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) &amp;&amp; ((rx.outer.outer + floormod(threadIdx.x_1, 7)) &lt; 8)), data_3[(((((cse_var_2 + (floordiv((threadIdx.x_1 + 735), 63)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 6), 9)*7)) + rx.outer.outer) + floormod(threadIdx.x [...]
+          attr [IterVar(threadIdx.x_1, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 49;
+          pad_temp.shared_1[(threadIdx.x_1 + 784)] = @tir.if_then_else(((((1 &lt;= floormod((floordiv(threadIdx.x_1, 7) + 4), 9)) &amp;&amp; (floormod((floordiv(threadIdx.x_1, 7) + 4), 9) &lt; 8)) &amp;&amp; (1 &lt;= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) &amp;&amp; ((rx.outer.outer + floormod(threadIdx.x_1, 7)) &lt; 8)), data_3[(((((cse_var_2 + (floordiv((threadIdx.x_1 + 784), 63)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 4), 9)*7)) + rx.outer.outer) + floormod(threadIdx.x [...]
+          attr [IterVar(threadIdx.x_1, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 49;
+          pad_temp.shared_1[(threadIdx.x_1 + 833)] = @tir.if_then_else((((threadIdx.x_1 &lt; 42) &amp;&amp; (1 &lt;= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) &amp;&amp; ((rx.outer.outer + floormod(threadIdx.x_1, 7)) &lt; 8)), data_3[(((((cse_var_2 + (floordiv((threadIdx.x_1 + 833), 63)*49)) + ((floordiv(threadIdx.x_1, 7) + 2)*7)) + rx.outer.outer) + floormod(threadIdx.x_1, 7)) - 8)], 0f32, dtype=float32)
+          attr [IterVar(threadIdx.x_1, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 49;
+          pad_temp.shared_1[(threadIdx.x_1 + 882)] = @tir.if_then_else((((7 &lt;= threadIdx.x_1) &amp;&amp; (1 &lt;= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) &amp;&amp; ((rx.outer.outer + floormod(threadIdx.x_1, 7)) &lt; 8)), data_3[(((cse_var_2 + threadIdx.x_1) + rx.outer.outer) + 678)], 0f32, dtype=float32)
+          attr [IterVar(threadIdx.x_1, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 49;
+          pad_temp.shared_1[(threadIdx.x_1 + 931)] = @tir.if_then_else(((((1 &lt;= floormod((floordiv(threadIdx.x_1, 7) + 7), 9)) &amp;&amp; (floormod((floordiv(threadIdx.x_1, 7) + 7), 9) &lt; 8)) &amp;&amp; (1 &lt;= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) &amp;&amp; ((rx.outer.outer + floormod(threadIdx.x_1, 7)) &lt; 8)), data_3[(((((cse_var_2 + (floordiv((threadIdx.x_1 + 931), 63)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 7), 9)*7)) + rx.outer.outer) + floormod(threadIdx.x [...]
+          attr [IterVar(threadIdx.x_1, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 49;
+          pad_temp.shared_1[(threadIdx.x_1 + 980)] = @tir.if_then_else(((((1 &lt;= floormod((floordiv(threadIdx.x_1, 7) + 5), 9)) &amp;&amp; (floormod((floordiv(threadIdx.x_1, 7) + 5), 9) &lt; 8)) &amp;&amp; (1 &lt;= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) &amp;&amp; ((rx.outer.outer + floormod(threadIdx.x_1, 7)) &lt; 8)), data_3[(((((cse_var_2 + (floordiv((threadIdx.x_1 + 980), 63)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 5), 9)*7)) + rx.outer.outer) + floormod(threadIdx.x [...]
+          attr [IterVar(threadIdx.x_1, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 49;
+          pad_temp.shared_1[(threadIdx.x_1 + 1029)] = @tir.if_then_else(((((1 &lt;= floormod((floordiv(threadIdx.x_1, 7) + 3), 9)) &amp;&amp; (floormod((floordiv(threadIdx.x_1, 7) + 3), 9) &lt; 8)) &amp;&amp; (1 &lt;= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) &amp;&amp; ((rx.outer.outer + floormod(threadIdx.x_1, 7)) &lt; 8)), data_3[(((((cse_var_2 + (floordiv((threadIdx.x_1 + 1029), 63)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 3), 9)*7)) + rx.outer.outer) + floormod(threadIdx [...]
+          attr [IterVar(threadIdx.x_1, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 49;
+          pad_temp.shared_1[(threadIdx.x_1 + 1078)] = @tir.if_then_else(((1 &lt;= (rx.outer.outer + floormod(threadIdx.x_1, 7))) &amp;&amp; ((rx.outer.outer + floormod(threadIdx.x_1, 7)) &lt; 8)), data_3[(((((cse_var_2 + (floordiv((threadIdx.x_1 + 1078), 63)*49)) + ((floordiv(threadIdx.x_1, 7) + 1)*7)) + rx.outer.outer) + floormod(threadIdx.x_1, 7)) - 8)], 0f32, dtype=float32)
+          attr [IterVar(threadIdx.x_1, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 49;
+          pad_temp.shared_1[(threadIdx.x_1 + 1127)] = @tir.if_then_else(((((1 &lt;= floormod((floordiv(threadIdx.x_1, 7) + 8), 9)) &amp;&amp; (floormod((floordiv(threadIdx.x_1, 7) + 8), 9) &lt; 8)) &amp;&amp; (1 &lt;= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) &amp;&amp; ((rx.outer.outer + floormod(threadIdx.x_1, 7)) &lt; 8)), data_3[(((((cse_var_2 + (floordiv((threadIdx.x_1 + 1127), 63)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 8), 9)*7)) + rx.outer.outer) + floormod(threadIdx [...]
+          attr [IterVar(threadIdx.x_1, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 49;
+          pad_temp.shared_1[(threadIdx.x_1 + 1176)] = @tir.if_then_else(((((1 &lt;= floormod((floordiv(threadIdx.x_1, 7) + 6), 9)) &amp;&amp; (floormod((floordiv(threadIdx.x_1, 7) + 6), 9) &lt; 8)) &amp;&amp; (1 &lt;= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) &amp;&amp; ((rx.outer.outer + floormod(threadIdx.x_1, 7)) &lt; 8)), data_3[(((((cse_var_2 + (floordiv((threadIdx.x_1 + 1176), 63)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 6), 9)*7)) + rx.outer.outer) + floormod(threadIdx [...]
+          attr [IterVar(threadIdx.x_1, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 49;
+          pad_temp.shared_1[(threadIdx.x_1 + 1225)] = @tir.if_then_else(((((1 &lt;= floormod((floordiv(threadIdx.x_1, 7) + 4), 9)) &amp;&amp; (floormod((floordiv(threadIdx.x_1, 7) + 4), 9) &lt; 8)) &amp;&amp; (1 &lt;= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) &amp;&amp; ((rx.outer.outer + floormod(threadIdx.x_1, 7)) &lt; 8)), data_3[(((((cse_var_2 + (floordiv((threadIdx.x_1 + 1225), 63)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 4), 9)*7)) + rx.outer.outer) + floormod(threadIdx [...]
+          attr [IterVar(threadIdx.x_1, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 49;
+          pad_temp.shared_1[(threadIdx.x_1 + 1274)] = @tir.if_then_else((((threadIdx.x_1 &lt; 42) &amp;&amp; (1 &lt;= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) &amp;&amp; ((rx.outer.outer + floormod(threadIdx.x_1, 7)) &lt; 8)), data_3[(((((cse_var_2 + (floordiv((threadIdx.x_1 + 1274), 63)*49)) + ((floordiv(threadIdx.x_1, 7) + 2)*7)) + rx.outer.outer) + floormod(threadIdx.x_1, 7)) - 8)], 0f32, dtype=float32)
+          attr [IterVar(threadIdx.x_1, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 49;
+          pad_temp.shared_1[(threadIdx.x_1 + 1323)] = @tir.if_then_else((((7 &lt;= threadIdx.x_1) &amp;&amp; (1 &lt;= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) &amp;&amp; ((rx.outer.outer + floormod(threadIdx.x_1, 7)) &lt; 8)), data_3[(((cse_var_2 + threadIdx.x_1) + rx.outer.outer) + 1021)], 0f32, dtype=float32)
+          attr [IterVar(threadIdx.x_1, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 49;
+          pad_temp.shared_1[(threadIdx.x_1 + 1372)] = @tir.if_then_else(((((1 &lt;= floormod((floordiv(threadIdx.x_1, 7) + 7), 9)) &amp;&amp; (floormod((floordiv(threadIdx.x_1, 7) + 7), 9) &lt; 8)) &amp;&amp; (1 &lt;= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) &amp;&amp; ((rx.outer.outer + floormod(threadIdx.x_1, 7)) &lt; 8)), data_3[(((((cse_var_2 + (floordiv((threadIdx.x_1 + 1372), 63)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 7), 9)*7)) + rx.outer.outer) + floormod(threadIdx [...]
+          attr [IterVar(threadIdx.x_1, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 49;
+          pad_temp.shared_1[(threadIdx.x_1 + 1421)] = @tir.if_then_else(((((1 &lt;= floormod((floordiv(threadIdx.x_1, 7) + 5), 9)) &amp;&amp; (floormod((floordiv(threadIdx.x_1, 7) + 5), 9) &lt; 8)) &amp;&amp; (1 &lt;= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) &amp;&amp; ((rx.outer.outer + floormod(threadIdx.x_1, 7)) &lt; 8)), data_3[(((((cse_var_2 + (floordiv((threadIdx.x_1 + 1421), 63)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 5), 9)*7)) + rx.outer.outer) + floormod(threadIdx [...]
+          attr [IterVar(threadIdx.x_1, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 49;
+          pad_temp.shared_1[(threadIdx.x_1 + 1470)] = @tir.if_then_else(((((1 &lt;= floormod((floordiv(threadIdx.x_1, 7) + 3), 9)) &amp;&amp; (floormod((floordiv(threadIdx.x_1, 7) + 3), 9) &lt; 8)) &amp;&amp; (1 &lt;= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) &amp;&amp; ((rx.outer.outer + floormod(threadIdx.x_1, 7)) &lt; 8)), data_3[(((((cse_var_2 + (floordiv((threadIdx.x_1 + 1470), 63)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 3), 9)*7)) + rx.outer.outer) + floormod(threadIdx [...]
+          attr [IterVar(threadIdx.x_1, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 49;
+          pad_temp.shared_1[(threadIdx.x_1 + 1519)] = @tir.if_then_else(((1 &lt;= (rx.outer.outer + floormod(threadIdx.x_1, 7))) &amp;&amp; ((rx.outer.outer + floormod(threadIdx.x_1, 7)) &lt; 8)), data_3[(((((cse_var_2 + (floordiv((threadIdx.x_1 + 1519), 63)*49)) + ((floordiv(threadIdx.x_1, 7) + 1)*7)) + rx.outer.outer) + floormod(threadIdx.x_1, 7)) - 8)], 0f32, dtype=float32)
+          attr [IterVar(threadIdx.x_1, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 49;
+          pad_temp.shared_1[(threadIdx.x_1 + 1568)] = @tir.if_then_else(((((1 &lt;= floormod((floordiv(threadIdx.x_1, 7) + 8), 9)) &amp;&amp; (floormod((floordiv(threadIdx.x_1, 7) + 8), 9) &lt; 8)) &amp;&amp; (1 &lt;= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) &amp;&amp; ((rx.outer.outer + floormod(threadIdx.x_1, 7)) &lt; 8)), data_3[(((((cse_var_2 + (floordiv((threadIdx.x_1 + 1568), 63)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 8), 9)*7)) + rx.outer.outer) + floormod(threadIdx [...]
+          attr [IterVar(threadIdx.x_1, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 49;
+          pad_temp.shared_1[(threadIdx.x_1 + 1617)] = @tir.if_then_else(((((1 &lt;= floormod((floordiv(threadIdx.x_1, 7) + 6), 9)) &amp;&amp; (floormod((floordiv(threadIdx.x_1, 7) + 6), 9) &lt; 8)) &amp;&amp; (1 &lt;= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) &amp;&amp; ((rx.outer.outer + floormod(threadIdx.x_1, 7)) &lt; 8)), data_3[(((((cse_var_2 + (floordiv((threadIdx.x_1 + 1617), 63)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 6), 9)*7)) + rx.outer.outer) + floormod(threadIdx [...]
+          attr [IterVar(threadIdx.x_1, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 49;
+          pad_temp.shared_1[(threadIdx.x_1 + 1666)] = @tir.if_then_else(((((1 &lt;= floormod((floordiv(threadIdx.x_1, 7) + 4), 9)) &amp;&amp; (floormod((floordiv(threadIdx.x_1, 7) + 4), 9) &lt; 8)) &amp;&amp; (1 &lt;= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) &amp;&amp; ((rx.outer.outer + floormod(threadIdx.x_1, 7)) &lt; 8)), data_3[(((((cse_var_2 + (floordiv((threadIdx.x_1 + 1666), 63)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 4), 9)*7)) + rx.outer.outer) + floormod(threadIdx [...]
+          attr [IterVar(threadIdx.x_1, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 49;
+          pad_temp.shared_1[(threadIdx.x_1 + 1715)] = @tir.if_then_else((((threadIdx.x_1 &lt; 42) &amp;&amp; (1 &lt;= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) &amp;&amp; ((rx.outer.outer + floormod(threadIdx.x_1, 7)) &lt; 8)), data_3[(((((cse_var_2 + (floordiv((threadIdx.x_1 + 1715), 63)*49)) + ((floordiv(threadIdx.x_1, 7) + 2)*7)) + rx.outer.outer) + floormod(threadIdx.x_1, 7)) - 8)], 0f32, dtype=float32)
+          attr [IterVar(threadIdx.x_1, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 49;
+          pad_temp.shared_1[(threadIdx.x_1 + 1764)] = @tir.if_then_else((((7 &lt;= threadIdx.x_1) &amp;&amp; (1 &lt;= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) &amp;&amp; ((rx.outer.outer + floormod(threadIdx.x_1, 7)) &lt; 8)), data_3[(((cse_var_2 + threadIdx.x_1) + rx.outer.outer) + 1364)], 0f32, dtype=float32)
+          attr [IterVar(threadIdx.x_1, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 49;
+          pad_temp.shared_1[(threadIdx.x_1 + 1813)] = @tir.if_then_else(((((1 &lt;= floormod((floordiv(threadIdx.x_1, 7) + 7), 9)) &amp;&amp; (floormod((floordiv(threadIdx.x_1, 7) + 7), 9) &lt; 8)) &amp;&amp; (1 &lt;= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) &amp;&amp; ((rx.outer.outer + floormod(threadIdx.x_1, 7)) &lt; 8)), data_3[(((((cse_var_2 + (floordiv((threadIdx.x_1 + 1813), 63)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 7), 9)*7)) + rx.outer.outer) + floormod(threadIdx [...]
+          attr [IterVar(threadIdx.x_1, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 49;
+          pad_temp.shared_1[(threadIdx.x_1 + 1862)] = @tir.if_then_else(((((1 &lt;= floormod((floordiv(threadIdx.x_1, 7) + 5), 9)) &amp;&amp; (floormod((floordiv(threadIdx.x_1, 7) + 5), 9) &lt; 8)) &amp;&amp; (1 &lt;= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) &amp;&amp; ((rx.outer.outer + floormod(threadIdx.x_1, 7)) &lt; 8)), data_3[(((((cse_var_2 + (floordiv((threadIdx.x_1 + 1862), 63)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 5), 9)*7)) + rx.outer.outer) + floormod(threadIdx [...]
+          attr [IterVar(threadIdx.x_1, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 49;
+          pad_temp.shared_1[(threadIdx.x_1 + 1911)] = @tir.if_then_else(((((1 &lt;= floormod((floordiv(threadIdx.x_1, 7) + 3), 9)) &amp;&amp; (floormod((floordiv(threadIdx.x_1, 7) + 3), 9) &lt; 8)) &amp;&amp; (1 &lt;= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) &amp;&amp; ((rx.outer.outer + floormod(threadIdx.x_1, 7)) &lt; 8)), data_3[(((((cse_var_2 + (floordiv((threadIdx.x_1 + 1911), 63)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 3), 9)*7)) + rx.outer.outer) + floormod(threadIdx [...]
+          attr [IterVar(threadIdx.x_1, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 49;
+          pad_temp.shared_1[(threadIdx.x_1 + 1960)] = @tir.if_then_else(((1 &lt;= (rx.outer.outer + floormod(threadIdx.x_1, 7))) &amp;&amp; ((rx.outer.outer + floormod(threadIdx.x_1, 7)) &lt; 8)), data_3[(((((cse_var_2 + (floordiv((threadIdx.x_1 + 1960), 63)*49)) + ((floordiv(threadIdx.x_1, 7) + 1)*7)) + rx.outer.outer) + floormod(threadIdx.x_1, 7)) - 8)], 0f32, dtype=float32)
+          attr [IterVar(threadIdx.x_1, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 49;
+          if @tir.likely((threadIdx.x_1 &lt; 7), dtype=bool) {
+            pad_temp.shared_1[(threadIdx.x_1 + 2009)] = 0f32
           }
-          if @tir.likely((threadIdx.x_1 &lt; 42), dtype=bool) {
-            pad_temp.shared_1[(((floordiv(((threadIdx.x_1*4) + 225), 49)*49) + (floormod((floordiv(((threadIdx.x_1*4) + 1), 7) + 4), 7)*7)) + floormod(((threadIdx.x_1*4) + 1), 7))] = @tir.if_then_else(((1 &lt;= (ry.outer.outer + floormod((floordiv(((threadIdx.x_1*4) + 1), 7) + 4), 7))) &amp;&amp; ((ry.outer.outer + floormod((floordiv(((threadIdx.x_1*4) + 1), 7) + 4), 7)) &lt; 8)), data_3[((((((rc.outer.outer*392) + (floordiv(((threadIdx.x_1*4) + 225), 49)*49)) + (ry.outer.outer*7)) + (fl [...]
+          attr [IterVar(threadIdx.x_2: int32, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 49;
+          kernel.shared_1: Buffer(kernel.shared, float32, [384], [], scope=&quot;shared&quot;)[threadIdx.x_2] = kernel_3: Buffer(kernel_2, float32, [2359296], [])[((((blockIdx.x*18432) + cse_var_1) + (threadIdx.x_2*3)) + rx.outer.outer)]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 49;
+          kernel.shared_1[(threadIdx.x_2 + 49)] = kernel_3[((((((blockIdx.x*18432) + (floordiv((threadIdx.x_2 + 49), 96)*4608)) + cse_var_1) + (floordiv(floormod((threadIdx.x_2 + 49), 96), 3)*9)) + (floormod((threadIdx.x_2 + 1), 3)*3)) + rx.outer.outer)]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 49;
+          kernel.shared_1[(threadIdx.x_2 + 98)] = kernel_3[(((((blockIdx.x*18432) + (floordiv((threadIdx.x_2 + 98), 96)*4608)) + cse_var_1) + (floormod((threadIdx.x_2 + 2), 96)*3)) + rx.outer.outer)]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 49;
+          kernel.shared_1[(threadIdx.x_2 + 147)] = kernel_3[((((((blockIdx.x*18432) + (floordiv((threadIdx.x_2 + 147), 96)*4608)) + cse_var_1) + (floormod((floordiv(threadIdx.x_2, 3) + 17), 32)*9)) + (floormod(threadIdx.x_2, 3)*3)) + rx.outer.outer)]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 49;
+          kernel.shared_1[(threadIdx.x_2 + 196)] = kernel_3[((((((blockIdx.x*18432) + (floordiv((threadIdx.x_2 + 196), 96)*4608)) + cse_var_1) + (floordiv(floormod((threadIdx.x_2 + 4), 96), 3)*9)) + (floormod((threadIdx.x_2 + 1), 3)*3)) + rx.outer.outer)]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 49;
+          kernel.shared_1[(threadIdx.x_2 + 245)] = kernel_3[((((((blockIdx.x*18432) + (floordiv((threadIdx.x_2 + 245), 96)*4608)) + cse_var_1) + (floordiv(floormod((threadIdx.x_2 + 53), 96), 3)*9)) + (floormod((threadIdx.x_2 + 2), 3)*3)) + rx.outer.outer)]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 49;
+          kernel.shared_1[(threadIdx.x_2 + 294)] = kernel_3[((((((blockIdx.x*18432) + (floordiv((threadIdx.x_2 + 294), 96)*4608)) + cse_var_1) + ((floordiv(threadIdx.x_2, 3) + 2)*9)) + (floormod(threadIdx.x_2, 3)*3)) + rx.outer.outer)]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 49;
+          if @tir.likely((threadIdx.x_2 &lt; 41), dtype=bool) {
+            kernel.shared_1[(threadIdx.x_2 + 343)] = kernel_3[((((((blockIdx.x*18432) + (floordiv((threadIdx.x_2 + 343), 96)*4608)) + cse_var_1) + (floordiv(floormod((threadIdx.x_2 + 55), 96), 3)*9)) + (floormod((threadIdx.x_2 + 1), 3)*3)) + rx.outer.outer)]
           }
-          if @tir.likely((threadIdx.x_1 &lt; 42), dtype=bool) {
-            pad_temp.shared_1[(((floordiv(((threadIdx.x_1*4) + 226), 49)*49) + (floormod((floordiv(((threadIdx.x_1*4) + 2), 7) + 4), 7)*7)) + floormod(((threadIdx.x_1*4) + 2), 7))] = @tir.if_then_else(((1 &lt;= (ry.outer.outer + floormod((floordiv(((threadIdx.x_1*4) + 2), 7) + 4), 7))) &amp;&amp; ((ry.outer.outer + floormod((floordiv(((threadIdx.x_1*4) + 2), 7) + 4), 7)) &lt; 8)), data_3[((((((rc.outer.outer*392) + (floordiv(((threadIdx.x_1*4) + 226), 49)*49)) + (ry.outer.outer*7)) + (fl [...]
+          for (rc.outer.inner: int32, 0, 8) {
+            let cse_var_3: int32 = (rc.outer.inner*12)
+             {
+              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[((rc.outer.inner*252) + threadIdx.x)]*kernel.shared_1[cse_var_3]))
+              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[((rc.outer.inner*252) + threadIdx.x)]*kernel.shared_1[(cse_var_3 + 192)]))
+              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[((rc.outer.inner*252) + threadIdx.x)]*kernel.shared_1[(cse_var_3 + 96)]))
+              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[((rc.outer.inner*252) + threadIdx.x)]*kernel.shared_1[(cse_var_3 + 288)]))
+              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*252) + threadIdx.x) + 63)]*kernel.shared_1[(cse_var_3 + 3)]))
+              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*252) + threadIdx.x) + 63)]*kernel.shared_1[(cse_var_3 + 195)]))
+              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*252) + threadIdx.x) + 63)]*kernel.shared_1[(cse_var_3 + 99)]))
+              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*252) + threadIdx.x) + 63)]*kernel.shared_1[(cse_var_3 + 291)]))
+              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*252) + threadIdx.x) + 126)]*kernel.shared_1[(cse_var_3 + 6)]))
+              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*252) + threadIdx.x) + 126)]*kernel.shared_1[(cse_var_3 + 198)]))
+              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*252) + threadIdx.x) + 126)]*kernel.shared_1[(cse_var_3 + 102)]))
+              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*252) + threadIdx.x) + 126)]*kernel.shared_1[(cse_var_3 + 294)]))
+              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*252) + threadIdx.x) + 189)]*kernel.shared_1[(cse_var_3 + 9)]))
+              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*252) + threadIdx.x) + 189)]*kernel.shared_1[(cse_var_3 + 201)]))
+              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*252) + threadIdx.x) + 189)]*kernel.shared_1[(cse_var_3 + 105)]))
+              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*252) + threadIdx.x) + 189)]*kernel.shared_1[(cse_var_3 + 297)]))
+              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*252) + threadIdx.x) + 7)]*kernel.shared_1[(cse_var_3 + 1)]))
+              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*252) + threadIdx.x) + 7)]*kernel.shared_1[(cse_var_3 + 193)]))
+              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*252) + threadIdx.x) + 7)]*kernel.shared_1[(cse_var_3 + 97)]))
+              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*252) + threadIdx.x) + 7)]*kernel.shared_1[(cse_var_3 + 289)]))
+              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*252) + threadIdx.x) + 70)]*kernel.shared_1[(cse_var_3 + 4)]))
+              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*252) + threadIdx.x) + 70)]*kernel.shared_1[(cse_var_3 + 196)]))
+              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*252) + threadIdx.x) + 70)]*kernel.shared_1[(cse_var_3 + 100)]))
+              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*252) + threadIdx.x) + 70)]*kernel.shared_1[(cse_var_3 + 292)]))
+              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*252) + threadIdx.x) + 133)]*kernel.shared_1[(cse_var_3 + 7)]))
+              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*252) + threadIdx.x) + 133)]*kernel.shared_1[(cse_var_3 + 199)]))
+              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*252) + threadIdx.x) + 133)]*kernel.shared_1[(cse_var_3 + 103)]))
+              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*252) + threadIdx.x) + 133)]*kernel.shared_1[(cse_var_3 + 295)]))
+              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*252) + threadIdx.x) + 196)]*kernel.shared_1[(cse_var_3 + 10)]))
+              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*252) + threadIdx.x) + 196)]*kernel.shared_1[(cse_var_3 + 202)]))
+              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*252) + threadIdx.x) + 196)]*kernel.shared_1[(cse_var_3 + 106)]))
+              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*252) + threadIdx.x) + 196)]*kernel.shared_1[(cse_var_3 + 298)]))
+              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*252) + threadIdx.x) + 14)]*kernel.shared_1[(cse_var_3 + 2)]))
+              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*252) + threadIdx.x) + 14)]*kernel.shared_1[(cse_var_3 + 194)]))
+              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*252) + threadIdx.x) + 14)]*kernel.shared_1[(cse_var_3 + 98)]))
+              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*252) + threadIdx.x) + 14)]*kernel.shared_1[(cse_var_3 + 290)]))
+              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*252) + threadIdx.x) + 77)]*kernel.shared_1[(cse_var_3 + 5)]))
+              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*252) + threadIdx.x) + 77)]*kernel.shared_1[(cse_var_3 + 197)]))
+              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*252) + threadIdx.x) + 77)]*kernel.shared_1[(cse_var_3 + 101)]))
+              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*252) + threadIdx.x) + 77)]*kernel.shared_1[(cse_var_3 + 293)]))
+              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*252) + threadIdx.x) + 140)]*kernel.shared_1[(cse_var_3 + 8)]))
+              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*252) + threadIdx.x) + 140)]*kernel.shared_1[(cse_var_3 + 200)]))
+              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*252) + threadIdx.x) + 140)]*kernel.shared_1[(cse_var_3 + 104)]))
+              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*252) + threadIdx.x) + 140)]*kernel.shared_1[(cse_var_3 + 296)]))
+              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*252) + threadIdx.x) + 203)]*kernel.shared_1[(cse_var_3 + 11)]))
+              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*252) + threadIdx.x) + 203)]*kernel.shared_1[(cse_var_3 + 203)]))
+              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*252) + threadIdx.x) + 203)]*kernel.shared_1[(cse_var_3 + 107)]))
+              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*252) + threadIdx.x) + 203)]*kernel.shared_1[(cse_var_3 + 299)]))
+            }
           }
-          if @tir.likely((threadIdx.x_1 &lt; 42), dtype=bool) {
-            pad_temp.shared_1[(((floordiv(((threadIdx.x_1*4) + 227), 49)*49) + (floormod((floordiv(((threadIdx.x_1*4) + 3), 7) + 4), 7)*7)) + floormod(((threadIdx.x_1*4) + 3), 7))] = @tir.if_then_else(((1 &lt;= (ry.outer.outer + floormod((floordiv(((threadIdx.x_1*4) + 3), 7) + 4), 7))) &amp;&amp; ((ry.outer.outer + floormod((floordiv(((threadIdx.x_1*4) + 3), 7) + 4), 7)) &lt; 8)), data_3[((((((rc.outer.outer*392) + (floordiv(((threadIdx.x_1*4) + 227), 49)*49)) + (ry.outer.outer*7)) + (fl [...]
-          }
-        }
-        attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 56;
-        kernel.shared_1[threadIdx.x_2] = kernel_3[((((((blockIdx.x*36864) + (floordiv(threadIdx.x_2, 8)*4608)) + (rc.outer.outer*72)) + (floormod(threadIdx.x_2, 8)*9)) + (ry.outer.outer*3)) + 1)]
-        attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 56;
-        if @tir.likely((threadIdx.x_2 &lt; 8), dtype=bool) {
-          kernel.shared_1[(threadIdx.x_2 + 56)] = kernel_3[(((((blockIdx.x*36864) + (rc.outer.outer*72)) + (threadIdx.x_2*9)) + (ry.outer.outer*3)) + 32257)]
-        }
-        conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[floormod(threadIdx.x, 7)]*kernel.shared_1[(floordiv(threadIdx.x, 7)*8)]))
-        conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 7)]*kernel.shared_1[(floordiv(threadIdx.x, 7)*8)]))
-        conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 14)]*kernel.shared_1[(floordiv(threadIdx.x, 7)*8)]))
-        conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 21)]*kernel.shared_1[(floordiv(threadIdx.x, 7)*8)]))
-        conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 28)]*kernel.shared_1[(floordiv(threadIdx.x, 7)*8)]))
-        conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 35)]*kernel.shared_1[(floordiv(threadIdx.x, 7)*8)]))
-        conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 42)]*kernel.shared_1[(floordiv(threadIdx.x, 7)*8)]))
-        conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 49)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 1)]))
-        conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 56)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 1)]))
-        conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 63)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 1)]))
-        conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 70)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 1)]))
-        conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 77)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 1)]))
-        conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 84)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 1)]))
-        conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 91)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 1)]))
-        conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 98)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 2)]))
-        conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 105)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 2)]))
-        conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 112)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 2)]))
-        conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 119)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 2)]))
-        conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 126)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 2)]))
-        conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 133)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 2)]))
-        conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 140)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 2)]))
-        conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 147)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 3)]))
-        conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 154)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 3)]))
-        conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 161)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 3)]))
-        conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 168)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 3)]))
-        conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 175)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 3)]))
-        conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 182)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 3)]))
-        conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 189)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 3)]))
-        conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 196)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 4)]))
-        conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 203)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 4)]))
-        conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 210)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 4)]))
-        conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 217)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 4)]))
-        conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 224)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 4)]))
-        conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 231)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 4)]))
-        conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 238)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 4)]))
-        conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 245)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 5)]))
-        conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 252)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 5)]))
-        conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 259)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 5)]))
-        conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 266)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 5)]))
-        conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 273)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 5)]))
-        conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 280)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 5)]))
-        conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 287)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 5)]))
-        conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 294)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 6)]))
-        conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 301)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 6)]))
-        conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 308)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 6)]))
-        conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 315)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 6)]))
-        conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 322)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 6)]))
-        conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 329)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 6)]))
-        conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 336)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 6)]))
-        conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 343)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 7)]))
-        conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 350)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 7)]))
-        conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 357)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 7)]))
-        conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 364)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 7)]))
-        conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 371)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 7)]))
-        conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 378)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 7)]))
-        conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 385)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 7)]))
-        attr [IterVar(threadIdx.x_1, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 56 {
-          pad_temp.shared_1[(threadIdx.x_1*4)] = @tir.if_then_else((((1 &lt;= (floordiv(floormod((threadIdx.x_1*4), 49), 7) + ry.outer.outer)) &amp;&amp; ((floordiv(floormod((threadIdx.x_1*4), 49), 7) + ry.outer.outer) &lt; 8)) &amp;&amp; (floormod((threadIdx.x_1*4), 7) &lt; 6)), data_3[((((rc.outer.outer*392) + (ry.outer.outer*7)) + (threadIdx.x_1*4)) - 6)], 0f32, dtype=float32)
-          pad_temp.shared_1[((threadIdx.x_1*4) + 1)] = @tir.if_then_else((((1 &lt;= (floordiv(floormod(((threadIdx.x_1*4) + 1), 49), 7) + ry.outer.outer)) &amp;&amp; ((floordiv(floormod(((threadIdx.x_1*4) + 1), 49), 7) + ry.outer.outer) &lt; 8)) &amp;&amp; (floormod(((threadIdx.x_1*4) + 1), 7) &lt; 6)), data_3[((((rc.outer.outer*392) + (ry.outer.outer*7)) + (threadIdx.x_1*4)) - 5)], 0f32, dtype=float32)
-          pad_temp.shared_1[((threadIdx.x_1*4) + 2)] = @tir.if_then_else((((1 &lt;= (floordiv(floormod(((threadIdx.x_1*4) + 2), 49), 7) + ry.outer.outer)) &amp;&amp; ((floordiv(floormod(((threadIdx.x_1*4) + 2), 49), 7) + ry.outer.outer) &lt; 8)) &amp;&amp; (floormod(((threadIdx.x_1*4) + 2), 7) &lt; 6)), data_3[((((rc.outer.outer*392) + (ry.outer.outer*7)) + (threadIdx.x_1*4)) - 4)], 0f32, dtype=float32)
-          pad_temp.shared_1[((threadIdx.x_1*4) + 3)] = @tir.if_then_else((((1 &lt;= (floordiv(floormod(((threadIdx.x_1*4) + 3), 49), 7) + ry.outer.outer)) &amp;&amp; ((floordiv(floormod(((threadIdx.x_1*4) + 3), 49), 7) + ry.outer.outer) &lt; 8)) &amp;&amp; (floormod(((threadIdx.x_1*4) + 3), 7) &lt; 6)), data_3[((((rc.outer.outer*392) + (ry.outer.outer*7)) + (threadIdx.x_1*4)) - 3)], 0f32, dtype=float32)
-        }
-        attr [IterVar(threadIdx.x_1, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 56 {
-          if @tir.likely((threadIdx.x_1 &lt; 42), dtype=bool) {
-            pad_temp.shared_1[(((floordiv(((threadIdx.x_1*4) + 224), 49)*49) + (floormod((floordiv((threadIdx.x_1*4), 7) + 4), 7)*7)) + floormod((threadIdx.x_1*4), 7))] = @tir.if_then_else((((1 &lt;= (ry.outer.outer + floormod((floordiv((threadIdx.x_1*4), 7) + 4), 7))) &amp;&amp; ((ry.outer.outer + floormod((floordiv((threadIdx.x_1*4), 7) + 4), 7)) &lt; 8)) &amp;&amp; (floormod((threadIdx.x_1*4), 7) &lt; 6)), data_3[((((((rc.outer.outer*392) + (floordiv(((threadIdx.x_1*4) + 224), 49)*49) [...]
-          }
-          if @tir.likely((threadIdx.x_1 &lt; 42), dtype=bool) {
-            pad_temp.shared_1[(((floordiv(((threadIdx.x_1*4) + 225), 49)*49) + (floormod((floordiv(((threadIdx.x_1*4) + 1), 7) + 4), 7)*7)) + floormod(((threadIdx.x_1*4) + 1), 7))] = @tir.if_then_else((((1 &lt;= (ry.outer.outer + floormod((floordiv(((threadIdx.x_1*4) + 1), 7) + 4), 7))) &amp;&amp; ((ry.outer.outer + floormod((floordiv(((threadIdx.x_1*4) + 1), 7) + 4), 7)) &lt; 8)) &amp;&amp; (floormod(((threadIdx.x_1*4) + 1), 7) &lt; 6)), data_3[((((((rc.outer.outer*392) + (floordiv(((th [...]
-          }
-          if @tir.likely((threadIdx.x_1 &lt; 42), dtype=bool) {
-            pad_temp.shared_1[(((floordiv(((threadIdx.x_1*4) + 226), 49)*49) + (floormod((floordiv(((threadIdx.x_1*4) + 2), 7) + 4), 7)*7)) + floormod(((threadIdx.x_1*4) + 2), 7))] = @tir.if_then_else((((1 &lt;= (ry.outer.outer + floormod((floordiv(((threadIdx.x_1*4) + 2), 7) + 4), 7))) &amp;&amp; ((ry.outer.outer + floormod((floordiv(((threadIdx.x_1*4) + 2), 7) + 4), 7)) &lt; 8)) &amp;&amp; (floormod(((threadIdx.x_1*4) + 2), 7) &lt; 6)), data_3[((((((rc.outer.outer*392) + (floordiv(((th [...]
-          }
-          if @tir.likely((threadIdx.x_1 &lt; 42), dtype=bool) {
-            pad_temp.shared_1[(((floordiv(((threadIdx.x_1*4) + 227), 49)*49) + (floormod((floordiv(((threadIdx.x_1*4) + 3), 7) + 4), 7)*7)) + floormod(((threadIdx.x_1*4) + 3), 7))] = @tir.if_then_else((((1 &lt;= (ry.outer.outer + floormod((floordiv(((threadIdx.x_1*4) + 3), 7) + 4), 7))) &amp;&amp; ((ry.outer.outer + floormod((floordiv(((threadIdx.x_1*4) + 3), 7) + 4), 7)) &lt; 8)) &amp;&amp; (floormod(((threadIdx.x_1*4) + 3), 7) &lt; 6)), data_3[((((((rc.outer.outer*392) + (floordiv(((th [...]
-          }
-        }
-        attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 56;
-        kernel.shared_1[threadIdx.x_2] = kernel_3[((((((blockIdx.x*36864) + (floordiv(threadIdx.x_2, 8)*4608)) + (rc.outer.outer*72)) + (floormod(threadIdx.x_2, 8)*9)) + (ry.outer.outer*3)) + 2)]
-        attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 56;
-        if @tir.likely((threadIdx.x_2 &lt; 8), dtype=bool) {
-          kernel.shared_1[(threadIdx.x_2 + 56)] = kernel_3[(((((blockIdx.x*36864) + (rc.outer.outer*72)) + (threadIdx.x_2*9)) + (ry.outer.outer*3)) + 32258)]
         }
-        conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[floormod(threadIdx.x, 7)]*kernel.shared_1[(floordiv(threadIdx.x, 7)*8)]))
-        conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 7)]*kernel.shared_1[(floordiv(threadIdx.x, 7)*8)]))
-        conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 14)]*kernel.shared_1[(floordiv(threadIdx.x, 7)*8)]))
-        conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 21)]*kernel.shared_1[(floordiv(threadIdx.x, 7)*8)]))
-        conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 28)]*kernel.shared_1[(floordiv(threadIdx.x, 7)*8)]))
-        conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 35)]*kernel.shared_1[(floordiv(threadIdx.x, 7)*8)]))
-        conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 42)]*kernel.shared_1[(floordiv(threadIdx.x, 7)*8)]))
-        conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 49)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 1)]))
-        conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 56)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 1)]))
-        conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 63)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 1)]))
-        conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 70)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 1)]))
-        conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 77)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 1)]))
-        conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 84)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 1)]))
-        conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 91)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 1)]))
-        conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 98)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 2)]))
-        conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 105)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 2)]))
-        conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 112)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 2)]))
-        conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 119)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 2)]))
-        conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 126)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 2)]))
-        conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 133)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 2)]))
-        conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 140)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 2)]))
-        conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 147)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 3)]))
-        conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 154)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 3)]))
-        conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 161)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 3)]))
-        conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 168)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 3)]))
-        conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 175)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 3)]))
-        conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 182)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 3)]))
-        conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 189)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 3)]))
-        conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 196)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 4)]))
-        conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 203)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 4)]))
-        conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 210)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 4)]))
-        conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 217)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 4)]))
-        conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 224)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 4)]))
-        conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 231)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 4)]))
-        conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 238)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 4)]))
-        conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 245)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 5)]))
-        conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 252)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 5)]))
-        conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 259)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 5)]))
-        conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 266)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 5)]))
-        conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 273)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 5)]))
-        conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 280)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 5)]))
-        conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 287)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 5)]))
-        conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 294)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 6)]))
-        conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 301)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 6)]))
-        conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 308)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 6)]))
-        conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 315)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 6)]))
-        conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 322)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 6)]))
-        conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 329)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 6)]))
-        conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 336)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 6)]))
-        conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 343)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 7)]))
-        conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 350)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 7)]))
-        conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 357)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 7)]))
-        conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 364)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 7)]))
-        conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 371)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 7)]))
-        conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 378)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 7)]))
-        conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(floormod(threadIdx.x, 7) + 385)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*8) + 7)]))
       }
     }
-    for (i2.inner: int32, 0, 7) {
-      compute_3: Buffer(compute_2, float32, [25088], [])[((((blockIdx.x*392) + (floordiv(threadIdx.x, 7)*49)) + (i2.inner*7)) + floormod(threadIdx.x, 7))] = max((conv2d_nchw_1[i2.inner] + bias_3: Buffer(bias_2, float32, [512], [])[((blockIdx.x*8) + floordiv(threadIdx.x, 7))]), 0f32)
+    for (i1.inner: int32, 0, 2) {
+      compute_3: Buffer(compute_2, float32, [25088], [])[(((blockIdx.x*196) + (i1.inner*49)) + threadIdx.x)] = max((conv2d_nchw_1[i1.inner] + bias_3: Buffer(bias_2, float32, [512], [])[((blockIdx.x*4) + i1.inner)]), 0f32)
+      compute_3[((((blockIdx.x*196) + (i1.inner*49)) + threadIdx.x) + 98)] = max((conv2d_nchw_1[(i1.inner + 2)] + bias_3[(((blockIdx.x*4) + i1.inner) + 2)]), 0f32)
     }
   }
 }
@@ -804,7 +717,7 @@ cooperative fetching, unrolling and operator fusion.</p>
 <span class="p">)</span>
 </pre></div>
 </div>
-<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Execution time of this operator: 0.552 ms
+<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Execution time of this operator: 0.244 ms
 </pre></div>
 </div>
 </div>
@@ -833,33 +746,33 @@ conv2d_nchw_nn_o_i, conv2d_nchw_nn_i = s[conv2d_nchw].split(conv2d_nchw_nn, fact
 conv2d_nchw_nn_o_o_i, conv2d_nchw_nn_o_i = s[conv2d_nchw].split(conv2d_nchw_nn_o_i, factor=1)
 conv2d_nchw_nn_o_o_o_i, conv2d_nchw_nn_o_o_i = s[conv2d_nchw].split(conv2d_nchw_nn_o_o_i, factor=1)
 conv2d_nchw_nn_o_o_o_o, conv2d_nchw_nn_o_o_o_i = s[conv2d_nchw].split(conv2d_nchw_nn_o_o_o_i, factor=1)
-conv2d_nchw_ff_o_i, conv2d_nchw_ff_i = s[conv2d_nchw].split(conv2d_nchw_ff, factor=1)
+conv2d_nchw_ff_o_i, conv2d_nchw_ff_i = s[conv2d_nchw].split(conv2d_nchw_ff, factor=2)
 conv2d_nchw_ff_o_o_i, conv2d_nchw_ff_o_i = s[conv2d_nchw].split(conv2d_nchw_ff_o_i, factor=1)
-conv2d_nchw_ff_o_o_o_i, conv2d_nchw_ff_o_o_i = s[conv2d_nchw].split(conv2d_nchw_ff_o_o_i, factor=8)
-conv2d_nchw_ff_o_o_o_o, conv2d_nchw_ff_o_o_o_i = s[conv2d_nchw].split(conv2d_nchw_ff_o_o_o_i, factor=1)
-conv2d_nchw_yy_o_i, conv2d_nchw_yy_i = s[conv2d_nchw].split(conv2d_nchw_yy, factor=7)
+conv2d_nchw_ff_o_o_o_i, conv2d_nchw_ff_o_o_i = s[conv2d_nchw].split(conv2d_nchw_ff_o_o_i, factor=1)
+conv2d_nchw_ff_o_o_o_o, conv2d_nchw_ff_o_o_o_i = s[conv2d_nchw].split(conv2d_nchw_ff_o_o_o_i, factor=2)
+conv2d_nchw_yy_o_i, conv2d_nchw_yy_i = s[conv2d_nchw].split(conv2d_nchw_yy, factor=1)
 conv2d_nchw_yy_o_o_i, conv2d_nchw_yy_o_i = s[conv2d_nchw].split(conv2d_nchw_yy_o_i, factor=1)
-conv2d_nchw_yy_o_o_o_i, conv2d_nchw_yy_o_o_i = s[conv2d_nchw].split(conv2d_nchw_yy_o_o_i, factor=1)
+conv2d_nchw_yy_o_o_o_i, conv2d_nchw_yy_o_o_i = s[conv2d_nchw].split(conv2d_nchw_yy_o_o_i, factor=7)
 conv2d_nchw_yy_o_o_o_o, conv2d_nchw_yy_o_o_o_i = s[conv2d_nchw].split(conv2d_nchw_yy_o_o_o_i, factor=1)
 conv2d_nchw_xx_o_i, conv2d_nchw_xx_i = s[conv2d_nchw].split(conv2d_nchw_xx, factor=1)
 conv2d_nchw_xx_o_o_i, conv2d_nchw_xx_o_i = s[conv2d_nchw].split(conv2d_nchw_xx_o_i, factor=1)
 conv2d_nchw_xx_o_o_o_i, conv2d_nchw_xx_o_o_i = s[conv2d_nchw].split(conv2d_nchw_xx_o_o_i, factor=7)
 conv2d_nchw_xx_o_o_o_o, conv2d_nchw_xx_o_o_o_i = s[conv2d_nchw].split(conv2d_nchw_xx_o_o_o_i, factor=1)
 conv2d_nchw_rc_o_i, conv2d_nchw_rc_i = s[conv2d_nchw].split(conv2d_nchw_rc, factor=4)
-conv2d_nchw_rc_o_o, conv2d_nchw_rc_o_i = s[conv2d_nchw].split(conv2d_nchw_rc_o_i, factor=2)
+conv2d_nchw_rc_o_o, conv2d_nchw_rc_o_i = s[conv2d_nchw].split(conv2d_nchw_rc_o_i, factor=8)
 conv2d_nchw_ry_o_i, conv2d_nchw_ry_i = s[conv2d_nchw].split(conv2d_nchw_ry, factor=1)
-conv2d_nchw_ry_o_o, conv2d_nchw_ry_o_i = s[conv2d_nchw].split(conv2d_nchw_ry_o_i, factor=1)
+conv2d_nchw_ry_o_o, conv2d_nchw_ry_o_i = s[conv2d_nchw].split(conv2d_nchw_ry_o_i, factor=3)
 conv2d_nchw_rx_o_i, conv2d_nchw_rx_i = s[conv2d_nchw].split(conv2d_nchw_rx, factor=1)
 conv2d_nchw_rx_o_o, conv2d_nchw_rx_o_i = s[conv2d_nchw].split(conv2d_nchw_rx_o_i, factor=1)
 s[conv2d_nchw].reorder(conv2d_nchw_nn_o_o_o_o, conv2d_nchw_ff_o_o_o_o, conv2d_nchw_yy_o_o_o_o, conv2d_nchw_xx_o_o_o_o, conv2d_nchw_nn_o_o_o_i, conv2d_nchw_ff_o_o_o_i, conv2d_nchw_yy_o_o_o_i, conv2d_nchw_xx_o_o_o_i, conv2d_nchw_nn_o_o_i, conv2d_nchw_ff_o_o_i, conv2d_nchw_yy_o_o_i, conv2d_nchw_xx_o_o_i, conv2d_nchw_rc_o_o, conv2d_nchw_ry_o_o, conv2d_nchw_rx_o_o, conv2d_nchw_rc_o_i, conv2d_nchw_ry_o_i, conv2d_nchw_rx_o_i, conv2d_nchw_nn_o_i, conv2d_nchw_ff_o_i, conv2d_nchw_yy_o_i, conv2d_nc [...]
 compute_i0_o_i, compute_i0_i = s[compute].split(compute_i0, factor=1)
 compute_i0_o_o_i, compute_i0_o_i = s[compute].split(compute_i0_o_i, factor=1)
 compute_i0_o_o_o, compute_i0_o_o_i = s[compute].split(compute_i0_o_o_i, factor=1)
-compute_i1_o_i, compute_i1_i = s[compute].split(compute_i1, factor=1)
-compute_i1_o_o_i, compute_i1_o_i = s[compute].split(compute_i1_o_i, factor=8)
-compute_i1_o_o_o, compute_i1_o_o_i = s[compute].split(compute_i1_o_o_i, factor=1)
-compute_i2_o_i, compute_i2_i = s[compute].split(compute_i2, factor=7)
-compute_i2_o_o_i, compute_i2_o_i = s[compute].split(compute_i2_o_i, factor=1)
+compute_i1_o_i, compute_i1_i = s[compute].split(compute_i1, factor=2)
+compute_i1_o_o_i, compute_i1_o_i = s[compute].split(compute_i1_o_i, factor=1)
+compute_i1_o_o_o, compute_i1_o_o_i = s[compute].split(compute_i1_o_o_i, factor=2)
+compute_i2_o_i, compute_i2_i = s[compute].split(compute_i2, factor=1)
+compute_i2_o_o_i, compute_i2_o_i = s[compute].split(compute_i2_o_i, factor=7)
 compute_i2_o_o_o, compute_i2_o_o_i = s[compute].split(compute_i2_o_o_i, factor=1)
 compute_i3_o_i, compute_i3_i = s[compute].split(compute_i3, factor=1)
 compute_i3_o_o_i, compute_i3_o_i = s[compute].split(compute_i3_o_i, factor=7)
@@ -882,14 +795,14 @@ s[compute].bind(compute_i0_o_i_i1_o_i_fused_i2_o_i_fused_i3_o_i_fused, te.thread
 kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused = s[kernel_shared].fuse(kernel_shared_ax0, kernel_shared_ax1, kernel_shared_ax2, kernel_shared_ax3)
 kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o, kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_i = s[kernel_shared].split(kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused, factor=1)
 s[kernel_shared].vectorize(kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_i)
-kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_o, kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_i = s[kernel_shared].split(kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o, factor=56)
+kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_o, kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_i = s[kernel_shared].split(kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o, factor=49)
 s[kernel_shared].bind(kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_i, te.thread_axis(&quot;threadIdx.x&quot;))
 pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused = s[pad_temp_shared].fuse(pad_temp_shared_ax0, pad_temp_shared_ax1, pad_temp_shared_ax2, pad_temp_shared_ax3)
-pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o, pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_i = s[pad_temp_shared].split(pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused, factor=4)
+pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o, pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_i = s[pad_temp_shared].split(pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused, factor=1)
 s[pad_temp_shared].vectorize(pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_i)
-pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_o, pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_i = s[pad_temp_shared].split(pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o, factor=56)
+pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_o, pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_i = s[pad_temp_shared].split(pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o, factor=49)
 s[pad_temp_shared].bind(pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_i, te.thread_axis(&quot;threadIdx.x&quot;))
-s[conv2d_nchw].pragma(conv2d_nchw_nn_o_o_o_o, &quot;auto_unroll_max_step&quot;, 512)
+s[conv2d_nchw].pragma(conv2d_nchw_nn_o_o_o_o, &quot;auto_unroll_max_step&quot;, 64)
 s[conv2d_nchw].pragma(conv2d_nchw_nn_o_o_o_o, &quot;unroll_explicit&quot;, True)
 
 CUDA source code:
@@ -907,257 +820,127 @@ CUDA source code:
   #define int64_t long long
   #define uint64_t unsigned long long
 #endif
-extern &quot;C&quot; __global__ void __launch_bounds__(56) default_function_kernel0(float* __restrict__ data, float* __restrict__ kernel, float* __restrict__ compute, float* __restrict__ bias) {
-  float conv2d_nchw[7];
-  __shared__ float pad_temp_shared[392];
-  __shared__ float kernel_shared[64];
+extern &quot;C&quot; __global__ void __launch_bounds__(49) default_function_kernel0(float* __restrict__ data, float* __restrict__ kernel, float* __restrict__ compute, float* __restrict__ bias) {
+  float conv2d_nchw[4];
+  __shared__ float pad_temp_shared[2016];
+  __shared__ float kernel_shared[384];
   conv2d_nchw[0] = 0.000000e+00f;
-  conv2d_nchw[1] = 0.000000e+00f;
   conv2d_nchw[2] = 0.000000e+00f;
+  conv2d_nchw[1] = 0.000000e+00f;
   conv2d_nchw[3] = 0.000000e+00f;
-  conv2d_nchw[4] = 0.000000e+00f;
-  conv2d_nchw[5] = 0.000000e+00f;
-  conv2d_nchw[6] = 0.000000e+00f;
-  for (int rc_outer_outer = 0; rc_outer_outer &lt; 64; ++rc_outer_outer) {
-    for (int ry_outer_outer = 0; ry_outer_outer &lt; 3; ++ry_outer_outer) {
-      __syncthreads();
-      pad_temp_shared[(((int)threadIdx.x) * 4)] = ((((1 &lt;= ((((((int)threadIdx.x) * 4) % 49) / 7) + ry_outer_outer)) &amp;&amp; (((((((int)threadIdx.x) * 4) % 49) / 7) + ry_outer_outer) &lt; 8)) &amp;&amp; (1 &lt;= ((((int)threadIdx.x) * 4) % 7))) ? data[((((rc_outer_outer * 392) + (ry_outer_outer * 7)) + (((int)threadIdx.x) * 4)) - 8)] : 0.000000e+00f);
-      pad_temp_shared[((((int)threadIdx.x) * 4) + 1)] = ((((1 &lt;= (((((((int)threadIdx.x) * 4) + 1) % 49) / 7) + ry_outer_outer)) &amp;&amp; ((((((((int)threadIdx.x) * 4) + 1) % 49) / 7) + ry_outer_outer) &lt; 8)) &amp;&amp; (1 &lt;= (((((int)threadIdx.x) * 4) + 1) % 7))) ? data[((((rc_outer_outer * 392) + (ry_outer_outer * 7)) + (((int)threadIdx.x) * 4)) - 7)] : 0.000000e+00f);
-      pad_temp_shared[((((int)threadIdx.x) * 4) + 2)] = ((((1 &lt;= (((((((int)threadIdx.x) * 4) + 2) % 49) / 7) + ry_outer_outer)) &amp;&amp; ((((((((int)threadIdx.x) * 4) + 2) % 49) / 7) + ry_outer_outer) &lt; 8)) &amp;&amp; (1 &lt;= (((((int)threadIdx.x) * 4) + 2) % 7))) ? data[((((rc_outer_outer * 392) + (ry_outer_outer * 7)) + (((int)threadIdx.x) * 4)) - 6)] : 0.000000e+00f);
-      pad_temp_shared[((((int)threadIdx.x) * 4) + 3)] = ((((1 &lt;= (((((((int)threadIdx.x) * 4) + 3) % 49) / 7) + ry_outer_outer)) &amp;&amp; ((((((((int)threadIdx.x) * 4) + 3) % 49) / 7) + ry_outer_outer) &lt; 8)) &amp;&amp; (1 &lt;= (((((int)threadIdx.x) * 4) + 3) % 7))) ? data[((((rc_outer_outer * 392) + (ry_outer_outer * 7)) + (((int)threadIdx.x) * 4)) - 5)] : 0.000000e+00f);
-      if (((int)threadIdx.x) &lt; 42) {
-        pad_temp_shared[((((((((int)threadIdx.x) * 4) + 224) / 49) * 49) + (((((((int)threadIdx.x) * 4) / 7) + 4) % 7) * 7)) + ((((int)threadIdx.x) * 4) % 7))] = ((((1 &lt;= (ry_outer_outer + ((((((int)threadIdx.x) * 4) / 7) + 4) % 7))) &amp;&amp; ((ry_outer_outer + ((((((int)threadIdx.x) * 4) / 7) + 4) % 7)) &lt; 8)) &amp;&amp; (1 &lt;= ((((int)threadIdx.x) * 4) % 7))) ? data[((((((rc_outer_outer * 392) + ((((((int)threadIdx.x) * 4) + 224) / 49) * 49)) + (ry_outer_outer * 7)) + (((((((i [...]
-      }
-      if (((int)threadIdx.x) &lt; 42) {
-        pad_temp_shared[((((((((int)threadIdx.x) * 4) + 225) / 49) * 49) + ((((((((int)threadIdx.x) * 4) + 1) / 7) + 4) % 7) * 7)) + (((((int)threadIdx.x) * 4) + 1) % 7))] = ((((1 &lt;= (ry_outer_outer + (((((((int)threadIdx.x) * 4) + 1) / 7) + 4) % 7))) &amp;&amp; ((ry_outer_outer + (((((((int)threadIdx.x) * 4) + 1) / 7) + 4) % 7)) &lt; 8)) &amp;&amp; (1 &lt;= (((((int)threadIdx.x) * 4) + 1) % 7))) ? data[((((((rc_outer_outer * 392) + ((((((int)threadIdx.x) * 4) + 225) / 49) * 49)) + (r [...]
-      }
-      if (((int)threadIdx.x) &lt; 42) {
-        pad_temp_shared[((((((((int)threadIdx.x) * 4) + 226) / 49) * 49) + ((((((((int)threadIdx.x) * 4) + 2) / 7) + 4) % 7) * 7)) + (((((int)threadIdx.x) * 4) + 2) % 7))] = ((((1 &lt;= (ry_outer_outer + (((((((int)threadIdx.x) * 4) + 2) / 7) + 4) % 7))) &amp;&amp; ((ry_outer_outer + (((((((int)threadIdx.x) * 4) + 2) / 7) + 4) % 7)) &lt; 8)) &amp;&amp; (1 &lt;= (((((int)threadIdx.x) * 4) + 2) % 7))) ? data[((((((rc_outer_outer * 392) + ((((((int)threadIdx.x) * 4) + 226) / 49) * 49)) + (r [...]
-      }
-      if (((int)threadIdx.x) &lt; 42) {
-        pad_temp_shared[((((((((int)threadIdx.x) * 4) + 227) / 49) * 49) + ((((((((int)threadIdx.x) * 4) + 3) / 7) + 4) % 7) * 7)) + (((((int)threadIdx.x) * 4) + 3) % 7))] = ((((1 &lt;= (ry_outer_outer + (((((((int)threadIdx.x) * 4) + 3) / 7) + 4) % 7))) &amp;&amp; ((ry_outer_outer + (((((((int)threadIdx.x) * 4) + 3) / 7) + 4) % 7)) &lt; 8)) &amp;&amp; (1 &lt;= (((((int)threadIdx.x) * 4) + 3) % 7))) ? data[((((((rc_outer_outer * 392) + ((((((int)threadIdx.x) * 4) + 227) / 49) * 49)) + (r [...]
-      }
-      kernel_shared[((int)threadIdx.x)] = kernel[(((((((int)blockIdx.x) * 36864) + ((((int)threadIdx.x) &gt;&gt; 3) * 4608)) + (rc_outer_outer * 72)) + ((((int)threadIdx.x) &amp; 7) * 9)) + (ry_outer_outer * 3))];
-      if (((int)threadIdx.x) &lt; 8) {
-        kernel_shared[(((int)threadIdx.x) + 56)] = kernel[(((((((int)blockIdx.x) * 36864) + (rc_outer_outer * 72)) + (((int)threadIdx.x) * 9)) + (ry_outer_outer * 3)) + 32256)];
-      }
-      __syncthreads();
-      conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((int)threadIdx.x) % 7)] * kernel_shared[((((int)threadIdx.x) / 7) * 8)]));
-      conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 7)] * kernel_shared[((((int)threadIdx.x) / 7) * 8)]));
-      conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 14)] * kernel_shared[((((int)threadIdx.x) / 7) * 8)]));
-      conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 21)] * kernel_shared[((((int)threadIdx.x) / 7) * 8)]));
-      conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 28)] * kernel_shared[((((int)threadIdx.x) / 7) * 8)]));
-      conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 35)] * kernel_shared[((((int)threadIdx.x) / 7) * 8)]));
-      conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 42)] * kernel_shared[((((int)threadIdx.x) / 7) * 8)]));
-      conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 49)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 1)]));
-      conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 56)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 1)]));
-      conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 63)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 1)]));
-      conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 70)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 1)]));
-      conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 77)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 1)]));
-      conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 84)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 1)]));
-      conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 91)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 1)]));
-      conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 98)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 2)]));
-      conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 105)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 2)]));
-      conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 112)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 2)]));
-      conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 119)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 2)]));
-      conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 126)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 2)]));
-      conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 133)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 2)]));
-      conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 140)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 2)]));
-      conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 147)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 3)]));
-      conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 154)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 3)]));
-      conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 161)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 3)]));
-      conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 168)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 3)]));
-      conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 175)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 3)]));
-      conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 182)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 3)]));
-      conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 189)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 3)]));
-      conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 196)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 4)]));
-      conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 203)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 4)]));
-      conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 210)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 4)]));
-      conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 217)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 4)]));
-      conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 224)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 4)]));
-      conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 231)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 4)]));
-      conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 238)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 4)]));
-      conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 245)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 5)]));
-      conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 252)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 5)]));
-      conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 259)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 5)]));
-      conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 266)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 5)]));
-      conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 273)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 5)]));
-      conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 280)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 5)]));
-      conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 287)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 5)]));
-      conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 294)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 6)]));
-      conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 301)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 6)]));
-      conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 308)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 6)]));
-      conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 315)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 6)]));
-      conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 322)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 6)]));
-      conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 329)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 6)]));
-      conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 336)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 6)]));
-      conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 343)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 7)]));
-      conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 350)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 7)]));
-      conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 357)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 7)]));
-      conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 364)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 7)]));
-      conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 371)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 7)]));
-      conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 378)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 7)]));
-      conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 385)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 7)]));
+  for (int rc_outer_outer = 0; rc_outer_outer &lt; 16; ++rc_outer_outer) {
+    for (int rx_outer_outer = 0; rx_outer_outer &lt; 3; ++rx_outer_outer) {
       __syncthreads();
-      pad_temp_shared[(((int)threadIdx.x) * 4)] = (((1 &lt;= ((((((int)threadIdx.x) * 4) % 49) / 7) + ry_outer_outer)) &amp;&amp; (((((((int)threadIdx.x) * 4) % 49) / 7) + ry_outer_outer) &lt; 8)) ? data[((((rc_outer_outer * 392) + (ry_outer_outer * 7)) + (((int)threadIdx.x) * 4)) - 7)] : 0.000000e+00f);
-      pad_temp_shared[((((int)threadIdx.x) * 4) + 1)] = (((1 &lt;= (((((((int)threadIdx.x) * 4) + 1) % 49) / 7) + ry_outer_outer)) &amp;&amp; ((((((((int)threadIdx.x) * 4) + 1) % 49) / 7) + ry_outer_outer) &lt; 8)) ? data[((((rc_outer_outer * 392) + (ry_outer_outer * 7)) + (((int)threadIdx.x) * 4)) - 6)] : 0.000000e+00f);
-      pad_temp_shared[((((int)threadIdx.x) * 4) + 2)] = (((1 &lt;= (((((((int)threadIdx.x) * 4) + 2) % 49) / 7) + ry_outer_outer)) &amp;&amp; ((((((((int)threadIdx.x) * 4) + 2) % 49) / 7) + ry_outer_outer) &lt; 8)) ? data[((((rc_outer_outer * 392) + (ry_outer_outer * 7)) + (((int)threadIdx.x) * 4)) - 5)] : 0.000000e+00f);
-      pad_temp_shared[((((int)threadIdx.x) * 4) + 3)] = (((1 &lt;= (((((((int)threadIdx.x) * 4) + 3) % 49) / 7) + ry_outer_outer)) &amp;&amp; ((((((((int)threadIdx.x) * 4) + 3) % 49) / 7) + ry_outer_outer) &lt; 8)) ? data[((((rc_outer_outer * 392) + (ry_outer_outer * 7)) + (((int)threadIdx.x) * 4)) - 4)] : 0.000000e+00f);
-      if (((int)threadIdx.x) &lt; 42) {
-        pad_temp_shared[((((((((int)threadIdx.x) * 4) + 224) / 49) * 49) + (((((((int)threadIdx.x) * 4) / 7) + 4) % 7) * 7)) + ((((int)threadIdx.x) * 4) % 7))] = (((1 &lt;= (ry_outer_outer + ((((((int)threadIdx.x) * 4) / 7) + 4) % 7))) &amp;&amp; ((ry_outer_outer + ((((((int)threadIdx.x) * 4) / 7) + 4) % 7)) &lt; 8)) ? data[((((((rc_outer_outer * 392) + ((((((int)threadIdx.x) * 4) + 224) / 49) * 49)) + (ry_outer_outer * 7)) + (((((((int)threadIdx.x) * 4) / 7) + 4) % 7) * 7)) + ((((int)th [...]
+      pad_temp_shared[((int)threadIdx.x)] = ((((7 &lt;= ((int)threadIdx.x)) &amp;&amp; (1 &lt;= (rx_outer_outer + (((int)threadIdx.x) % 7)))) &amp;&amp; ((rx_outer_outer + (((int)threadIdx.x) % 7)) &lt; 8)) ? data[((((rc_outer_outer * 1568) + ((int)threadIdx.x)) + rx_outer_outer) - 8)] : 0.000000e+00f);
+      pad_temp_shared[(((int)threadIdx.x) + 49)] = (((((1 &lt;= (((((int)threadIdx.x) / 7) + 7) % 9)) &amp;&amp; ((((((int)threadIdx.x) / 7) + 7) % 9) &lt; 8)) &amp;&amp; (1 &lt;= (rx_outer_outer + (((int)threadIdx.x) % 7)))) &amp;&amp; ((rx_outer_outer + (((int)threadIdx.x) % 7)) &lt; 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 49) / 63) * 49)) + ((((((int)threadIdx.x) / 7) + 7) % 9) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
+      pad_temp_shared[(((int)threadIdx.x) + 98)] = (((((1 &lt;= (((((int)threadIdx.x) / 7) + 5) % 9)) &amp;&amp; ((((((int)threadIdx.x) / 7) + 5) % 9) &lt; 8)) &amp;&amp; (1 &lt;= (rx_outer_outer + (((int)threadIdx.x) % 7)))) &amp;&amp; ((rx_outer_outer + (((int)threadIdx.x) % 7)) &lt; 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 98) / 63) * 49)) + ((((((int)threadIdx.x) / 7) + 5) % 9) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
+      pad_temp_shared[(((int)threadIdx.x) + 147)] = (((((1 &lt;= (((((int)threadIdx.x) / 7) + 3) % 9)) &amp;&amp; ((((((int)threadIdx.x) / 7) + 3) % 9) &lt; 8)) &amp;&amp; (1 &lt;= (rx_outer_outer + (((int)threadIdx.x) % 7)))) &amp;&amp; ((rx_outer_outer + (((int)threadIdx.x) % 7)) &lt; 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 147) / 63) * 49)) + ((((((int)threadIdx.x) / 7) + 3) % 9) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
+      pad_temp_shared[(((int)threadIdx.x) + 196)] = (((1 &lt;= (rx_outer_outer + (((int)threadIdx.x) % 7))) &amp;&amp; ((rx_outer_outer + (((int)threadIdx.x) % 7)) &lt; 8)) ? data[(((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 196) / 63) * 49)) + ((int)threadIdx.x)) + rx_outer_outer) - 1)] : 0.000000e+00f);
+      pad_temp_shared[(((int)threadIdx.x) + 245)] = (((((1 &lt;= (((((int)threadIdx.x) / 7) + 8) % 9)) &amp;&amp; ((((((int)threadIdx.x) / 7) + 8) % 9) &lt; 8)) &amp;&amp; (1 &lt;= (rx_outer_outer + (((int)threadIdx.x) % 7)))) &amp;&amp; ((rx_outer_outer + (((int)threadIdx.x) % 7)) &lt; 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 245) / 63) * 49)) + ((((((int)threadIdx.x) / 7) + 8) % 9) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
+      pad_temp_shared[(((int)threadIdx.x) + 294)] = (((((1 &lt;= (((((int)threadIdx.x) / 7) + 6) % 9)) &amp;&amp; ((((((int)threadIdx.x) / 7) + 6) % 9) &lt; 8)) &amp;&amp; (1 &lt;= (rx_outer_outer + (((int)threadIdx.x) % 7)))) &amp;&amp; ((rx_outer_outer + (((int)threadIdx.x) % 7)) &lt; 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 294) / 63) * 49)) + ((((((int)threadIdx.x) / 7) + 6) % 9) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
+      pad_temp_shared[(((int)threadIdx.x) + 343)] = (((((1 &lt;= (((((int)threadIdx.x) / 7) + 4) % 9)) &amp;&amp; ((((((int)threadIdx.x) / 7) + 4) % 9) &lt; 8)) &amp;&amp; (1 &lt;= (rx_outer_outer + (((int)threadIdx.x) % 7)))) &amp;&amp; ((rx_outer_outer + (((int)threadIdx.x) % 7)) &lt; 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 343) / 63) * 49)) + ((((((int)threadIdx.x) / 7) + 4) % 9) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
+      pad_temp_shared[(((int)threadIdx.x) + 392)] = ((((((int)threadIdx.x) &lt; 42) &amp;&amp; (1 &lt;= (rx_outer_outer + (((int)threadIdx.x) % 7)))) &amp;&amp; ((rx_outer_outer + (((int)threadIdx.x) % 7)) &lt; 8)) ? data[(((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 392) / 63) * 49)) + ((int)threadIdx.x)) + rx_outer_outer) + 6)] : 0.000000e+00f);
+      pad_temp_shared[(((int)threadIdx.x) + 441)] = ((((7 &lt;= ((int)threadIdx.x)) &amp;&amp; (1 &lt;= (rx_outer_outer + (((int)threadIdx.x) % 7)))) &amp;&amp; ((rx_outer_outer + (((int)threadIdx.x) % 7)) &lt; 8)) ? data[((((rc_outer_outer * 1568) + ((int)threadIdx.x)) + rx_outer_outer) + 335)] : 0.000000e+00f);
+      pad_temp_shared[(((int)threadIdx.x) + 490)] = (((((1 &lt;= (((((int)threadIdx.x) / 7) + 7) % 9)) &amp;&amp; ((((((int)threadIdx.x) / 7) + 7) % 9) &lt; 8)) &amp;&amp; (1 &lt;= (rx_outer_outer + (((int)threadIdx.x) % 7)))) &amp;&amp; ((rx_outer_outer + (((int)threadIdx.x) % 7)) &lt; 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 490) / 63) * 49)) + ((((((int)threadIdx.x) / 7) + 7) % 9) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
+      pad_temp_shared[(((int)threadIdx.x) + 539)] = (((((1 &lt;= (((((int)threadIdx.x) / 7) + 5) % 9)) &amp;&amp; ((((((int)threadIdx.x) / 7) + 5) % 9) &lt; 8)) &amp;&amp; (1 &lt;= (rx_outer_outer + (((int)threadIdx.x) % 7)))) &amp;&amp; ((rx_outer_outer + (((int)threadIdx.x) % 7)) &lt; 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 539) / 63) * 49)) + ((((((int)threadIdx.x) / 7) + 5) % 9) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
+      pad_temp_shared[(((int)threadIdx.x) + 588)] = (((((1 &lt;= (((((int)threadIdx.x) / 7) + 3) % 9)) &amp;&amp; ((((((int)threadIdx.x) / 7) + 3) % 9) &lt; 8)) &amp;&amp; (1 &lt;= (rx_outer_outer + (((int)threadIdx.x) % 7)))) &amp;&amp; ((rx_outer_outer + (((int)threadIdx.x) % 7)) &lt; 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 588) / 63) * 49)) + ((((((int)threadIdx.x) / 7) + 3) % 9) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
+      pad_temp_shared[(((int)threadIdx.x) + 637)] = (((1 &lt;= (rx_outer_outer + (((int)threadIdx.x) % 7))) &amp;&amp; ((rx_outer_outer + (((int)threadIdx.x) % 7)) &lt; 8)) ? data[(((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 637) / 63) * 49)) + ((int)threadIdx.x)) + rx_outer_outer) - 1)] : 0.000000e+00f);
+      pad_temp_shared[(((int)threadIdx.x) + 686)] = (((((1 &lt;= (((((int)threadIdx.x) / 7) + 8) % 9)) &amp;&amp; ((((((int)threadIdx.x) / 7) + 8) % 9) &lt; 8)) &amp;&amp; (1 &lt;= (rx_outer_outer + (((int)threadIdx.x) % 7)))) &amp;&amp; ((rx_outer_outer + (((int)threadIdx.x) % 7)) &lt; 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 686) / 63) * 49)) + ((((((int)threadIdx.x) / 7) + 8) % 9) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
+      pad_temp_shared[(((int)threadIdx.x) + 735)] = (((((1 &lt;= (((((int)threadIdx.x) / 7) + 6) % 9)) &amp;&amp; ((((((int)threadIdx.x) / 7) + 6) % 9) &lt; 8)) &amp;&amp; (1 &lt;= (rx_outer_outer + (((int)threadIdx.x) % 7)))) &amp;&amp; ((rx_outer_outer + (((int)threadIdx.x) % 7)) &lt; 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 735) / 63) * 49)) + ((((((int)threadIdx.x) / 7) + 6) % 9) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
+      pad_temp_shared[(((int)threadIdx.x) + 784)] = (((((1 &lt;= (((((int)threadIdx.x) / 7) + 4) % 9)) &amp;&amp; ((((((int)threadIdx.x) / 7) + 4) % 9) &lt; 8)) &amp;&amp; (1 &lt;= (rx_outer_outer + (((int)threadIdx.x) % 7)))) &amp;&amp; ((rx_outer_outer + (((int)threadIdx.x) % 7)) &lt; 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 784) / 63) * 49)) + ((((((int)threadIdx.x) / 7) + 4) % 9) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
+      pad_temp_shared[(((int)threadIdx.x) + 833)] = ((((((int)threadIdx.x) &lt; 42) &amp;&amp; (1 &lt;= (rx_outer_outer + (((int)threadIdx.x) % 7)))) &amp;&amp; ((rx_outer_outer + (((int)threadIdx.x) % 7)) &lt; 8)) ? data[(((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 833) / 63) * 49)) + ((int)threadIdx.x)) + rx_outer_outer) + 6)] : 0.000000e+00f);
+      pad_temp_shared[(((int)threadIdx.x) + 882)] = ((((7 &lt;= ((int)threadIdx.x)) &amp;&amp; (1 &lt;= (rx_outer_outer + (((int)threadIdx.x) % 7)))) &amp;&amp; ((rx_outer_outer + (((int)threadIdx.x) % 7)) &lt; 8)) ? data[((((rc_outer_outer * 1568) + ((int)threadIdx.x)) + rx_outer_outer) + 678)] : 0.000000e+00f);
+      pad_temp_shared[(((int)threadIdx.x) + 931)] = (((((1 &lt;= (((((int)threadIdx.x) / 7) + 7) % 9)) &amp;&amp; ((((((int)threadIdx.x) / 7) + 7) % 9) &lt; 8)) &amp;&amp; (1 &lt;= (rx_outer_outer + (((int)threadIdx.x) % 7)))) &amp;&amp; ((rx_outer_outer + (((int)threadIdx.x) % 7)) &lt; 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 931) / 63) * 49)) + ((((((int)threadIdx.x) / 7) + 7) % 9) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
+      pad_temp_shared[(((int)threadIdx.x) + 980)] = (((((1 &lt;= (((((int)threadIdx.x) / 7) + 5) % 9)) &amp;&amp; ((((((int)threadIdx.x) / 7) + 5) % 9) &lt; 8)) &amp;&amp; (1 &lt;= (rx_outer_outer + (((int)threadIdx.x) % 7)))) &amp;&amp; ((rx_outer_outer + (((int)threadIdx.x) % 7)) &lt; 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 980) / 63) * 49)) + ((((((int)threadIdx.x) / 7) + 5) % 9) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
+      pad_temp_shared[(((int)threadIdx.x) + 1029)] = (((((1 &lt;= (((((int)threadIdx.x) / 7) + 3) % 9)) &amp;&amp; ((((((int)threadIdx.x) / 7) + 3) % 9) &lt; 8)) &amp;&amp; (1 &lt;= (rx_outer_outer + (((int)threadIdx.x) % 7)))) &amp;&amp; ((rx_outer_outer + (((int)threadIdx.x) % 7)) &lt; 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 1029) / 63) * 49)) + ((((((int)threadIdx.x) / 7) + 3) % 9) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
+      pad_temp_shared[(((int)threadIdx.x) + 1078)] = (((1 &lt;= (rx_outer_outer + (((int)threadIdx.x) % 7))) &amp;&amp; ((rx_outer_outer + (((int)threadIdx.x) % 7)) &lt; 8)) ? data[(((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 1078) / 63) * 49)) + ((int)threadIdx.x)) + rx_outer_outer) - 1)] : 0.000000e+00f);
+      pad_temp_shared[(((int)threadIdx.x) + 1127)] = (((((1 &lt;= (((((int)threadIdx.x) / 7) + 8) % 9)) &amp;&amp; ((((((int)threadIdx.x) / 7) + 8) % 9) &lt; 8)) &amp;&amp; (1 &lt;= (rx_outer_outer + (((int)threadIdx.x) % 7)))) &amp;&amp; ((rx_outer_outer + (((int)threadIdx.x) % 7)) &lt; 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 1127) / 63) * 49)) + ((((((int)threadIdx.x) / 7) + 8) % 9) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
+      pad_temp_shared[(((int)threadIdx.x) + 1176)] = (((((1 &lt;= (((((int)threadIdx.x) / 7) + 6) % 9)) &amp;&amp; ((((((int)threadIdx.x) / 7) + 6) % 9) &lt; 8)) &amp;&amp; (1 &lt;= (rx_outer_outer + (((int)threadIdx.x) % 7)))) &amp;&amp; ((rx_outer_outer + (((int)threadIdx.x) % 7)) &lt; 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 1176) / 63) * 49)) + ((((((int)threadIdx.x) / 7) + 6) % 9) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
+      pad_temp_shared[(((int)threadIdx.x) + 1225)] = (((((1 &lt;= (((((int)threadIdx.x) / 7) + 4) % 9)) &amp;&amp; ((((((int)threadIdx.x) / 7) + 4) % 9) &lt; 8)) &amp;&amp; (1 &lt;= (rx_outer_outer + (((int)threadIdx.x) % 7)))) &amp;&amp; ((rx_outer_outer + (((int)threadIdx.x) % 7)) &lt; 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 1225) / 63) * 49)) + ((((((int)threadIdx.x) / 7) + 4) % 9) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
+      pad_temp_shared[(((int)threadIdx.x) + 1274)] = ((((((int)threadIdx.x) &lt; 42) &amp;&amp; (1 &lt;= (rx_outer_outer + (((int)threadIdx.x) % 7)))) &amp;&amp; ((rx_outer_outer + (((int)threadIdx.x) % 7)) &lt; 8)) ? data[(((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 1274) / 63) * 49)) + ((int)threadIdx.x)) + rx_outer_outer) + 6)] : 0.000000e+00f);
+      pad_temp_shared[(((int)threadIdx.x) + 1323)] = ((((7 &lt;= ((int)threadIdx.x)) &amp;&amp; (1 &lt;= (rx_outer_outer + (((int)threadIdx.x) % 7)))) &amp;&amp; ((rx_outer_outer + (((int)threadIdx.x) % 7)) &lt; 8)) ? data[((((rc_outer_outer * 1568) + ((int)threadIdx.x)) + rx_outer_outer) + 1021)] : 0.000000e+00f);
+      pad_temp_shared[(((int)threadIdx.x) + 1372)] = (((((1 &lt;= (((((int)threadIdx.x) / 7) + 7) % 9)) &amp;&amp; ((((((int)threadIdx.x) / 7) + 7) % 9) &lt; 8)) &amp;&amp; (1 &lt;= (rx_outer_outer + (((int)threadIdx.x) % 7)))) &amp;&amp; ((rx_outer_outer + (((int)threadIdx.x) % 7)) &lt; 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 1372) / 63) * 49)) + ((((((int)threadIdx.x) / 7) + 7) % 9) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
+      pad_temp_shared[(((int)threadIdx.x) + 1421)] = (((((1 &lt;= (((((int)threadIdx.x) / 7) + 5) % 9)) &amp;&amp; ((((((int)threadIdx.x) / 7) + 5) % 9) &lt; 8)) &amp;&amp; (1 &lt;= (rx_outer_outer + (((int)threadIdx.x) % 7)))) &amp;&amp; ((rx_outer_outer + (((int)threadIdx.x) % 7)) &lt; 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 1421) / 63) * 49)) + ((((((int)threadIdx.x) / 7) + 5) % 9) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
+      pad_temp_shared[(((int)threadIdx.x) + 1470)] = (((((1 &lt;= (((((int)threadIdx.x) / 7) + 3) % 9)) &amp;&amp; ((((((int)threadIdx.x) / 7) + 3) % 9) &lt; 8)) &amp;&amp; (1 &lt;= (rx_outer_outer + (((int)threadIdx.x) % 7)))) &amp;&amp; ((rx_outer_outer + (((int)threadIdx.x) % 7)) &lt; 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 1470) / 63) * 49)) + ((((((int)threadIdx.x) / 7) + 3) % 9) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
+      pad_temp_shared[(((int)threadIdx.x) + 1519)] = (((1 &lt;= (rx_outer_outer + (((int)threadIdx.x) % 7))) &amp;&amp; ((rx_outer_outer + (((int)threadIdx.x) % 7)) &lt; 8)) ? data[(((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 1519) / 63) * 49)) + ((int)threadIdx.x)) + rx_outer_outer) - 1)] : 0.000000e+00f);
+      pad_temp_shared[(((int)threadIdx.x) + 1568)] = (((((1 &lt;= (((((int)threadIdx.x) / 7) + 8) % 9)) &amp;&amp; ((((((int)threadIdx.x) / 7) + 8) % 9) &lt; 8)) &amp;&amp; (1 &lt;= (rx_outer_outer + (((int)threadIdx.x) % 7)))) &amp;&amp; ((rx_outer_outer + (((int)threadIdx.x) % 7)) &lt; 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 1568) / 63) * 49)) + ((((((int)threadIdx.x) / 7) + 8) % 9) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
+      pad_temp_shared[(((int)threadIdx.x) + 1617)] = (((((1 &lt;= (((((int)threadIdx.x) / 7) + 6) % 9)) &amp;&amp; ((((((int)threadIdx.x) / 7) + 6) % 9) &lt; 8)) &amp;&amp; (1 &lt;= (rx_outer_outer + (((int)threadIdx.x) % 7)))) &amp;&amp; ((rx_outer_outer + (((int)threadIdx.x) % 7)) &lt; 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 1617) / 63) * 49)) + ((((((int)threadIdx.x) / 7) + 6) % 9) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
+      pad_temp_shared[(((int)threadIdx.x) + 1666)] = (((((1 &lt;= (((((int)threadIdx.x) / 7) + 4) % 9)) &amp;&amp; ((((((int)threadIdx.x) / 7) + 4) % 9) &lt; 8)) &amp;&amp; (1 &lt;= (rx_outer_outer + (((int)threadIdx.x) % 7)))) &amp;&amp; ((rx_outer_outer + (((int)threadIdx.x) % 7)) &lt; 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 1666) / 63) * 49)) + ((((((int)threadIdx.x) / 7) + 4) % 9) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
+      pad_temp_shared[(((int)threadIdx.x) + 1715)] = ((((((int)threadIdx.x) &lt; 42) &amp;&amp; (1 &lt;= (rx_outer_outer + (((int)threadIdx.x) % 7)))) &amp;&amp; ((rx_outer_outer + (((int)threadIdx.x) % 7)) &lt; 8)) ? data[(((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 1715) / 63) * 49)) + ((int)threadIdx.x)) + rx_outer_outer) + 6)] : 0.000000e+00f);
+      pad_temp_shared[(((int)threadIdx.x) + 1764)] = ((((7 &lt;= ((int)threadIdx.x)) &amp;&amp; (1 &lt;= (rx_outer_outer + (((int)threadIdx.x) % 7)))) &amp;&amp; ((rx_outer_outer + (((int)threadIdx.x) % 7)) &lt; 8)) ? data[((((rc_outer_outer * 1568) + ((int)threadIdx.x)) + rx_outer_outer) + 1364)] : 0.000000e+00f);
+      pad_temp_shared[(((int)threadIdx.x) + 1813)] = (((((1 &lt;= (((((int)threadIdx.x) / 7) + 7) % 9)) &amp;&amp; ((((((int)threadIdx.x) / 7) + 7) % 9) &lt; 8)) &amp;&amp; (1 &lt;= (rx_outer_outer + (((int)threadIdx.x) % 7)))) &amp;&amp; ((rx_outer_outer + (((int)threadIdx.x) % 7)) &lt; 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 1813) / 63) * 49)) + ((((((int)threadIdx.x) / 7) + 7) % 9) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
+      pad_temp_shared[(((int)threadIdx.x) + 1862)] = (((((1 &lt;= (((((int)threadIdx.x) / 7) + 5) % 9)) &amp;&amp; ((((((int)threadIdx.x) / 7) + 5) % 9) &lt; 8)) &amp;&amp; (1 &lt;= (rx_outer_outer + (((int)threadIdx.x) % 7)))) &amp;&amp; ((rx_outer_outer + (((int)threadIdx.x) % 7)) &lt; 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 1862) / 63) * 49)) + ((((((int)threadIdx.x) / 7) + 5) % 9) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
+      pad_temp_shared[(((int)threadIdx.x) + 1911)] = (((((1 &lt;= (((((int)threadIdx.x) / 7) + 3) % 9)) &amp;&amp; ((((((int)threadIdx.x) / 7) + 3) % 9) &lt; 8)) &amp;&amp; (1 &lt;= (rx_outer_outer + (((int)threadIdx.x) % 7)))) &amp;&amp; ((rx_outer_outer + (((int)threadIdx.x) % 7)) &lt; 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 1911) / 63) * 49)) + ((((((int)threadIdx.x) / 7) + 3) % 9) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
+      pad_temp_shared[(((int)threadIdx.x) + 1960)] = (((1 &lt;= (rx_outer_outer + (((int)threadIdx.x) % 7))) &amp;&amp; ((rx_outer_outer + (((int)threadIdx.x) % 7)) &lt; 8)) ? data[(((((rc_outer_outer * 1568) + (((((int)threadIdx.x) + 1960) / 63) * 49)) + ((int)threadIdx.x)) + rx_outer_outer) - 1)] : 0.000000e+00f);
+      if (((int)threadIdx.x) &lt; 7) {
+        pad_temp_shared[(((int)threadIdx.x) + 2009)] = 0.000000e+00f;
       }
-      if (((int)threadIdx.x) &lt; 42) {
-        pad_temp_shared[((((((((int)threadIdx.x) * 4) + 225) / 49) * 49) + ((((((((int)threadIdx.x) * 4) + 1) / 7) + 4) % 7) * 7)) + (((((int)threadIdx.x) * 4) + 1) % 7))] = (((1 &lt;= (ry_outer_outer + (((((((int)threadIdx.x) * 4) + 1) / 7) + 4) % 7))) &amp;&amp; ((ry_outer_outer + (((((((int)threadIdx.x) * 4) + 1) / 7) + 4) % 7)) &lt; 8)) ? data[((((((rc_outer_outer * 392) + ((((((int)threadIdx.x) * 4) + 225) / 49) * 49)) + (ry_outer_outer * 7)) + ((((((((int)threadIdx.x) * 4) + 1) / 7 [...]
-      }
-      if (((int)threadIdx.x) &lt; 42) {
-        pad_temp_shared[((((((((int)threadIdx.x) * 4) + 226) / 49) * 49) + ((((((((int)threadIdx.x) * 4) + 2) / 7) + 4) % 7) * 7)) + (((((int)threadIdx.x) * 4) + 2) % 7))] = (((1 &lt;= (ry_outer_outer + (((((((int)threadIdx.x) * 4) + 2) / 7) + 4) % 7))) &amp;&amp; ((ry_outer_outer + (((((((int)threadIdx.x) * 4) + 2) / 7) + 4) % 7)) &lt; 8)) ? data[((((((rc_outer_outer * 392) + ((((((int)threadIdx.x) * 4) + 226) / 49) * 49)) + (ry_outer_outer * 7)) + ((((((((int)threadIdx.x) * 4) + 2) / 7 [...]
-      }
-      if (((int)threadIdx.x) &lt; 42) {
-        pad_temp_shared[((((((((int)threadIdx.x) * 4) + 227) / 49) * 49) + ((((((((int)threadIdx.x) * 4) + 3) / 7) + 4) % 7) * 7)) + (((((int)threadIdx.x) * 4) + 3) % 7))] = (((1 &lt;= (ry_outer_outer + (((((((int)threadIdx.x) * 4) + 3) / 7) + 4) % 7))) &amp;&amp; ((ry_outer_outer + (((((((int)threadIdx.x) * 4) + 3) / 7) + 4) % 7)) &lt; 8)) ? data[((((((rc_outer_outer * 392) + ((((((int)threadIdx.x) * 4) + 227) / 49) * 49)) + (ry_outer_outer * 7)) + ((((((((int)threadIdx.x) * 4) + 3) / 7 [...]
-      }
-      kernel_shared[((int)threadIdx.x)] = kernel[((((((((int)blockIdx.x) * 36864) + ((((int)threadIdx.x) &gt;&gt; 3) * 4608)) + (rc_outer_outer * 72)) + ((((int)threadIdx.x) &amp; 7) * 9)) + (ry_outer_outer * 3)) + 1)];
-      if (((int)threadIdx.x) &lt; 8) {
-        kernel_shared[(((int)threadIdx.x) + 56)] = kernel[(((((((int)blockIdx.x) * 36864) + (rc_outer_outer * 72)) + (((int)threadIdx.x) * 9)) + (ry_outer_outer * 3)) + 32257)];
+      kernel_shared[((int)threadIdx.x)] = kernel[((((((int)blockIdx.x) * 18432) + (rc_outer_outer * 288)) + (((int)threadIdx.x) * 3)) + rx_outer_outer)];
+      kernel_shared[(((int)threadIdx.x) + 49)] = kernel[((((((((int)blockIdx.x) * 18432) + (((((int)threadIdx.x) + 49) / 96) * 4608)) + (rc_outer_outer * 288)) + ((((((int)threadIdx.x) + 49) % 96) / 3) * 9)) + (((((int)threadIdx.x) + 1) % 3) * 3)) + rx_outer_outer)];
+      kernel_shared[(((int)threadIdx.x) + 98)] = kernel[(((((((int)blockIdx.x) * 18432) + (((((int)threadIdx.x) + 98) / 96) * 4608)) + (rc_outer_outer * 288)) + ((((int)threadIdx.x) + 2) * 3)) + rx_outer_outer)];
+      kernel_shared[(((int)threadIdx.x) + 147)] = kernel[((((((((int)blockIdx.x) * 18432) + (((((int)threadIdx.x) + 147) / 96) * 4608)) + (rc_outer_outer * 288)) + ((((((int)threadIdx.x) / 3) + 17) &amp; 31) * 9)) + ((((int)threadIdx.x) % 3) * 3)) + rx_outer_outer)];
+      kernel_shared[(((int)threadIdx.x) + 196)] = kernel[((((((((int)blockIdx.x) * 18432) + (((((int)threadIdx.x) + 196) / 96) * 4608)) + (rc_outer_outer * 288)) + (((((int)threadIdx.x) + 4) / 3) * 9)) + (((((int)threadIdx.x) + 1) % 3) * 3)) + rx_outer_outer)];
+      kernel_shared[(((int)threadIdx.x) + 245)] = kernel[((((((((int)blockIdx.x) * 18432) + (((((int)threadIdx.x) + 245) / 96) * 4608)) + (rc_outer_outer * 288)) + ((((((int)threadIdx.x) + 53) % 96) / 3) * 9)) + (((((int)threadIdx.x) + 2) % 3) * 3)) + rx_outer_outer)];
+      kernel_shared[(((int)threadIdx.x) + 294)] = kernel[((((((((int)blockIdx.x) * 18432) + (((((int)threadIdx.x) + 294) / 96) * 4608)) + (rc_outer_outer * 288)) + (((int)threadIdx.x) * 3)) + rx_outer_outer) + 18)];
+      if (((int)threadIdx.x) &lt; 41) {
+        kernel_shared[(((int)threadIdx.x) + 343)] = kernel[((((((((int)blockIdx.x) * 18432) + (((((int)threadIdx.x) + 343) / 96) * 4608)) + (rc_outer_outer * 288)) + (((((int)threadIdx.x) + 55) / 3) * 9)) + (((((int)threadIdx.x) + 1) % 3) * 3)) + rx_outer_outer)];
       }
       __syncthreads();
-      conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((int)threadIdx.x) % 7)] * kernel_shared[((((int)threadIdx.x) / 7) * 8)]));
-      conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 7)] * kernel_shared[((((int)threadIdx.x) / 7) * 8)]));
-      conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 14)] * kernel_shared[((((int)threadIdx.x) / 7) * 8)]));
-      conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 21)] * kernel_shared[((((int)threadIdx.x) / 7) * 8)]));
-      conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 28)] * kernel_shared[((((int)threadIdx.x) / 7) * 8)]));
-      conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 35)] * kernel_shared[((((int)threadIdx.x) / 7) * 8)]));
-      conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 42)] * kernel_shared[((((int)threadIdx.x) / 7) * 8)]));
-      conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 49)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 1)]));
-      conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 56)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 1)]));
-      conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 63)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 1)]));
-      conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 70)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 1)]));
-      conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 77)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 1)]));
-      conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 84)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 1)]));
-      conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 91)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 1)]));
-      conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 98)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 2)]));
-      conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 105)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 2)]));
-      conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 112)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 2)]));
-      conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 119)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 2)]));
-      conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 126)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 2)]));
-      conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 133)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 2)]));
-      conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 140)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 2)]));
-      conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 147)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 3)]));
-      conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 154)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 3)]));
-      conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 161)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 3)]));
-      conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 168)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 3)]));
-      conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 175)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 3)]));
-      conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 182)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 3)]));
-      conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 189)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 3)]));
-      conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 196)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 4)]));
-      conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 203)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 4)]));
-      conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 210)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 4)]));
-      conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 217)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 4)]));
-      conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 224)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 4)]));
-      conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 231)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 4)]));
-      conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 238)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 4)]));
-      conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 245)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 5)]));
-      conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 252)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 5)]));
-      conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 259)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 5)]));
-      conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 266)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 5)]));
-      conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 273)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 5)]));
-      conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 280)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 5)]));
-      conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 287)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 5)]));
-      conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 294)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 6)]));
-      conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 301)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 6)]));
-      conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 308)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 6)]));
-      conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 315)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 6)]));
-      conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 322)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 6)]));
-      conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 329)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 6)]));
-      conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 336)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 6)]));
-      conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 343)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 7)]));
-      conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 350)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 7)]));
-      conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 357)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 7)]));
-      conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 364)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 7)]));
-      conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 371)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 7)]));
-      conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 378)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 7)]));
-      conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 385)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 7)]));
-      __syncthreads();
-      pad_temp_shared[(((int)threadIdx.x) * 4)] = ((((1 &lt;= ((((((int)threadIdx.x) * 4) % 49) / 7) + ry_outer_outer)) &amp;&amp; (((((((int)threadIdx.x) * 4) % 49) / 7) + ry_outer_outer) &lt; 8)) &amp;&amp; (((((int)threadIdx.x) * 4) % 7) &lt; 6)) ? data[((((rc_outer_outer * 392) + (ry_outer_outer * 7)) + (((int)threadIdx.x) * 4)) - 6)] : 0.000000e+00f);
-      pad_temp_shared[((((int)threadIdx.x) * 4) + 1)] = ((((1 &lt;= (((((((int)threadIdx.x) * 4) + 1) % 49) / 7) + ry_outer_outer)) &amp;&amp; ((((((((int)threadIdx.x) * 4) + 1) % 49) / 7) + ry_outer_outer) &lt; 8)) &amp;&amp; ((((((int)threadIdx.x) * 4) + 1) % 7) &lt; 6)) ? data[((((rc_outer_outer * 392) + (ry_outer_outer * 7)) + (((int)threadIdx.x) * 4)) - 5)] : 0.000000e+00f);
-      pad_temp_shared[((((int)threadIdx.x) * 4) + 2)] = ((((1 &lt;= (((((((int)threadIdx.x) * 4) + 2) % 49) / 7) + ry_outer_outer)) &amp;&amp; ((((((((int)threadIdx.x) * 4) + 2) % 49) / 7) + ry_outer_outer) &lt; 8)) &amp;&amp; ((((((int)threadIdx.x) * 4) + 2) % 7) &lt; 6)) ? data[((((rc_outer_outer * 392) + (ry_outer_outer * 7)) + (((int)threadIdx.x) * 4)) - 4)] : 0.000000e+00f);
-      pad_temp_shared[((((int)threadIdx.x) * 4) + 3)] = ((((1 &lt;= (((((((int)threadIdx.x) * 4) + 3) % 49) / 7) + ry_outer_outer)) &amp;&amp; ((((((((int)threadIdx.x) * 4) + 3) % 49) / 7) + ry_outer_outer) &lt; 8)) &amp;&amp; ((((((int)threadIdx.x) * 4) + 3) % 7) &lt; 6)) ? data[((((rc_outer_outer * 392) + (ry_outer_outer * 7)) + (((int)threadIdx.x) * 4)) - 3)] : 0.000000e+00f);
-      if (((int)threadIdx.x) &lt; 42) {
-        pad_temp_shared[((((((((int)threadIdx.x) * 4) + 224) / 49) * 49) + (((((((int)threadIdx.x) * 4) / 7) + 4) % 7) * 7)) + ((((int)threadIdx.x) * 4) % 7))] = ((((1 &lt;= (ry_outer_outer + ((((((int)threadIdx.x) * 4) / 7) + 4) % 7))) &amp;&amp; ((ry_outer_outer + ((((((int)threadIdx.x) * 4) / 7) + 4) % 7)) &lt; 8)) &amp;&amp; (((((int)threadIdx.x) * 4) % 7) &lt; 6)) ? data[((((((rc_outer_outer * 392) + ((((((int)threadIdx.x) * 4) + 224) / 49) * 49)) + (ry_outer_outer * 7)) + (((((((in [...]
-      }
-      if (((int)threadIdx.x) &lt; 42) {
-        pad_temp_shared[((((((((int)threadIdx.x) * 4) + 225) / 49) * 49) + ((((((((int)threadIdx.x) * 4) + 1) / 7) + 4) % 7) * 7)) + (((((int)threadIdx.x) * 4) + 1) % 7))] = ((((1 &lt;= (ry_outer_outer + (((((((int)threadIdx.x) * 4) + 1) / 7) + 4) % 7))) &amp;&amp; ((ry_outer_outer + (((((((int)threadIdx.x) * 4) + 1) / 7) + 4) % 7)) &lt; 8)) &amp;&amp; ((((((int)threadIdx.x) * 4) + 1) % 7) &lt; 6)) ? data[((((((rc_outer_outer * 392) + ((((((int)threadIdx.x) * 4) + 225) / 49) * 49)) + (ry [...]
+      for (int rc_outer_inner = 0; rc_outer_inner &lt; 8; ++rc_outer_inner) {
+        conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[((rc_outer_inner * 252) + ((int)threadIdx.x))] * kernel_shared[(rc_outer_inner * 12)]));
+        conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[((rc_outer_inner * 252) + ((int)threadIdx.x))] * kernel_shared[((rc_outer_inner * 12) + 192)]));
+        conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[((rc_outer_inner * 252) + ((int)threadIdx.x))] * kernel_shared[((rc_outer_inner * 12) + 96)]));
+        conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[((rc_outer_inner * 252) + ((int)threadIdx.x))] * kernel_shared[((rc_outer_inner * 12) + 288)]));
+        conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 252) + ((int)threadIdx.x)) + 63)] * kernel_shared[((rc_outer_inner * 12) + 3)]));
+        conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 252) + ((int)threadIdx.x)) + 63)] * kernel_shared[((rc_outer_inner * 12) + 195)]));
+        conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 252) + ((int)threadIdx.x)) + 63)] * kernel_shared[((rc_outer_inner * 12) + 99)]));
+        conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 252) + ((int)threadIdx.x)) + 63)] * kernel_shared[((rc_outer_inner * 12) + 291)]));
+        conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 252) + ((int)threadIdx.x)) + 126)] * kernel_shared[((rc_outer_inner * 12) + 6)]));
+        conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 252) + ((int)threadIdx.x)) + 126)] * kernel_shared[((rc_outer_inner * 12) + 198)]));
+        conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 252) + ((int)threadIdx.x)) + 126)] * kernel_shared[((rc_outer_inner * 12) + 102)]));
+        conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 252) + ((int)threadIdx.x)) + 126)] * kernel_shared[((rc_outer_inner * 12) + 294)]));
+        conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 252) + ((int)threadIdx.x)) + 189)] * kernel_shared[((rc_outer_inner * 12) + 9)]));
+        conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 252) + ((int)threadIdx.x)) + 189)] * kernel_shared[((rc_outer_inner * 12) + 201)]));
+        conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 252) + ((int)threadIdx.x)) + 189)] * kernel_shared[((rc_outer_inner * 12) + 105)]));
+        conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 252) + ((int)threadIdx.x)) + 189)] * kernel_shared[((rc_outer_inner * 12) + 297)]));
+        conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 252) + ((int)threadIdx.x)) + 7)] * kernel_shared[((rc_outer_inner * 12) + 1)]));
+        conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 252) + ((int)threadIdx.x)) + 7)] * kernel_shared[((rc_outer_inner * 12) + 193)]));
+        conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 252) + ((int)threadIdx.x)) + 7)] * kernel_shared[((rc_outer_inner * 12) + 97)]));
+        conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 252) + ((int)threadIdx.x)) + 7)] * kernel_shared[((rc_outer_inner * 12) + 289)]));
+        conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 252) + ((int)threadIdx.x)) + 70)] * kernel_shared[((rc_outer_inner * 12) + 4)]));
+        conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 252) + ((int)threadIdx.x)) + 70)] * kernel_shared[((rc_outer_inner * 12) + 196)]));
+        conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 252) + ((int)threadIdx.x)) + 70)] * kernel_shared[((rc_outer_inner * 12) + 100)]));
+        conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 252) + ((int)threadIdx.x)) + 70)] * kernel_shared[((rc_outer_inner * 12) + 292)]));
+        conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 252) + ((int)threadIdx.x)) + 133)] * kernel_shared[((rc_outer_inner * 12) + 7)]));
+        conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 252) + ((int)threadIdx.x)) + 133)] * kernel_shared[((rc_outer_inner * 12) + 199)]));
+        conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 252) + ((int)threadIdx.x)) + 133)] * kernel_shared[((rc_outer_inner * 12) + 103)]));
+        conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 252) + ((int)threadIdx.x)) + 133)] * kernel_shared[((rc_outer_inner * 12) + 295)]));
+        conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 252) + ((int)threadIdx.x)) + 196)] * kernel_shared[((rc_outer_inner * 12) + 10)]));
+        conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 252) + ((int)threadIdx.x)) + 196)] * kernel_shared[((rc_outer_inner * 12) + 202)]));
+        conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 252) + ((int)threadIdx.x)) + 196)] * kernel_shared[((rc_outer_inner * 12) + 106)]));
+        conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 252) + ((int)threadIdx.x)) + 196)] * kernel_shared[((rc_outer_inner * 12) + 298)]));
+        conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 252) + ((int)threadIdx.x)) + 14)] * kernel_shared[((rc_outer_inner * 12) + 2)]));
+        conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 252) + ((int)threadIdx.x)) + 14)] * kernel_shared[((rc_outer_inner * 12) + 194)]));
+        conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 252) + ((int)threadIdx.x)) + 14)] * kernel_shared[((rc_outer_inner * 12) + 98)]));
+        conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 252) + ((int)threadIdx.x)) + 14)] * kernel_shared[((rc_outer_inner * 12) + 290)]));
+        conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 252) + ((int)threadIdx.x)) + 77)] * kernel_shared[((rc_outer_inner * 12) + 5)]));
+        conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 252) + ((int)threadIdx.x)) + 77)] * kernel_shared[((rc_outer_inner * 12) + 197)]));
+        conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 252) + ((int)threadIdx.x)) + 77)] * kernel_shared[((rc_outer_inner * 12) + 101)]));
+        conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 252) + ((int)threadIdx.x)) + 77)] * kernel_shared[((rc_outer_inner * 12) + 293)]));
+        conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 252) + ((int)threadIdx.x)) + 140)] * kernel_shared[((rc_outer_inner * 12) + 8)]));
+        conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 252) + ((int)threadIdx.x)) + 140)] * kernel_shared[((rc_outer_inner * 12) + 200)]));
+        conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 252) + ((int)threadIdx.x)) + 140)] * kernel_shared[((rc_outer_inner * 12) + 104)]));
+        conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 252) + ((int)threadIdx.x)) + 140)] * kernel_shared[((rc_outer_inner * 12) + 296)]));
+        conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 252) + ((int)threadIdx.x)) + 203)] * kernel_shared[((rc_outer_inner * 12) + 11)]));
+        conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 252) + ((int)threadIdx.x)) + 203)] * kernel_shared[((rc_outer_inner * 12) + 203)]));
+        conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 252) + ((int)threadIdx.x)) + 203)] * kernel_shared[((rc_outer_inner * 12) + 107)]));
+        conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 252) + ((int)threadIdx.x)) + 203)] * kernel_shared[((rc_outer_inner * 12) + 299)]));
       }
-      if (((int)threadIdx.x) &lt; 42) {
-        pad_temp_shared[((((((((int)threadIdx.x) * 4) + 226) / 49) * 49) + ((((((((int)threadIdx.x) * 4) + 2) / 7) + 4) % 7) * 7)) + (((((int)threadIdx.x) * 4) + 2) % 7))] = ((((1 &lt;= (ry_outer_outer + (((((((int)threadIdx.x) * 4) + 2) / 7) + 4) % 7))) &amp;&amp; ((ry_outer_outer + (((((((int)threadIdx.x) * 4) + 2) / 7) + 4) % 7)) &lt; 8)) &amp;&amp; ((((((int)threadIdx.x) * 4) + 2) % 7) &lt; 6)) ? data[((((((rc_outer_outer * 392) + ((((((int)threadIdx.x) * 4) + 226) / 49) * 49)) + (ry [...]
-      }
-      if (((int)threadIdx.x) &lt; 42) {
-        pad_temp_shared[((((((((int)threadIdx.x) * 4) + 227) / 49) * 49) + ((((((((int)threadIdx.x) * 4) + 3) / 7) + 4) % 7) * 7)) + (((((int)threadIdx.x) * 4) + 3) % 7))] = ((((1 &lt;= (ry_outer_outer + (((((((int)threadIdx.x) * 4) + 3) / 7) + 4) % 7))) &amp;&amp; ((ry_outer_outer + (((((((int)threadIdx.x) * 4) + 3) / 7) + 4) % 7)) &lt; 8)) &amp;&amp; ((((((int)threadIdx.x) * 4) + 3) % 7) &lt; 6)) ? data[((((((rc_outer_outer * 392) + ((((((int)threadIdx.x) * 4) + 227) / 49) * 49)) + (ry [...]
-      }
-      kernel_shared[((int)threadIdx.x)] = kernel[((((((((int)blockIdx.x) * 36864) + ((((int)threadIdx.x) &gt;&gt; 3) * 4608)) + (rc_outer_outer * 72)) + ((((int)threadIdx.x) &amp; 7) * 9)) + (ry_outer_outer * 3)) + 2)];
-      if (((int)threadIdx.x) &lt; 8) {
-        kernel_shared[(((int)threadIdx.x) + 56)] = kernel[(((((((int)blockIdx.x) * 36864) + (rc_outer_outer * 72)) + (((int)threadIdx.x) * 9)) + (ry_outer_outer * 3)) + 32258)];
-      }
-      __syncthreads();
-      conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((int)threadIdx.x) % 7)] * kernel_shared[((((int)threadIdx.x) / 7) * 8)]));
-      conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 7)] * kernel_shared[((((int)threadIdx.x) / 7) * 8)]));
-      conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 14)] * kernel_shared[((((int)threadIdx.x) / 7) * 8)]));
-      conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 21)] * kernel_shared[((((int)threadIdx.x) / 7) * 8)]));
-      conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 28)] * kernel_shared[((((int)threadIdx.x) / 7) * 8)]));
-      conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 35)] * kernel_shared[((((int)threadIdx.x) / 7) * 8)]));
-      conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 42)] * kernel_shared[((((int)threadIdx.x) / 7) * 8)]));
-      conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 49)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 1)]));
-      conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 56)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 1)]));
-      conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 63)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 1)]));
-      conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 70)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 1)]));
-      conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 77)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 1)]));
-      conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 84)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 1)]));
-      conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 91)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 1)]));
-      conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 98)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 2)]));
-      conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 105)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 2)]));
-      conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 112)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 2)]));
-      conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 119)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 2)]));
-      conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 126)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 2)]));
-      conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 133)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 2)]));
-      conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 140)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 2)]));
-      conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 147)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 3)]));
-      conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 154)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 3)]));
-      conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 161)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 3)]));
-      conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 168)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 3)]));
-      conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 175)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 3)]));
-      conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 182)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 3)]));
-      conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 189)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 3)]));
-      conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 196)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 4)]));
-      conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 203)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 4)]));
-      conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 210)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 4)]));
-      conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 217)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 4)]));
-      conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 224)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 4)]));
-      conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 231)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 4)]));
-      conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 238)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 4)]));
-      conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 245)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 5)]));
-      conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 252)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 5)]));
-      conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 259)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 5)]));
-      conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 266)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 5)]));
-      conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 273)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 5)]));
-      conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 280)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 5)]));
-      conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 287)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 5)]));
-      conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 294)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 6)]));
-      conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 301)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 6)]));
-      conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 308)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 6)]));
-      conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 315)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 6)]));
-      conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 322)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 6)]));
-      conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 329)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 6)]));
-      conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 336)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 6)]));
-      conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 343)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 7)]));
-      conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 350)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 7)]));
-      conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 357)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 7)]));
-      conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 364)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 7)]));
-      conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 371)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 7)]));
-      conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 378)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 7)]));
-      conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[((((int)threadIdx.x) % 7) + 385)] * kernel_shared[(((((int)threadIdx.x) / 7) * 8) + 7)]));
     }
   }
-  for (int i2_inner = 0; i2_inner &lt; 7; ++i2_inner) {
-    compute[((((((int)blockIdx.x) * 392) + ((((int)threadIdx.x) / 7) * 49)) + (i2_inner * 7)) + (((int)threadIdx.x) % 7))] = max((conv2d_nchw[i2_inner] + bias[((((int)blockIdx.x) * 8) + (((int)threadIdx.x) / 7))]), 0.000000e+00f);
+  for (int i1_inner = 0; i1_inner &lt; 2; ++i1_inner) {
+    compute[(((((int)blockIdx.x) * 196) + (i1_inner * 49)) + ((int)threadIdx.x))] = max((conv2d_nchw[i1_inner] + bias[((((int)blockIdx.x) * 4) + i1_inner)]), 0.000000e+00f);
+    compute[((((((int)blockIdx.x) * 196) + (i1_inner * 49)) + ((int)threadIdx.x)) + 98)] = max((conv2d_nchw[(i1_inner + 2)] + bias[(((((int)blockIdx.x) * 4) + i1_inner) + 2)]), 0.000000e+00f);
   }
 }
 </pre></div>
@@ -1194,7 +977,7 @@ In the example below we resume the status and do more 5 trials.</p>
 Get devices for measurement successfully!
 </pre></div>
 </div>
-<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 5 minutes  48.035 seconds)</p>
+<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 5 minutes  42.208 seconds)</p>
 <div class="sphx-glr-footer sphx-glr-footer-example docutils container" id="sphx-glr-download-how-to-tune-with-autoscheduler-tune-conv2d-layer-cuda-py">
 <div class="sphx-glr-download sphx-glr-download-python docutils container">
 <p><a class="reference download internal" download="" href="../../_downloads/e3e540f3b477c0c52d8eb73e674e8ffd/tune_conv2d_layer_cuda.py"><code class="xref download docutils literal notranslate"><span class="pre">Download</span> <span class="pre">Python</span> <span class="pre">source</span> <span class="pre">code:</span> <span class="pre">tune_conv2d_layer_cuda.py</span></code></a></p>
diff --git a/docs/how_to/tune_with_autoscheduler/tune_network_cuda.html b/docs/how_to/tune_with_autoscheduler/tune_network_cuda.html
index e39384f27b..8af7f03e1b 100644
--- a/docs/how_to/tune_with_autoscheduler/tune_network_cuda.html
+++ b/docs/how_to/tune_with_autoscheduler/tune_network_cuda.html
@@ -916,7 +916,7 @@ so we can read the log file and load the best schedules.</p>
 Evaluate inference time cost...
 Execution time summary:
  mean (ms)   median (ms)    max (ms)     min (ms)     std (ms)
-   7.8815       7.8801       7.8879       7.8764       0.0048
+   7.8874       7.8938       7.8943       7.8741       0.0094
 </pre></div>
 </div>
 </div>
@@ -938,7 +938,7 @@ to learn how to use the RPC Tracker and RPC Server.
 To use the RPC Tracker in auto-scheduler, replace the runner in <code class="code docutils literal notranslate"><span class="pre">TuningOptions</span></code>
 with <a class="reference internal" href="../../reference/api/python/auto_scheduler.html#tvm.auto_scheduler.RPCRunner" title="tvm.auto_scheduler.RPCRunner"><code class="xref any py py-class docutils literal notranslate"><span class="pre">auto_scheduler.RPCRunner</span></code></a>.</p></li>
 </ol>
-<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 1 minutes  4.115 seconds)</p>
+<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 1 minutes  1.921 seconds)</p>
 <div class="sphx-glr-footer sphx-glr-footer-example docutils container" id="sphx-glr-download-how-to-tune-with-autoscheduler-tune-network-cuda-py">
 <div class="sphx-glr-download sphx-glr-download-python docutils container">
 <p><a class="reference download internal" download="" href="../../_downloads/eafe360d52540634c9eea0fa89e804bd/tune_network_cuda.py"><code class="xref download docutils literal notranslate"><span class="pre">Download</span> <span class="pre">Python</span> <span class="pre">source</span> <span class="pre">code:</span> <span class="pre">tune_network_cuda.py</span></code></a></p>
diff --git a/docs/how_to/tune_with_autoscheduler/tune_network_x86.html b/docs/how_to/tune_with_autoscheduler/tune_network_x86.html
index 1d0730ad9f..620b5a04b8 100644
--- a/docs/how_to/tune_with_autoscheduler/tune_network_x86.html
+++ b/docs/how_to/tune_with_autoscheduler/tune_network_x86.html
@@ -935,7 +935,7 @@ so we can read the log file and load the best schedules.</p>
 Evaluate inference time cost...
 Execution time summary:
  mean (ms)   median (ms)    max (ms)     min (ms)     std (ms)
-  762.8631     763.0774     764.4738     761.0381      1.4108
+  749.6636     749.6433     749.9993     749.3483      0.2661
 </pre></div>
 </div>
 </div>
@@ -957,7 +957,7 @@ to learn how to use the RPC Tracker and RPC Server.
 To use the RPC Tracker in auto-scheduler, replace the runner in <code class="code docutils literal notranslate"><span class="pre">TuningOptions</span></code>
 with <a class="reference internal" href="../../reference/api/python/auto_scheduler.html#tvm.auto_scheduler.RPCRunner" title="tvm.auto_scheduler.RPCRunner"><code class="xref any py py-class docutils literal notranslate"><span class="pre">auto_scheduler.RPCRunner</span></code></a>.</p></li>
 </ol>
-<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 1 minutes  35.298 seconds)</p>
+<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 1 minutes  32.179 seconds)</p>
 <div class="sphx-glr-footer sphx-glr-footer-example docutils container" id="sphx-glr-download-how-to-tune-with-autoscheduler-tune-network-x86-py">
 <div class="sphx-glr-download sphx-glr-download-python docutils container">
 <p><a class="reference download internal" download="" href="../../_downloads/e416b94ca1090b0897c0f6e0df95b911/tune_network_x86.py"><code class="xref download docutils literal notranslate"><span class="pre">Download</span> <span class="pre">Python</span> <span class="pre">source</span> <span class="pre">code:</span> <span class="pre">tune_network_x86.py</span></code></a></p>
diff --git a/docs/how_to/tune_with_autoscheduler/tune_sparse_x86.html b/docs/how_to/tune_with_autoscheduler/tune_sparse_x86.html
index ecf145dc90..f80119c3bd 100644
--- a/docs/how_to/tune_with_autoscheduler/tune_sparse_x86.html
+++ b/docs/how_to/tune_with_autoscheduler/tune_sparse_x86.html
@@ -633,102 +633,29 @@ layout transformation, parallelization, vectorization, unrolling, and operator f
              placeholder_4: Buffer(placeholder_14: Pointer(float32), float32, [128, 512], []),
              compute: Buffer(compute_2: Pointer(float32), float32, [128, 512], [])}
   buffer_map = {placeholder_5: placeholder, placeholder_6: placeholder_1, placeholder_7: placeholder_2, placeholder_8: placeholder_3, placeholder_9: placeholder_4, compute_1: compute} {
-  for (i0.outer.i1.outer.fused: int32, 0, 32) &quot;parallel&quot; {
-    allocate(compute_3: Pointer(global float32), float32, [2048]), storage_scope = global {
-      for (i.outer.inner: int32, 0, 2) {
-        for (i.inner.init: int32, 0, 64) {
-          let cse_var_1: int32 = ((i.outer.inner*1024) + (i.inner.init*16))
-           {
-            compute_4: Buffer(compute_3, float32, [2048], [])[cse_var_1] = 0f32
-            compute_4[(cse_var_1 + 1)] = 0f32
-            compute_4[(cse_var_1 + 2)] = 0f32
-            compute_4[(cse_var_1 + 3)] = 0f32
-            compute_4[(cse_var_1 + 4)] = 0f32
-            compute_4[(cse_var_1 + 5)] = 0f32
-            compute_4[(cse_var_1 + 6)] = 0f32
-            compute_4[(cse_var_1 + 7)] = 0f32
-            compute_4[(cse_var_1 + 8)] = 0f32
-            compute_4[(cse_var_1 + 9)] = 0f32
-            compute_4[(cse_var_1 + 10)] = 0f32
-            compute_4[(cse_var_1 + 11)] = 0f32
-            compute_4[(cse_var_1 + 12)] = 0f32
-            compute_4[(cse_var_1 + 13)] = 0f32
-            compute_4[(cse_var_1 + 14)] = 0f32
-            compute_4[(cse_var_1 + 15)] = 0f32
-          }
-        }
-        for (elem_idx: int32, 0, (placeholder_15: Buffer(placeholder_13, int32, [33], [])[(i0.outer.i1.outer.fused + 1)] - placeholder_15[i0.outer.i1.outer.fused])) {
-          for (i.inner: int32, 0, 64) {
-            if @tir.likely((elem_idx &lt; (placeholder_15[(i0.outer.i1.outer.fused + 1)] - placeholder_15[i0.outer.i1.outer.fused])), dtype=bool) {
-              let cse_var_2: int32 = ((i.outer.inner*1024) + (i.inner*16))
-              compute_4[cse_var_2] = (compute_4[cse_var_2] + (placeholder_16: Buffer(placeholder_11, float32, [78656], [])[((placeholder_15[i0.outer.i1.outer.fused]*16) + (elem_idx*16))]*max(placeholder_17: Buffer(placeholder_10, float32, [32768], [])[(((i.outer.inner*16384) + (i.inner*256)) + placeholder_18: Buffer(placeholder_12, int32, [4916], [])[(placeholder_15[i0.outer.i1.outer.fused] + elem_idx)])], 0f32)))
-            }
-            if @tir.likely((elem_idx &lt; (placeholder_15[(i0.outer.i1.outer.fused + 1)] - placeholder_15[i0.outer.i1.outer.fused])), dtype=bool) {
-              let cse_var_3: int32 = (((i.outer.inner*1024) + (i.inner*16)) + 1)
-              compute_4[cse_var_3] = (compute_4[cse_var_3] + (placeholder_16[(((placeholder_15[i0.outer.i1.outer.fused]*16) + (elem_idx*16)) + 1)]*max(placeholder_17[(((i.outer.inner*16384) + (i.inner*256)) + placeholder_18[(placeholder_15[i0.outer.i1.outer.fused] + elem_idx)])], 0f32)))
-            }
-            if @tir.likely((elem_idx &lt; (placeholder_15[(i0.outer.i1.outer.fused + 1)] - placeholder_15[i0.outer.i1.outer.fused])), dtype=bool) {
-              let cse_var_4: int32 = (((i.outer.inner*1024) + (i.inner*16)) + 2)
-              compute_4[cse_var_4] = (compute_4[cse_var_4] + (placeholder_16[(((placeholder_15[i0.outer.i1.outer.fused]*16) + (elem_idx*16)) + 2)]*max(placeholder_17[(((i.outer.inner*16384) + (i.inner*256)) + placeholder_18[(placeholder_15[i0.outer.i1.outer.fused] + elem_idx)])], 0f32)))
-            }
-            if @tir.likely((elem_idx &lt; (placeholder_15[(i0.outer.i1.outer.fused + 1)] - placeholder_15[i0.outer.i1.outer.fused])), dtype=bool) {
-              let cse_var_5: int32 = (((i.outer.inner*1024) + (i.inner*16)) + 3)
-              compute_4[cse_var_5] = (compute_4[cse_var_5] + (placeholder_16[(((placeholder_15[i0.outer.i1.outer.fused]*16) + (elem_idx*16)) + 3)]*max(placeholder_17[(((i.outer.inner*16384) + (i.inner*256)) + placeholder_18[(placeholder_15[i0.outer.i1.outer.fused] + elem_idx)])], 0f32)))
-            }
-            if @tir.likely((elem_idx &lt; (placeholder_15[(i0.outer.i1.outer.fused + 1)] - placeholder_15[i0.outer.i1.outer.fused])), dtype=bool) {
-              let cse_var_6: int32 = (((i.outer.inner*1024) + (i.inner*16)) + 4)
-              compute_4[cse_var_6] = (compute_4[cse_var_6] + (placeholder_16[(((placeholder_15[i0.outer.i1.outer.fused]*16) + (elem_idx*16)) + 4)]*max(placeholder_17[(((i.outer.inner*16384) + (i.inner*256)) + placeholder_18[(placeholder_15[i0.outer.i1.outer.fused] + elem_idx)])], 0f32)))
-            }
-            if @tir.likely((elem_idx &lt; (placeholder_15[(i0.outer.i1.outer.fused + 1)] - placeholder_15[i0.outer.i1.outer.fused])), dtype=bool) {
-              let cse_var_7: int32 = (((i.outer.inner*1024) + (i.inner*16)) + 5)
-              compute_4[cse_var_7] = (compute_4[cse_var_7] + (placeholder_16[(((placeholder_15[i0.outer.i1.outer.fused]*16) + (elem_idx*16)) + 5)]*max(placeholder_17[(((i.outer.inner*16384) + (i.inner*256)) + placeholder_18[(placeholder_15[i0.outer.i1.outer.fused] + elem_idx)])], 0f32)))
-            }
-            if @tir.likely((elem_idx &lt; (placeholder_15[(i0.outer.i1.outer.fused + 1)] - placeholder_15[i0.outer.i1.outer.fused])), dtype=bool) {
-              let cse_var_8: int32 = (((i.outer.inner*1024) + (i.inner*16)) + 6)
-              compute_4[cse_var_8] = (compute_4[cse_var_8] + (placeholder_16[(((placeholder_15[i0.outer.i1.outer.fused]*16) + (elem_idx*16)) + 6)]*max(placeholder_17[(((i.outer.inner*16384) + (i.inner*256)) + placeholder_18[(placeholder_15[i0.outer.i1.outer.fused] + elem_idx)])], 0f32)))
-            }
-            if @tir.likely((elem_idx &lt; (placeholder_15[(i0.outer.i1.outer.fused + 1)] - placeholder_15[i0.outer.i1.outer.fused])), dtype=bool) {
-              let cse_var_9: int32 = (((i.outer.inner*1024) + (i.inner*16)) + 7)
-              compute_4[cse_var_9] = (compute_4[cse_var_9] + (placeholder_16[(((placeholder_15[i0.outer.i1.outer.fused]*16) + (elem_idx*16)) + 7)]*max(placeholder_17[(((i.outer.inner*16384) + (i.inner*256)) + placeholder_18[(placeholder_15[i0.outer.i1.outer.fused] + elem_idx)])], 0f32)))
+  for (i0.outer.i1.outer.fused: int32, 0, 64) &quot;parallel&quot; {
+    allocate(compute_3: Pointer(global float32), float32, [1024]), storage_scope = global {
+      for (i.outer.inner: int32, 0, 16) {
+        for (nb_j.inner: int32, 0, 2) {
+          for (i.inner.init: int32, 0, 2) {
+            for (j.init: int32, 0, 16) {
+              compute_4: Buffer(compute_3, float32, [1024], [])[((((i.outer.inner*64) + (i.inner.init*32)) + (nb_j.inner*16)) + j.init)] = 0f32
             }
-            if @tir.likely((elem_idx &lt; (placeholder_15[(i0.outer.i1.outer.fused + 1)] - placeholder_15[i0.outer.i1.outer.fused])), dtype=bool) {
-              let cse_var_10: int32 = (((i.outer.inner*1024) + (i.inner*16)) + 8)
-              compute_4[cse_var_10] = (compute_4[cse_var_10] + (placeholder_16[(((placeholder_15[i0.outer.i1.outer.fused]*16) + (elem_idx*16)) + 8)]*max(placeholder_17[(((i.outer.inner*16384) + (i.inner*256)) + placeholder_18[(placeholder_15[i0.outer.i1.outer.fused] + elem_idx)])], 0f32)))
-            }
-            if @tir.likely((elem_idx &lt; (placeholder_15[(i0.outer.i1.outer.fused + 1)] - placeholder_15[i0.outer.i1.outer.fused])), dtype=bool) {
-              let cse_var_11: int32 = (((i.outer.inner*1024) + (i.inner*16)) + 9)
-              compute_4[cse_var_11] = (compute_4[cse_var_11] + (placeholder_16[(((placeholder_15[i0.outer.i1.outer.fused]*16) + (elem_idx*16)) + 9)]*max(placeholder_17[(((i.outer.inner*16384) + (i.inner*256)) + placeholder_18[(placeholder_15[i0.outer.i1.outer.fused] + elem_idx)])], 0f32)))
-            }
-            if @tir.likely((elem_idx &lt; (placeholder_15[(i0.outer.i1.outer.fused + 1)] - placeholder_15[i0.outer.i1.outer.fused])), dtype=bool) {
-              let cse_var_12: int32 = (((i.outer.inner*1024) + (i.inner*16)) + 10)
-              compute_4[cse_var_12] = (compute_4[cse_var_12] + (placeholder_16[(((placeholder_15[i0.outer.i1.outer.fused]*16) + (elem_idx*16)) + 10)]*max(placeholder_17[(((i.outer.inner*16384) + (i.inner*256)) + placeholder_18[(placeholder_15[i0.outer.i1.outer.fused] + elem_idx)])], 0f32)))
-            }
-            if @tir.likely((elem_idx &lt; (placeholder_15[(i0.outer.i1.outer.fused + 1)] - placeholder_15[i0.outer.i1.outer.fused])), dtype=bool) {
-              let cse_var_13: int32 = (((i.outer.inner*1024) + (i.inner*16)) + 11)
-              compute_4[cse_var_13] = (compute_4[cse_var_13] + (placeholder_16[(((placeholder_15[i0.outer.i1.outer.fused]*16) + (elem_idx*16)) + 11)]*max(placeholder_17[(((i.outer.inner*16384) + (i.inner*256)) + placeholder_18[(placeholder_15[i0.outer.i1.outer.fused] + elem_idx)])], 0f32)))
-            }
-            if @tir.likely((elem_idx &lt; (placeholder_15[(i0.outer.i1.outer.fused + 1)] - placeholder_15[i0.outer.i1.outer.fused])), dtype=bool) {
-              let cse_var_14: int32 = (((i.outer.inner*1024) + (i.inner*16)) + 12)
-              compute_4[cse_var_14] = (compute_4[cse_var_14] + (placeholder_16[(((placeholder_15[i0.outer.i1.outer.fused]*16) + (elem_idx*16)) + 12)]*max(placeholder_17[(((i.outer.inner*16384) + (i.inner*256)) + placeholder_18[(placeholder_15[i0.outer.i1.outer.fused] + elem_idx)])], 0f32)))
-            }
-            if @tir.likely((elem_idx &lt; (placeholder_15[(i0.outer.i1.outer.fused + 1)] - placeholder_15[i0.outer.i1.outer.fused])), dtype=bool) {
-              let cse_var_15: int32 = (((i.outer.inner*1024) + (i.inner*16)) + 13)
-              compute_4[cse_var_15] = (compute_4[cse_var_15] + (placeholder_16[(((placeholder_15[i0.outer.i1.outer.fused]*16) + (elem_idx*16)) + 13)]*max(placeholder_17[(((i.outer.inner*16384) + (i.inner*256)) + placeholder_18[(placeholder_15[i0.outer.i1.outer.fused] + elem_idx)])], 0f32)))
-            }
-            if @tir.likely((elem_idx &lt; (placeholder_15[(i0.outer.i1.outer.fused + 1)] - placeholder_15[i0.outer.i1.outer.fused])), dtype=bool) {
-              let cse_var_16: int32 = (((i.outer.inner*1024) + (i.inner*16)) + 14)
-              compute_4[cse_var_16] = (compute_4[cse_var_16] + (placeholder_16[(((placeholder_15[i0.outer.i1.outer.fused]*16) + (elem_idx*16)) + 14)]*max(placeholder_17[(((i.outer.inner*16384) + (i.inner*256)) + placeholder_18[(placeholder_15[i0.outer.i1.outer.fused] + elem_idx)])], 0f32)))
-            }
-            if @tir.likely((elem_idx &lt; (placeholder_15[(i0.outer.i1.outer.fused + 1)] - placeholder_15[i0.outer.i1.outer.fused])), dtype=bool) {
-              let cse_var_17: int32 = (((i.outer.inner*1024) + (i.inner*16)) + 15)
-              compute_4[cse_var_17] = (compute_4[cse_var_17] + (placeholder_16[(((placeholder_15[i0.outer.i1.outer.fused]*16) + (elem_idx*16)) + 15)]*max(placeholder_17[(((i.outer.inner*16384) + (i.inner*256)) + placeholder_18[(placeholder_15[i0.outer.i1.outer.fused] + elem_idx)])], 0f32)))
+          }
+          for (elem_idx: int32, 0, let cse_var_1: int32 = ((floormod(i0.outer.i1.outer.fused, 16)*2) + nb_j.inner) in (placeholder_15: Buffer(placeholder_13, int32, [33], [])[(cse_var_1 + 1)] - placeholder_15[cse_var_1])) {
+            for (i.inner: int32, 0, 2) {
+              for (j: int32, 0, 16) {
+                let cse_var_3: int32 = ((floormod(i0.outer.i1.outer.fused, 16)*2) + nb_j.inner)
+                let cse_var_2: int32 = ((((i.outer.inner*64) + (i.inner*32)) + (nb_j.inner*16)) + j)
+                compute_4[cse_var_2] = (compute_4[cse_var_2] + (placeholder_16: Buffer(placeholder_11, float32, [78656], [])[(((placeholder_15[cse_var_3]*16) + (elem_idx*16)) + j)]*max(placeholder_17: Buffer(placeholder_10, float32, [32768], [])[((((floordiv(i0.outer.i1.outer.fused, 16)*8192) + (i.outer.inner*512)) + (i.inner*256)) + placeholder_18: Buffer(placeholder_12, int32, [4916], [])[(placeholder_15[cse_var_3] + elem_idx)])], 0f32)))
+              }
             }
           }
         }
       }
-      for (i0.inner: int32, 0, 128) {
-        let cse_var_18: int32 = ((i0.inner*512) + (i0.outer.i1.outer.fused*16))
-        compute_5: Buffer(compute_2, float32, [65536], [])[ramp(cse_var_18, 1, 16)] = max((compute_4[ramp((i0.inner*16), 1, 16)] + placeholder_19: Buffer(placeholder_14, float32, [65536], [])[ramp(cse_var_18, 1, 16)]), broadcast(0f32, 16))
+      for (i0.inner: int32, 0, 32) {
+        let cse_var_4: int32 = (((floordiv(i0.outer.i1.outer.fused, 16)*16384) + (i0.inner*512)) + (floormod(i0.outer.i1.outer.fused, 16)*32))
+        compute_5: Buffer(compute_2, float32, [65536], [])[ramp(cse_var_4, 1, 32)] = max((compute_4[ramp((i0.inner*32), 1, 32)] + placeholder_19: Buffer(placeholder_14, float32, [65536], [])[ramp(cse_var_4, 1, 32)]), broadcast(0f32, 32))
       }
     }
   }
@@ -766,7 +693,7 @@ layout transformation, parallelization, vectorization, unrolling, and operator f
 <span class="p">)</span>
 </pre></div>
 </div>
-<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Execution time of this operator: 1.841 ms
+<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Execution time of this operator: 2.130 ms
 </pre></div>
 </div>
 <div class="admonition note">
diff --git a/docs/how_to/tune_with_autotvm/sg_execution_times.html b/docs/how_to/tune_with_autotvm/sg_execution_times.html
index c74325c726..ee8a7ad035 100644
--- a/docs/how_to/tune_with_autotvm/sg_execution_times.html
+++ b/docs/how_to/tune_with_autotvm/sg_execution_times.html
@@ -340,7 +340,7 @@
             
   <div class="section" id="computation-times">
 <span id="sphx-glr-how-to-tune-with-autotvm-sg-execution-times"></span><h1>Computation times<a class="headerlink" href="#computation-times" title="Permalink to this headline">¶</a></h1>
-<p><strong>00:30.708</strong> total execution time for <strong>how_to_tune_with_autotvm</strong> files:</p>
+<p><strong>00:33.178</strong> total execution time for <strong>how_to_tune_with_autotvm</strong> files:</p>
 <table class="docutils align-default">
 <colgroup>
 <col style="width: 84%" />
@@ -349,18 +349,18 @@
 </colgroup>
 <tbody>
 <tr class="row-odd"><td><p><a class="reference internal" href="tune_conv2d_cuda.html#sphx-glr-how-to-tune-with-autotvm-tune-conv2d-cuda-py"><span class="std std-ref">Tuning High Performance Convolution on NVIDIA GPUs</span></a> (<code class="docutils literal notranslate"><span class="pre">tune_conv2d_cuda.py</span></code>)</p></td>
-<td><p>00:30.671</p></td>
+<td><p>00:33.143</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="tune_relay_x86.html#sphx-glr-how-to-tune-with-autotvm-tune-relay-x86-py"><span class="std std-ref">Auto-tuning a Convolutional Network for x86 CPU</span></a> (<code class="docutils literal notranslate"><span class="pre">tune_relay_x86.py</span></code>)</p></td>
-<td><p>00:00.021</p></td>
+<td><p>00:00.020</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
-<tr class="row-odd"><td><p><a class="reference internal" href="tune_relay_cuda.html#sphx-glr-how-to-tune-with-autotvm-tune-relay-cuda-py"><span class="std std-ref">Auto-tuning a Convolutional Network for NVIDIA GPU</span></a> (<code class="docutils literal notranslate"><span class="pre">tune_relay_cuda.py</span></code>)</p></td>
-<td><p>00:00.006</p></td>
+<tr class="row-odd"><td><p><a class="reference internal" href="tune_relay_arm.html#sphx-glr-how-to-tune-with-autotvm-tune-relay-arm-py"><span class="std std-ref">Auto-tuning a Convolutional Network for ARM CPU</span></a> (<code class="docutils literal notranslate"><span class="pre">tune_relay_arm.py</span></code>)</p></td>
+<td><p>00:00.005</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
-<tr class="row-even"><td><p><a class="reference internal" href="tune_relay_arm.html#sphx-glr-how-to-tune-with-autotvm-tune-relay-arm-py"><span class="std std-ref">Auto-tuning a Convolutional Network for ARM CPU</span></a> (<code class="docutils literal notranslate"><span class="pre">tune_relay_arm.py</span></code>)</p></td>
+<tr class="row-even"><td><p><a class="reference internal" href="tune_relay_cuda.html#sphx-glr-how-to-tune-with-autotvm-tune-relay-cuda-py"><span class="std std-ref">Auto-tuning a Convolutional Network for NVIDIA GPU</span></a> (<code class="docutils literal notranslate"><span class="pre">tune_relay_cuda.py</span></code>)</p></td>
 <td><p>00:00.005</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
diff --git a/docs/how_to/tune_with_autotvm/tune_conv2d_cuda.html b/docs/how_to/tune_with_autotvm/tune_conv2d_cuda.html
index c9013e37e8..5c083a8dd7 100644
--- a/docs/how_to/tune_with_autotvm/tune_conv2d_cuda.html
+++ b/docs/how_to/tune_with_autotvm/tune_conv2d_cuda.html
@@ -690,7 +690,7 @@ Traceback (most recent call last):
   File &quot;tvm/_ffi/_cython/./packed_func.pxi&quot;, line 56, in tvm._ffi._cy3.core.tvm_callback
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 875, in verify_pass
     raise InstantiationError(&quot;Skipped because of invalid gpu kernel&quot;)
-tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [(&#39;tile_f&#39;, [-1, 32, 2, 2]), (&#39;tile_y&#39;, [-1, 1, 1, 7]), (&#39;tile_x&#39;, [-1, 1, 1, 1]), (&#39;tile_rc&#39;, [-1, 8, 2]), (&#39;tile_ry&#39;, [-1, 3, 1]), (&#39;tile_rx&#39;, [-1, 3, 1]), (&#39;auto_unroll_max_step&#39;, 0), (&#39;unroll_explicit&#39;, 1)],None,6048089
+tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [(&#39;tile_f&#39;, [-1, 1, 16, 1]), (&#39;tile_y&#39;, [-1, 1, 7, 1]), (&#39;tile_x&#39;, [-1, 7, 1, 1]), (&#39;tile_rc&#39;, [-1, 64, 4]), (&#39;tile_ry&#39;, [-1, 1, 3]), (&#39;tile_rx&#39;, [-1, 1, 1]), (&#39;auto_unroll_max_step&#39;, 512), (&#39;unroll_explicit&#39;, 0)],None,2218954
 No: 2   GFLOPS: 0.00/0.00       result: Traceback (most recent call last):
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 592, in __call__
     func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
@@ -813,7 +813,7 @@ Traceback (most recent call last):
   File &quot;tvm/_ffi/_cython/./packed_func.pxi&quot;, line 56, in tvm._ffi._cy3.core.tvm_callback
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 875, in verify_pass
     raise InstantiationError(&quot;Skipped because of invalid gpu kernel&quot;)
-tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [(&#39;tile_f&#39;, [-1, 32, 2, 2]), (&#39;tile_y&#39;, [-1, 1, 1, 7]), (&#39;tile_x&#39;, [-1, 7, 1, 1]), (&#39;tile_rc&#39;, [-1, 8, 2]), (&#39;tile_ry&#39;, [-1, 1, 3]), (&#39;tile_rx&#39;, [-1, 3, 1]), (&#39;auto_unroll_max_step&#39;, 512), (&#39;unroll_explicit&#39;, 1)],None,7984969
+tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [(&#39;tile_f&#39;, [-1, 2, 2, 8]), (&#39;tile_y&#39;, [-1, 1, 1, 1]), (&#39;tile_x&#39;, [-1, 7, 1, 1]), (&#39;tile_rc&#39;, [-1, 256, 1]), (&#39;tile_ry&#39;, [-1, 1, 1]), (&#39;tile_rx&#39;, [-1, 3, 1]), (&#39;auto_unroll_max_step&#39;, 1500), (&#39;unroll_explicit&#39;, 0)],None,4094784
 No: 3   GFLOPS: 0.00/0.00       result: Traceback (most recent call last):
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 592, in __call__
     func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
@@ -936,10 +936,8 @@ Traceback (most recent call last):
   File &quot;tvm/_ffi/_cython/./packed_func.pxi&quot;, line 56, in tvm._ffi._cy3.core.tvm_callback
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 875, in verify_pass
     raise InstantiationError(&quot;Skipped because of invalid gpu kernel&quot;)
-tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [(&#39;tile_f&#39;, [-1, 1, 4, 8]), (&#39;tile_y&#39;, [-1, 1, 7, 1]), (&#39;tile_x&#39;, [-1, 1, 1, 7]), (&#39;tile_rc&#39;, [-1, 1, 512]), (&#39;tile_ry&#39;, [-1, 1, 1]), (&#39;tile_rx&#39;, [-1, 3, 1]), (&#39;auto_unroll_max_step&#39;, 0), (&#39;unroll_explicit&#39;, 1)],None,6001309
-No: 4   GFLOPS: 27.51/27.51     result: MeasureResult(costs=(0.008415714000000001,), error_no=MeasureErrorNo.NO_ERROR, all_cost=2.996152639389038, timestamp=1673074612.875652) [(&#39;tile_f&#39;, [-1, 8, 16, 4]), (&#39;tile_y&#39;, [-1, 1, 7, 1]), (&#39;tile_x&#39;, [-1, 7, 1, 1]), (&#39;tile_rc&#39;, [-1, 1, 2]), (&#39;tile_ry&#39;, [-1, 1, 1]), (&#39;tile_rx&#39;, [-1, 1, 3]), (&#39;auto_unroll_max_step&#39;, 512), (&#39;unroll_explicit&#39;, 1)],None,8167849
-No: 5   GFLOPS: 1158.14/1158.14 result: MeasureResult(costs=(0.0001998908324022346,), error_no=MeasureErrorNo.NO_ERROR, all_cost=1.7755820751190186, timestamp=1673074614.818961)       [(&#39;tile_f&#39;, [-1, 1, 8, 1]), (&#39;tile_y&#39;, [-1, 1, 1, 7]), (&#39;tile_x&#39;, [-1, 1, 7, 1]), (&#39;tile_rc&#39;, [-1, 2, 4]), (&#39;tile_ry&#39;, [-1, 1, 1]), (&#39;tile_rx&#39;, [-1, 3, 1]), (&#39;auto_unroll_max_step&#39;, 1500), (&#39;unroll_explicit&#39;, 1)],None,9365647
-No: 6   GFLOPS: 0.00/1158.14    result: Traceback (most recent call last):
+tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [(&#39;tile_f&#39;, [-1, 2, 32, 2]), (&#39;tile_y&#39;, [-1, 1, 1, 7]), (&#39;tile_x&#39;, [-1, 1, 1, 1]), (&#39;tile_rc&#39;, [-1, 16, 2]), (&#39;tile_ry&#39;, [-1, 1, 1]), (&#39;tile_rx&#39;, [-1, 3, 1]), (&#39;auto_unroll_max_step&#39;, 512), (&#39;unroll_explicit&#39;, 0)],None,2373231
+No: 4   GFLOPS: 0.00/0.00       result: Traceback (most recent call last):
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 592, in __call__
     func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 544, in _build_func_common
@@ -1061,8 +1059,8 @@ Traceback (most recent call last):
   File &quot;tvm/_ffi/_cython/./packed_func.pxi&quot;, line 56, in tvm._ffi._cy3.core.tvm_callback
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 875, in verify_pass
     raise InstantiationError(&quot;Skipped because of invalid gpu kernel&quot;)
-tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [(&#39;tile_f&#39;, [-1, 2, 1, 256]), (&#39;tile_y&#39;, [-1, 1, 1, 1]), (&#39;tile_x&#39;, [-1, 7, 1, 1]), (&#39;tile_rc&#39;, [-1, 4, 64]), (&#39;tile_ry&#39;, [-1, 1, 3]), (&#39;tile_rx&#39;, [-1, 3, 1]), (&#39;auto_unroll_max_step&#39;, 0), (&#39;unroll_explicit&#39;, 0)],None,1134537
-No: 7   GFLOPS: 0.00/1158.14    result: Traceback (most recent call last):
+tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [(&#39;tile_f&#39;, [-1, 2, 1, 64]), (&#39;tile_y&#39;, [-1, 1, 7, 1]), (&#39;tile_x&#39;, [-1, 7, 1, 1]), (&#39;tile_rc&#39;, [-1, 1, 16]), (&#39;tile_ry&#39;, [-1, 3, 1]), (&#39;tile_rx&#39;, [-1, 1, 3]), (&#39;auto_unroll_max_step&#39;, 0), (&#39;unroll_explicit&#39;, 1)],None,6703601
+No: 5   GFLOPS: 0.00/0.00       result: Traceback (most recent call last):
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 592, in __call__
     func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 544, in _build_func_common
@@ -1184,9 +1182,8 @@ Traceback (most recent call last):
   File &quot;tvm/_ffi/_cython/./packed_func.pxi&quot;, line 56, in tvm._ffi._cy3.core.tvm_callback
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 875, in verify_pass
     raise InstantiationError(&quot;Skipped because of invalid gpu kernel&quot;)
-tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [(&#39;tile_f&#39;, [-1, 4, 2, 64]), (&#39;tile_y&#39;, [-1, 7, 1, 1]), (&#39;tile_x&#39;, [-1, 1, 1, 1]), (&#39;tile_rc&#39;, [-1, 1, 256]), (&#39;tile_ry&#39;, [-1, 1, 1]), (&#39;tile_rx&#39;, [-1, 3, 1]), (&#39;auto_unroll_max_step&#39;, 1500), (&#39;unroll_explicit&#39;, 1)],None,9476266
-No: 8   GFLOPS: 942.65/1158.14  result: MeasureResult(costs=(0.00024558444140625,), error_no=MeasureErrorNo.NO_ERROR, all_cost=1.3535137176513672, timestamp=1673074617.8424668)        [(&#39;tile_f&#39;, [-1, 1, 4, 1]), (&#39;tile_y&#39;, [-1, 7, 1, 1]), (&#39;tile_x&#39;, [-1, 1, 7, 1]), (&#39;tile_rc&#39;, [-1, 2, 2]), (&#39;tile_ry&#39;, [-1, 1, 1]), (&#39;tile_rx&#39;, [-1, 3, 1]), (&#39;auto_unroll_max_step&#39;, 0), (&#39;unroll_explicit&#39;, 0)],None,621519
-No: 9   GFLOPS: 0.00/1158.14    result: Traceback (most recent call last):
+tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [(&#39;tile_f&#39;, [-1, 128, 2, 2]), (&#39;tile_y&#39;, [-1, 1, 1, 7]), (&#39;tile_x&#39;, [-1, 1, 7, 1]), (&#39;tile_rc&#39;, [-1, 4, 4]), (&#39;tile_ry&#39;, [-1, 3, 1]), (&#39;tile_rx&#39;, [-1, 1, 1]), (&#39;auto_unroll_max_step&#39;, 1500), (&#39;unroll_explicit&#39;, 0)],None,3754811
+No: 6   GFLOPS: 0.00/0.00       result: Traceback (most recent call last):
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 592, in __call__
     func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 544, in _build_func_common
@@ -1308,9 +1305,10 @@ Traceback (most recent call last):
   File &quot;tvm/_ffi/_cython/./packed_func.pxi&quot;, line 56, in tvm._ffi._cy3.core.tvm_callback
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 875, in verify_pass
     raise InstantiationError(&quot;Skipped because of invalid gpu kernel&quot;)
-tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [(&#39;tile_f&#39;, [-1, 1, 8, 16]), (&#39;tile_y&#39;, [-1, 1, 1, 1]), (&#39;tile_x&#39;, [-1, 7, 1, 1]), (&#39;tile_rc&#39;, [-1, 32, 1]), (&#39;tile_ry&#39;, [-1, 1, 3]), (&#39;tile_rx&#39;, [-1, 1, 3]), (&#39;auto_unroll_max_step&#39;, 0), (&#39;unroll_explicit&#39;, 1)],None,6794659
-No: 10  GFLOPS: 6.28/1158.14    result: MeasureResult(costs=(0.036839053999999996,), error_no=MeasureErrorNo.NO_ERROR, all_cost=5.837235689163208, timestamp=1673074623.8672774)        [(&#39;tile_f&#39;, [-1, 8, 8, 2]), (&#39;tile_y&#39;, [-1, 1, 1, 7]), (&#39;tile_x&#39;, [-1, 1, 1, 7]), (&#39;tile_rc&#39;, [-1, 2, 16]), (&#39;tile_ry&#39;, [-1, 1, 1]), (&#39;tile_rx&#39;, [-1, 1, 1]), (&#39;auto_unroll_max_step&#39;, 512), (&#39;unroll_explicit&#39;, 0)],None,1868982
-No: 11  GFLOPS: 0.00/1158.14    result: Traceback (most recent call last):
+tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [(&#39;tile_f&#39;, [-1, 4, 2, 1]), (&#39;tile_y&#39;, [-1, 1, 7, 1]), (&#39;tile_x&#39;, [-1, 1, 1, 7]), (&#39;tile_rc&#39;, [-1, 8, 64]), (&#39;tile_ry&#39;, [-1, 3, 1]), (&#39;tile_rx&#39;, [-1, 3, 1]), (&#39;auto_unroll_max_step&#39;, 512), (&#39;unroll_explicit&#39;, 0)],None,2688852
+No: 7   GFLOPS: 81.13/81.13     result: MeasureResult(costs=(0.002853425575,), error_no=MeasureErrorNo.NO_ERROR, all_cost=1.593247652053833, timestamp=1673207944.3081067)      [(&#39;tile_f&#39;, [-1, 1, 64, 4]), (&#39;tile_y&#39;, [-1, 7, 1, 1]), (&#39;tile_x&#39;, [-1, 1, 1, 7]), (&#39;tile_rc&#39;, [-1, 4, 1]), (&#39;tile_ry&#39;, [-1, 1, 1]), (&#39;tile_rx&#39;, [-1, 1, 1]), (&#39;auto_unroll_max_step&#39;, 1500), (&#39;unroll_explicit&#39;, 0)],None,3494833
+No: 8   GFLOPS: 565.24/565.24   result: MeasureResult(costs=(0.00040955998465473144,), error_no=MeasureErrorNo.NO_ERROR, all_cost=1.5637280941009521, timestamp=1673207945.3138244)     [(&#39;tile_f&#39;, [-1, 4, 2, 2]), (&#39;tile_y&#39;, [-1, 1, 7, 1]), (&#39;tile_x&#39;, [-1, 1, 7, 1]), (&#39;tile_rc&#39;, [-1, 8, 2]), (&#39;tile_ry&#39;, [-1, 1, 3]), (&#39;tile_rx&#39;, [-1, 1, 3]), (&#39;auto_unroll_max_step&#39;, 1500), (&#39;unroll_explicit&#39;, 0)],None,5081626
+No: 9   GFLOPS: 0.00/565.24     result: Traceback (most recent call last):
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 592, in __call__
     func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 544, in _build_func_common
@@ -1432,8 +1430,8 @@ Traceback (most recent call last):
   File &quot;tvm/_ffi/_cython/./packed_func.pxi&quot;, line 56, in tvm._ffi._cy3.core.tvm_callback
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 875, in verify_pass
     raise InstantiationError(&quot;Skipped because of invalid gpu kernel&quot;)
-tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [(&#39;tile_f&#39;, [-1, 64, 2, 2]), (&#39;tile_y&#39;, [-1, 1, 1, 1]), (&#39;tile_x&#39;, [-1, 1, 1, 7]), (&#39;tile_rc&#39;, [-1, 32, 2]), (&#39;tile_ry&#39;, [-1, 3, 1]), (&#39;tile_rx&#39;, [-1, 1, 3]), (&#39;auto_unroll_max_step&#39;, 512), (&#39;unroll_explicit&#39;, 1)],None,8380310
-No: 12  GFLOPS: 0.00/1158.14    result: Traceback (most recent call last):
+tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [(&#39;tile_f&#39;, [-1, 16, 8, 2]), (&#39;tile_y&#39;, [-1, 1, 1, 7]), (&#39;tile_x&#39;, [-1, 7, 1, 1]), (&#39;tile_rc&#39;, [-1, 8, 32]), (&#39;tile_ry&#39;, [-1, 1, 1]), (&#39;tile_rx&#39;, [-1, 1, 3]), (&#39;auto_unroll_max_step&#39;, 512), (&#39;unroll_explicit&#39;, 1)],None,8284183
+No: 10  GFLOPS: 0.00/565.24     result: Traceback (most recent call last):
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 592, in __call__
     func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 544, in _build_func_common
@@ -1555,8 +1553,10 @@ Traceback (most recent call last):
   File &quot;tvm/_ffi/_cython/./packed_func.pxi&quot;, line 56, in tvm._ffi._cy3.core.tvm_callback
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 875, in verify_pass
     raise InstantiationError(&quot;Skipped because of invalid gpu kernel&quot;)
-tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [(&#39;tile_f&#39;, [-1, 2, 8, 2]), (&#39;tile_y&#39;, [-1, 1, 1, 7]), (&#39;tile_x&#39;, [-1, 7, 1, 1]), (&#39;tile_rc&#39;, [-1, 128, 4]), (&#39;tile_ry&#39;, [-1, 1, 1]), (&#39;tile_rx&#39;, [-1, 1, 3]), (&#39;auto_unroll_max_step&#39;, 512), (&#39;unroll_explicit&#39;, 1)],None,8224340
-No: 13  GFLOPS: 0.00/1158.14    result: Traceback (most recent call last):
+tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [(&#39;tile_f&#39;, [-1, 8, 32, 1]), (&#39;tile_y&#39;, [-1, 7, 1, 1]), (&#39;tile_x&#39;, [-1, 1, 1, 1]), (&#39;tile_rc&#39;, [-1, 256, 2]), (&#39;tile_ry&#39;, [-1, 3, 1]), (&#39;tile_rx&#39;, [-1, 3, 1]), (&#39;auto_unroll_max_step&#39;, 512), (&#39;unroll_explicit&#39;, 0)],None,2580423
+No: 11  GFLOPS: 6.14/565.24     result: MeasureResult(costs=(0.03770536025,), error_no=MeasureErrorNo.NO_ERROR, all_cost=2.2243459224700928, timestamp=1673207948.3442025)      [(&#39;tile_f&#39;, [-1, 64, 8, 1]), (&#39;tile_y&#39;, [-1, 1, 1, 1]), (&#39;tile_x&#39;, [-1, 7, 1, 1]), (&#39;tile_rc&#39;, [-1, 4, 1]), (&#39;tile_ry&#39;, [-1, 1, 1]), (&#39;tile_rx&#39;, [-1, 3, 1]), (&#39;auto_unroll_max_step&#39;, 512), (&#39;unroll_explicit&#39;, 0)],None,2331153
+No: 12  GFLOPS: 212.14/565.24   result: MeasureResult(costs=(0.0010912774489795919,), error_no=MeasureErrorNo.NO_ERROR, all_cost=2.7804789543151855, timestamp=1673207949.3513148)      [(&#39;tile_f&#39;, [-1, 2, 32, 1]), (&#39;tile_y&#39;, [-1, 7, 1, 1]), (&#39;tile_x&#39;, [-1, 1, 1, 7]), (&#39;tile_rc&#39;, [-1, 16, 4]), (&#39;tile_ry&#39;, [-1, 1, 1]), (&#39;tile_rx&#39;, [-1, 1, 1]), (&#39;auto_unroll_max_step&#39;, 512), (&#39;unroll_explicit&#39;, 1)],None,7053461
+No: 13  GFLOPS: 0.00/565.24     result: Traceback (most recent call last):
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 592, in __call__
     func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 544, in _build_func_common
@@ -1678,8 +1678,8 @@ Traceback (most recent call last):
   File &quot;tvm/_ffi/_cython/./packed_func.pxi&quot;, line 56, in tvm._ffi._cy3.core.tvm_callback
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 875, in verify_pass
     raise InstantiationError(&quot;Skipped because of invalid gpu kernel&quot;)
-tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [(&#39;tile_f&#39;, [-1, 4, 4, 32]), (&#39;tile_y&#39;, [-1, 1, 1, 7]), (&#39;tile_x&#39;, [-1, 1, 1, 1]), (&#39;tile_rc&#39;, [-1, 128, 2]), (&#39;tile_ry&#39;, [-1, 3, 1]), (&#39;tile_rx&#39;, [-1, 3, 1]), (&#39;auto_unroll_max_step&#39;, 1500), (&#39;unroll_explicit&#39;, 1)],None,9547096
-No: 14  GFLOPS: 0.00/1158.14    result: Traceback (most recent call last):
+tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [(&#39;tile_f&#39;, [-1, 32, 2, 4]), (&#39;tile_y&#39;, [-1, 1, 1, 7]), (&#39;tile_x&#39;, [-1, 1, 1, 7]), (&#39;tile_rc&#39;, [-1, 2, 128]), (&#39;tile_ry&#39;, [-1, 3, 1]), (&#39;tile_rx&#39;, [-1, 1, 1]), (&#39;auto_unroll_max_step&#39;, 1500), (&#39;unroll_explicit&#39;, 0)],None,3857813
+No: 14  GFLOPS: 0.00/565.24     result: Traceback (most recent call last):
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 592, in __call__
     func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 544, in _build_func_common
@@ -1801,254 +1801,161 @@ Traceback (most recent call last):
   File &quot;tvm/_ffi/_cython/./packed_func.pxi&quot;, line 56, in tvm._ffi._cy3.core.tvm_callback
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 875, in verify_pass
     raise InstantiationError(&quot;Skipped because of invalid gpu kernel&quot;)
-tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [(&#39;tile_f&#39;, [-1, 4, 16, 8]), (&#39;tile_y&#39;, [-1, 1, 7, 1]), (&#39;tile_x&#39;, [-1, 1, 1, 1]), (&#39;tile_rc&#39;, [-1, 8, 32]), (&#39;tile_ry&#39;, [-1, 1, 1]), (&#39;tile_rx&#39;, [-1, 1, 1]), (&#39;auto_unroll_max_step&#39;, 512), (&#39;unroll_explicit&#39;, 1)],None,7121560
-No: 15  GFLOPS: 0.00/1158.14    result: Traceback (most recent call last):
-  File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 592, in __call__
-    func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
-  File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 544, in _build_func_common
-    func = build(s, args, target_host=task.target_host, runtime=runtime)
-  File &quot;/workspace/python/tvm/driver/build_module.py&quot;, line 227, in build
-    input_mod = lower(inputs, args, name=name, binds=binds)
-  File &quot;/workspace/python/tvm/driver/build_module.py&quot;, line 134, in lower
-    return ffi.lower_schedule(inp, args, name, binds, simple_mode)
+tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [(&#39;tile_f&#39;, [-1, 1, 1, 16]), (&#39;tile_y&#39;, [-1, 1, 1, 7]), (&#39;tile_x&#39;, [-1, 1, 7, 1]), (&#39;tile_rc&#39;, [-1, 32, 16]), (&#39;tile_ry&#39;, [-1, 1, 1]), (&#39;tile_rx&#39;, [-1, 1, 1]), (&#39;auto_unroll_max_step&#39;, 512), (&#39;unroll_explicit&#39;, 1)],None,7109464
+No: 15  GFLOPS: 0.00/565.24     result: Traceback (most recent call last):
+  File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 742, in __call__
+    yield remote, remote.load_module(os.path.split(build_result.filename)[1])
+  File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 706, in run_through_rpc
+    costs = time_f(*args).results
+  File &quot;/workspace/python/tvm/runtime/module.py&quot;, line 357, in evaluator
+    blob = feval(*args)
   File &quot;tvm/_ffi/_cython/./packed_func.pxi&quot;, line 331, in tvm._ffi._cy3.core.PackedFuncBase.__call__
-  File &quot;tvm/_ffi/_cython/./packed_func.pxi&quot;, line 276, in tvm._ffi._cy3.core.FuncCall
+  File &quot;tvm/_ffi/_cython/./packed_func.pxi&quot;, line 262, in tvm._ffi._cy3.core.FuncCall
+  File &quot;tvm/_ffi/_cython/./packed_func.pxi&quot;, line 251, in tvm._ffi._cy3.core.FuncCall3
   File &quot;tvm/_ffi/_cython/./base.pxi&quot;, line 181, in tvm._ffi._cy3.core.CHECK_CALL
 tvm._ffi.base.TVMError: Traceback (most recent call last):
-  24: TVMFuncCall
+  4: TVMFuncCall
         at ../src/runtime/c_runtime_api.cc:477
-  23: tvm::runtime::PackedFuncObj::CallPacked(tvm::runtime::TVMArgs, tvm::runtime::TVMRetValue*) const
-        at ../include/tvm/runtime/packed_func.h:1217
-  22: Call
-        at ../include/tvm/runtime/packed_func.h:1213
-  21: operator()
-        at ../include/tvm/runtime/packed_func.h:1730
-  20: unpack_call&lt;tvm::IRModule, 5, tvm::&lt;lambda(tvm::te::Schedule, const tvm::runtime::Array&lt;tvm::runtime::ObjectRef&gt;&amp;, const tvm::runtime::String&amp;, const tvm::runtime::Map&lt;tvm::te::Tensor, tvm::tir::Buffer&gt;&amp;, bool)&gt; &gt;
-        at ../include/tvm/runtime/packed_func.h:1670
-  19: run&lt;&gt;
-        at ../include/tvm/runtime/packed_func.h:1630
-  18: run&lt;tvm::runtime::TVMMovableArgValueWithContext_&gt;
-        at ../include/tvm/runtime/packed_func.h:1630
-  17: run&lt;tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_&gt;
-        at ../include/tvm/runtime/packed_func.h:1630
-  16: run&lt;tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_&gt;
-        at ../include/tvm/runtime/packed_func.h:1630
-  15: run&lt;tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_&gt;
-        at ../include/tvm/runtime/packed_func.h:1630
-  14: run&lt;tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_&gt;
-        at ../include/tvm/runtime/packed_func.h:1645
-  13: operator()
-        at ../src/driver/driver_api.cc:395
-  12: tvm::LowerSchedule(tvm::te::Schedule, tvm::runtime::Array&lt;tvm::runtime::ObjectRef, void&gt; const&amp;, std::__cxx11::basic_string&lt;char, std::char_traits&lt;char&gt;, std::allocator&lt;char&gt; &gt; const&amp;, std::unordered_map&lt;tvm::te::Tensor, tvm::tir::Buffer, std::hash&lt;tvm::te::Tensor&gt;, std::equal_to&lt;tvm::te::Tensor&gt;, std::allocator&lt;std::pair&lt;tvm::te::Tensor const, tvm::tir::Buffer&gt; &gt; &gt; const&amp;, tvm::GlobalVarSupply, bool)
-        at ../src/driver/driver_api.cc:381
-  11: tvm::LowerWithPassList(tvm::IRModule, tvm::runtime::Array&lt;tvm::transform::Pass, void&gt;)
-        at ../src/driver/driver_api.cc:276
-  10: tvm::transform::Pass::operator()(tvm::IRModule) const
-        at ../src/ir/transform.cc:258
-  9: tvm::transform::Pass::operator()(tvm::IRModule, tvm::transform::PassContext const&amp;) const
-        at ../src/ir/transform.cc:274
-  8: tvm::transform::SequentialNode::operator()(tvm::IRModule, tvm::transform::PassContext const&amp;) const
-        at ../src/ir/transform.cc:454
-  7: tvm::transform::Pass::operator()(tvm::IRModule, tvm::transform::PassContext const&amp;) const
-        at ../src/ir/transform.cc:274
-  6: tvm::tir::transform::PrimFuncPassNode::operator()(tvm::IRModule, tvm::transform::PassContext const&amp;) const
-        at ../src/tir/ir/transform.cc:100
-  5: tvm::runtime::TypedPackedFunc&lt;tvm::tir::PrimFunc (tvm::tir::PrimFunc, tvm::IRModule, tvm::transform::PassContext)&gt;::operator()(tvm::tir::PrimFunc, tvm::IRModule, tvm::transform::PassContext) const
-        at ../include/tvm/runtime/packed_func.h:1749
-  4: tvm::tir::PrimFunc tvm::runtime::detail::typed_packed_call_dispatcher&lt;tvm::tir::PrimFunc&gt;::run&lt;tvm::tir::PrimFunc, tvm::IRModule, tvm::transform::PassContext&gt;(tvm::runtime::PackedFunc const&amp;, tvm::tir::PrimFunc&amp;&amp;, tvm::IRModule&amp;&amp;, tvm::transform::PassContext&amp;&amp;)
-        at ../include/tvm/runtime/packed_func.h:1693
-  3: tvm::runtime::TVMRetValue tvm::runtime::PackedFunc::operator()&lt;tvm::tir::PrimFunc, tvm::IRModule, tvm::transform::PassContext&gt;(tvm::tir::PrimFunc&amp;&amp;, tvm::IRModule&amp;&amp;, tvm::transform::PassContext&amp;&amp;) const
-        at ../include/tvm/runtime/packed_func.h:1617
-  2: tvm::runtime::PackedFuncObj::CallPacked(tvm::runtime::TVMArgs, tvm::runtime::TVMRetValue*) const
-        at ../include/tvm/runtime/packed_func.h:1217
-  1: Call
-        at ../include/tvm/runtime/packed_func.h:1213
-  0: operator()
-        at ../src/runtime/c_runtime_api.cc:534
-  File &quot;tvm/_ffi/_cython/./packed_func.pxi&quot;, line 56, in tvm._ffi._cy3.core.tvm_callback
-  File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 875, in verify_pass
-    raise InstantiationError(&quot;Skipped because of invalid gpu kernel&quot;)
-tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel
+  3: tvm::runtime::PackedFuncObj::CallPacked(tvm::runtime::TVMArgs, tvm::runtime::TVMRetValue*) const
+        at ../include/tvm/runtime/packed_func.h:1217
+  2: tvm::runtime::RPCWrappedFunc::operator()(tvm::runtime::TVMArgs, tvm::runtime::TVMRetValue*) const
+        at ../src/runtime/rpc/rpc_module.cc:129
+  1: tvm::runtime::RPCClientSession::CallFunc(void*, TVMValue const*, int const*, int, std::function&lt;void (tvm::runtime::TVMArgs)&gt; const&amp;)
+        at ../src/runtime/rpc/rpc_endpoint.cc:1012
+  0: tvm::runtime::RPCEndpoint::CallFunc(void*, TVMValue const*, int const*, int, std::function&lt;void (tvm::runtime::TVMArgs)&gt;)
+        at ../src/runtime/rpc/rpc_endpoint.cc:804
+  File &quot;../src/runtime/rpc/rpc_endpoint.cc&quot;, line 804
+TVMError:
+---------------------------------------------------------------
+An error occurred during the execution of TVM.
+For more information, please see: https://tvm.apache.org/docs/errors.html
+---------------------------------------------------------------
+  Check failed: (code == RPCCode::kReturn) is false: code=kShutdown
+
+During handling of the above exception, another exception occurred:
 
 Traceback (most recent call last):
-  24: TVMFuncCall
-        at ../src/runtime/c_runtime_api.cc:477
-  23: tvm::runtime::PackedFuncObj::CallPacked(tvm::runtime::TVMArgs, tvm::runtime::TVMRetValue*) const
-        at ../include/tvm/runtime/packed_func.h:1217
-  22: Call
-        at ../include/tvm/runtime/packed_func.h:1213
-  21: operator()
-        at ../include/tvm/runtime/packed_func.h:1730
-  20: unpack_call&lt;tvm::IRModule, 5, tvm::&lt;lambda(tvm::te::Schedule, const tvm::runtime::Array&lt;tvm::runtime::ObjectRef&gt;&amp;, const tvm::runtime::String&amp;, const tvm::runtime::Map&lt;tvm::te::Tensor, tvm::tir::Buffer&gt;&amp;, bool)&gt; &gt;
-        at ../include/tvm/runtime/packed_func.h:1670
-  19: run&lt;&gt;
-        at ../include/tvm/runtime/packed_func.h:1630
-  18: run&lt;tvm::runtime::TVMMovableArgValueWithContext_&gt;
-        at ../include/tvm/runtime/packed_func.h:1630
-  17: run&lt;tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_&gt;
-        at ../include/tvm/runtime/packed_func.h:1630
-  16: run&lt;tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_&gt;
-        at ../include/tvm/runtime/packed_func.h:1630
-  15: run&lt;tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_&gt;
-        at ../include/tvm/runtime/packed_func.h:1630
-  14: run&lt;tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_&gt;
-        at ../include/tvm/runtime/packed_func.h:1645
-  13: operator()
-        at ../src/driver/driver_api.cc:395
-  12: tvm::LowerSchedule(tvm::te::Schedule, tvm::runtime::Array&lt;tvm::runtime::ObjectRef, void&gt; const&amp;, std::__cxx11::basic_string&lt;char, std::char_traits&lt;char&gt;, std::allocator&lt;char&gt; &gt; const&amp;, std::unordered_map&lt;tvm::te::Tensor, tvm::tir::Buffer, std::hash&lt;tvm::te::Tensor&gt;, std::equal_to&lt;tvm::te::Tensor&gt;, std::allocator&lt;std::pair&lt;tvm::te::Tensor const, tvm::tir::Buffer&gt; &gt; &gt; const&amp;, tvm::GlobalVarSupply, bool)
-        at ../src/driver/driver_api.cc:381
-  11: tvm::LowerWithPassList(tvm::IRModule, tvm::runtime::Array&lt;tvm::transform::Pass, void&gt;)
-        at ../src/driver/driver_api.cc:276
-  10: tvm::transform::Pass::operator()(tvm::IRModule) const
-        at ../src/ir/transform.cc:258
-  9: tvm::transform::Pass::operator()(tvm::IRModule, tvm::transform::PassContext const&amp;) const
-        at ../src/ir/transform.cc:274
-  8: tvm::transform::SequentialNode::operator()(tvm::IRModule, tvm::transform::PassContext const&amp;) const
-        at ../src/ir/transform.cc:454
-  7: tvm::transform::Pass::operator()(tvm::IRModule, tvm::transform::PassContext const&amp;) const
-        at ../src/ir/transform.cc:274
-  6: tvm::tir::transform::PrimFuncPassNode::operator()(tvm::IRModule, tvm::transform::PassContext const&amp;) const
-        at ../src/tir/ir/transform.cc:100
-  5: tvm::runtime::TypedPackedFunc&lt;tvm::tir::PrimFunc (tvm::tir::PrimFunc, tvm::IRModule, tvm::transform::PassContext)&gt;::operator()(tvm::tir::PrimFunc, tvm::IRModule, tvm::transform::PassContext) const
-        at ../include/tvm/runtime/packed_func.h:1749
-  4: tvm::tir::PrimFunc tvm::runtime::detail::typed_packed_call_dispatcher&lt;tvm::tir::PrimFunc&gt;::run&lt;tvm::tir::PrimFunc, tvm::IRModule, tvm::transform::PassContext&gt;(tvm::runtime::PackedFunc const&amp;, tvm::tir::PrimFunc&amp;&amp;, tvm::IRModule&amp;&amp;, tvm::transform::PassContext&amp;&amp;)
-        at ../include/tvm/runtime/packed_func.h:1693
-  3: tvm::runtime::TVMRetValue tvm::runtime::PackedFunc::operator()&lt;tvm::tir::PrimFunc, tvm::IRModule, tvm::transform::PassContext&gt;(tvm::tir::PrimFunc&amp;&amp;, tvm::IRModule&amp;&amp;, tvm::transform::PassContext&amp;&amp;) const
-        at ../include/tvm/runtime/packed_func.h:1617
-  2: tvm::runtime::PackedFuncObj::CallPacked(tvm::runtime::TVMArgs, tvm::runtime::TVMRetValue*) const
-        at ../include/tvm/runtime/packed_func.h:1217
-  1: Call
-        at ../include/tvm/runtime/packed_func.h:1213
-  0: operator()
-        at ../src/runtime/c_runtime_api.cc:534
-  File &quot;tvm/_ffi/_cython/./packed_func.pxi&quot;, line 56, in tvm._ffi._cy3.core.tvm_callback
-  File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 875, in verify_pass
-    raise InstantiationError(&quot;Skipped because of invalid gpu kernel&quot;)
-tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [(&#39;tile_f&#39;, [-1, 2, 64, 4]), (&#39;tile_y&#39;, [-1, 1, 1, 1]), (&#39;tile_x&#39;, [-1, 1, 1, 7]), (&#39;tile_rc&#39;, [-1, 4, 2]), (&#39;tile_ry&#39;, [-1, 1, 3]), (&#39;tile_rx&#39;, [-1, 1, 1]), (&#39;auto_unroll_max_step&#39;, 512), (&#39;unroll_explicit&#39;, 0)],None,2174614
-No: 16  GFLOPS: 0.00/1158.14    result: Traceback (most recent call last):
-  File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 592, in __call__
-    func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
-  File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 544, in _build_func_common
-    func = build(s, args, target_host=task.target_host, runtime=runtime)
-  File &quot;/workspace/python/tvm/driver/build_module.py&quot;, line 227, in build
-    input_mod = lower(inputs, args, name=name, binds=binds)
-  File &quot;/workspace/python/tvm/driver/build_module.py&quot;, line 134, in lower
-    return ffi.lower_schedule(inp, args, name, binds, simple_mode)
-  File &quot;tvm/_ffi/_cython/./packed_func.pxi&quot;, line 331, in tvm._ffi._cy3.core.PackedFuncBase.__call__
-  File &quot;tvm/_ffi/_cython/./packed_func.pxi&quot;, line 276, in tvm._ffi._cy3.core.FuncCall
-  File &quot;tvm/_ffi/_cython/./base.pxi&quot;, line 181, in tvm._ffi._cy3.core.CHECK_CALL
+  File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 706, in run_through_rpc
+    costs = time_f(*args).results
+  File &quot;/usr/lib/python3.7/contextlib.py&quot;, line 130, in __exit__
+    self.gen.throw(type, value, traceback)
+  File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 746, in __call__
+    remote.remove(build_result.filename)
+  File &quot;/workspace/python/tvm/rpc/client.py&quot;, line 144, in remove
+    self._remote_funcs[&quot;remove&quot;] = self.get_function(&quot;tvm.rpc.server.remove&quot;)
+  File &quot;/workspace/python/tvm/rpc/client.py&quot;, line 72, in get_function
+    return self._sess.get_function(name)
+  File &quot;/workspace/python/tvm/runtime/module.py&quot;, line 171, in get_function
+    self.handle, c_str(name), ctypes.c_int(query_imports), ctypes.byref(ret_handle)
+  File &quot;/workspace/python/tvm/_ffi/base.py&quot;, line 348, in check_call
+    raise get_last_ffi_error()
 tvm._ffi.base.TVMError: Traceback (most recent call last):
-  24: TVMFuncCall
-        at ../src/runtime/c_runtime_api.cc:477
-  23: tvm::runtime::PackedFuncObj::CallPacked(tvm::runtime::TVMArgs, tvm::runtime::TVMRetValue*) const
-        at ../include/tvm/runtime/packed_func.h:1217
-  22: Call
-        at ../include/tvm/runtime/packed_func.h:1213
-  21: operator()
-        at ../include/tvm/runtime/packed_func.h:1730
-  20: unpack_call&lt;tvm::IRModule, 5, tvm::&lt;lambda(tvm::te::Schedule, const tvm::runtime::Array&lt;tvm::runtime::ObjectRef&gt;&amp;, const tvm::runtime::String&amp;, const tvm::runtime::Map&lt;tvm::te::Tensor, tvm::tir::Buffer&gt;&amp;, bool)&gt; &gt;
-        at ../include/tvm/runtime/packed_func.h:1670
-  19: run&lt;&gt;
-        at ../include/tvm/runtime/packed_func.h:1630
-  18: run&lt;tvm::runtime::TVMMovableArgValueWithContext_&gt;
-        at ../include/tvm/runtime/packed_func.h:1630
-  17: run&lt;tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_&gt;
-        at ../include/tvm/runtime/packed_func.h:1630
-  16: run&lt;tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_&gt;
-        at ../include/tvm/runtime/packed_func.h:1630
-  15: run&lt;tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_&gt;
-        at ../include/tvm/runtime/packed_func.h:1630
-  14: run&lt;tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_&gt;
-        at ../include/tvm/runtime/packed_func.h:1645
-  13: operator()
-        at ../src/driver/driver_api.cc:395
-  12: tvm::LowerSchedule(tvm::te::Schedule, tvm::runtime::Array&lt;tvm::runtime::ObjectRef, void&gt; const&amp;, std::__cxx11::basic_string&lt;char, std::char_traits&lt;char&gt;, std::allocator&lt;char&gt; &gt; const&amp;, std::unordered_map&lt;tvm::te::Tensor, tvm::tir::Buffer, std::hash&lt;tvm::te::Tensor&gt;, std::equal_to&lt;tvm::te::Tensor&gt;, std::allocator&lt;std::pair&lt;tvm::te::Tensor const, tvm::tir::Buffer&gt; &gt; &gt; const&amp;, tvm::GlobalVarSupply, bool)
-        at ../src/driver/driver_api.cc:381
-  11: tvm::LowerWithPassList(tvm::IRModule, tvm::runtime::Array&lt;tvm::transform::Pass, void&gt;)
-        at ../src/driver/driver_api.cc:276
-  10: tvm::transform::Pass::operator()(tvm::IRModule) const
-        at ../src/ir/transform.cc:258
-  9: tvm::transform::Pass::operator()(tvm::IRModule, tvm::transform::PassContext const&amp;) const
-        at ../src/ir/transform.cc:274
-  8: tvm::transform::SequentialNode::operator()(tvm::IRModule, tvm::transform::PassContext const&amp;) const
-        at ../src/ir/transform.cc:454
-  7: tvm::transform::Pass::operator()(tvm::IRModule, tvm::transform::PassContext const&amp;) const
-        at ../src/ir/transform.cc:274
-  6: tvm::tir::transform::PrimFuncPassNode::operator()(tvm::IRModule, tvm::transform::PassContext const&amp;) const
-        at ../src/tir/ir/transform.cc:100
-  5: tvm::runtime::TypedPackedFunc&lt;tvm::tir::PrimFunc (tvm::tir::PrimFunc, tvm::IRModule, tvm::transform::PassContext)&gt;::operator()(tvm::tir::PrimFunc, tvm::IRModule, tvm::transform::PassContext) const
-        at ../include/tvm/runtime/packed_func.h:1749
-  4: tvm::tir::PrimFunc tvm::runtime::detail::typed_packed_call_dispatcher&lt;tvm::tir::PrimFunc&gt;::run&lt;tvm::tir::PrimFunc, tvm::IRModule, tvm::transform::PassContext&gt;(tvm::runtime::PackedFunc const&amp;, tvm::tir::PrimFunc&amp;&amp;, tvm::IRModule&amp;&amp;, tvm::transform::PassContext&amp;&amp;)
-        at ../include/tvm/runtime/packed_func.h:1693
-  3: tvm::runtime::TVMRetValue tvm::runtime::PackedFunc::operator()&lt;tvm::tir::PrimFunc, tvm::IRModule, tvm::transform::PassContext&gt;(tvm::tir::PrimFunc&amp;&amp;, tvm::IRModule&amp;&amp;, tvm::transform::PassContext&amp;&amp;) const
+  52: 0xffffffffffffffff
+  51: _start
+  50: __libc_start_main
+  49: _Py_UnixMain
+  48: 0x0000000000650da0
+  47: 0x0000000000650afa
+  46: _PyFunction_FastCallDict
+  45: _PyEval_EvalCodeWithName
+  44: _PyEval_EvalFrameDefault
+  43: _PyFunction_FastCallKeywords
+  42: _PyEval_EvalCodeWithName
+  41: _PyEval_EvalFrameDefault
+  40: _PyMethodDef_RawFastCallKeywords
+  39: 0x0000000000546369
+  38: _PyEval_EvalCodeWithName
+  37: _PyEval_EvalFrameDefault
+  36: _PyFunction_FastCallKeywords
+  35: _PyEval_EvalCodeWithName
+  34: _PyEval_EvalFrameDefault
+  33: _PyFunction_FastCallDict
+  32: _PyEval_EvalCodeWithName
+  31: _PyEval_EvalFrameDefault
+  30: _PyObject_FastCallDict
+  29: 0x00000000004c06e1
+  28: _PyFunction_FastCallDict
+  27: _PyEval_EvalFrameDefault
+  26: _PyMethodDescr_FastCallKeywords
+  25: 0x00000000005dcb58
+  24: 0x00000000005dc83f
+  23: 0x00000000004ba127
+  22: _PyEval_EvalFrameDefault
+  21: _PyFunction_FastCallKeywords
+  20: _PyEval_EvalFrameDefault
+  19: _PyFunction_FastCallKeywords
+  18: _PyEval_EvalFrameDefault
+  17: _PyFunction_FastCallKeywords
+  16: _PyEval_EvalCodeWithName
+  15: _PyEval_EvalFrameDefault
+  14: 0x0000000000537c30
+  13: _PyObject_FastCallKeywords
+  12: 0x00007f9e4d319fa2
+  11: _ctypes_callproc
+  10: ffi_call
+  9: ffi_call_unix64
+  8: TVMModGetFunction
+        at ../src/runtime/c_runtime_api.cc:408
+  7: tvm::runtime::ModuleNode::GetFunction(std::__cxx11::basic_string&lt;char, std::char_traits&lt;char&gt;, std::allocator&lt;char&gt; &gt; const&amp;, bool)
+        at ../src/runtime/module.cc:66
+  6: tvm::runtime::RPCModuleNode::GetFunction(std::__cxx11::basic_string&lt;char, std::char_traits&lt;char&gt;, std::allocator&lt;char&gt; &gt; const&amp;, tvm::runtime::ObjectPtr&lt;tvm::runtime::Object&gt; const&amp;)
+        at ../src/runtime/rpc/rpc_module.cc:185
+  5: tvm::runtime::RPCClientSession::GetFunction(std::__cxx11::basic_string&lt;char, std::char_traits&lt;char&gt;, std::allocator&lt;char&gt; &gt; const&amp;)
+        at ../src/runtime/rpc/rpc_endpoint.cc:1007
+  4: tvm::runtime::TVMRetValue tvm::runtime::RPCEndpoint::SysCallRemote&lt;std::__cxx11::basic_string&lt;char, std::char_traits&lt;char&gt;, std::allocator&lt;char&gt; &gt; const&amp;&gt;(tvm::runtime::RPCCode, std::__cxx11::basic_string&lt;char, std::char_traits&lt;char&gt;, std::allocator&lt;char&gt; &gt; const&amp;)
+        at ../src/runtime/rpc/rpc_endpoint.h:223
+  3: tvm::runtime::TVMRetValue tvm::runtime::PackedFunc::operator()&lt;int, std::__cxx11::basic_string&lt;char, std::char_traits&lt;char&gt;, std::allocator&lt;char&gt; &gt; const&amp;&gt;(int&amp;&amp;, std::__cxx11::basic_string&lt;char, std::char_traits&lt;char&gt;, std::allocator&lt;char&gt; &gt; const&amp;) const
         at ../include/tvm/runtime/packed_func.h:1617
   2: tvm::runtime::PackedFuncObj::CallPacked(tvm::runtime::TVMArgs, tvm::runtime::TVMRetValue*) const
         at ../include/tvm/runtime/packed_func.h:1217
   1: Call
         at ../include/tvm/runtime/packed_func.h:1213
   0: operator()
-        at ../src/runtime/c_runtime_api.cc:534
-  File &quot;tvm/_ffi/_cython/./packed_func.pxi&quot;, line 56, in tvm._ffi._cy3.core.tvm_callback
-  File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 875, in verify_pass
-    raise InstantiationError(&quot;Skipped because of invalid gpu kernel&quot;)
-tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel
+        at ../src/runtime/rpc/rpc_endpoint.cc:684
+  File &quot;../src/runtime/rpc/rpc_endpoint.cc&quot;, line 684
+TVMError:
+---------------------------------------------------------------
+An error occurred during the execution of TVM.
+For more information, please see: https://tvm.apache.org/docs/errors.html
+---------------------------------------------------------------
+  Check failed: (code == RPCCode::kReturn) is false: code=1
 
 Traceback (most recent call last):
-  24: TVMFuncCall
-        at ../src/runtime/c_runtime_api.cc:477
-  23: tvm::runtime::PackedFuncObj::CallPacked(tvm::runtime::TVMArgs, tvm::runtime::TVMRetValue*) const
-        at ../include/tvm/runtime/packed_func.h:1217
-  22: Call
-        at ../include/tvm/runtime/packed_func.h:1213
-  21: operator()
-        at ../include/tvm/runtime/packed_func.h:1730
-  20: unpack_call&lt;tvm::IRModule, 5, tvm::&lt;lambda(tvm::te::Schedule, const tvm::runtime::Array&lt;tvm::runtime::ObjectRef&gt;&amp;, const tvm::runtime::String&amp;, const tvm::runtime::Map&lt;tvm::te::Tensor, tvm::tir::Buffer&gt;&amp;, bool)&gt; &gt;
-        at ../include/tvm/runtime/packed_func.h:1670
-  19: run&lt;&gt;
-        at ../include/tvm/runtime/packed_func.h:1630
-  18: run&lt;tvm::runtime::TVMMovableArgValueWithContext_&gt;
-        at ../include/tvm/runtime/packed_func.h:1630
-  17: run&lt;tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_&gt;
-        at ../include/tvm/runtime/packed_func.h:1630
-  16: run&lt;tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_&gt;
-        at ../include/tvm/runtime/packed_func.h:1630
-  15: run&lt;tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_&gt;
-        at ../include/tvm/runtime/packed_func.h:1630
-  14: run&lt;tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_&gt;
-        at ../include/tvm/runtime/packed_func.h:1645
-  13: operator()
-        at ../src/driver/driver_api.cc:395
-  12: tvm::LowerSchedule(tvm::te::Schedule, tvm::runtime::Array&lt;tvm::runtime::ObjectRef, void&gt; const&amp;, std::__cxx11::basic_string&lt;char, std::char_traits&lt;char&gt;, std::allocator&lt;char&gt; &gt; const&amp;, std::unordered_map&lt;tvm::te::Tensor, tvm::tir::Buffer, std::hash&lt;tvm::te::Tensor&gt;, std::equal_to&lt;tvm::te::Tensor&gt;, std::allocator&lt;std::pair&lt;tvm::te::Tensor const, tvm::tir::Buffer&gt; &gt; &gt; const&amp;, tvm::GlobalVarSupply, bool)
-        at ../src/driver/driver_api.cc:381
-  11: tvm::LowerWithPassList(tvm::IRModule, tvm::runtime::Array&lt;tvm::transform::Pass, void&gt;)
-        at ../src/driver/driver_api.cc:276
-  10: tvm::transform::Pass::operator()(tvm::IRModule) const
-        at ../src/ir/transform.cc:258
-  9: tvm::transform::Pass::operator()(tvm::IRModule, tvm::transform::PassContext const&amp;) const
-        at ../src/ir/transform.cc:274
-  8: tvm::transform::SequentialNode::operator()(tvm::IRModule, tvm::transform::PassContext const&amp;) const
-        at ../src/ir/transform.cc:454
-  7: tvm::transform::Pass::operator()(tvm::IRModule, tvm::transform::PassContext const&amp;) const
-        at ../src/ir/transform.cc:274
-  6: tvm::tir::transform::PrimFuncPassNode::operator()(tvm::IRModule, tvm::transform::PassContext const&amp;) const
-        at ../src/tir/ir/transform.cc:100
-  5: tvm::runtime::TypedPackedFunc&lt;tvm::tir::PrimFunc (tvm::tir::PrimFunc, tvm::IRModule, tvm::transform::PassContext)&gt;::operator()(tvm::tir::PrimFunc, tvm::IRModule, tvm::transform::PassContext) const
-        at ../include/tvm/runtime/packed_func.h:1749
-  4: tvm::tir::PrimFunc tvm::runtime::detail::typed_packed_call_dispatcher&lt;tvm::tir::PrimFunc&gt;::run&lt;tvm::tir::PrimFunc, tvm::IRModule, tvm::transform::PassContext&gt;(tvm::runtime::PackedFunc const&amp;, tvm::tir::PrimFunc&amp;&amp;, tvm::IRModule&amp;&amp;, tvm::transform::PassContext&amp;&amp;)
-        at ../include/tvm/runtime/packed_func.h:1693
-  3: tvm::runtime::TVMRetValue tvm::runtime::PackedFunc::operator()&lt;tvm::tir::PrimFunc, tvm::IRModule, tvm::transform::PassContext&gt;(tvm::tir::PrimFunc&amp;&amp;, tvm::IRModule&amp;&amp;, tvm::transform::PassContext&amp;&amp;) const
-        at ../include/tvm/runtime/packed_func.h:1617
-  2: tvm::runtime::PackedFuncObj::CallPacked(tvm::runtime::TVMArgs, tvm::runtime::TVMRetValue*) const
-        at ../include/tvm/runtime/packed_func.h:1217
-  1: Call
-        at ../include/tvm/runtime/packed_func.h:1213
-  0: operator()
-        at ../src/runtime/c_runtime_api.cc:534
-  File &quot;tvm/_ffi/_cython/./packed_func.pxi&quot;, line 56, in tvm._ffi._cy3.core.tvm_callback
-  File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 875, in verify_pass
-    raise InstantiationError(&quot;Skipped because of invalid gpu kernel&quot;)
-tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [(&#39;tile_f&#39;, [-1, 1, 4, 32]), (&#39;tile_y&#39;, [-1, 7, 1, 1]), (&#39;tile_x&#39;, [-1, 1, 1, 7]), (&#39;tile_rc&#39;, [-1, 2, 16]), (&#39;tile_ry&#39;, [-1, 1, 3]), (&#39;tile_rx&#39;, [-1, 3, 1]), (&#39;auto_unroll_max_step&#39;, 0), (&#39;unroll_explicit&#39;, 1)],None,6321454
-No: 17  GFLOPS: 0.00/1158.14    result: Traceback (most recent call last):
+  52: 0xffffffffffffffff
+  51: _start
+  50: __libc_start_main
+  49: _Py_UnixMain
+  48: 0x0000000000650da0
+  47: 0x0000000000650afa
+  46: _PyFunction_FastCallDict
+  45: _PyEval_EvalCodeWithName
+  44: _PyEval_EvalFrameDefault
+  43: _PyFunction_FastCallKeywords
+  42: _PyEval_EvalCodeWithName
+  41: _PyEval_EvalFrameDefault
+  40: _PyMethodDef_RawFastCallKeywords
+  39: 0x0000000000546369
+  38: _PyEval_EvalCodeWithName
+  37: _PyEval_EvalFrameDefault
+  36: _PyFunction_FastCallKeywords
+  35: _PyEval_EvalCodeWithName
+  34: _PyEval_EvalFrameDefault
+  33: _PyFunction_FastCallDict
+  32: _PyEval_EvalCodeWithName
+  31: _PyEval_EvalFrameDefault
+  30: _PyObject_FastCallDict
+  29: 0x00000000004c06e1
+  28: _PyFunction_FastCallDict
+  27: _PyEval_EvalFrameDefault
+  26: _PyMethodDescr_FastCallKeywords
+  25: 0x00000000005dcb58
+  24: 0x00000000005dc83f
+  23: 0x00000000004ba127
+  22: _PyEval_EvalFrameDefault
+  21: _PyFunction_FastCallKeywords
+  20: _PyEval_EvalFrameDefault
+  19: _PyFunction_FastCall      [(&#39;tile_f&#39;, [-1, 32, 1, 16]), (&#39;tile_y&#39;, [-1, 7, 1, 1]), (&#39;tile_x&#39;, [-1, 1, 1, 1]), (&#39;tile_rc&#39;, [-1, 1, 2]), (&#39;tile_ry&#39;, [-1, 3, 1]), (&#39;tile_rx&#39;, [-1, 1, 1]), (&#39;auto_unroll_max_step&#39;, 0), (&#39;unroll_explicit&#39;, 1)],None,5456389
+No: 16  GFLOPS: 0.00/565.24     result: Traceback (most recent call last):
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 592, in __call__
     func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 544, in _build_func_common
@@ -2170,8 +2077,8 @@ Traceback (most recent call last):
   File &quot;tvm/_ffi/_cython/./packed_func.pxi&quot;, line 56, in tvm._ffi._cy3.core.tvm_callback
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 875, in verify_pass
     raise InstantiationError(&quot;Skipped because of invalid gpu kernel&quot;)
-tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [(&#39;tile_f&#39;, [-1, 1, 4, 64]), (&#39;tile_y&#39;, [-1, 1, 7, 1]), (&#39;tile_x&#39;, [-1, 1, 1, 1]), (&#39;tile_rc&#39;, [-1, 2, 32]), (&#39;tile_ry&#39;, [-1, 3, 1]), (&#39;tile_rx&#39;, [-1, 1, 1]), (&#39;auto_unroll_max_step&#39;, 512), (&#39;unroll_explicit&#39;, 0)],None,2080967
-No: 18  GFLOPS: 0.00/1158.14    result: Traceback (most recent call last):
+tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [(&#39;tile_f&#39;, [-1, 2, 8, 4]), (&#39;tile_y&#39;, [-1, 1, 1, 1]), (&#39;tile_x&#39;, [-1, 1, 1, 1]), (&#39;tile_rc&#39;, [-1, 1, 32]), (&#39;tile_ry&#39;, [-1, 3, 1]), (&#39;tile_rx&#39;, [-1, 1, 3]), (&#39;auto_unroll_max_step&#39;, 0), (&#39;unroll_explicit&#39;, 1)],None,6723322
+No: 17  GFLOPS: 0.00/565.24     result: Traceback (most recent call last):
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 592, in __call__
     func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 544, in _build_func_common
@@ -2293,8 +2200,8 @@ Traceback (most recent call last):
   File &quot;tvm/_ffi/_cython/./packed_func.pxi&quot;, line 56, in tvm._ffi._cy3.core.tvm_callback
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 875, in verify_pass
     raise InstantiationError(&quot;Skipped because of invalid gpu kernel&quot;)
-tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [(&#39;tile_f&#39;, [-1, 4, 128, 1]), (&#39;tile_y&#39;, [-1, 1, 1, 1]), (&#39;tile_x&#39;, [-1, 7, 1, 1]), (&#39;tile_rc&#39;, [-1, 2, 128]), (&#39;tile_ry&#39;, [-1, 1, 1]), (&#39;tile_rx&#39;, [-1, 1, 1]), (&#39;auto_unroll_max_step&#39;, 0), (&#39;unroll_explicit&#39;, 0)],None,176931
-No: 19  GFLOPS: 0.00/1158.14    result: Traceback (most recent call last):
+tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [(&#39;tile_f&#39;, [-1, 1, 1, 4]), (&#39;tile_y&#39;, [-1, 1, 7, 1]), (&#39;tile_x&#39;, [-1, 1, 7, 1]), (&#39;tile_rc&#39;, [-1, 4, 32]), (&#39;tile_ry&#39;, [-1, 1, 3]), (&#39;tile_rx&#39;, [-1, 1, 3]), (&#39;auto_unroll_max_step&#39;, 512), (&#39;unroll_explicit&#39;, 0)],None,3441340
+No: 18  GFLOPS: 0.00/565.24     result: Traceback (most recent call last):
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 592, in __call__
     func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 544, in _build_func_common
@@ -2416,8 +2323,8 @@ Traceback (most recent call last):
   File &quot;tvm/_ffi/_cython/./packed_func.pxi&quot;, line 56, in tvm._ffi._cy3.core.tvm_callback
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 875, in verify_pass
     raise InstantiationError(&quot;Skipped because of invalid gpu kernel&quot;)
-tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [(&#39;tile_f&#39;, [-1, 32, 1, 16]), (&#39;tile_y&#39;, [-1, 1, 1, 7]), (&#39;tile_x&#39;, [-1, 1, 7, 1]), (&#39;tile_rc&#39;, [-1, 2, 16]), (&#39;tile_ry&#39;, [-1, 3, 1]), (&#39;tile_rx&#39;, [-1, 3, 1]), (&#39;auto_unroll_max_step&#39;, 1500), (&#39;unroll_explicit&#39;, 1)],None,9612189
-No: 20  GFLOPS: 0.00/1158.14    result: Traceback (most recent call last):
+tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [(&#39;tile_f&#39;, [-1, 8, 64, 1]), (&#39;tile_y&#39;, [-1, 1, 1, 7]), (&#39;tile_x&#39;, [-1, 1, 1, 1]), (&#39;tile_rc&#39;, [-1, 16, 1]), (&#39;tile_ry&#39;, [-1, 1, 3]), (&#39;tile_rx&#39;, [-1, 3, 1]), (&#39;auto_unroll_max_step&#39;, 0), (&#39;unroll_explicit&#39;, 0)],None,982788
+No: 19  GFLOPS: 0.00/565.24     result: Traceback (most recent call last):
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 592, in __call__
     func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 544, in _build_func_common
@@ -2539,7 +2446,8 @@ Traceback (most recent call last):
   File &quot;tvm/_ffi/_cython/./packed_func.pxi&quot;, line 56, in tvm._ffi._cy3.core.tvm_callback
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 875, in verify_pass
     raise InstantiationError(&quot;Skipped because of invalid gpu kernel&quot;)
-tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [(&#39;tile_f&#39;, [-1, 8, 1, 16]), (&#39;tile_y&#39;, [-1, 7, 1, 1]), (&#39;tile_x&#39;, [-1, 1, 7, 1]), (&#39;tile_rc&#39;, [-1, 8, 32]), (&#39;tile_ry&#39;, [-1, 3, 1]), (&#39;tile_rx&#39;, [-1, 1, 1]), (&#39;auto_unroll_max_step&#39;, 1500), (&#39;unroll_explicit&#39;, 0)],None,3831907
+tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [(&#39;tile_f&#39;, [-1, 1, 1, 32]), (&#39;tile_y&#39;, [-1, 1, 7, 1]), (&#39;tile_x&#39;, [-1, 7, 1, 1]), (&#39;tile_rc&#39;, [-1, 128, 1]), (&#39;tile_ry&#39;, [-1, 1, 1]), (&#39;tile_rx&#39;, [-1, 1, 3]), (&#39;auto_unroll_max_step&#39;, 512), (&#39;unroll_explicit&#39;, 0)],None,2930145
+No: 20  GFLOPS: 5.25/565.24     result: MeasureResult(costs=(0.044077056749999996,), error_no=MeasureErrorNo.NO_ERROR, all_cost=2.035950183868408, timestamp=1673207957.5529802)        [(&#39;tile_f&#39;, [-1, 1, 2, 32]), (&#39;tile_y&#39;, [-1, 1, 1, 7]), (&#39;tile_x&#39;, [-1, 1, 1, 1]), (&#39;tile_rc&#39;, [-1, 2, 8]), (&#39;tile_ry&#39;, [-1, 1, 1]), (&#39;tile_rx&#39;, [-1, 1, 1]), (&#39;auto_unroll_max_step&#39;, 0), (&#39;unroll_explicit&#39;, 1)],None,5326610
 </pre></div>
 </div>
 <p>Finally we can inspect the best config from log file, check correctness,
@@ -2578,9 +2486,9 @@ and measure running time.</p>
 <div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Finish loading 20 records
 
 Best config:
-[(&#39;tile_f&#39;, [-1, 1, 8, 1]), (&#39;tile_y&#39;, [-1, 1, 1, 7]), (&#39;tile_x&#39;, [-1, 1, 7, 1]), (&#39;tile_rc&#39;, [-1, 2, 4]), (&#39;tile_ry&#39;, [-1, 1, 1]), (&#39;tile_rx&#39;, [-1, 3, 1]), (&#39;auto_unroll_max_step&#39;, 1500), (&#39;unroll_explicit&#39;, 1)],None,9365647
+[(&#39;tile_f&#39;, [-1, 4, 2, 2]), (&#39;tile_y&#39;, [-1, 1, 7, 1]), (&#39;tile_x&#39;, [-1, 1, 7, 1]), (&#39;tile_rc&#39;, [-1, 8, 2]), (&#39;tile_ry&#39;, [-1, 1, 3]), (&#39;tile_rx&#39;, [-1, 1, 3]), (&#39;auto_unroll_max_step&#39;, 1500), (&#39;unroll_explicit&#39;, 0)],None,5081626
 Finish loading 20 records
-Time cost of this operator: 0.000514
+Time cost of this operator: 0.000721
 </pre></div>
 </div>
 <div class="sphx-glr-footer sphx-glr-footer-example docutils container" id="sphx-glr-download-how-to-tune-with-autotvm-tune-conv2d-cuda-py">
diff --git a/docs/how_to/work_with_microtvm/micro_autotune.html b/docs/how_to/work_with_microtvm/micro_autotune.html
index aac98b8ecb..a5a4b3e92c 100644
--- a/docs/how_to/work_with_microtvm/micro_autotune.html
+++ b/docs/how_to/work_with_microtvm/micro_autotune.html
@@ -663,10 +663,10 @@ the tuned operator.</p>
 <div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>########## Build without Autotuning ##########
 Node Name                                     Ops                                           Time(us)  Time(%)  Shape              Inputs  Outputs  Measurements(us)
 ---------                                     ---                                           --------  -------  -----              ------  -------  ----------------
-tvmgen_default_fused_nn_contrib_conv2d_NCHWc  tvmgen_default_fused_nn_contrib_conv2d_NCHWc  309.7     98.724   (1, 2, 10, 10, 3)  2       1        [309.7]
-tvmgen_default_fused_layout_transform_1       tvmgen_default_fused_layout_transform_1       3.027     0.965    (1, 6, 10, 10)     1       1        [3.027]
-tvmgen_default_fused_layout_transform         tvmgen_default_fused_layout_transform         0.976     0.311    (1, 1, 10, 10, 3)  1       1        [0.976]
-Total_time                                    -                                             313.703   -        -                  -       -        -
+tvmgen_default_fused_nn_contrib_conv2d_NCHWc  tvmgen_default_fused_nn_contrib_conv2d_NCHWc  310.6     98.715   (1, 2, 10, 10, 3)  2       1        [310.6]
+tvmgen_default_fused_layout_transform_1       tvmgen_default_fused_layout_transform_1       3.076     0.978    (1, 6, 10, 10)     1       1        [3.076]
+tvmgen_default_fused_layout_transform         tvmgen_default_fused_layout_transform         0.969     0.308    (1, 1, 10, 10, 3)  1       1        [0.969]
+Total_time                                    -                                             314.644   -        -                  -       -        -
 </pre></div>
 </div>
 </div>
@@ -718,10 +718,10 @@ Total_time                                    -
 <div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>########## Build with Autotuning ##########
 Node Name                                     Ops                                           Time(us)  Time(%)  Shape              Inputs  Outputs  Measurements(us)
 ---------                                     ---                                           --------  -------  -----              ------  -------  ----------------
-tvmgen_default_fused_nn_contrib_conv2d_NCHWc  tvmgen_default_fused_nn_contrib_conv2d_NCHWc  103.0     97.385   (1, 6, 10, 10, 1)  2       1        [103.0]
-tvmgen_default_fused_layout_transform_1       tvmgen_default_fused_layout_transform_1       1.807     1.708    (1, 6, 10, 10)     1       1        [1.807]
-tvmgen_default_fused_layout_transform         tvmgen_default_fused_layout_transform         0.959     0.906    (1, 1, 10, 10, 3)  1       1        [0.959]
-Total_time                                    -                                             105.765   -        -                  -       -        -
+tvmgen_default_fused_nn_contrib_conv2d_NCHWc  tvmgen_default_fused_nn_contrib_conv2d_NCHWc  105.1     97.451   (1, 6, 10, 10, 1)  2       1        [105.1]
+tvmgen_default_fused_layout_transform_1       tvmgen_default_fused_layout_transform_1       1.787     1.657    (1, 6, 10, 10)     1       1        [1.787]
+tvmgen_default_fused_layout_transform         tvmgen_default_fused_layout_transform         0.963     0.893    (1, 1, 10, 10, 3)  1       1        [0.963]
+Total_time                                    -                                             107.849   -        -                  -       -        -
 </pre></div>
 </div>
 <div class="sphx-glr-footer sphx-glr-footer-example docutils container" id="sphx-glr-download-how-to-work-with-microtvm-micro-autotune-py">
diff --git a/docs/how_to/work_with_microtvm/micro_pytorch.html b/docs/how_to/work_with_microtvm/micro_pytorch.html
index 84950bd1d6..c1355196c5 100644
--- a/docs/how_to/work_with_microtvm/micro_pytorch.html
+++ b/docs/how_to/work_with_microtvm/micro_pytorch.html
@@ -453,7 +453,7 @@ download a cat image and preprocess it to use as the model input.</p>
 Downloading: &quot;https://download.pytorch.org/models/quantized/mobilenet_v2_qnnpack_37f702c5.pth&quot; to /workspace/.cache/torch/hub/checkpoints/mobilenet_v2_qnnpack_37f702c5.pth
 
   0%|          | 0.00/3.42M [00:00&lt;?, ?B/s]
-100%|##########| 3.42M/3.42M [00:00&lt;00:00, 59.4MB/s]
+100%|##########| 3.42M/3.42M [00:00&lt;00:00, 114MB/s]
 /workspace/python/tvm/relay/frontend/pytorch_utils.py:47: DeprecationWarning: distutils Version classes are deprecated. Use packaging.version instead.
   return LooseVersion(torch_ver) &gt; ver
 /venv/apache-tvm-py3.7/lib/python3.7/site-packages/setuptools/_distutils/version.py:346: DeprecationWarning: distutils Version classes are deprecated. Use packaging.version instead.
@@ -577,7 +577,7 @@ via the host <cite>main.cc`</cite> or if a Zephyr emulated board is selected as
 Torch top-1 id: 282, class name: tiger cat
 </pre></div>
 </div>
-<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 1 minutes  7.728 seconds)</p>
+<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 1 minutes  4.488 seconds)</p>
 <div class="sphx-glr-footer sphx-glr-footer-example docutils container" id="sphx-glr-download-how-to-work-with-microtvm-micro-pytorch-py">
 <div class="sphx-glr-download sphx-glr-download-python docutils container">
 <p><a class="reference download internal" download="" href="../../_downloads/12b9ecc04c41abaa12022061771821d1/micro_pytorch.py"><code class="xref download docutils literal notranslate"><span class="pre">Download</span> <span class="pre">Python</span> <span class="pre">source</span> <span class="pre">code:</span> <span class="pre">micro_pytorch.py</span></code></a></p>
diff --git a/docs/how_to/work_with_microtvm/micro_train.html b/docs/how_to/work_with_microtvm/micro_train.html
index 2cf1bda6aa..de730ff120 100644
--- a/docs/how_to/work_with_microtvm/micro_train.html
+++ b/docs/how_to/work_with_microtvm/micro_train.html
@@ -523,7 +523,7 @@ take about <strong>2 minutes</strong> to download the Stanford Cars, while COCO
 <a href="https://docs.python.org/3/library/shutil.html#shutil.move" title="shutil.move" class="sphx-glr-backref-module-shutil sphx-glr-backref-type-py-function"><span class="n">shutil</span><span class="o">.</span><span class="n">move</span></a><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;</span><span class="si">{</span><a href="https://docs.python.org/3/library/stdtypes.html#str" title="builtins.str" class="sphx-glr-backref-module-builtins sphx-glr-backref-typ [...]
 </pre></div>
 </div>
-<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>&#39;/tmp/tmpehl9pho3/images/random&#39;
+<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>&#39;/tmp/tmpd4y05yt_/images/random&#39;
 </pre></div>
 </div>
 </div>
@@ -583,8 +583,8 @@ objects to other stuff? We can display some examples from our datasets using <co
     <span class="n">plt</span><span class="o">.</span><span class="n">axis</span><span class="p">(</span><span class="s2">&quot;off&quot;</span><span class="p">)</span>
 </pre></div>
 </div>
-<img src="../../_images/sphx_glr_micro_train_001.png" srcset="../../_images/sphx_glr_micro_train_001.png" alt="[0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [1.0, 0.0], [1.0, 0.0], [0.0, 1.0], [1.0, 0.0], [1.0, 0.0]" class = "sphx-glr-single-img"/><div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>/tmp/tmpehl9pho3/images/target contains 8144 images
-/tmp/tmpehl9pho3/images/random contains 5000 images
+<img src="../../_images/sphx_glr_micro_train_001.png" srcset="../../_images/sphx_glr_micro_train_001.png" alt="[0.0, 1.0], [0.0, 1.0], [1.0, 0.0], [0.0, 1.0], [0.0, 1.0], [1.0, 0.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0]" class = "sphx-glr-single-img"/><div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>/tmp/tmpd4y05yt_/images/target contains 8144 images
+/tmp/tmpd4y05yt_/images/random contains 5000 images
 </pre></div>
 </div>
 </div>
@@ -696,13 +696,13 @@ the time on our validation set).</p>
 </pre></div>
 </div>
 <div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Epoch 1/3
-328/328 - 49s - loss: 0.2185 - accuracy: 0.9234 - val_loss: 0.1424 - val_accuracy: 0.9543 - 49s/epoch - 149ms/step
+328/328 - 47s - loss: 0.1971 - accuracy: 0.9311 - val_loss: 0.2416 - val_accuracy: 0.9139 - 47s/epoch - 143ms/step
 Epoch 2/3
-328/328 - 45s - loss: 0.0920 - accuracy: 0.9654 - val_loss: 0.1235 - val_accuracy: 0.9581 - 45s/epoch - 136ms/step
+328/328 - 44s - loss: 0.0919 - accuracy: 0.9670 - val_loss: 0.1066 - val_accuracy: 0.9698 - 44s/epoch - 133ms/step
 Epoch 3/3
-328/328 - 44s - loss: 0.0635 - accuracy: 0.9770 - val_loss: 0.0954 - val_accuracy: 0.9687 - 44s/epoch - 136ms/step
+328/328 - 43s - loss: 0.0586 - accuracy: 0.9781 - val_loss: 0.1025 - val_accuracy: 0.9690 - 43s/epoch - 133ms/step
 
-&lt;keras.callbacks.History object at 0x7faf4b537f90&gt;
+&lt;keras.callbacks.History object at 0x7f2f09e55910&gt;
 </pre></div>
 </div>
 </div>
@@ -962,7 +962,7 @@ as intended.</p>
 <p>From here, we could modify the model to read live images from the camera - we have another
 Arduino tutorial for how to do that <a class="reference external" href="https://github.com/guberti/tvm-arduino-demos/tree/master/examples/person_detection">on GitHub</a>. Alternatively, we could also
 <a class="reference external" href="https://tvm.apache.org/docs/how_to/work_with_microtvm/micro_autotune.html">use TVM’s autotuning capabilities</a> to dramatically improve the model’s performance.</p>
-<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 5 minutes  44.695 seconds)</p>
+<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 4 minutes  17.521 seconds)</p>
 <div class="sphx-glr-footer sphx-glr-footer-example docutils container" id="sphx-glr-download-how-to-work-with-microtvm-micro-train-py">
 <div class="sphx-glr-download sphx-glr-download-python docutils container">
 <p><a class="reference download internal" download="" href="../../_downloads/b52cec46baf4f78d6bcd94cbe269c8a6/micro_train.py"><code class="xref download docutils literal notranslate"><span class="pre">Download</span> <span class="pre">Python</span> <span class="pre">source</span> <span class="pre">code:</span> <span class="pre">micro_train.py</span></code></a></p>
diff --git a/docs/how_to/work_with_microtvm/sg_execution_times.html b/docs/how_to/work_with_microtvm/sg_execution_times.html
index d568ba8c8a..c17391829a 100644
--- a/docs/how_to/work_with_microtvm/sg_execution_times.html
+++ b/docs/how_to/work_with_microtvm/sg_execution_times.html
@@ -340,7 +340,7 @@
             
   <div class="section" id="computation-times">
 <span id="sphx-glr-how-to-work-with-microtvm-sg-execution-times"></span><h1>Computation times<a class="headerlink" href="#computation-times" title="Permalink to this headline">¶</a></h1>
-<p><strong>07:59.299</strong> total execution time for <strong>how_to_work_with_microtvm</strong> files:</p>
+<p><strong>06:25.983</strong> total execution time for <strong>how_to_work_with_microtvm</strong> files:</p>
 <table class="docutils align-default">
 <colgroup>
 <col style="width: 83%" />
@@ -349,27 +349,27 @@
 </colgroup>
 <tbody>
 <tr class="row-odd"><td><p><a class="reference internal" href="micro_train.html#sphx-glr-how-to-work-with-microtvm-micro-train-py"><span class="std std-ref">Training Vision Models for microTVM on Arduino</span></a> (<code class="docutils literal notranslate"><span class="pre">micro_train.py</span></code>)</p></td>
-<td><p>05:44.695</p></td>
+<td><p>04:17.521</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="micro_pytorch.html#sphx-glr-how-to-work-with-microtvm-micro-pytorch-py"><span class="std std-ref">microTVM PyTorch Tutorial</span></a> (<code class="docutils literal notranslate"><span class="pre">micro_pytorch.py</span></code>)</p></td>
-<td><p>01:07.728</p></td>
+<td><p>01:04.488</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-odd"><td><p><a class="reference internal" href="micro_autotune.html#sphx-glr-how-to-work-with-microtvm-micro-autotune-py"><span class="std std-ref">Autotuning with microTVM</span></a> (<code class="docutils literal notranslate"><span class="pre">micro_autotune.py</span></code>)</p></td>
-<td><p>00:54.499</p></td>
+<td><p>00:52.163</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="micro_aot.html#sphx-glr-how-to-work-with-microtvm-micro-aot-py"><span class="std std-ref">microTVM Host-Driven AoT</span></a> (<code class="docutils literal notranslate"><span class="pre">micro_aot.py</span></code>)</p></td>
-<td><p>00:08.319</p></td>
+<td><p>00:07.974</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-odd"><td><p><a class="reference internal" href="micro_tflite.html#sphx-glr-how-to-work-with-microtvm-micro-tflite-py"><span class="std std-ref">microTVM with TFLite Models</span></a> (<code class="docutils literal notranslate"><span class="pre">micro_tflite.py</span></code>)</p></td>
-<td><p>00:04.056</p></td>
+<td><p>00:03.834</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="micro_reference_vm.html#sphx-glr-how-to-work-with-microtvm-micro-reference-vm-py"><span class="std std-ref">microTVM Reference Virtual Machines</span></a> (<code class="docutils literal notranslate"><span class="pre">micro_reference_vm.py</span></code>)</p></td>
-<td><p>00:00.002</p></td>
+<td><p>00:00.001</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-odd"><td><p><a class="reference internal" href="micro_ethosu.html#sphx-glr-how-to-work-with-microtvm-micro-ethosu-py"><span class="std std-ref">Running TVM on bare metal Arm(R) Cortex(R)-M55 CPU and Ethos(TM)-U55 NPU with CMSIS-NN</span></a> (<code class="docutils literal notranslate"><span class="pre">micro_ethosu.py</span></code>)</p></td>
diff --git a/docs/how_to/work_with_relay/sg_execution_times.html b/docs/how_to/work_with_relay/sg_execution_times.html
index 0974e579c3..50a3178b47 100644
--- a/docs/how_to/work_with_relay/sg_execution_times.html
+++ b/docs/how_to/work_with_relay/sg_execution_times.html
@@ -340,7 +340,7 @@
             
   <div class="section" id="computation-times">
 <span id="sphx-glr-how-to-work-with-relay-sg-execution-times"></span><h1>Computation times<a class="headerlink" href="#computation-times" title="Permalink to this headline">¶</a></h1>
-<p><strong>00:46.494</strong> total execution time for <strong>how_to_work_with_relay</strong> files:</p>
+<p><strong>00:44.747</strong> total execution time for <strong>how_to_work_with_relay</strong> files:</p>
 <table class="docutils align-default">
 <colgroup>
 <col style="width: 84%" />
@@ -349,15 +349,15 @@
 </colgroup>
 <tbody>
 <tr class="row-odd"><td><p><a class="reference internal" href="using_pipeline_executor.html#sphx-glr-how-to-work-with-relay-using-pipeline-executor-py"><span class="std std-ref">Using Pipeline Executor in Relay</span></a> (<code class="docutils literal notranslate"><span class="pre">using_pipeline_executor.py</span></code>)</p></td>
-<td><p>00:34.269</p></td>
+<td><p>00:32.865</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="using_external_lib.html#sphx-glr-how-to-work-with-relay-using-external-lib-py"><span class="std std-ref">Using External Libraries in Relay</span></a> (<code class="docutils literal notranslate"><span class="pre">using_external_lib.py</span></code>)</p></td>
-<td><p>00:10.355</p></td>
+<td><p>00:10.206</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-odd"><td><p><a class="reference internal" href="build_gcn.html#sphx-glr-how-to-work-with-relay-build-gcn-py"><span class="std std-ref">Building a Graph Convolutional Network</span></a> (<code class="docutils literal notranslate"><span class="pre">build_gcn.py</span></code>)</p></td>
-<td><p>00:01.863</p></td>
+<td><p>00:01.669</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="using_relay_viz.html#sphx-glr-how-to-work-with-relay-using-relay-viz-py"><span class="std std-ref">Use Relay Visualizer to Visualize Relay</span></a> (<code class="docutils literal notranslate"><span class="pre">using_relay_viz.py</span></code>)</p></td>
diff --git a/docs/how_to/work_with_schedules/intrin_math.html b/docs/how_to/work_with_schedules/intrin_math.html
index 5358f3eab9..a5c2b7ac24 100644
--- a/docs/how_to/work_with_schedules/intrin_math.html
+++ b/docs/how_to/work_with_schedules/intrin_math.html
@@ -536,7 +536,7 @@ The following example customizes CUDA lowering rule for <code class="code docuti
 <a href="../../reference/api/python/ir.html#tvm.ir.register_intrin_lowering" title="tvm.ir.register_intrin_lowering" class="sphx-glr-backref-module-tvm-ir sphx-glr-backref-type-py-function"><span class="n">register_intrin_lowering</span></a><span class="p">(</span><span class="s2">&quot;tir.exp&quot;</span><span class="p">,</span> <span class="n">target</span><span class="o">=</span><span class="s2">&quot;cuda&quot;</span><span class="p">,</span> <span class="n">f</span><span class="o">= [...]
 </pre></div>
 </div>
-<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>&lt;function my_cuda_math_rule at 0x7faeea543320&gt;
+<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>&lt;function my_cuda_math_rule at 0x7f2f0a77d8c0&gt;
 </pre></div>
 </div>
 <p>Register the rule to TVM with override option to override existing rule.
diff --git a/docs/how_to/work_with_schedules/sg_execution_times.html b/docs/how_to/work_with_schedules/sg_execution_times.html
index 096b693757..29f9da9687 100644
--- a/docs/how_to/work_with_schedules/sg_execution_times.html
+++ b/docs/how_to/work_with_schedules/sg_execution_times.html
@@ -340,7 +340,7 @@
             
   <div class="section" id="computation-times">
 <span id="sphx-glr-how-to-work-with-schedules-sg-execution-times"></span><h1>Computation times<a class="headerlink" href="#computation-times" title="Permalink to this headline">¶</a></h1>
-<p><strong>00:07.390</strong> total execution time for <strong>how_to_work_with_schedules</strong> files:</p>
+<p><strong>00:07.699</strong> total execution time for <strong>how_to_work_with_schedules</strong> files:</p>
 <table class="docutils align-default">
 <colgroup>
 <col style="width: 83%" />
@@ -349,35 +349,35 @@
 </colgroup>
 <tbody>
 <tr class="row-odd"><td><p><a class="reference internal" href="intrin_math.html#sphx-glr-how-to-work-with-schedules-intrin-math-py"><span class="std std-ref">Intrinsics and Math Functions</span></a> (<code class="docutils literal notranslate"><span class="pre">intrin_math.py</span></code>)</p></td>
-<td><p>00:04.827</p></td>
+<td><p>00:05.211</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="tensorize.html#sphx-glr-how-to-work-with-schedules-tensorize-py"><span class="std std-ref">Use Tensorize to Leverage Hardware Intrinsics</span></a> (<code class="docutils literal notranslate"><span class="pre">tensorize.py</span></code>)</p></td>
-<td><p>00:01.153</p></td>
+<td><p>00:01.120</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-odd"><td><p><a class="reference internal" href="reduction.html#sphx-glr-how-to-work-with-schedules-reduction-py"><span class="std std-ref">Reduction</span></a> (<code class="docutils literal notranslate"><span class="pre">reduction.py</span></code>)</p></td>
-<td><p>00:00.602</p></td>
+<td><p>00:00.581</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="scan.html#sphx-glr-how-to-work-with-schedules-scan-py"><span class="std std-ref">Scan and Recurrent Kernel</span></a> (<code class="docutils literal notranslate"><span class="pre">scan.py</span></code>)</p></td>
-<td><p>00:00.581</p></td>
+<td><p>00:00.569</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-odd"><td><p><a class="reference internal" href="extern_op.html#sphx-glr-how-to-work-with-schedules-extern-op-py"><span class="std std-ref">External Tensor Functions</span></a> (<code class="docutils literal notranslate"><span class="pre">extern_op.py</span></code>)</p></td>
-<td><p>00:00.118</p></td>
+<td><p>00:00.116</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="schedule_primitives.html#sphx-glr-how-to-work-with-schedules-schedule-primitives-py"><span class="std std-ref">Schedule Primitives in TVM</span></a> (<code class="docutils literal notranslate"><span class="pre">schedule_primitives.py</span></code>)</p></td>
-<td><p>00:00.055</p></td>
+<td><p>00:00.050</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-odd"><td><p><a class="reference internal" href="tedd.html#sphx-glr-how-to-work-with-schedules-tedd-py"><span class="std std-ref">Use Tensor Expression Debug Display (TEDD) for Visualization</span></a> (<code class="docutils literal notranslate"><span class="pre">tedd.py</span></code>)</p></td>
-<td><p>00:00.030</p></td>
+<td><p>00:00.029</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="tuple_inputs.html#sphx-glr-how-to-work-with-schedules-tuple-inputs-py"><span class="std std-ref">Compute and Reduce with Tuple Inputs</span></a> (<code class="docutils literal notranslate"><span class="pre">tuple_inputs.py</span></code>)</p></td>
-<td><p>00:00.025</p></td>
+<td><p>00:00.024</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 </tbody>
diff --git a/docs/how_to/work_with_schedules/tensorize.html b/docs/how_to/work_with_schedules/tensorize.html
index 29df72d893..ac240b84d6 100644
--- a/docs/how_to/work_with_schedules/tensorize.html
+++ b/docs/how_to/work_with_schedules/tensorize.html
@@ -587,7 +587,7 @@ The importing needs to happen before the tensorized GEMV being executed.</p>
              B: Buffer(B_2: Pointer(float32), float32, [512, 64], []),
              C: Buffer(C_2: Pointer(float32), float32, [1024, 512], [])}
   buffer_map = {A_1: A, B_1: B, C_1: C} {
-  attr [IterVar(i: int32, (nullptr), &quot;DataPar&quot;, &quot;&quot;)] &quot;pragma_import_llvm&quot; = &quot;; ModuleID = &#39;/tmp/tmpw4tmhn7f/input0.cc&#39;\nsource_filename = \&quot;/tmp/tmpw4tmhn7f/input0.cc\&quot;\ntarget datalayout = \&quot;e-m:e-i64:64-f80:128-n8:16:32:64-S128\&quot;\ntarget triple = \&quot;x86_64-pc-linux-gnu\&quot;\n\n; Function Attrs: noinline nounwind optnone uwtable\ndefine dso_local i32 @gemv_update(float*, float*, float*, i32, i32, i32) #0 {\n  %7 = allo [...]
+  attr [IterVar(i: int32, (nullptr), &quot;DataPar&quot;, &quot;&quot;)] &quot;pragma_import_llvm&quot; = &quot;; ModuleID = &#39;/tmp/tmp1re09rxh/input0.cc&#39;\nsource_filename = \&quot;/tmp/tmp1re09rxh/input0.cc\&quot;\ntarget datalayout = \&quot;e-m:e-i64:64-f80:128-n8:16:32:64-S128\&quot;\ntarget triple = \&quot;x86_64-pc-linux-gnu\&quot;\n\n; Function Attrs: noinline nounwind optnone uwtable\ndefine dso_local i32 @gemv_update(float*, float*, float*, i32, i32, i32) #0 {\n  %7 = allo [...]
   for (i, 0, 1024) {
     for (j.outer: int32, 0, 32) {
       @tir.call_extern(&quot;gemv_update&quot;, @tir.tvm_access_ptr(@tir.type_annotation(, dtype=float32), C_2, ((i*512) + (j.outer*16)), 16, 2, dtype=handle), @tir.tvm_access_ptr(@tir.type_annotation(, dtype=float32), A_2, (i*64), 64, 1, dtype=handle), @tir.tvm_access_ptr(@tir.type_annotation(, dtype=float32), B_2, (j.outer*1024), 1024, 1, dtype=handle), 16, 64, 64, dtype=int32)
diff --git a/docs/install/nnpack.html b/docs/install/nnpack.html
index 23d2181e9d..1ef28de467 100644
--- a/docs/install/nnpack.html
+++ b/docs/install/nnpack.html
@@ -229,17 +229,7 @@
               <p class="caption" role="heading"><span class="caption-text">Getting Started</span></p>
 <ul class="current">
 <li class="toctree-l1 current"><a class="reference internal" href="index.html">Installing TVM</a><ul class="current">
-<li class="toctree-l2 current"><a class="reference internal" href="from_source.html">Install from Source</a><ul class="current">
-<li class="toctree-l3"><a class="reference internal" href="from_source.html#developers-get-source-from-github">Developers: Get Source from Github</a></li>
-<li class="toctree-l3"><a class="reference internal" href="from_source.html#build-the-shared-library">Build the Shared Library</a></li>
-<li class="toctree-l3"><a class="reference internal" href="from_source.html#python-package-installation">Python Package Installation</a></li>
-<li class="toctree-l3 current"><a class="reference internal" href="from_source.html#install-contrib-libraries">Install Contrib Libraries</a><ul class="current">
-<li class="toctree-l4 current"><a class="current reference internal" href="#">NNPACK Contrib Installation</a></li>
-</ul>
-</li>
-<li class="toctree-l3"><a class="reference internal" href="from_source.html#enable-c-tests">Enable C++ Tests</a></li>
-</ul>
-</li>
+<li class="toctree-l2"><a class="reference internal" href="from_source.html">Install from Source</a></li>
 <li class="toctree-l2"><a class="reference internal" href="docker.html">Docker Images</a></li>
 <li class="toctree-l2 current"><a class="current reference internal" href="#">NNPACK Contrib Installation</a><ul>
 <li class="toctree-l3"><a class="reference internal" href="#conditions">Conditions</a></li>
diff --git a/docs/reference/api/doxygen/annotated.html b/docs/reference/api/doxygen/annotated.html
index 8b45ca4fd9..5237a146b4 100644
--- a/docs/reference/api/doxygen/annotated.html
+++ b/docs/reference/api/doxygen/annotated.html
@@ -217,21 +217,15 @@ $(function() {
 <tr id="row_1_2_23_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1detail_1_1SelectSHashReduce_3_01T_00_01TraitName_00_01false_01_4.html" target="_self">SelectSHashReduce&lt; T, TraitName, false &gt;</a></td><td class="desc"></td></tr>
 <tr id="row_1_2_24_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1detail_1_1SelectVisitAttrs.html" target="_self">SelectVisitAttrs</a></td><td class="desc"></td></tr>
 <tr id="row_1_2_25_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1detail_1_1SelectVisitAttrs_3_01T_00_01TraitName_00_01false_01_4.html" target="_self">SelectVisitAttrs&lt; T, TraitName, false &gt;</a></td><td class="desc"></td></tr>
-<tr id="row_1_2_26_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1detail_1_1TracedObjectWrapperSelector.html" target="_self">TracedObjectWrapperSelector</a></td><td class="desc"></td></tr>
-<tr id="row_1_2_27_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1detail_1_1TracedObjectWrapperSelector_3_01Array_3_01T_01_4_00_01true_01_4.html" target="_self">TracedObjectWrapperSelector&lt; Array&lt; T &gt;, true &gt;</a></td><td class="desc"></td></tr>
-<tr id="row_1_2_28_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1detail_1_1TracedObjectWrapperSelector_3_01Map_3_01K_00_01V_01_4_00_01true_01_4.html" target="_self">TracedObjectWrapperSelector&lt; Map&lt; K, V &gt;, true &gt;</a></td><td class="desc"></td></tr>
-<tr id="row_1_2_29_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1detail_1_1TracedObjectWrapperSelector_3_01Optional_3_01T_01_4_00_01true_01_4.html" target="_self">TracedObjectWrapperSelector&lt; Optional&lt; T &gt;, true &gt;</a></td><td class="desc"></td></tr>
-<tr id="row_1_2_30_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1detail_1_1TracedObjectWrapperSelector_3_01T_00_01false_01_4.html" target="_self">TracedObjectWrapperSelector&lt; T, false &gt;</a></td><td class="desc"></td></tr>
-<tr id="row_1_2_31_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1detail_1_1TracedObjectWrapperSelector_3_01T_00_01true_01_4.html" target="_self">TracedObjectWrapperSelector&lt; T, true &gt;</a></td><td class="desc"></td></tr>
-<tr id="row_1_2_32_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1detail_1_1TypeName.html" target="_self">TypeName</a></td><td class="desc">Helper struct to get the type name known to tvm </td></tr>
-<tr id="row_1_2_33_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1detail_1_1TypeName_3_01bool_01_4.html" target="_self">TypeName&lt; bool &gt;</a></td><td class="desc"></td></tr>
-<tr id="row_1_2_34_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1detail_1_1TypeName_3_01DataType_01_4.html" target="_self">TypeName&lt; DataType &gt;</a></td><td class="desc"></td></tr>
-<tr id="row_1_2_35_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1detail_1_1TypeName_3_01double_01_4.html" target="_self">TypeName&lt; double &gt;</a></td><td class="desc"></td></tr>
-<tr id="row_1_2_36_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1detail_1_1TypeName_3_01int_01_4.html" target="_self">TypeName&lt; int &gt;</a></td><td class="desc"></td></tr>
-<tr id="row_1_2_37_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1detail_1_1TypeName_3_01int64__t_01_4.html" target="_self">TypeName&lt; int64_t &gt;</a></td><td class="desc"></td></tr>
-<tr id="row_1_2_38_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1detail_1_1TypeName_3_01uint64__t_01_4.html" target="_self">TypeName&lt; uint64_t &gt;</a></td><td class="desc"></td></tr>
-<tr id="row_1_2_39_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1detail_1_1TypeName_3_01void_01_5_01_4.html" target="_self">TypeName&lt; void * &gt;</a></td><td class="desc"></td></tr>
-<tr id="row_1_2_40_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1detail_1_1ValueTypeInfoMaker.html" target="_self">ValueTypeInfoMaker</a></td><td class="desc"></td></tr>
+<tr id="row_1_2_26_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1detail_1_1TypeName.html" target="_self">TypeName</a></td><td class="desc">Helper struct to get the type name known to tvm </td></tr>
+<tr id="row_1_2_27_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1detail_1_1TypeName_3_01bool_01_4.html" target="_self">TypeName&lt; bool &gt;</a></td><td class="desc"></td></tr>
+<tr id="row_1_2_28_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1detail_1_1TypeName_3_01DataType_01_4.html" target="_self">TypeName&lt; DataType &gt;</a></td><td class="desc"></td></tr>
+<tr id="row_1_2_29_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1detail_1_1TypeName_3_01double_01_4.html" target="_self">TypeName&lt; double &gt;</a></td><td class="desc"></td></tr>
+<tr id="row_1_2_30_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1detail_1_1TypeName_3_01int_01_4.html" target="_self">TypeName&lt; int &gt;</a></td><td class="desc"></td></tr>
+<tr id="row_1_2_31_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1detail_1_1TypeName_3_01int64__t_01_4.html" target="_self">TypeName&lt; int64_t &gt;</a></td><td class="desc"></td></tr>
+<tr id="row_1_2_32_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1detail_1_1TypeName_3_01uint64__t_01_4.html" target="_self">TypeName&lt; uint64_t &gt;</a></td><td class="desc"></td></tr>
+<tr id="row_1_2_33_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1detail_1_1TypeName_3_01void_01_5_01_4.html" target="_self">TypeName&lt; void * &gt;</a></td><td class="desc"></td></tr>
+<tr id="row_1_2_34_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1detail_1_1ValueTypeInfoMaker.html" target="_self">ValueTypeInfoMaker</a></td><td class="desc"></td></tr>
 <tr id="row_1_3_" class="even" style="display:none;"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span id="arr_1_3_" class="arrow" onclick="toggleFolder('1_3_')">&#9658;</span><span class="icona"><span class="icon">N</span></span><a class="el" href="namespacetvm_1_1instrument.html" target="_self">instrument</a></td><td class="desc"></td></tr>
 <tr id="row_1_3_0_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1instrument_1_1PassInstrument.html" target="_self">PassInstrument</a></td><td class="desc">Managed reference class for <a class="el" href="classtvm_1_1instrument_1_1PassInstrumentNode.html" title="PassInstrumentNode forms an instrument implementation. It provides API for us [...]
 <tr id="row_1_3_1_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1instrument_1_1PassInstrumentNode.html" target="_self">PassInstrumentNode</a></td><td class="desc"><a class="el" href="classtvm_1_1instrument_1_1PassInstrumentNode.html" title="PassInstrumentNode forms an instrument implementation. It provides API for users to register call [...]
@@ -740,59 +734,53 @@ $(function() {
 <tr id="row_1_8_1_7_" class="even" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1printer_1_1CallDocNode.html" target="_self">CallDocNode</a></td><td class="desc"><a class="el" href="classtvm_1_1script_1_1printer_1_1Doc.html" title="Reference type of DocNode. ">Doc</a> that represents function call </td></tr>
 <tr id="row_1_8_1_8_" class="even" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1printer_1_1ClassDoc.html" target="_self">ClassDoc</a></td><td class="desc">Reference type of <a class="el" href="classtvm_1_1script_1_1printer_1_1ClassDocNode.html" title="Doc that represents class definition. ">ClassDocNode</a> </td></tr>
 <tr id="row_1_8_1_9_" class="even" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1printer_1_1ClassDocNode.html" target="_self">ClassDocNode</a></td><td class="desc"><a class="el" href="classtvm_1_1script_1_1printer_1_1Doc.html" title="Reference type of DocNode. ">Doc</a> that represents class definition </td></tr>
-<tr id="row_1_8_1_10_" class="even" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1printer_1_1DictDoc.html" target="_self">DictDoc</a></td><td class="desc">Reference type of <a class="el" href="classtvm_1_1script_1_1printer_1_1DictDocNode.html" title="Doc that represents dictionary literal. ">DictDocNode</a> </td></tr>
-<tr id="row_1_8_1_11_" class="even" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1printer_1_1DictDocNode.html" target="_self">DictDocNode</a></td><td class="desc"><a class="el" href="classtvm_1_1script_1_1printer_1_1Doc.html" title="Reference type of DocNode. ">Doc</a> that represents dictionary literal </td></tr>
-<tr id="row_1_8_1_12_" class="even" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1printer_1_1Doc.html" target="_self">Doc</a></td><td class="desc">Reference type of <a class="el" href="classtvm_1_1script_1_1printer_1_1DocNode.html" title="The base class of all Doc. ">DocNode</a> </td></tr>
-<tr id="row_1_8_1_13_" class="even" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1printer_1_1DocNode.html" target="_self">DocNode</a></td><td class="desc">The base class of all <a class="el" href="classtvm_1_1script_1_1printer_1_1Doc.html" title="Reference type of DocNode. ">Doc</a> </td></tr>
-<tr id="row_1_8_1_14_" class="even" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1printer_1_1ExprDoc.html" target="_self">ExprDoc</a></td><td class="desc">Reference type of <a class="el" href="classtvm_1_1script_1_1printer_1_1ExprDocNode.html" title="The base class of expression doc. ">ExprDocNode</a> </td></tr>
-<tr id="row_1_8_1_15_" class="even" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1printer_1_1ExprDocNode.html" target="_self">ExprDocNode</a></td><td class="desc">The base class of expression doc </td></tr>
-<tr id="row_1_8_1_16_" class="even" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1printer_1_1ExprStmtDoc.html" target="_self">ExprStmtDoc</a></td><td class="desc">Reference type of <a class="el" href="classtvm_1_1script_1_1printer_1_1ExprStmtDocNode.html" title="Doc that represents an expression as statement. ">ExprStmtDocNode</a> </td></tr>
-<tr id="row_1_8_1_17_" class="even" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1printer_1_1ExprStmtDocNode.html" target="_self">ExprStmtDocNode</a></td><td class="desc"><a class="el" href="classtvm_1_1script_1_1printer_1_1Doc.html" title="Reference type of DocNode. ">Doc</a> that represents an expression as statement </td></tr>
-<tr id="row_1_8_1_18_" class="even" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1printer_1_1ForDoc.html" target="_self">ForDoc</a></td><td class="desc">Reference type of <a class="el" href="classtvm_1_1script_1_1printer_1_1ForDocNode.html" title="Doc that represents for statement. ">ForDocNode</a> </td></tr>
-<tr id="row_1_8_1_19_" class="even" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1printer_1_1ForDocNode.html" target="_self">ForDocNode</a></td><td class="desc"><a class="el" href="classtvm_1_1script_1_1printer_1_1Doc.html" title="Reference type of DocNode. ">Doc</a> that represents for statement </td></tr>
-<tr id="row_1_8_1_20_" class="even" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1printer_1_1Frame.html" target="_self">Frame</a></td><td class="desc">Reference type of <a class="el" href="classtvm_1_1script_1_1printer_1_1FrameNode.html">FrameNode</a> </td></tr>
-<tr id="row_1_8_1_21_" class="even" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1printer_1_1FrameNode.html" target="_self">FrameNode</a></td><td class="desc"></td></tr>
-<tr id="row_1_8_1_22_" class="even" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1printer_1_1FunctionDoc.html" target="_self">FunctionDoc</a></td><td class="desc">Reference type of <a class="el" href="classtvm_1_1script_1_1printer_1_1FunctionDocNode.html" title="Doc that represents function definition. ">FunctionDocNode</a> </td></tr>
-<tr id="row_1_8_1_23_" class="even" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1printer_1_1FunctionDocNode.html" target="_self">FunctionDocNode</a></td><td class="desc"><a class="el" href="classtvm_1_1script_1_1printer_1_1Doc.html" title="Reference type of DocNode. ">Doc</a> that represents function definition </td></tr>
-<tr id="row_1_8_1_24_" class="even" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1printer_1_1IdDoc.html" target="_self">IdDoc</a></td><td class="desc">Reference type of <a class="el" href="classtvm_1_1script_1_1printer_1_1IdDocNode.html" title="Doc that represents identifier. ">IdDocNode</a> </td></tr>
-<tr id="row_1_8_1_25_" class="even" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1printer_1_1IdDocNode.html" target="_self">IdDocNode</a></td><td class="desc"><a class="el" href="classtvm_1_1script_1_1printer_1_1Doc.html" title="Reference type of DocNode. ">Doc</a> that represents identifier </td></tr>
-<tr id="row_1_8_1_26_" class="even" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1printer_1_1IfDoc.html" target="_self">IfDoc</a></td><td class="desc">Reference type of <a class="el" href="classtvm_1_1script_1_1printer_1_1IfDocNode.html" title="Doc that represent if-then-else statement. ">IfDocNode</a> </td></tr>
-<tr id="row_1_8_1_27_" class="even" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1printer_1_1IfDocNode.html" target="_self">IfDocNode</a></td><td class="desc"><a class="el" href="classtvm_1_1script_1_1printer_1_1Doc.html" title="Reference type of DocNode. ">Doc</a> that represent if-then-else statement </td></tr>
-<tr id="row_1_8_1_28_" class="even" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1printer_1_1IndexDoc.html" target="_self">IndexDoc</a></td><td class="desc">Reference type of <a class="el" href="classtvm_1_1script_1_1printer_1_1IndexDocNode.html" title="Doc that represents index access on another expression. ">IndexDocNode</a> </td></tr>
-<tr id="row_1_8_1_29_" class="even" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1printer_1_1IndexDocNode.html" target="_self">IndexDocNode</a></td><td class="desc"><a class="el" href="classtvm_1_1script_1_1printer_1_1Doc.html" title="Reference type of DocNode. ">Doc</a> that represents index access on another expression </td></tr>
-<tr id="row_1_8_1_30_" class="even" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1printer_1_1IRDocsifier.html" target="_self">IRDocsifier</a></td><td class="desc">Reference type of <a class="el" href="classtvm_1_1script_1_1printer_1_1IRDocsifierNode.html" title="IRDocsifier is the top-level interface in the IR-&gt;Doc process. ">IRDocsifier [...]
-<tr id="row_1_8_1_31_" class="even" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1printer_1_1IRDocsifierNode.html" target="_self">IRDocsifierNode</a></td><td class="desc"><a class="el" href="classtvm_1_1script_1_1printer_1_1IRDocsifier.html" title="Reference type of IRDocsifierNode. ">IRDocsifier</a> is the top-level interface in the IR-&gt [...]
-<tr id="row_1_8_1_32_" class="even" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1printer_1_1LambdaDoc.html" target="_self">LambdaDoc</a></td><td class="desc">Reference type of <a class="el" href="classtvm_1_1script_1_1printer_1_1LambdaDocNode.html" title="Doc that represents anonymous function. ">LambdaDocNode</a> </td></tr>
-<tr id="row_1_8_1_33_" class="even" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1printer_1_1LambdaDocNode.html" target="_self">LambdaDocNode</a></td><td class="desc"><a class="el" href="classtvm_1_1script_1_1printer_1_1Doc.html" title="Reference type of DocNode. ">Doc</a> that represents anonymous function </td></tr>
-<tr id="row_1_8_1_34_" class="even" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1printer_1_1ListDoc.html" target="_self">ListDoc</a></td><td class="desc">Reference type of <a class="el" href="classtvm_1_1script_1_1printer_1_1ListDocNode.html" title="Doc that represents list literal. ">ListDocNode</a> </td></tr>
-<tr id="row_1_8_1_35_" class="even" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1printer_1_1ListDocNode.html" target="_self">ListDocNode</a></td><td class="desc"><a class="el" href="classtvm_1_1script_1_1printer_1_1Doc.html" title="Reference type of DocNode. ">Doc</a> that represents list literal </td></tr>
-<tr id="row_1_8_1_36_" class="even" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1printer_1_1LiteralDoc.html" target="_self">LiteralDoc</a></td><td class="desc">Reference type of <a class="el" href="classtvm_1_1script_1_1printer_1_1LiteralDocNode.html" title="Doc that represents literal value. ">LiteralDocNode</a> </td></tr>
-<tr id="row_1_8_1_37_" class="even" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1printer_1_1LiteralDocNode.html" target="_self">LiteralDocNode</a></td><td class="desc"><a class="el" href="classtvm_1_1script_1_1printer_1_1Doc.html" title="Reference type of DocNode. ">Doc</a> that represents literal value </td></tr>
-<tr id="row_1_8_1_38_" class="even" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1printer_1_1MetadataFrame.html" target="_self">MetadataFrame</a></td><td class="desc">Reference type of <a class="el" href="classtvm_1_1script_1_1printer_1_1MetadataFrameNode.html" title="MetadataFrame contains information like contant parameter array. ">Metada [...]
-<tr id="row_1_8_1_39_" class="even" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1printer_1_1MetadataFrameNode.html" target="_self">MetadataFrameNode</a></td><td class="desc"><a class="el" href="classtvm_1_1script_1_1printer_1_1MetadataFrame.html" title="Reference type of MetadataFrameNode. ">MetadataFrame</a> contains information like cont [...]
+<tr id="row_1_8_1_10_" class="even" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1script_1_1printer_1_1Default.html" target="_self">Default</a></td><td class="desc"><a class="el" href="structtvm_1_1script_1_1printer_1_1Default.html" title="Default values in the TVMScript printer. ">Default</a> values in the TVMScript printer </td></tr>
+<tr id="row_1_8_1_11_" class="even" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1printer_1_1DictDoc.html" target="_self">DictDoc</a></td><td class="desc">Reference type of <a class="el" href="classtvm_1_1script_1_1printer_1_1DictDocNode.html" title="Doc that represents dictionary literal. ">DictDocNode</a> </td></tr>
+<tr id="row_1_8_1_12_" class="even" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1printer_1_1DictDocNode.html" target="_self">DictDocNode</a></td><td class="desc"><a class="el" href="classtvm_1_1script_1_1printer_1_1Doc.html" title="Reference type of DocNode. ">Doc</a> that represents dictionary literal </td></tr>
+<tr id="row_1_8_1_13_" class="even" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1printer_1_1Doc.html" target="_self">Doc</a></td><td class="desc">Reference type of <a class="el" href="classtvm_1_1script_1_1printer_1_1DocNode.html" title="The base class of all Doc. ">DocNode</a> </td></tr>
+<tr id="row_1_8_1_14_" class="even" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1printer_1_1DocNode.html" target="_self">DocNode</a></td><td class="desc">The base class of all <a class="el" href="classtvm_1_1script_1_1printer_1_1Doc.html" title="Reference type of DocNode. ">Doc</a> </td></tr>
+<tr id="row_1_8_1_15_" class="even" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1printer_1_1ExprDoc.html" target="_self">ExprDoc</a></td><td class="desc">Reference type of <a class="el" href="classtvm_1_1script_1_1printer_1_1ExprDocNode.html" title="The base class of expression doc. ">ExprDocNode</a> </td></tr>
+<tr id="row_1_8_1_16_" class="even" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1printer_1_1ExprDocNode.html" target="_self">ExprDocNode</a></td><td class="desc">The base class of expression doc </td></tr>
+<tr id="row_1_8_1_17_" class="even" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1printer_1_1ExprStmtDoc.html" target="_self">ExprStmtDoc</a></td><td class="desc">Reference type of <a class="el" href="classtvm_1_1script_1_1printer_1_1ExprStmtDocNode.html" title="Doc that represents an expression as statement. ">ExprStmtDocNode</a> </td></tr>
+<tr id="row_1_8_1_18_" class="even" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1printer_1_1ExprStmtDocNode.html" target="_self">ExprStmtDocNode</a></td><td class="desc"><a class="el" href="classtvm_1_1script_1_1printer_1_1Doc.html" title="Reference type of DocNode. ">Doc</a> that represents an expression as statement </td></tr>
+<tr id="row_1_8_1_19_" class="even" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1printer_1_1ForDoc.html" target="_self">ForDoc</a></td><td class="desc">Reference type of <a class="el" href="classtvm_1_1script_1_1printer_1_1ForDocNode.html" title="Doc that represents for statement. ">ForDocNode</a> </td></tr>
+<tr id="row_1_8_1_20_" class="even" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1printer_1_1ForDocNode.html" target="_self">ForDocNode</a></td><td class="desc"><a class="el" href="classtvm_1_1script_1_1printer_1_1Doc.html" title="Reference type of DocNode. ">Doc</a> that represents for statement </td></tr>
+<tr id="row_1_8_1_21_" class="even" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1printer_1_1Frame.html" target="_self">Frame</a></td><td class="desc">Reference type of <a class="el" href="classtvm_1_1script_1_1printer_1_1FrameNode.html">FrameNode</a> </td></tr>
+<tr id="row_1_8_1_22_" class="even" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1printer_1_1FrameNode.html" target="_self">FrameNode</a></td><td class="desc"></td></tr>
+<tr id="row_1_8_1_23_" class="even" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1printer_1_1FunctionDoc.html" target="_self">FunctionDoc</a></td><td class="desc">Reference type of <a class="el" href="classtvm_1_1script_1_1printer_1_1FunctionDocNode.html" title="Doc that represents function definition. ">FunctionDocNode</a> </td></tr>
+<tr id="row_1_8_1_24_" class="even" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1printer_1_1FunctionDocNode.html" target="_self">FunctionDocNode</a></td><td class="desc"><a class="el" href="classtvm_1_1script_1_1printer_1_1Doc.html" title="Reference type of DocNode. ">Doc</a> that represents function definition </td></tr>
+<tr id="row_1_8_1_25_" class="even" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1printer_1_1IdDoc.html" target="_self">IdDoc</a></td><td class="desc">Reference type of <a class="el" href="classtvm_1_1script_1_1printer_1_1IdDocNode.html" title="Doc that represents identifier. ">IdDocNode</a> </td></tr>
+<tr id="row_1_8_1_26_" class="even" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1printer_1_1IdDocNode.html" target="_self">IdDocNode</a></td><td class="desc"><a class="el" href="classtvm_1_1script_1_1printer_1_1Doc.html" title="Reference type of DocNode. ">Doc</a> that represents identifier </td></tr>
+<tr id="row_1_8_1_27_" class="even" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1printer_1_1IfDoc.html" target="_self">IfDoc</a></td><td class="desc">Reference type of <a class="el" href="classtvm_1_1script_1_1printer_1_1IfDocNode.html" title="Doc that represent if-then-else statement. ">IfDocNode</a> </td></tr>
+<tr id="row_1_8_1_28_" class="even" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1printer_1_1IfDocNode.html" target="_self">IfDocNode</a></td><td class="desc"><a class="el" href="classtvm_1_1script_1_1printer_1_1Doc.html" title="Reference type of DocNode. ">Doc</a> that represent if-then-else statement </td></tr>
+<tr id="row_1_8_1_29_" class="even" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1printer_1_1IndexDoc.html" target="_self">IndexDoc</a></td><td class="desc">Reference type of <a class="el" href="classtvm_1_1script_1_1printer_1_1IndexDocNode.html" title="Doc that represents index access on another expression. ">IndexDocNode</a> </td></tr>
+<tr id="row_1_8_1_30_" class="even" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1printer_1_1IndexDocNode.html" target="_self">IndexDocNode</a></td><td class="desc"><a class="el" href="classtvm_1_1script_1_1printer_1_1Doc.html" title="Reference type of DocNode. ">Doc</a> that represents index access on another expression </td></tr>
+<tr id="row_1_8_1_31_" class="even" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1printer_1_1IRDocsifier.html" target="_self">IRDocsifier</a></td><td class="desc">Reference type of <a class="el" href="classtvm_1_1script_1_1printer_1_1IRDocsifierNode.html" title="IRDocsifier is the top-level interface in the IR-&gt;Doc process. ">IRDocsifier [...]
+<tr id="row_1_8_1_32_" class="even" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1printer_1_1IRDocsifierFunctor.html" target="_self">IRDocsifierFunctor</a></td><td class="desc">Dynamic dispatch functor based on <a class="el" href="classtvm_1_1ObjectPath.html">ObjectPath</a> </td></tr>
+<tr id="row_1_8_1_33_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span id="arr_1_8_1_33_" class="arrow" onclick="toggleFolder('1_8_1_33_')">&#9658;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1printer_1_1IRDocsifierNode.html" target="_self">IRDocsifierNode</a></td><td class="desc"><a class="el" href="classtvm_1_1script_1_1printer_1_1IRDocsifier.html" title="Refe [...]
+<tr id="row_1_8_1_33_0_" class="even" style="display:none;"><td class="entry"><span style="width:80px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1script_1_1printer_1_1IRDocsifierNode_1_1VariableInfo.html" target="_self">VariableInfo</a></td><td class="desc">Information about a variable, including its optional name and its doc creator </td></tr>
+<tr id="row_1_8_1_34_" class="even" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1printer_1_1LambdaDoc.html" target="_self">LambdaDoc</a></td><td class="desc">Reference type of <a class="el" href="classtvm_1_1script_1_1printer_1_1LambdaDocNode.html" title="Doc that represents anonymous function. ">LambdaDocNode</a> </td></tr>
+<tr id="row_1_8_1_35_" class="even" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1printer_1_1LambdaDocNode.html" target="_self">LambdaDocNode</a></td><td class="desc"><a class="el" href="classtvm_1_1script_1_1printer_1_1Doc.html" title="Reference type of DocNode. ">Doc</a> that represents anonymous function </td></tr>
+<tr id="row_1_8_1_36_" class="even" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1printer_1_1ListDoc.html" target="_self">ListDoc</a></td><td class="desc">Reference type of <a class="el" href="classtvm_1_1script_1_1printer_1_1ListDocNode.html" title="Doc that represents list literal. ">ListDocNode</a> </td></tr>
+<tr id="row_1_8_1_37_" class="even" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1printer_1_1ListDocNode.html" target="_self">ListDocNode</a></td><td class="desc"><a class="el" href="classtvm_1_1script_1_1printer_1_1Doc.html" title="Reference type of DocNode. ">Doc</a> that represents list literal </td></tr>
+<tr id="row_1_8_1_38_" class="even" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1printer_1_1LiteralDoc.html" target="_self">LiteralDoc</a></td><td class="desc">Reference type of <a class="el" href="classtvm_1_1script_1_1printer_1_1LiteralDocNode.html" title="Doc that represents literal value. ">LiteralDocNode</a> </td></tr>
+<tr id="row_1_8_1_39_" class="even" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1printer_1_1LiteralDocNode.html" target="_self">LiteralDocNode</a></td><td class="desc"><a class="el" href="classtvm_1_1script_1_1printer_1_1Doc.html" title="Reference type of DocNode. ">Doc</a> that represents literal value </td></tr>
 <tr id="row_1_8_1_40_" class="even" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1printer_1_1OperationDoc.html" target="_self">OperationDoc</a></td><td class="desc">Reference type of <a class="el" href="classtvm_1_1script_1_1printer_1_1OperationDocNode.html" title="Doc that represents operation. ">OperationDocNode</a> </td></tr>
 <tr id="row_1_8_1_41_" class="even" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1printer_1_1OperationDocNode.html" target="_self">OperationDocNode</a></td><td class="desc"><a class="el" href="classtvm_1_1script_1_1printer_1_1Doc.html" title="Reference type of DocNode. ">Doc</a> that represents operation </td></tr>
 <tr id="row_1_8_1_42_" class="even" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1printer_1_1ReturnDoc.html" target="_self">ReturnDoc</a></td><td class="desc">Reference type of <a class="el" href="classtvm_1_1script_1_1printer_1_1ReturnDocNode.html" title="Doc that represents return statement. ">ReturnDocNode</a> </td></tr>
 <tr id="row_1_8_1_43_" class="even" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1printer_1_1ReturnDocNode.html" target="_self">ReturnDocNode</a></td><td class="desc"><a class="el" href="classtvm_1_1script_1_1printer_1_1Doc.html" title="Reference type of DocNode. ">Doc</a> that represents return statement </td></tr>
-<tr id="row_1_8_1_44_" class="even" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1printer_1_1RootNodeContainer.html" target="_self">RootNodeContainer</a></td><td class="desc"></td></tr>
-<tr id="row_1_8_1_45_" class="even" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1printer_1_1RootNodeContainerNode.html" target="_self">RootNodeContainerNode</a></td><td class="desc">A wrapper object to provide injection point for printer of each IR </td></tr>
-<tr id="row_1_8_1_46_" class="even" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1printer_1_1ScopeDoc.html" target="_self">ScopeDoc</a></td><td class="desc">Reference type of <a class="el" href="classtvm_1_1script_1_1printer_1_1ScopeDocNode.html" title="Doc that represents special scopes. ">ScopeDocNode</a> </td></tr>
-<tr id="row_1_8_1_47_" class="even" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1printer_1_1ScopeDocNode.html" target="_self">ScopeDocNode</a></td><td class="desc"><a class="el" href="classtvm_1_1script_1_1printer_1_1Doc.html" title="Reference type of DocNode. ">Doc</a> that represents special scopes </td></tr>
-<tr id="row_1_8_1_48_" class="even" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1printer_1_1SliceDoc.html" target="_self">SliceDoc</a></td><td class="desc">Reference type of <a class="el" href="classtvm_1_1script_1_1printer_1_1SliceDocNode.html" title="Doc that represents slice in Index expression. ">SliceDocNode</a> </td></tr>
-<tr id="row_1_8_1_49_" class="even" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1printer_1_1SliceDocNode.html" target="_self">SliceDocNode</a></td><td class="desc"><a class="el" href="classtvm_1_1script_1_1printer_1_1Doc.html" title="Reference type of DocNode. ">Doc</a> that represents slice in Index expression </td></tr>
-<tr id="row_1_8_1_50_" class="even" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1printer_1_1StmtBlockDoc.html" target="_self">StmtBlockDoc</a></td><td class="desc">Reference type of <a class="el" href="classtvm_1_1script_1_1printer_1_1StmtBlockDocNode.html" title="The container doc that holds a list of StmtDoc. ">StmtBlockDocNode</a> </td></tr>
-<tr id="row_1_8_1_51_" class="even" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1printer_1_1StmtBlockDocNode.html" target="_self">StmtBlockDocNode</a></td><td class="desc">The container doc that holds a list of <a class="el" href="classtvm_1_1script_1_1printer_1_1StmtDoc.html" title="Reference type of StmtDocNode. ">StmtDoc</a> </td></tr>
-<tr id="row_1_8_1_52_" class="even" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1printer_1_1StmtDoc.html" target="_self">StmtDoc</a></td><td class="desc">Reference type of <a class="el" href="classtvm_1_1script_1_1printer_1_1StmtDocNode.html" title="The base class of statement doc. ">StmtDocNode</a> </td></tr>
-<tr id="row_1_8_1_53_" class="even" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1printer_1_1StmtDocNode.html" target="_self">StmtDocNode</a></td><td class="desc">The base class of statement doc </td></tr>
-<tr id="row_1_8_1_54_" class="even" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1printer_1_1TracedObjectFunctor.html" target="_self">TracedObjectFunctor</a></td><td class="desc">Dynamic dispatch functor based on <a class="el" href="classtvm_1_1TracedObject.html" title="Traced wrapper for regular (non-container) TVM objects. ">TracedObject< [...]
-<tr id="row_1_8_1_55_" class="even" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1printer_1_1TupleDoc.html" target="_self">TupleDoc</a></td><td class="desc">Reference type of <a class="el" href="classtvm_1_1script_1_1printer_1_1TupleDocNode.html" title="Doc that represents tuple literal. ">TupleDocNode</a> </td></tr>
-<tr id="row_1_8_1_56_" class="even" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1printer_1_1TupleDocNode.html" target="_self">TupleDocNode</a></td><td class="desc"><a class="el" href="classtvm_1_1script_1_1printer_1_1Doc.html" title="Reference type of DocNode. ">Doc</a> that represents tuple literal </td></tr>
-<tr id="row_1_8_1_57_" class="even" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1printer_1_1VarDefFrame.html" target="_self">VarDefFrame</a></td><td class="desc">Reference type of <a class="el" href="classtvm_1_1script_1_1printer_1_1VarDefFrameNode.html" title="VarDefFrame contains information about the free variables that needs to be defi [...]
-<tr id="row_1_8_1_58_" class="even" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1printer_1_1VarDefFrameNode.html" target="_self">VarDefFrameNode</a></td><td class="desc"><a class="el" href="classtvm_1_1script_1_1printer_1_1VarDefFrame.html" title="Reference type of VarDefFrameNode. ">VarDefFrame</a> contains information about the free vari [...]
-<tr id="row_1_8_1_59_" class="even" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1printer_1_1VarTable.html" target="_self">VarTable</a></td><td class="desc">Reference type of <a class="el" href="classtvm_1_1script_1_1printer_1_1VarTableNode.html" title="Variable Table manages mapping from variable object to ExprDoc during the process of pri [...]
-<tr id="row_1_8_1_60_" class="even" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1printer_1_1VarTableNode.html" target="_self">VarTableNode</a></td><td class="desc">Variable Table manages mapping from variable object to <a class="el" href="classtvm_1_1script_1_1printer_1_1ExprDoc.html" title="Reference type of ExprDocNode. ">ExprDoc</a> dur [...]
-<tr id="row_1_8_1_61_" class="even" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1printer_1_1WhileDoc.html" target="_self">WhileDoc</a></td><td class="desc">Reference type of <a class="el" href="classtvm_1_1script_1_1printer_1_1WhileDocNode.html" title="Doc that represents while statement. ">WhileDocNode</a> </td></tr>
-<tr id="row_1_8_1_62_" class="even" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1printer_1_1WhileDocNode.html" target="_self">WhileDocNode</a></td><td class="desc"><a class="el" href="classtvm_1_1script_1_1printer_1_1Doc.html" title="Reference type of DocNode. ">Doc</a> that represents while statement </td></tr>
+<tr id="row_1_8_1_44_" class="even" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1printer_1_1ScopeDoc.html" target="_self">ScopeDoc</a></td><td class="desc">Reference type of <a class="el" href="classtvm_1_1script_1_1printer_1_1ScopeDocNode.html" title="Doc that represents special scopes. ">ScopeDocNode</a> </td></tr>
+<tr id="row_1_8_1_45_" class="even" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1printer_1_1ScopeDocNode.html" target="_self">ScopeDocNode</a></td><td class="desc"><a class="el" href="classtvm_1_1script_1_1printer_1_1Doc.html" title="Reference type of DocNode. ">Doc</a> that represents special scopes </td></tr>
+<tr id="row_1_8_1_46_" class="even" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1printer_1_1SliceDoc.html" target="_self">SliceDoc</a></td><td class="desc">Reference type of <a class="el" href="classtvm_1_1script_1_1printer_1_1SliceDocNode.html" title="Doc that represents slice in Index expression. ">SliceDocNode</a> </td></tr>
+<tr id="row_1_8_1_47_" class="even" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1printer_1_1SliceDocNode.html" target="_self">SliceDocNode</a></td><td class="desc"><a class="el" href="classtvm_1_1script_1_1printer_1_1Doc.html" title="Reference type of DocNode. ">Doc</a> that represents slice in Index expression </td></tr>
+<tr id="row_1_8_1_48_" class="even" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1printer_1_1StmtBlockDoc.html" target="_self">StmtBlockDoc</a></td><td class="desc">Reference type of <a class="el" href="classtvm_1_1script_1_1printer_1_1StmtBlockDocNode.html" title="The container doc that holds a list of StmtDoc. ">StmtBlockDocNode</a> </td></tr>
+<tr id="row_1_8_1_49_" class="even" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1printer_1_1StmtBlockDocNode.html" target="_self">StmtBlockDocNode</a></td><td class="desc">The container doc that holds a list of <a class="el" href="classtvm_1_1script_1_1printer_1_1StmtDoc.html" title="Reference type of StmtDocNode. ">StmtDoc</a> </td></tr>
+<tr id="row_1_8_1_50_" class="even" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1printer_1_1StmtDoc.html" target="_self">StmtDoc</a></td><td class="desc">Reference type of <a class="el" href="classtvm_1_1script_1_1printer_1_1StmtDocNode.html" title="The base class of statement doc. ">StmtDocNode</a> </td></tr>
+<tr id="row_1_8_1_51_" class="even" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1printer_1_1StmtDocNode.html" target="_self">StmtDocNode</a></td><td class="desc">The base class of statement doc </td></tr>
+<tr id="row_1_8_1_52_" class="even" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1printer_1_1TupleDoc.html" target="_self">TupleDoc</a></td><td class="desc">Reference type of <a class="el" href="classtvm_1_1script_1_1printer_1_1TupleDocNode.html" title="Doc that represents tuple literal. ">TupleDocNode</a> </td></tr>
+<tr id="row_1_8_1_53_" class="even" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1printer_1_1TupleDocNode.html" target="_self">TupleDocNode</a></td><td class="desc"><a class="el" href="classtvm_1_1script_1_1printer_1_1Doc.html" title="Reference type of DocNode. ">Doc</a> that represents tuple literal </td></tr>
+<tr id="row_1_8_1_54_" class="even" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1printer_1_1WhileDoc.html" target="_self">WhileDoc</a></td><td class="desc">Reference type of <a class="el" href="classtvm_1_1script_1_1printer_1_1WhileDocNode.html" title="Doc that represents while statement. ">WhileDocNode</a> </td></tr>
+<tr id="row_1_8_1_55_" class="even" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1printer_1_1WhileDocNode.html" target="_self">WhileDocNode</a></td><td class="desc"><a class="el" href="classtvm_1_1script_1_1printer_1_1Doc.html" title="Reference type of DocNode. ">Doc</a> that represents while statement </td></tr>
 <tr id="row_1_9_" class="even" style="display:none;"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span id="arr_1_9_" class="arrow" onclick="toggleFolder('1_9_')">&#9658;</span><span class="icona"><span class="icon">N</span></span><a class="el" href="namespacetvm_1_1support.html" target="_self">support</a></td><td class="desc"></td></tr>
 <tr id="row_1_9_0_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1support_1_1LinearCongruentialEngine.html" target="_self">LinearCongruentialEngine</a></td><td class="desc">This linear congruential engine is a drop-in replacement for std::minstd_rand. It strictly corresponds to std::minstd_rand and is designed to be platform-independent  [...]
 <tr id="row_1_9_1_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span id="arr_1_9_1_" class="arrow" onclick="toggleFolder('1_9_1_')">&#9658;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1support_1_1Span.html" target="_self">Span</a></td><td class="desc">A partial implementation of the C++20 std::span </td></tr>
@@ -1077,147 +1065,139 @@ $(function() {
 <tr id="row_1_47_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1ConstantPoolInfoNode.html" target="_self">ConstantPoolInfoNode</a></td><td class="desc"></td></tr>
 <tr id="row_1_48_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1Constructor.html" target="_self">Constructor</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1ConstructorNode.html" title="ADT constructor. Constructors compare by pointer equality. ">ConstructorNode</a> </td></tr>
 <tr id="row_1_49_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1ConstructorNode.html" target="_self">ConstructorNode</a></td><td class="desc">ADT constructor. Constructors compare by pointer equality </td></tr>
-<tr id="row_1_50_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1ContextManager.html" target="_self">ContextManager</a></td><td class="desc">A context type that delegates EnterWithScope and ExitWithScope to user-provided functions </td></tr>
-<tr id="row_1_51_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1Diagnostic.html" target="_self">Diagnostic</a></td><td class="desc"></td></tr>
-<tr id="row_1_52_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1DiagnosticBuilder.html" target="_self">DiagnosticBuilder</a></td><td class="desc">A wrapper around std::stringstream to build a diagnostic </td></tr>
-<tr id="row_1_53_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1DiagnosticContext.html" target="_self">DiagnosticContext</a></td><td class="desc"></td></tr>
-<tr id="row_1_54_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1DiagnosticContextNode.html" target="_self">DiagnosticContextNode</a></td><td class="desc"></td></tr>
-<tr id="row_1_55_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1DiagnosticNode.html" target="_self">DiagnosticNode</a></td><td class="desc">A compiler diagnostic message </td></tr>
-<tr id="row_1_56_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1DiagnosticRenderer.html" target="_self">DiagnosticRenderer</a></td><td class="desc"></td></tr>
-<tr id="row_1_57_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1DiagnosticRendererNode.html" target="_self">DiagnosticRendererNode</a></td><td class="desc">Display diagnostics in a given display format </td></tr>
-<tr id="row_1_58_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1DictAttrs.html" target="_self">DictAttrs</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1DictAttrsNode.html" title="Specialized attribute type that is backed by a map. The DictAttrsNode implements the Attrs behavior...">DictAttrsNode</a> </td></tr>
-<tr id="row_1_59_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1DictAttrsNode.html" target="_self">DictAttrsNode</a></td><td class="desc">Specialized attribute type that is backed by a map. The <a class="el" href="classtvm_1_1DictAttrsNode.html" title="Specialized attribute type that is backed by a map. The DictAttrsNode implements the  [...]
-<tr id="row_1_60_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1EnvFunc.html" target="_self">EnvFunc</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1EnvFuncNode.html" title="A serializable function backed by TVM&#39;s global environment. ">EnvFuncNode</a> </td></tr>
-<tr id="row_1_61_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1EnvFuncNode.html" target="_self">EnvFuncNode</a></td><td class="desc">A serializable function backed by TVM's global environment </td></tr>
-<tr id="row_1_62_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1ErrorBuilder.html" target="_self">ErrorBuilder</a></td><td class="desc">A wrapper around std::stringstream to build error </td></tr>
-<tr id="row_1_63_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1ErrorReporter.html" target="_self">ErrorReporter</a></td><td class="desc">An abstraction around how errors are stored and reported. Designed to be opaque to users, so we can support a robust and simpler error reporting mode, as well as a more complex mode </td></tr>
-<tr id="row_1_64_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1FloatImm.html" target="_self">FloatImm</a></td><td class="desc">Managed reference class to <a class="el" href="classtvm_1_1FloatImmNode.html" title="Constant floating point literals in the program. ">FloatImmNode</a> </td></tr>
-<tr id="row_1_65_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1FloatImmNode.html" target="_self">FloatImmNode</a></td><td class="desc">Constant floating point literals in the program </td></tr>
-<tr id="row_1_66_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1FuncType.html" target="_self">FuncType</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1FuncTypeNode.html" title="Function type. ">FuncTypeNode</a> </td></tr>
-<tr id="row_1_67_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1FuncTypeNode.html" target="_self">FuncTypeNode</a></td><td class="desc">Function type </td></tr>
-<tr id="row_1_68_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1GenericFunc.html" target="_self">GenericFunc</a></td><td class="desc">Generic function that can be specialized on a per-target basis </td></tr>
-<tr id="row_1_69_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1GenericFuncNode.html" target="_self">GenericFuncNode</a></td><td class="desc">Represents a generic function that can be specialized on a per-target basis </td></tr>
-<tr id="row_1_70_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1GlobalTypeVar.html" target="_self">GlobalTypeVar</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1GlobalTypeVarNode.html" title="A global type variable that is used for defining new types or type aliases. ">GlobalTypeVarNode</a> </td></tr>
-<tr id="row_1_71_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1GlobalTypeVarNode.html" target="_self">GlobalTypeVarNode</a></td><td class="desc">A global type variable that is used for defining new types or type aliases </td></tr>
-<tr id="row_1_72_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1GlobalVar.html" target="_self">GlobalVar</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1GlobalVarNode.html" title="Global variable that lives in the top-level module. ">GlobalVarNode</a> </td></tr>
-<tr id="row_1_73_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1GlobalVarNode.html" target="_self">GlobalVarNode</a></td><td class="desc">Global variable that lives in the top-level module </td></tr>
-<tr id="row_1_74_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1GlobalVarSupply.html" target="_self">GlobalVarSupply</a></td><td class="desc">Managed reference class to <a class="el" href="classtvm_1_1GlobalVarSupplyNode.html" title="GlobalVarSupply can be used to generate unique GlobalVars. ">GlobalVarSupplyNode</a> </td></tr>
-<tr id="row_1_75_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1GlobalVarSupplyNode.html" target="_self">GlobalVarSupplyNode</a></td><td class="desc"><a class="el" href="classtvm_1_1GlobalVarSupply.html" title="Managed reference class to GlobalVarSupplyNode. ">GlobalVarSupply</a> can be used to generate unique GlobalVars </td></tr>
-<tr id="row_1_76_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1IncompleteType.html" target="_self">IncompleteType</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1IncompleteTypeNode.html" title="Intermediate values that is used to indicate incomplete type during type inference. ">IncompleteTypeNode</a> </td></tr>
-<tr id="row_1_77_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1IncompleteTypeNode.html" target="_self">IncompleteTypeNode</a></td><td class="desc">Intermediate values that is used to indicate incomplete type during type inference </td></tr>
-<tr id="row_1_78_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1Integer.html" target="_self">Integer</a></td><td class="desc">Container of constant int that adds more constructors </td></tr>
-<tr id="row_1_79_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1IntImm.html" target="_self">IntImm</a></td><td class="desc">Managed reference class to <a class="el" href="classtvm_1_1IntImmNode.html" title="Constant integer literals in the program. ">IntImmNode</a> </td></tr>
-<tr id="row_1_80_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1IntImmNode.html" target="_self">IntImmNode</a></td><td class="desc">Constant integer literals in the program </td></tr>
-<tr id="row_1_81_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1IRModule.html" target="_self">IRModule</a></td><td class="desc">Managed reference class to <a class="el" href="classtvm_1_1IRModuleNode.html" title="IRModule that holds functions and type definitions. ">IRModuleNode</a> </td></tr>
-<tr id="row_1_82_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1IRModuleNode.html" target="_self">IRModuleNode</a></td><td class="desc"><a class="el" href="classtvm_1_1IRModule.html" title="Managed reference class to IRModuleNode. ">IRModule</a> that holds functions and type definitions </td></tr>
-<tr id="row_1_83_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1MapValuePath.html" target="_self">MapValuePath</a></td><td class="desc"></td></tr>
-<tr id="row_1_84_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1MapValuePathNode.html" target="_self">MapValuePathNode</a></td><td class="desc"></td></tr>
-<tr id="row_1_85_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1MemoryInfo.html" target="_self">MemoryInfo</a></td><td class="desc">Defines memory info </td></tr>
-<tr id="row_1_86_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1MemoryInfoNode.html" target="_self">MemoryInfoNode</a></td><td class="desc">Memory information of special memory region. Use <a class="el" href="classtvm_1_1MemoryInfo.html" title="Defines memory info. ">MemoryInfo</a> as its container type </td></tr>
-<tr id="row_1_87_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1MissingArrayElementPath.html" target="_self">MissingArrayElementPath</a></td><td class="desc"></td></tr>
-<tr id="row_1_88_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1MissingArrayElementPathNode.html" target="_self">MissingArrayElementPathNode</a></td><td class="desc"></td></tr>
-<tr id="row_1_89_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1MissingMapEntryPath.html" target="_self">MissingMapEntryPath</a></td><td class="desc"></td></tr>
-<tr id="row_1_90_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1MissingMapEntryPathNode.html" target="_self">MissingMapEntryPathNode</a></td><td class="desc"></td></tr>
-<tr id="row_1_91_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1NameSupply.html" target="_self">NameSupply</a></td><td class="desc">Managed reference class to <a class="el" href="classtvm_1_1NameSupplyNode.html" title="NameSupply can be used to generate unique names. ">NameSupplyNode</a> </td></tr>
-<tr id="row_1_92_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1NameSupplyNode.html" target="_self">NameSupplyNode</a></td><td class="desc"><a class="el" href="classtvm_1_1NameSupply.html" title="Managed reference class to NameSupplyNode. ">NameSupply</a> can be used to generate unique names </td></tr>
-<tr id="row_1_93_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1NDArrayContainerTrait.html" target="_self">NDArrayContainerTrait</a></td><td class="desc"></td></tr>
-<tr id="row_1_94_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1NodeFunctor.html" target="_self">NodeFunctor</a></td><td class="desc">A dynamically dispatched functor on the type of the first argument </td></tr>
-<tr id="row_1_95_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1NodeFunctor_3_01R_07const_01ObjectRef_01_6n_00_01Args_8_8_8_08_4.html" target="_self">NodeFunctor&lt; R(const ObjectRef &amp;n, Args...)&gt;</a></td><td class="desc"></td></tr>
-<tr id="row_1_96_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1ObjectPath.html" target="_self">ObjectPath</a></td><td class="desc"></td></tr>
-<tr id="row_1_97_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1ObjectPathNode.html" target="_self">ObjectPathNode</a></td><td class="desc">Path to an object from some root object </td></tr>
-<tr id="row_1_98_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1ObjectPathPair.html" target="_self">ObjectPathPair</a></td><td class="desc"></td></tr>
-<tr id="row_1_99_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1ObjectPathPairNode.html" target="_self">ObjectPathPairNode</a></td><td class="desc">Pair of <code><a class="el" href="classtvm_1_1ObjectPath.html">ObjectPath</a></code>s, one for each object being tested for structural equality </td></tr>
-<tr id="row_1_100_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1Op.html" target="_self">Op</a></td><td class="desc">Managed reference class to <a class="el" href="classtvm_1_1OpNode.html" title="Primitive Op(builtin intrinsics) ">OpNode</a> </td></tr>
-<tr id="row_1_101_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1OpAttrMap.html" target="_self">OpAttrMap</a></td><td class="desc">Map&lt;Op,ValueType&gt; used to store meta-information about <a class="el" href="classtvm_1_1Op.html" title="Managed reference class to OpNode. ">Op</a> </td></tr>
-<tr id="row_1_102_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1OpNode.html" target="_self">OpNode</a></td><td class="desc">Primitive Op(builtin intrinsics) </td></tr>
-<tr id="row_1_103_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1OpRegEntry.html" target="_self">OpRegEntry</a></td><td class="desc">Helper structure to register operators </td></tr>
-<tr id="row_1_104_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1PointerType.html" target="_self">PointerType</a></td><td class="desc"></td></tr>
-<tr id="row_1_105_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1PointerTypeNode.html" target="_self">PointerTypeNode</a></td><td class="desc">Low-level raw pointer type </td></tr>
-<tr id="row_1_106_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1PoolInfo.html" target="_self">PoolInfo</a></td><td class="desc">Base class for <a class="el" href="classtvm_1_1WorkspacePoolInfo.html">WorkspacePoolInfo</a> and <a class="el" href="classtvm_1_1ConstantPoolInfo.html">ConstantPoolInfo</a> </td></tr>
-<tr id="row_1_107_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1PoolInfoNode.html" target="_self">PoolInfoNode</a></td><td class="desc">Describes a pool of memory accessible by one or more targets </td></tr>
-<tr id="row_1_108_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1PoolInfoProperties.html" target="_self">PoolInfoProperties</a></td><td class="desc"></td></tr>
-<tr id="row_1_109_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1PoolInfoPropertiesNode.html" target="_self">PoolInfoPropertiesNode</a></td><td class="desc">Describes a pool of memory properties </td></tr>
-<tr id="row_1_110_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1PrimExpr.html" target="_self">PrimExpr</a></td><td class="desc">Reference to <a class="el" href="classtvm_1_1PrimExprNode.html" title="Base node of all primitive expressions. ">PrimExprNode</a> </td></tr>
-<tr id="row_1_111_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1PrimExprNode.html" target="_self">PrimExprNode</a></td><td class="desc">Base node of all primitive expressions </td></tr>
-<tr id="row_1_112_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1PrimType.html" target="_self">PrimType</a></td><td class="desc"></td></tr>
-<tr id="row_1_113_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1PrimTypeNode.html" target="_self">PrimTypeNode</a></td><td class="desc">Primitive data types used in the low-level IR </td></tr>
-<tr id="row_1_114_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1Range.html" target="_self">Range</a></td><td class="desc"><a class="el" href="classtvm_1_1Range.html" title="Range constainer. ">Range</a> constainer </td></tr>
-<tr id="row_1_115_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1RangeNode.html" target="_self">RangeNode</a></td><td class="desc"><a class="el" href="classtvm_1_1Range.html" title="Range constainer. ">Range</a> over one dimension </td></tr>
-<tr id="row_1_116_" class="even" style="display:none;"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span id="arr_1_116_" class="arrow" onclick="toggleFolder('1_116_')">&#9658;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1ReflectionVTable.html" target="_self">ReflectionVTable</a></td><td class="desc">Virtual function table to support IR/AST node reflection </td></tr>
-<tr id="row_1_116_0_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1ReflectionVTable_1_1Registry.html" target="_self">Registry</a></td><td class="desc"><a class="el" href="classtvm_1_1ReflectionVTable_1_1Registry.html" title="Registry of a reflection table. ">Registry</a> of a reflection table </td></tr>
-<tr id="row_1_117_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1RelayExpr.html" target="_self">RelayExpr</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1RelayExprNode.html" title="Base node of all non-primitive expressions. ">RelayExprNode</a> </td></tr>
-<tr id="row_1_118_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1RelayExprNode.html" target="_self">RelayExprNode</a></td><td class="desc">Base node of all non-primitive expressions </td></tr>
-<tr id="row_1_119_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1RelayRefType.html" target="_self">RelayRefType</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1RelayRefTypeNode.html" title="Reference Type High-level Relay IR. ">RelayRefTypeNode</a> </td></tr>
-<tr id="row_1_120_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1RelayRefTypeNode.html" target="_self">RelayRefTypeNode</a></td><td class="desc">Reference <a class="el" href="classtvm_1_1Type.html" title="Managed reference to TypeNode. ">Type</a> High-level Relay IR </td></tr>
-<tr id="row_1_121_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1ReprPrinter.html" target="_self">ReprPrinter</a></td><td class="desc">A printer class to print the AST/IR nodes </td></tr>
-<tr id="row_1_122_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1RootPath.html" target="_self">RootPath</a></td><td class="desc"></td></tr>
-<tr id="row_1_123_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1RootPathNode.html" target="_self">RootPathNode</a></td><td class="desc"></td></tr>
-<tr id="row_1_124_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1SEqualHandlerDefault.html" target="_self">SEqualHandlerDefault</a></td><td class="desc">The default handler for equality testing </td></tr>
-<tr id="row_1_125_" class="even" style="display:none;"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span id="arr_1_125_" class="arrow" onclick="toggleFolder('1_125_')">&#9658;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1SEqualReducer.html" target="_self">SEqualReducer</a></td><td class="desc">A Reducer class to reduce the structural equality result of two objects </td></tr>
-<tr id="row_1_125_0_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1SEqualReducer_1_1Handler.html" target="_self">Handler</a></td><td class="desc">Internal handler that defines custom behaviors. </td></tr>
-<tr id="row_1_126_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1SHashHandlerDefault.html" target="_self">SHashHandlerDefault</a></td><td class="desc">The default handler for hash key computation </td></tr>
-<tr id="row_1_127_" class="even" style="display:none;"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span id="arr_1_127_" class="arrow" onclick="toggleFolder('1_127_')">&#9658;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1SHashReducer.html" target="_self">SHashReducer</a></td><td class="desc">A Reducer class to reduce the structural hash value </td></tr>
-<tr id="row_1_127_0_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1SHashReducer_1_1Handler.html" target="_self">Handler</a></td><td class="desc">Internal handler that defines custom behaviors </td></tr>
-<tr id="row_1_128_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1SourceName.html" target="_self">SourceName</a></td><td class="desc">The source name of a file span </td></tr>
-<tr id="row_1_129_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1SourceNameNode.html" target="_self">SourceNameNode</a></td><td class="desc">The name of a source fragment </td></tr>
-<tr id="row_1_130_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1Span.html" target="_self">Span</a></td><td class="desc"></td></tr>
-<tr id="row_1_131_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1SpanNode.html" target="_self">SpanNode</a></td><td class="desc">Stores locations in frontend source that generated a node </td></tr>
-<tr id="row_1_132_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1StructuralEqual.html" target="_self">StructuralEqual</a></td><td class="desc">Content-aware structural equality comparator for objects </td></tr>
-<tr id="row_1_133_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1StructuralHash.html" target="_self">StructuralHash</a></td><td class="desc">Content-aware structural hasing </td></tr>
-<tr id="row_1_134_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1Target.html" target="_self">Target</a></td><td class="desc">Managed reference class to <a class="el" href="classtvm_1_1TargetNode.html" title="Compilation target. ">TargetNode</a> </td></tr>
-<tr id="row_1_135_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TargetKind.html" target="_self">TargetKind</a></td><td class="desc">Managed reference class to <a class="el" href="classtvm_1_1TargetKindNode.html" title="Target kind, specifies the kind of the target. ">TargetKindNode</a> </td></tr>
-<tr id="row_1_136_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TargetKindAttrMap.html" target="_self">TargetKindAttrMap</a></td><td class="desc">Map&lt;TargetKind, ValueType&gt; used to store meta-information about <a class="el" href="classtvm_1_1TargetKind.html" title="Managed reference class to TargetKindNode. ">TargetKind</a> </td></tr>
-<tr id="row_1_137_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TargetKindNode.html" target="_self">TargetKindNode</a></td><td class="desc"><a class="el" href="classtvm_1_1Target.html" title="Managed reference class to TargetNode. ">Target</a> kind, specifies the kind of the target </td></tr>
-<tr id="row_1_138_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TargetKindRegEntry.html" target="_self">TargetKindRegEntry</a></td><td class="desc">Helper structure to register <a class="el" href="classtvm_1_1TargetKind.html" title="Managed reference class to TargetKindNode. ">TargetKind</a> </td></tr>
-<tr id="row_1_139_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TargetNode.html" target="_self">TargetNode</a></td><td class="desc">Compilation target </td></tr>
-<tr id="row_1_140_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TargetTag.html" target="_self">TargetTag</a></td><td class="desc">Managed reference class to <a class="el" href="classtvm_1_1TargetTagNode.html" title="A target tag. ">TargetTagNode</a> </td></tr>
-<tr id="row_1_141_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TargetTagNode.html" target="_self">TargetTagNode</a></td><td class="desc">A target tag </td></tr>
-<tr id="row_1_142_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TargetTagRegEntry.html" target="_self">TargetTagRegEntry</a></td><td class="desc"></td></tr>
-<tr id="row_1_143_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TensorAffineType.html" target="_self">TensorAffineType</a></td><td class="desc">Managed reference to AffineTypes </td></tr>
-<tr id="row_1_144_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TensorAffineTypeNode.html" target="_self">TensorAffineTypeNode</a></td><td class="desc"><a class="el" href="classtvm_1_1TensorAffineType.html" title="Managed reference to AffineTypes. ">TensorAffineType</a> representation </td></tr>
-<tr id="row_1_145_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TensorType.html" target="_self">TensorType</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1TensorTypeNode.html" title="This is the most commonly used type in relay. TensorType have a fixed dimension, data type...">TensorTypeNode</a> </td></tr>
-<tr id="row_1_146_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TensorTypeNode.html" target="_self">TensorTypeNode</a></td><td class="desc">This is the most commonly used type in relay. <a class="el" href="classtvm_1_1TensorType.html" title="Managed reference to TensorTypeNode. ">TensorType</a> have a fixed dimension, data type </td></tr>
-<tr id="row_1_147_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TracedArray.html" target="_self">TracedArray</a></td><td class="desc">Traced wrapper for Array objects </td></tr>
-<tr id="row_1_148_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TracedArrayIterator.html" target="_self">TracedArrayIterator</a></td><td class="desc">Iterator class for TracedArray&lt;T&gt; </td></tr>
-<tr id="row_1_149_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TracedBasicValue.html" target="_self">TracedBasicValue</a></td><td class="desc">Traced wrapper for basic values (i.e. non-TVM objects) </td></tr>
-<tr id="row_1_150_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TracedMap.html" target="_self">TracedMap</a></td><td class="desc">Traced wrapper for Map objects </td></tr>
-<tr id="row_1_151_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TracedMapIterator.html" target="_self">TracedMapIterator</a></td><td class="desc">Iterator class for TracedMap&lt;K, V&gt; </td></tr>
-<tr id="row_1_152_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TracedObject.html" target="_self">TracedObject</a></td><td class="desc">Traced wrapper for regular (non-container) TVM objects </td></tr>
-<tr id="row_1_153_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TracedOptional.html" target="_self">TracedOptional</a></td><td class="desc">Traced wrapper for Optional objects </td></tr>
-<tr id="row_1_154_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TupleAffineType.html" target="_self">TupleAffineType</a></td><td class="desc">Managed reference to TupleAffineTypes </td></tr>
-<tr id="row_1_155_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TupleAffineTypeNode.html" target="_self">TupleAffineTypeNode</a></td><td class="desc"><a class="el" href="classtvm_1_1TupleAffineType.html" title="Managed reference to TupleAffineTypes. ">TupleAffineType</a> representation </td></tr>
-<tr id="row_1_156_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TupleType.html" target="_self">TupleType</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1TupleTypeNode.html" title="The type of tuple values. ">TupleTypeNode</a> </td></tr>
-<tr id="row_1_157_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TupleTypeNode.html" target="_self">TupleTypeNode</a></td><td class="desc">The type of tuple values </td></tr>
-<tr id="row_1_158_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1Type.html" target="_self">Type</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1TypeNode.html" title="Type is the base type of all types. ">TypeNode</a> </td></tr>
-<tr id="row_1_159_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TypeCall.html" target="_self">TypeCall</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1TypeCallNode.html" title="Type function application. ">TypeCallNode</a> </td></tr>
-<tr id="row_1_160_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TypeCallNode.html" target="_self">TypeCallNode</a></td><td class="desc"><a class="el" href="classtvm_1_1Type.html" title="Managed reference to TypeNode. ">Type</a> function application </td></tr>
-<tr id="row_1_161_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TypeConstraint.html" target="_self">TypeConstraint</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1TypeConstraintNode.html" title="Potential Constraints in a function. ">TypeConstraintNode</a> </td></tr>
-<tr id="row_1_162_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TypeConstraintNode.html" target="_self">TypeConstraintNode</a></td><td class="desc">Potential Constraints in a function </td></tr>
-<tr id="row_1_163_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TypeData.html" target="_self">TypeData</a></td><td class="desc">Stores all data for an Algebraic Data <a class="el" href="classtvm_1_1Type.html" title="Managed reference to TypeNode. ">Type</a> (ADT) </td></tr>
-<tr id="row_1_164_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TypeDataNode.html" target="_self">TypeDataNode</a></td><td class="desc"><a class="el" href="classtvm_1_1TypeData.html" title="Stores all data for an Algebraic Data Type (ADT). ">TypeData</a> container node </td></tr>
-<tr id="row_1_165_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TypedEnvFunc.html" target="_self">TypedEnvFunc</a></td><td class="desc">Please refer to <a class="el" href="classtvm_1_1TypedEnvFunc_3_01R_07Args_8_8_8_08_4.html#TypedEnvFuncAnchor">TypedEnvFunc&lt;R(Args..)&gt;</a> </td></tr>
-<tr id="row_1_166_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TypedEnvFunc_3_01R_07Args_8_8_8_08_4.html" target="_self">TypedEnvFunc&lt; R(Args...)&gt;</a></td><td class="desc">A typed version of <a class="el" href="classtvm_1_1EnvFunc.html" title="Managed reference to EnvFuncNode. ">EnvFunc</a>. It is backed by a GlobalFuncNode inte [...]
-<tr id="row_1_167_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TypeFunctor.html" target="_self">TypeFunctor</a></td><td class="desc"></td></tr>
-<tr id="row_1_168_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TypeFunctor_3_01R_07const_01Type_01_6n_00_01Args_8_8_8_08_4.html" target="_self">TypeFunctor&lt; R(const Type &amp;n, Args...)&gt;</a></td><td class="desc"></td></tr>
-<tr id="row_1_169_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TypeMutator.html" target="_self">TypeMutator</a></td><td class="desc"><a class="el" href="classtvm_1_1TypeMutator.html" title="TypeMutator that mutates expressions. ">TypeMutator</a> that mutates expressions </td></tr>
-<tr id="row_1_170_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TypeNode.html" target="_self">TypeNode</a></td><td class="desc"><a class="el" href="classtvm_1_1Type.html" title="Managed reference to TypeNode. ">Type</a> is the base type of all types </td></tr>
-<tr id="row_1_171_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TypeRelation.html" target="_self">TypeRelation</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1TypeRelationNode.html" title="User defined type relation, it is an input-output relation on types. ">TypeRelationNode</a> </td></tr>
-<tr id="row_1_172_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TypeRelationNode.html" target="_self">TypeRelationNode</a></td><td class="desc">User defined type relation, it is an input-output relation on types </td></tr>
-<tr id="row_1_173_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TypeReporter.html" target="_self">TypeReporter</a></td><td class="desc">Container class of <a class="el" href="classtvm_1_1TypeReporter.html" title="Container class of TypeReporter. ">TypeReporter</a> </td></tr>
-<tr id="row_1_174_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TypeReporterNode.html" target="_self">TypeReporterNode</a></td><td class="desc">Reporter that reports back to the type resolution information </td></tr>
-<tr id="row_1_175_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TypeVar.html" target="_self">TypeVar</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1TypeVarNode.html" title="Type parameter in functions. ">TypeVarNode</a> </td></tr>
-<tr id="row_1_176_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TypeVarNode.html" target="_self">TypeVarNode</a></td><td class="desc"><a class="el" href="classtvm_1_1Type.html" title="Managed reference to TypeNode. ">Type</a> parameter in functions </td></tr>
-<tr id="row_1_177_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TypeVisitor.html" target="_self">TypeVisitor</a></td><td class="desc">A type visitor that recursively visit types </td></tr>
-<tr id="row_1_178_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1UnknownAttributeAccessPath.html" target="_self">UnknownAttributeAccessPath</a></td><td class="desc"></td></tr>
-<tr id="row_1_179_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1UnknownAttributeAccessPathNode.html" target="_self">UnknownAttributeAccessPathNode</a></td><td class="desc"></td></tr>
-<tr id="row_1_180_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1VirtualDevice.html" target="_self">VirtualDevice</a></td><td class="desc">Managed reference class to <code><a class="el" href="classtvm_1_1VirtualDeviceNode.html" title="Describes at compile time the constraints on where data is to be stored at runtime down to the (virtu.. [...]
-<tr id="row_1_181_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1VirtualDeviceCache.html" target="_self">VirtualDeviceCache</a></td><td class="desc">A cache of <code>VirtualDevices</code>. This can be used: </td></tr>
-<tr id="row_1_182_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1VirtualDeviceNode.html" target="_self">VirtualDeviceNode</a></td><td class="desc">Describes at compile time the constraints on where data is to be stored at runtime down to the (virtual) device and memory scope level, and how to compile code to compute that data. Used by t [...]
-<tr id="row_1_183_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1With.html" target="_self">With</a></td><td class="desc">RAII wrapper function to enter and exit a context object similar to python's with syntax </td></tr>
-<tr id="row_1_184_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1WorkspaceMemoryPools.html" target="_self">WorkspaceMemoryPools</a></td><td class="desc"></td></tr>
-<tr id="row_1_185_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1WorkspaceMemoryPoolsNode.html" target="_self">WorkspaceMemoryPoolsNode</a></td><td class="desc"></td></tr>
-<tr id="row_1_186_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1WorkspacePoolInfo.html" target="_self">WorkspacePoolInfo</a></td><td class="desc"></td></tr>
-<tr id="row_1_187_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1WorkspacePoolInfoNode.html" target="_self">WorkspacePoolInfoNode</a></td><td class="desc"></td></tr>
+<tr id="row_1_50_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1Diagnostic.html" target="_self">Diagnostic</a></td><td class="desc"></td></tr>
+<tr id="row_1_51_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1DiagnosticBuilder.html" target="_self">DiagnosticBuilder</a></td><td class="desc">A wrapper around std::stringstream to build a diagnostic </td></tr>
+<tr id="row_1_52_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1DiagnosticContext.html" target="_self">DiagnosticContext</a></td><td class="desc"></td></tr>
+<tr id="row_1_53_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1DiagnosticContextNode.html" target="_self">DiagnosticContextNode</a></td><td class="desc"></td></tr>
+<tr id="row_1_54_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1DiagnosticNode.html" target="_self">DiagnosticNode</a></td><td class="desc">A compiler diagnostic message </td></tr>
+<tr id="row_1_55_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1DiagnosticRenderer.html" target="_self">DiagnosticRenderer</a></td><td class="desc"></td></tr>
+<tr id="row_1_56_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1DiagnosticRendererNode.html" target="_self">DiagnosticRendererNode</a></td><td class="desc">Display diagnostics in a given display format </td></tr>
+<tr id="row_1_57_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1DictAttrs.html" target="_self">DictAttrs</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1DictAttrsNode.html" title="Specialized attribute type that is backed by a map. The DictAttrsNode implements the Attrs behavior...">DictAttrsNode</a> </td></tr>
+<tr id="row_1_58_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1DictAttrsNode.html" target="_self">DictAttrsNode</a></td><td class="desc">Specialized attribute type that is backed by a map. The <a class="el" href="classtvm_1_1DictAttrsNode.html" title="Specialized attribute type that is backed by a map. The DictAttrsNode implements the  [...]
+<tr id="row_1_59_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1EnvFunc.html" target="_self">EnvFunc</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1EnvFuncNode.html" title="A serializable function backed by TVM&#39;s global environment. ">EnvFuncNode</a> </td></tr>
+<tr id="row_1_60_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1EnvFuncNode.html" target="_self">EnvFuncNode</a></td><td class="desc">A serializable function backed by TVM's global environment </td></tr>
+<tr id="row_1_61_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1ErrorBuilder.html" target="_self">ErrorBuilder</a></td><td class="desc">A wrapper around std::stringstream to build error </td></tr>
+<tr id="row_1_62_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1ErrorReporter.html" target="_self">ErrorReporter</a></td><td class="desc">An abstraction around how errors are stored and reported. Designed to be opaque to users, so we can support a robust and simpler error reporting mode, as well as a more complex mode </td></tr>
+<tr id="row_1_63_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1FloatImm.html" target="_self">FloatImm</a></td><td class="desc">Managed reference class to <a class="el" href="classtvm_1_1FloatImmNode.html" title="Constant floating point literals in the program. ">FloatImmNode</a> </td></tr>
+<tr id="row_1_64_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1FloatImmNode.html" target="_self">FloatImmNode</a></td><td class="desc">Constant floating point literals in the program </td></tr>
+<tr id="row_1_65_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1FuncType.html" target="_self">FuncType</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1FuncTypeNode.html" title="Function type. ">FuncTypeNode</a> </td></tr>
+<tr id="row_1_66_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1FuncTypeNode.html" target="_self">FuncTypeNode</a></td><td class="desc">Function type </td></tr>
+<tr id="row_1_67_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1GenericFunc.html" target="_self">GenericFunc</a></td><td class="desc">Generic function that can be specialized on a per-target basis </td></tr>
+<tr id="row_1_68_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1GenericFuncNode.html" target="_self">GenericFuncNode</a></td><td class="desc">Represents a generic function that can be specialized on a per-target basis </td></tr>
+<tr id="row_1_69_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1GlobalTypeVar.html" target="_self">GlobalTypeVar</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1GlobalTypeVarNode.html" title="A global type variable that is used for defining new types or type aliases. ">GlobalTypeVarNode</a> </td></tr>
+<tr id="row_1_70_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1GlobalTypeVarNode.html" target="_self">GlobalTypeVarNode</a></td><td class="desc">A global type variable that is used for defining new types or type aliases </td></tr>
+<tr id="row_1_71_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1GlobalVar.html" target="_self">GlobalVar</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1GlobalVarNode.html" title="Global variable that lives in the top-level module. ">GlobalVarNode</a> </td></tr>
+<tr id="row_1_72_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1GlobalVarNode.html" target="_self">GlobalVarNode</a></td><td class="desc">Global variable that lives in the top-level module </td></tr>
+<tr id="row_1_73_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1GlobalVarSupply.html" target="_self">GlobalVarSupply</a></td><td class="desc">Managed reference class to <a class="el" href="classtvm_1_1GlobalVarSupplyNode.html" title="GlobalVarSupply can be used to generate unique GlobalVars. ">GlobalVarSupplyNode</a> </td></tr>
+<tr id="row_1_74_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1GlobalVarSupplyNode.html" target="_self">GlobalVarSupplyNode</a></td><td class="desc"><a class="el" href="classtvm_1_1GlobalVarSupply.html" title="Managed reference class to GlobalVarSupplyNode. ">GlobalVarSupply</a> can be used to generate unique GlobalVars </td></tr>
+<tr id="row_1_75_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1IncompleteType.html" target="_self">IncompleteType</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1IncompleteTypeNode.html" title="Intermediate values that is used to indicate incomplete type during type inference. ">IncompleteTypeNode</a> </td></tr>
+<tr id="row_1_76_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1IncompleteTypeNode.html" target="_self">IncompleteTypeNode</a></td><td class="desc">Intermediate values that is used to indicate incomplete type during type inference </td></tr>
+<tr id="row_1_77_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1Integer.html" target="_self">Integer</a></td><td class="desc">Container of constant int that adds more constructors </td></tr>
+<tr id="row_1_78_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1IntImm.html" target="_self">IntImm</a></td><td class="desc">Managed reference class to <a class="el" href="classtvm_1_1IntImmNode.html" title="Constant integer literals in the program. ">IntImmNode</a> </td></tr>
+<tr id="row_1_79_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1IntImmNode.html" target="_self">IntImmNode</a></td><td class="desc">Constant integer literals in the program </td></tr>
+<tr id="row_1_80_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1IRModule.html" target="_self">IRModule</a></td><td class="desc">Managed reference class to <a class="el" href="classtvm_1_1IRModuleNode.html" title="IRModule that holds functions and type definitions. ">IRModuleNode</a> </td></tr>
+<tr id="row_1_81_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1IRModuleNode.html" target="_self">IRModuleNode</a></td><td class="desc"><a class="el" href="classtvm_1_1IRModule.html" title="Managed reference class to IRModuleNode. ">IRModule</a> that holds functions and type definitions </td></tr>
+<tr id="row_1_82_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1MapValuePath.html" target="_self">MapValuePath</a></td><td class="desc"></td></tr>
+<tr id="row_1_83_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1MapValuePathNode.html" target="_self">MapValuePathNode</a></td><td class="desc"></td></tr>
+<tr id="row_1_84_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1MemoryInfo.html" target="_self">MemoryInfo</a></td><td class="desc">Defines memory info </td></tr>
+<tr id="row_1_85_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1MemoryInfoNode.html" target="_self">MemoryInfoNode</a></td><td class="desc">Memory information of special memory region. Use <a class="el" href="classtvm_1_1MemoryInfo.html" title="Defines memory info. ">MemoryInfo</a> as its container type </td></tr>
+<tr id="row_1_86_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1MissingArrayElementPath.html" target="_self">MissingArrayElementPath</a></td><td class="desc"></td></tr>
+<tr id="row_1_87_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1MissingArrayElementPathNode.html" target="_self">MissingArrayElementPathNode</a></td><td class="desc"></td></tr>
+<tr id="row_1_88_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1MissingMapEntryPath.html" target="_self">MissingMapEntryPath</a></td><td class="desc"></td></tr>
+<tr id="row_1_89_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1MissingMapEntryPathNode.html" target="_self">MissingMapEntryPathNode</a></td><td class="desc"></td></tr>
+<tr id="row_1_90_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1NameSupply.html" target="_self">NameSupply</a></td><td class="desc">Managed reference class to <a class="el" href="classtvm_1_1NameSupplyNode.html" title="NameSupply can be used to generate unique names. ">NameSupplyNode</a> </td></tr>
+<tr id="row_1_91_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1NameSupplyNode.html" target="_self">NameSupplyNode</a></td><td class="desc"><a class="el" href="classtvm_1_1NameSupply.html" title="Managed reference class to NameSupplyNode. ">NameSupply</a> can be used to generate unique names </td></tr>
+<tr id="row_1_92_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1NDArrayContainerTrait.html" target="_self">NDArrayContainerTrait</a></td><td class="desc"></td></tr>
+<tr id="row_1_93_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1NodeFunctor.html" target="_self">NodeFunctor</a></td><td class="desc">A dynamically dispatched functor on the type of the first argument </td></tr>
+<tr id="row_1_94_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1NodeFunctor_3_01R_07const_01ObjectRef_01_6n_00_01Args_8_8_8_08_4.html" target="_self">NodeFunctor&lt; R(const ObjectRef &amp;n, Args...)&gt;</a></td><td class="desc"></td></tr>
+<tr id="row_1_95_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1ObjectPath.html" target="_self">ObjectPath</a></td><td class="desc"></td></tr>
+<tr id="row_1_96_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1ObjectPathNode.html" target="_self">ObjectPathNode</a></td><td class="desc">Path to an object from some root object </td></tr>
+<tr id="row_1_97_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1ObjectPathPair.html" target="_self">ObjectPathPair</a></td><td class="desc"></td></tr>
+<tr id="row_1_98_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1ObjectPathPairNode.html" target="_self">ObjectPathPairNode</a></td><td class="desc">Pair of <code><a class="el" href="classtvm_1_1ObjectPath.html">ObjectPath</a></code>s, one for each object being tested for structural equality </td></tr>
+<tr id="row_1_99_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1Op.html" target="_self">Op</a></td><td class="desc">Managed reference class to <a class="el" href="classtvm_1_1OpNode.html" title="Primitive Op(builtin intrinsics) ">OpNode</a> </td></tr>
+<tr id="row_1_100_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1OpAttrMap.html" target="_self">OpAttrMap</a></td><td class="desc">Map&lt;Op,ValueType&gt; used to store meta-information about <a class="el" href="classtvm_1_1Op.html" title="Managed reference class to OpNode. ">Op</a> </td></tr>
+<tr id="row_1_101_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1OpNode.html" target="_self">OpNode</a></td><td class="desc">Primitive Op(builtin intrinsics) </td></tr>
+<tr id="row_1_102_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1OpRegEntry.html" target="_self">OpRegEntry</a></td><td class="desc">Helper structure to register operators </td></tr>
+<tr id="row_1_103_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1PointerType.html" target="_self">PointerType</a></td><td class="desc"></td></tr>
+<tr id="row_1_104_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1PointerTypeNode.html" target="_self">PointerTypeNode</a></td><td class="desc">Low-level raw pointer type </td></tr>
+<tr id="row_1_105_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1PoolInfo.html" target="_self">PoolInfo</a></td><td class="desc">Base class for <a class="el" href="classtvm_1_1WorkspacePoolInfo.html">WorkspacePoolInfo</a> and <a class="el" href="classtvm_1_1ConstantPoolInfo.html">ConstantPoolInfo</a> </td></tr>
+<tr id="row_1_106_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1PoolInfoNode.html" target="_self">PoolInfoNode</a></td><td class="desc">Describes a pool of memory accessible by one or more targets </td></tr>
+<tr id="row_1_107_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1PoolInfoProperties.html" target="_self">PoolInfoProperties</a></td><td class="desc"></td></tr>
+<tr id="row_1_108_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1PoolInfoPropertiesNode.html" target="_self">PoolInfoPropertiesNode</a></td><td class="desc">Describes a pool of memory properties </td></tr>
+<tr id="row_1_109_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1PrimExpr.html" target="_self">PrimExpr</a></td><td class="desc">Reference to <a class="el" href="classtvm_1_1PrimExprNode.html" title="Base node of all primitive expressions. ">PrimExprNode</a> </td></tr>
+<tr id="row_1_110_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1PrimExprNode.html" target="_self">PrimExprNode</a></td><td class="desc">Base node of all primitive expressions </td></tr>
+<tr id="row_1_111_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1PrimType.html" target="_self">PrimType</a></td><td class="desc"></td></tr>
+<tr id="row_1_112_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1PrimTypeNode.html" target="_self">PrimTypeNode</a></td><td class="desc">Primitive data types used in the low-level IR </td></tr>
+<tr id="row_1_113_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1Range.html" target="_self">Range</a></td><td class="desc"><a class="el" href="classtvm_1_1Range.html" title="Range constainer. ">Range</a> constainer </td></tr>
+<tr id="row_1_114_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1RangeNode.html" target="_self">RangeNode</a></td><td class="desc"><a class="el" href="classtvm_1_1Range.html" title="Range constainer. ">Range</a> over one dimension </td></tr>
+<tr id="row_1_115_" class="even" style="display:none;"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span id="arr_1_115_" class="arrow" onclick="toggleFolder('1_115_')">&#9658;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1ReflectionVTable.html" target="_self">ReflectionVTable</a></td><td class="desc">Virtual function table to support IR/AST node reflection </td></tr>
+<tr id="row_1_115_0_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1ReflectionVTable_1_1Registry.html" target="_self">Registry</a></td><td class="desc"><a class="el" href="classtvm_1_1ReflectionVTable_1_1Registry.html" title="Registry of a reflection table. ">Registry</a> of a reflection table </td></tr>
+<tr id="row_1_116_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1RelayExpr.html" target="_self">RelayExpr</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1RelayExprNode.html" title="Base node of all non-primitive expressions. ">RelayExprNode</a> </td></tr>
+<tr id="row_1_117_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1RelayExprNode.html" target="_self">RelayExprNode</a></td><td class="desc">Base node of all non-primitive expressions </td></tr>
+<tr id="row_1_118_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1RelayRefType.html" target="_self">RelayRefType</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1RelayRefTypeNode.html" title="Reference Type High-level Relay IR. ">RelayRefTypeNode</a> </td></tr>
+<tr id="row_1_119_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1RelayRefTypeNode.html" target="_self">RelayRefTypeNode</a></td><td class="desc">Reference <a class="el" href="classtvm_1_1Type.html" title="Managed reference to TypeNode. ">Type</a> High-level Relay IR </td></tr>
+<tr id="row_1_120_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1ReprPrinter.html" target="_self">ReprPrinter</a></td><td class="desc">A printer class to print the AST/IR nodes </td></tr>
+<tr id="row_1_121_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1RootPath.html" target="_self">RootPath</a></td><td class="desc"></td></tr>
+<tr id="row_1_122_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1RootPathNode.html" target="_self">RootPathNode</a></td><td class="desc"></td></tr>
+<tr id="row_1_123_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1SEqualHandlerDefault.html" target="_self">SEqualHandlerDefault</a></td><td class="desc">The default handler for equality testing </td></tr>
+<tr id="row_1_124_" class="even" style="display:none;"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span id="arr_1_124_" class="arrow" onclick="toggleFolder('1_124_')">&#9658;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1SEqualReducer.html" target="_self">SEqualReducer</a></td><td class="desc">A Reducer class to reduce the structural equality result of two objects </td></tr>
+<tr id="row_1_124_0_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1SEqualReducer_1_1Handler.html" target="_self">Handler</a></td><td class="desc">Internal handler that defines custom behaviors. </td></tr>
+<tr id="row_1_125_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1SHashHandlerDefault.html" target="_self">SHashHandlerDefault</a></td><td class="desc">The default handler for hash key computation </td></tr>
+<tr id="row_1_126_" class="even" style="display:none;"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span id="arr_1_126_" class="arrow" onclick="toggleFolder('1_126_')">&#9658;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1SHashReducer.html" target="_self">SHashReducer</a></td><td class="desc">A Reducer class to reduce the structural hash value </td></tr>
+<tr id="row_1_126_0_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1SHashReducer_1_1Handler.html" target="_self">Handler</a></td><td class="desc">Internal handler that defines custom behaviors </td></tr>
+<tr id="row_1_127_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1SourceName.html" target="_self">SourceName</a></td><td class="desc">The source name of a file span </td></tr>
+<tr id="row_1_128_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1SourceNameNode.html" target="_self">SourceNameNode</a></td><td class="desc">The name of a source fragment </td></tr>
+<tr id="row_1_129_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1Span.html" target="_self">Span</a></td><td class="desc"></td></tr>
+<tr id="row_1_130_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1SpanNode.html" target="_self">SpanNode</a></td><td class="desc">Stores locations in frontend source that generated a node </td></tr>
+<tr id="row_1_131_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1StructuralEqual.html" target="_self">StructuralEqual</a></td><td class="desc">Content-aware structural equality comparator for objects </td></tr>
+<tr id="row_1_132_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1StructuralHash.html" target="_self">StructuralHash</a></td><td class="desc">Content-aware structural hasing </td></tr>
+<tr id="row_1_133_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1Target.html" target="_self">Target</a></td><td class="desc">Managed reference class to <a class="el" href="classtvm_1_1TargetNode.html" title="Compilation target. ">TargetNode</a> </td></tr>
+<tr id="row_1_134_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TargetKind.html" target="_self">TargetKind</a></td><td class="desc">Managed reference class to <a class="el" href="classtvm_1_1TargetKindNode.html" title="Target kind, specifies the kind of the target. ">TargetKindNode</a> </td></tr>
+<tr id="row_1_135_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TargetKindAttrMap.html" target="_self">TargetKindAttrMap</a></td><td class="desc">Map&lt;TargetKind, ValueType&gt; used to store meta-information about <a class="el" href="classtvm_1_1TargetKind.html" title="Managed reference class to TargetKindNode. ">TargetKind</a> </td></tr>
+<tr id="row_1_136_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TargetKindNode.html" target="_self">TargetKindNode</a></td><td class="desc"><a class="el" href="classtvm_1_1Target.html" title="Managed reference class to TargetNode. ">Target</a> kind, specifies the kind of the target </td></tr>
+<tr id="row_1_137_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TargetKindRegEntry.html" target="_self">TargetKindRegEntry</a></td><td class="desc">Helper structure to register <a class="el" href="classtvm_1_1TargetKind.html" title="Managed reference class to TargetKindNode. ">TargetKind</a> </td></tr>
+<tr id="row_1_138_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TargetNode.html" target="_self">TargetNode</a></td><td class="desc">Compilation target </td></tr>
+<tr id="row_1_139_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TargetTag.html" target="_self">TargetTag</a></td><td class="desc">Managed reference class to <a class="el" href="classtvm_1_1TargetTagNode.html" title="A target tag. ">TargetTagNode</a> </td></tr>
+<tr id="row_1_140_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TargetTagNode.html" target="_self">TargetTagNode</a></td><td class="desc">A target tag </td></tr>
+<tr id="row_1_141_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TargetTagRegEntry.html" target="_self">TargetTagRegEntry</a></td><td class="desc"></td></tr>
+<tr id="row_1_142_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TensorAffineType.html" target="_self">TensorAffineType</a></td><td class="desc">Managed reference to AffineTypes </td></tr>
+<tr id="row_1_143_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TensorAffineTypeNode.html" target="_self">TensorAffineTypeNode</a></td><td class="desc"><a class="el" href="classtvm_1_1TensorAffineType.html" title="Managed reference to AffineTypes. ">TensorAffineType</a> representation </td></tr>
+<tr id="row_1_144_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TensorType.html" target="_self">TensorType</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1TensorTypeNode.html" title="This is the most commonly used type in relay. TensorType have a fixed dimension, data type...">TensorTypeNode</a> </td></tr>
+<tr id="row_1_145_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TensorTypeNode.html" target="_self">TensorTypeNode</a></td><td class="desc">This is the most commonly used type in relay. <a class="el" href="classtvm_1_1TensorType.html" title="Managed reference to TensorTypeNode. ">TensorType</a> have a fixed dimension, data type </td></tr>
+<tr id="row_1_146_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TupleAffineType.html" target="_self">TupleAffineType</a></td><td class="desc">Managed reference to TupleAffineTypes </td></tr>
+<tr id="row_1_147_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TupleAffineTypeNode.html" target="_self">TupleAffineTypeNode</a></td><td class="desc"><a class="el" href="classtvm_1_1TupleAffineType.html" title="Managed reference to TupleAffineTypes. ">TupleAffineType</a> representation </td></tr>
+<tr id="row_1_148_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TupleType.html" target="_self">TupleType</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1TupleTypeNode.html" title="The type of tuple values. ">TupleTypeNode</a> </td></tr>
+<tr id="row_1_149_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TupleTypeNode.html" target="_self">TupleTypeNode</a></td><td class="desc">The type of tuple values </td></tr>
+<tr id="row_1_150_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1Type.html" target="_self">Type</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1TypeNode.html" title="Type is the base type of all types. ">TypeNode</a> </td></tr>
+<tr id="row_1_151_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TypeCall.html" target="_self">TypeCall</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1TypeCallNode.html" title="Type function application. ">TypeCallNode</a> </td></tr>
+<tr id="row_1_152_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TypeCallNode.html" target="_self">TypeCallNode</a></td><td class="desc"><a class="el" href="classtvm_1_1Type.html" title="Managed reference to TypeNode. ">Type</a> function application </td></tr>
+<tr id="row_1_153_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TypeConstraint.html" target="_self">TypeConstraint</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1TypeConstraintNode.html" title="Potential Constraints in a function. ">TypeConstraintNode</a> </td></tr>
+<tr id="row_1_154_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TypeConstraintNode.html" target="_self">TypeConstraintNode</a></td><td class="desc">Potential Constraints in a function </td></tr>
+<tr id="row_1_155_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TypeData.html" target="_self">TypeData</a></td><td class="desc">Stores all data for an Algebraic Data <a class="el" href="classtvm_1_1Type.html" title="Managed reference to TypeNode. ">Type</a> (ADT) </td></tr>
+<tr id="row_1_156_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TypeDataNode.html" target="_self">TypeDataNode</a></td><td class="desc"><a class="el" href="classtvm_1_1TypeData.html" title="Stores all data for an Algebraic Data Type (ADT). ">TypeData</a> container node </td></tr>
+<tr id="row_1_157_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TypedEnvFunc.html" target="_self">TypedEnvFunc</a></td><td class="desc">Please refer to <a class="el" href="classtvm_1_1TypedEnvFunc_3_01R_07Args_8_8_8_08_4.html#TypedEnvFuncAnchor">TypedEnvFunc&lt;R(Args..)&gt;</a> </td></tr>
+<tr id="row_1_158_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TypedEnvFunc_3_01R_07Args_8_8_8_08_4.html" target="_self">TypedEnvFunc&lt; R(Args...)&gt;</a></td><td class="desc">A typed version of <a class="el" href="classtvm_1_1EnvFunc.html" title="Managed reference to EnvFuncNode. ">EnvFunc</a>. It is backed by a GlobalFuncNode inte [...]
+<tr id="row_1_159_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TypeFunctor.html" target="_self">TypeFunctor</a></td><td class="desc"></td></tr>
+<tr id="row_1_160_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TypeFunctor_3_01R_07const_01Type_01_6n_00_01Args_8_8_8_08_4.html" target="_self">TypeFunctor&lt; R(const Type &amp;n, Args...)&gt;</a></td><td class="desc"></td></tr>
+<tr id="row_1_161_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TypeMutator.html" target="_self">TypeMutator</a></td><td class="desc"><a class="el" href="classtvm_1_1TypeMutator.html" title="TypeMutator that mutates expressions. ">TypeMutator</a> that mutates expressions </td></tr>
+<tr id="row_1_162_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TypeNode.html" target="_self">TypeNode</a></td><td class="desc"><a class="el" href="classtvm_1_1Type.html" title="Managed reference to TypeNode. ">Type</a> is the base type of all types </td></tr>
+<tr id="row_1_163_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TypeRelation.html" target="_self">TypeRelation</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1TypeRelationNode.html" title="User defined type relation, it is an input-output relation on types. ">TypeRelationNode</a> </td></tr>
+<tr id="row_1_164_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TypeRelationNode.html" target="_self">TypeRelationNode</a></td><td class="desc">User defined type relation, it is an input-output relation on types </td></tr>
+<tr id="row_1_165_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TypeReporter.html" target="_self">TypeReporter</a></td><td class="desc">Container class of <a class="el" href="classtvm_1_1TypeReporter.html" title="Container class of TypeReporter. ">TypeReporter</a> </td></tr>
+<tr id="row_1_166_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TypeReporterNode.html" target="_self">TypeReporterNode</a></td><td class="desc">Reporter that reports back to the type resolution information </td></tr>
+<tr id="row_1_167_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TypeVar.html" target="_self">TypeVar</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1TypeVarNode.html" title="Type parameter in functions. ">TypeVarNode</a> </td></tr>
+<tr id="row_1_168_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TypeVarNode.html" target="_self">TypeVarNode</a></td><td class="desc"><a class="el" href="classtvm_1_1Type.html" title="Managed reference to TypeNode. ">Type</a> parameter in functions </td></tr>
+<tr id="row_1_169_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TypeVisitor.html" target="_self">TypeVisitor</a></td><td class="desc">A type visitor that recursively visit types </td></tr>
+<tr id="row_1_170_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1UnknownAttributeAccessPath.html" target="_self">UnknownAttributeAccessPath</a></td><td class="desc"></td></tr>
+<tr id="row_1_171_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1UnknownAttributeAccessPathNode.html" target="_self">UnknownAttributeAccessPathNode</a></td><td class="desc"></td></tr>
+<tr id="row_1_172_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1VirtualDevice.html" target="_self">VirtualDevice</a></td><td class="desc">Managed reference class to <code><a class="el" href="classtvm_1_1VirtualDeviceNode.html" title="Describes at compile time the constraints on where data is to be stored at runtime down to the (virtu.. [...]
+<tr id="row_1_173_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1VirtualDeviceCache.html" target="_self">VirtualDeviceCache</a></td><td class="desc">A cache of <code>VirtualDevices</code>. This can be used: </td></tr>
+<tr id="row_1_174_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1VirtualDeviceNode.html" target="_self">VirtualDeviceNode</a></td><td class="desc">Describes at compile time the constraints on where data is to be stored at runtime down to the (virtual) device and memory scope level, and how to compile code to compute that data. Used by t [...]
+<tr id="row_1_175_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1With.html" target="_self">With</a></td><td class="desc">RAII wrapper function to enter and exit a context object similar to python's with syntax </td></tr>
+<tr id="row_1_176_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1WorkspaceMemoryPools.html" target="_self">WorkspaceMemoryPools</a></td><td class="desc"></td></tr>
+<tr id="row_1_177_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1WorkspaceMemoryPoolsNode.html" target="_self">WorkspaceMemoryPoolsNode</a></td><td class="desc"></td></tr>
+<tr id="row_1_178_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1WorkspacePoolInfo.html" target="_self">WorkspacePoolInfo</a></td><td class="desc"></td></tr>
+<tr id="row_1_179_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1WorkspacePoolInfoNode.html" target="_self">WorkspacePoolInfoNode</a></td><td class="desc"></td></tr>
 <tr id="row_2_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classGlobalVar.html" target="_self">GlobalVar</a></td><td class="desc"></td></tr>
 <tr id="row_3_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structMemoryManagerInterface.html" target="_self">MemoryManagerInterface</a></td><td class="desc"></td></tr>
 <tr id="row_4_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm__workspace__t.html" target="_self">tvm_workspace_t</a></td><td class="desc"></td></tr>
diff --git a/docs/reference/api/doxygen/array_8h.html b/docs/reference/api/doxygen/array_8h.html
index 5138c6c3d0..fcb91a94ba 100644
--- a/docs/reference/api/doxygen/array_8h.html
+++ b/docs/reference/api/doxygen/array_8h.html
@@ -90,7 +90,7 @@ Include dependency graph for array.h:</div>
 </div><div class="textblock"><div class="dynheader">
 This graph shows which files directly or indirectly include this file:</div>
 <div class="dyncontent">
-<div class="center"><iframe scrolling="no" frameborder="0" src="array_8h__dep__incl.svg" width="4026" height="1110"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
+<div class="center"><iframe scrolling="no" frameborder="0" src="array_8h__dep__incl.svg" width="4808" height="1110"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
 </div>
 </div>
 </div>
diff --git a/docs/reference/api/doxygen/array_8h__dep__incl.svg b/docs/reference/api/doxygen/array_8h__dep__incl.svg
index 320a2bdf13..d7020a7f48 100644
--- a/docs/reference/api/doxygen/array_8h__dep__incl.svg
+++ b/docs/reference/api/doxygen/array_8h__dep__incl.svg
@@ -4,1353 +4,1335 @@
 <!-- Generated by graphviz version 2.40.1 (20161225.0304)
  -->
 <!-- Title: include/tvm/runtime/container/array.h Pages: 1 -->
-<svg width="3019pt" height="832pt"
- viewBox="0.00 0.00 3019.00 832.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+<svg width="3606pt" height="832pt"
+ viewBox="0.00 0.00 3606.00 832.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
 <g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 828)">
 <title>include/tvm/runtime/container/array.h</title>
-<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-828 3015,-828 3015,4 -4,4"/>
+<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-828 3602,-828 3602,4 -4,4"/>
 <!-- Node20 -->
 <g id="node1" class="node">
 <title>Node20</title>
-<polygon fill="#bfbfbf" stroke="#000000" points="1250,-793.5 1250,-823.5 1366,-823.5 1366,-793.5 1250,-793.5"/>
-<text text-anchor="start" x="1258" y="-811.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/runtime</text>
-<text text-anchor="middle" x="1308" y="-800.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/container/array.h</text>
+<polygon fill="#bfbfbf" stroke="#000000" points="2205,-793.5 2205,-823.5 2321,-823.5 2321,-793.5 2205,-793.5"/>
+<text text-anchor="start" x="2213" y="-811.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/runtime</text>
+<text text-anchor="middle" x="2263" y="-800.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/container/array.h</text>
 </g>
 <!-- Node21 -->
 <g id="node2" class="node">
 <title>Node21</title>
 <g id="a_node2"><a xlink:href="ir_2adt_8h.html" target="_top" xlink:title="Algebraic data type definitions. ">
-<polygon fill="#ffffff" stroke="#000000" points="1920,-670.5 1920,-689.5 2032,-689.5 2032,-670.5 1920,-670.5"/>
-<text text-anchor="middle" x="1976" y="-677.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/ir/adt.h</text>
+<polygon fill="#ffffff" stroke="#000000" points="861,-670.5 861,-689.5 973,-689.5 973,-670.5 861,-670.5"/>
+<text text-anchor="middle" x="917" y="-677.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/ir/adt.h</text>
 </a>
 </g>
 </g>
 <!-- Node20&#45;&gt;Node21 -->
 <g id="edge1" class="edge">
 <title>Node20&#45;&gt;Node21</title>
-<path fill="none" stroke="#191970" d="M1376.1529,-806.627C1565.221,-801.102 2084.0791,-783.5746 2109,-757 2139.5703,-724.4011 2073.1986,-701.4969 2023.6553,-689.5567"/>
-<polygon fill="#191970" stroke="#191970" points="1375.9514,-803.1313 1366.0571,-806.9198 1376.1543,-810.1284 1375.9514,-803.1313"/>
+<path fill="none" stroke="#191970" d="M2194.5072,-807.3851C1955.5434,-803.2458 1167.3288,-787.4081 1058,-757 1006.6755,-742.7249 953.4483,-706.8884 929.7277,-689.6105"/>
+<polygon fill="#191970" stroke="#191970" points="2194.7377,-810.8895 2204.7964,-807.562 2194.8581,-803.8905 2194.7377,-810.8895"/>
 </g>
 <!-- Node22 -->
 <g id="node3" class="node">
 <title>Node22</title>
 <g id="a_node3"><a xlink:href="ir_2module_8h.html" target="_top" xlink:title="IRModule that holds the functions and type definitions. ">
-<polygon fill="#ffffff" stroke="#000000" points="1722.5,-609 1722.5,-628 1855.5,-628 1855.5,-609 1722.5,-609"/>
-<text text-anchor="middle" x="1789" y="-616" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/ir/module.h</text>
+<polygon fill="#ffffff" stroke="#000000" points="1354.5,-609 1354.5,-628 1487.5,-628 1487.5,-609 1354.5,-609"/>
+<text text-anchor="middle" x="1421" y="-616" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/ir/module.h</text>
 </a>
 </g>
 </g>
 <!-- Node20&#45;&gt;Node22 -->
-<g id="edge98" class="edge">
+<g id="edge95" class="edge">
 <title>Node20&#45;&gt;Node22</title>
-<path fill="none" stroke="#191970" d="M1371.823,-790.5442C1402.9277,-781.3269 1440.7147,-769.435 1474,-757 1588.1633,-714.3498 1720.4069,-651.7245 1769.4205,-628.0427"/>
-<polygon fill="#191970" stroke="#191970" points="1370.5798,-787.2615 1361.9731,-793.4401 1372.5543,-793.9772 1370.5798,-787.2615"/>
+<path fill="none" stroke="#191970" d="M2194.7056,-807.0914C2054.1257,-803.511 1737.0399,-791.7361 1635,-757 1547.8604,-727.3362 1461.0492,-654.4444 1431.5585,-628.1529"/>
+<polygon fill="#191970" stroke="#191970" points="2194.8381,-810.5957 2204.922,-807.345 2195.0119,-803.5978 2194.8381,-810.5957"/>
 </g>
 <!-- Node26 -->
 <g id="node6" class="node">
 <title>Node26</title>
 <g id="a_node6"><a xlink:href="ir_2transform_8h.html" target="_top" xlink:title="include/tvm/ir/transform.h">
-<polygon fill="#ffffff" stroke="#ff0000" points="1844,-475 1844,-494 1988,-494 1988,-475 1844,-475"/>
-<text text-anchor="middle" x="1916" y="-482" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/ir/transform.h</text>
+<polygon fill="#ffffff" stroke="#ff0000" points="1261,-475 1261,-494 1405,-494 1405,-475 1261,-475"/>
+<text text-anchor="middle" x="1333" y="-482" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/ir/transform.h</text>
 </a>
 </g>
 </g>
 <!-- Node20&#45;&gt;Node26 -->
-<g id="edge99" class="edge">
+<g id="edge96" class="edge">
 <title>Node20&#45;&gt;Node26</title>
-<path fill="none" stroke="#191970" d="M1375.609,-793.6809C1397.3936,-785.9455 1420.011,-774.3288 1436,-757 1460.3081,-730.6551 1460,-715.846 1460,-680 1460,-680 1460,-680 1460,-618.5 1460,-576.6757 1746.0267,-517.0487 1865.0183,-494.0526"/>
-<polygon fill="#191970" stroke="#191970" points="1374.4042,-790.3929 1366.0167,-796.8659 1376.6101,-797.0363 1374.4042,-790.3929"/>
+<path fill="none" stroke="#191970" d="M2194.7685,-806.2991C2074.1203,-801.6443 1828.3707,-788.5196 1749,-757 1696.867,-736.297 1649,-736.0934 1649,-680 1649,-680 1649,-680 1649,-618.5 1649,-560.6234 1467.0596,-513.4271 1379.2875,-494.026"/>
+<polygon fill="#191970" stroke="#191970" points="2194.8667,-809.8052 2204.9916,-806.6848 2195.1307,-802.8102 2194.8667,-809.8052"/>
 </g>
 <!-- Node45 -->
 <g id="node11" class="node">
 <title>Node45</title>
 <g id="a_node11"><a xlink:href="builder_8h.html" target="_top" xlink:title="include/tvm/meta_schedule\l/builder.h">
-<polygon fill="#ffffff" stroke="#000000" points="1430,-268.5 1430,-298.5 1582,-298.5 1582,-268.5 1430,-268.5"/>
-<text text-anchor="start" x="1438" y="-286.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/meta_schedule</text>
-<text text-anchor="middle" x="1506" y="-275.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/builder.h</text>
+<polygon fill="#ffffff" stroke="#000000" points="1630,-268.5 1630,-298.5 1782,-298.5 1782,-268.5 1630,-268.5"/>
+<text text-anchor="start" x="1638" y="-286.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/meta_schedule</text>
+<text text-anchor="middle" x="1706" y="-275.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/builder.h</text>
 </a>
 </g>
 </g>
 <!-- Node20&#45;&gt;Node45 -->
-<g id="edge104" class="edge">
+<g id="edge101" class="edge">
 <title>Node20&#45;&gt;Node45</title>
-<path fill="none" stroke="#191970" d="M1331.705,-785.7055C1354.0073,-761.7063 1384,-721.6828 1384,-680 1384,-680 1384,-680 1384,-618.5 1384,-519.4365 1399.9166,-493.8608 1437,-402 1452.8628,-362.7056 1480.0251,-320.765 1495.1045,-298.8554"/>
-<polygon fill="#191970" stroke="#191970" points="1328.8983,-783.5747 1324.4909,-793.2093 1333.9446,-788.4261 1328.8983,-783.5747"/>
+<path fill="none" stroke="#191970" d="M2194.2824,-805.1576C2057.2398,-796.6085 1763,-768.0059 1763,-680 1763,-680 1763,-680 1763,-618.5 1763,-493.4293 1723.9099,-345.767 1710.4727,-298.7027"/>
+<polygon fill="#191970" stroke="#191970" points="2194.472,-808.6753 2204.6651,-805.7856 2194.8947,-801.6881 2194.472,-808.6753"/>
 </g>
 <!-- Node46 -->
 <g id="node12" class="node">
 <title>Node46</title>
 <g id="a_node12"><a xlink:href="measure__callback_8h.html" target="_top" xlink:title="include/tvm/meta_schedule\l/measure_callback.h">
-<polygon fill="#ffffff" stroke="#000000" points="616,-67.5 616,-97.5 768,-97.5 768,-67.5 616,-67.5"/>
-<text text-anchor="start" x="624" y="-85.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/meta_schedule</text>
-<text text-anchor="middle" x="692" y="-74.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/measure_callback.h</text>
+<polygon fill="#ffffff" stroke="#000000" points="2631,-67.5 2631,-97.5 2783,-97.5 2783,-67.5 2631,-67.5"/>
+<text text-anchor="start" x="2639" y="-85.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/meta_schedule</text>
+<text text-anchor="middle" x="2707" y="-74.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/measure_callback.h</text>
 </a>
 </g>
 </g>
 <!-- Node20&#45;&gt;Node46 -->
-<g id="edge109" class="edge">
+<g id="edge106" class="edge">
 <title>Node20&#45;&gt;Node46</title>
-<path fill="none" stroke="#191970" d="M1239.5803,-807.5216C983.8949,-803.6321 96.4776,-787.9506 45,-757 11.0295,-736.5754 0,-719.6379 0,-680 0,-680 0,-680 0,-216.5 0,-154.3204 436.2016,-106.4084 615.9644,-89.3035"/>
-<polygon fill="#191970" stroke="#191970" points="1239.793,-811.0251 1249.8447,-807.6766 1239.8987,-804.0259 1239.793,-811.0251"/>
+<path fill="none" stroke="#191970" d="M2331.1242,-804.9882C2469.8386,-797.4885 2780.7524,-778.8024 2885,-757 2988.4554,-735.3633 3110,-785.6937 3110,-680 3110,-680 3110,-680 3110,-350.5 3110,-195.2663 2900.1089,-124.5104 2783.275,-97.1496"/>
+<polygon fill="#191970" stroke="#191970" points="2330.8374,-801.4985 2321.0397,-805.5301 2331.2131,-808.4884 2330.8374,-801.4985"/>
 </g>
 <!-- Node47 -->
 <g id="node13" class="node">
 <title>Node47</title>
 <g id="a_node13"><a xlink:href="task__scheduler_8h.html" target="_top" xlink:title="include/tvm/meta_schedule\l/task_scheduler.h">
-<polygon fill="#ffffff" stroke="#000000" points="758,-.5 758,-30.5 910,-30.5 910,-.5 758,-.5"/>
-<text text-anchor="start" x="766" y="-18.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/meta_schedule</text>
-<text text-anchor="middle" x="834" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/task_scheduler.h</text>
+<polygon fill="#ffffff" stroke="#000000" points="2806,-.5 2806,-30.5 2958,-30.5 2958,-.5 2806,-.5"/>
+<text text-anchor="start" x="2814" y="-18.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/meta_schedule</text>
+<text text-anchor="middle" x="2882" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/task_scheduler.h</text>
 </a>
 </g>
 </g>
 <!-- Node20&#45;&gt;Node47 -->
-<g id="edge117" class="edge">
+<g id="edge114" class="edge">
 <title>Node20&#45;&gt;Node47</title>
-<path fill="none" stroke="#191970" d="M1284.295,-785.7055C1261.9927,-761.7063 1232,-721.6828 1232,-680 1232,-680 1232,-680 1232,-551.5 1232,-422.6163 1233.8747,-380.5068 1171,-268 1101.5426,-143.7143 942.732,-62.3984 870.7147,-30.6021"/>
-<polygon fill="#191970" stroke="#191970" points="1282.0554,-788.4261 1291.5091,-793.2093 1287.1017,-783.5747 1282.0554,-788.4261"/>
+<path fill="none" stroke="#191970" d="M2331.1451,-807.4035C2489.5901,-804.2683 2878.795,-793.0322 3004,-757 3073.7445,-736.9285 3148,-752.5752 3148,-680 3148,-680 3148,-680 3148,-149.5 3148,-63.9211 3036.8274,-32.9888 2958.2249,-21.8138"/>
+<polygon fill="#191970" stroke="#191970" points="2330.9781,-803.9059 2321.0476,-807.5984 2331.1133,-810.9046 2330.9781,-803.9059"/>
 </g>
 <!-- Node48 -->
 <g id="node14" class="node">
 <title>Node48</title>
 <g id="a_node14"><a xlink:href="tune__context_8h.html" target="_top" xlink:title="include/tvm/meta_schedule\l/tune_context.h">
-<polygon fill="#ffffff" stroke="#000000" points="616,-134.5 616,-164.5 768,-164.5 768,-134.5 616,-134.5"/>
-<text text-anchor="start" x="624" y="-152.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/meta_schedule</text>
-<text text-anchor="middle" x="692" y="-141.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/tune_context.h</text>
+<polygon fill="#ffffff" stroke="#000000" points="1986,-134.5 1986,-164.5 2138,-164.5 2138,-134.5 1986,-134.5"/>
+<text text-anchor="start" x="1994" y="-152.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/meta_schedule</text>
+<text text-anchor="middle" x="2062" y="-141.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/tune_context.h</text>
 </a>
 </g>
 </g>
 <!-- Node20&#45;&gt;Node48 -->
-<g id="edge118" class="edge">
+<g id="edge115" class="edge">
 <title>Node20&#45;&gt;Node48</title>
-<path fill="none" stroke="#191970" d="M1239.7237,-807.3796C989.7069,-803.0501 136.0704,-786.1854 86,-757 51.1596,-736.6919 38,-720.3271 38,-680 38,-680 38,-680 38,-283.5 38,-225.1151 443.1799,-175.6223 615.6979,-157.1953"/>
-<polygon fill="#191970" stroke="#191970" points="1239.71,-810.8798 1249.7687,-807.5524 1239.8305,-803.8808 1239.71,-810.8798"/>
+<path fill="none" stroke="#191970" d="M2194.6859,-794.5323C2103.4041,-773.8698 1953,-731.9433 1953,-680 1953,-680 1953,-680 1953,-551.5 1953,-423.7802 1959.4401,-389.1084 2000,-268 2013.1986,-228.59 2038.252,-186.357 2052.1291,-164.5165"/>
+<polygon fill="#191970" stroke="#191970" points="2194.169,-798.003 2204.6914,-796.7658 2195.6941,-791.1712 2194.169,-798.003"/>
 </g>
 <!-- Node49 -->
 <g id="node15" class="node">
 <title>Node49</title>
 <g id="a_node15"><a xlink:href="database_8h.html" target="_top" xlink:title="include/tvm/meta_schedule\l/database.h">
-<polygon fill="#ffffff" stroke="#000000" points="1260,-268.5 1260,-298.5 1412,-298.5 1412,-268.5 1260,-268.5"/>
-<text text-anchor="start" x="1268" y="-286.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/meta_schedule</text>
-<text text-anchor="middle" x="1336" y="-275.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/database.h</text>
+<polygon fill="#ffffff" stroke="#000000" points="2210,-268.5 2210,-298.5 2362,-298.5 2362,-268.5 2210,-268.5"/>
+<text text-anchor="start" x="2218" y="-286.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/meta_schedule</text>
+<text text-anchor="middle" x="2286" y="-275.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/database.h</text>
 </a>
 </g>
 </g>
 <!-- Node20&#45;&gt;Node49 -->
-<g id="edge106" class="edge">
+<g id="edge103" class="edge">
 <title>Node20&#45;&gt;Node49</title>
-<path fill="none" stroke="#191970" d="M1308,-783.3849C1308,-757.4823 1308,-715.9175 1308,-680 1308,-680 1308,-680 1308,-417.5 1308,-373.5323 1322.6225,-323.1326 1330.7334,-298.5584"/>
-<polygon fill="#191970" stroke="#191970" points="1304.5001,-783.4649 1308,-793.4649 1311.5001,-783.465 1304.5001,-783.4649"/>
+<path fill="none" stroke="#191970" d="M2331.2708,-802.1909C2404.6827,-791.2118 2511,-761.2804 2511,-680 2511,-680 2511,-680 2511,-618.5 2511,-548.151 2498.9688,-526.892 2459,-469 2444.8391,-448.4889 2433.7885,-450.4586 2416,-433 2391.5136,-408.9678 2326.2421,-331.6002 2298.6236,-298.6202"/>
+<polygon fill="#191970" stroke="#191970" points="2330.5588,-798.7566 2321.145,-803.6177 2331.5355,-805.6881 2330.5588,-798.7566"/>
 </g>
 <!-- Node50 -->
 <g id="node16" class="node">
 <title>Node50</title>
 <g id="a_node16"><a xlink:href="search__strategy_8h.html" target="_top" xlink:title="include/tvm/meta_schedule\l/search_strategy.h">
-<polygon fill="#ffffff" stroke="#000000" points="848,-201.5 848,-231.5 1000,-231.5 1000,-201.5 848,-201.5"/>
-<text text-anchor="start" x="856" y="-219.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/meta_schedule</text>
-<text text-anchor="middle" x="924" y="-208.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/search_strategy.h</text>
+<polygon fill="#ffffff" stroke="#000000" points="2456,-201.5 2456,-231.5 2608,-231.5 2608,-201.5 2456,-201.5"/>
+<text text-anchor="start" x="2464" y="-219.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/meta_schedule</text>
+<text text-anchor="middle" x="2532" y="-208.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/search_strategy.h</text>
 </a>
 </g>
 </g>
 <!-- Node20&#45;&gt;Node50 -->
-<g id="edge115" class="edge">
+<g id="edge112" class="edge">
 <title>Node20&#45;&gt;Node50</title>
-<path fill="none" stroke="#191970" d="M1239.4646,-807.1555C995.3092,-802.1515 179.8771,-783.5203 131,-757 94.035,-736.9431 76,-722.0558 76,-680 76,-680 76,-680 76,-350.5 76,-309.9428 81.4442,-290.7793 115,-268 174.7121,-227.4646 658.1378,-218.8241 847.9224,-216.9907"/>
-<polygon fill="#191970" stroke="#191970" points="1239.5586,-810.658 1249.6279,-807.3627 1239.7013,-803.6595 1239.5586,-810.658"/>
+<path fill="none" stroke="#191970" d="M2331.3919,-804.1167C2518.3319,-791.2274 3024,-749.9016 3024,-680 3024,-680 3024,-680 3024,-417.5 3024,-316.8844 2948.6222,-302.2074 2854,-268 2773.0738,-238.7439 2674.7298,-226.0737 2608.1814,-220.6075"/>
+<polygon fill="#191970" stroke="#191970" points="2330.8749,-800.6437 2321.1367,-804.8172 2331.352,-807.6274 2330.8749,-800.6437"/>
 </g>
 <!-- Node51 -->
 <g id="node17" class="node">
 <title>Node51</title>
 <g id="a_node17"><a xlink:href="extracted__task_8h.html" target="_top" xlink:title="include/tvm/meta_schedule\l/extracted_task.h">
-<polygon fill="#ffffff" stroke="#000000" points="1600,-268.5 1600,-298.5 1752,-298.5 1752,-268.5 1600,-268.5"/>
-<text text-anchor="start" x="1608" y="-286.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/meta_schedule</text>
-<text text-anchor="middle" x="1676" y="-275.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/extracted_task.h</text>
+<polygon fill="#ffffff" stroke="#000000" points="0,-268.5 0,-298.5 152,-298.5 152,-268.5 0,-268.5"/>
+<text text-anchor="start" x="8" y="-286.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/meta_schedule</text>
+<text text-anchor="middle" x="76" y="-275.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/extracted_task.h</text>
 </a>
 </g>
 </g>
 <!-- Node20&#45;&gt;Node51 -->
-<g id="edge107" class="edge">
+<g id="edge104" class="edge">
 <title>Node20&#45;&gt;Node51</title>
-<path fill="none" stroke="#191970" d="M1344.2416,-787.8593C1377.6208,-765.8519 1422,-727.635 1422,-680 1422,-680 1422,-680 1422,-618.5 1422,-482.6946 1438.5493,-429.5858 1536,-335 1553.7474,-317.7743 1577.7366,-306.2337 1600.8407,-298.5337"/>
-<polygon fill="#191970" stroke="#191970" points="1342.1503,-785.0417 1335.5962,-793.366 1345.9109,-790.9458 1342.1503,-785.0417"/>
+<path fill="none" stroke="#191970" d="M2194.7874,-808.2138C1821.0047,-806.2094 38,-790.8388 38,-680 38,-680 38,-680 38,-417.5 38,-373.0065 57.6309,-323.2478 68.6732,-298.778"/>
+<polygon fill="#191970" stroke="#191970" points="2194.8965,-811.7144 2204.9146,-808.2667 2194.9331,-804.7145 2194.8965,-811.7144"/>
 </g>
 <!-- Node52 -->
 <g id="node18" class="node">
 <title>Node52</title>
 <g id="a_node18"><a xlink:href="profiler_8h.html" target="_top" xlink:title="include/tvm/meta_schedule\l/profiler.h">
-<polygon fill="#ffffff" stroke="#000000" points="768,-268.5 768,-298.5 920,-298.5 920,-268.5 768,-268.5"/>
-<text text-anchor="start" x="776" y="-286.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/meta_schedule</text>
-<text text-anchor="middle" x="844" y="-275.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/profiler.h</text>
+<polygon fill="#ffffff" stroke="#000000" points="1800,-268.5 1800,-298.5 1952,-298.5 1952,-268.5 1800,-268.5"/>
+<text text-anchor="start" x="1808" y="-286.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/meta_schedule</text>
+<text text-anchor="middle" x="1876" y="-275.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/profiler.h</text>
 </a>
 </g>
 </g>
 <!-- Node20&#45;&gt;Node52 -->
-<g id="edge111" class="edge">
+<g id="edge108" class="edge">
 <title>Node20&#45;&gt;Node52</title>
-<path fill="none" stroke="#191970" d="M1239.5352,-804.2065C1108.2514,-794.0874 834,-763.0139 834,-680 834,-680 834,-680 834,-417.5 834,-374.3421 839.2223,-323.5818 842.1191,-298.7453"/>
-<polygon fill="#191970" stroke="#191970" points="1239.6443,-807.7245 1249.8783,-804.9831 1240.1685,-800.7441 1239.6443,-807.7245"/>
+<path fill="none" stroke="#191970" d="M2194.6633,-804.5943C2112.0965,-798.7005 1979.2336,-785.0939 1939,-757 1906.8626,-734.5594 1896,-719.1969 1896,-680 1896,-680 1896,-680 1896,-618.5 1896,-521.7312 1808.3263,-526.1189 1782,-433 1766.4859,-378.125 1823.1377,-324.0702 1855.2791,-298.6119"/>
+<polygon fill="#191970" stroke="#191970" points="2194.5035,-808.0916 2204.7218,-805.2921 2194.988,-801.1084 2194.5035,-808.0916"/>
 </g>
 <!-- Node53 -->
 <g id="node19" class="node">
 <title>Node53</title>
 <g id="a_node19"><a xlink:href="space__generator_8h.html" target="_top" xlink:title="include/tvm/meta_schedule\l/space_generator.h">
-<polygon fill="#ffffff" stroke="#000000" points="124,-268.5 124,-298.5 276,-298.5 276,-268.5 124,-268.5"/>
-<text text-anchor="start" x="132" y="-286.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/meta_schedule</text>
-<text text-anchor="middle" x="200" y="-275.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/space_generator.h</text>
+<polygon fill="#ffffff" stroke="#000000" points="1044,-268.5 1044,-298.5 1196,-298.5 1196,-268.5 1044,-268.5"/>
+<text text-anchor="start" x="1052" y="-286.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/meta_schedule</text>
+<text text-anchor="middle" x="1120" y="-275.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/space_generator.h</text>
 </a>
 </g>
 </g>
 <!-- Node20&#45;&gt;Node53 -->
-<g id="edge116" class="edge">
+<g id="edge113" class="edge">
 <title>Node20&#45;&gt;Node53</title>
-<path fill="none" stroke="#191970" d="M1239.6612,-806.8176C1022.4649,-801.2366 356.7252,-782.2454 263,-757 191.0231,-737.6126 114,-754.5422 114,-680 114,-680 114,-680 114,-417.5 114,-367.7428 156.5171,-321.7091 181.7616,-298.7619"/>
-<polygon fill="#191970" stroke="#191970" points="1239.8808,-810.3243 1249.967,-807.081 1240.0597,-803.3266 1239.8808,-810.3243"/>
+<path fill="none" stroke="#191970" d="M2194.5474,-807.5793C1929.6997,-803.802 980.4156,-788.1979 849,-757 769.0854,-738.0284 681,-762.1357 681,-680 681,-680 681,-680 681,-618.5 681,-552.0957 734.6342,-554.8938 772,-500 820.1225,-429.3035 801.256,-384.4929 871,-335 898.4054,-315.5521 981.2846,-301.0211 1043.9608,-292.4372"/>
+<polygon fill="#191970" stroke="#191970" points="2194.7436,-811.0823 2204.7922,-807.7243 2194.8428,-804.083 2194.7436,-811.0823"/>
 </g>
 <!-- Node147 -->
-<g id="node33" class="node">
+<g id="node27" class="node">
 <title>Node147</title>
-<g id="a_node33"><a xlink:href="meta__schedule_2cost__model_8h.html" target="_top" xlink:title="include/tvm/meta_schedule\l/cost_model.h">
-<polygon fill="#ffffff" stroke="#000000" points="976,-268.5 976,-298.5 1128,-298.5 1128,-268.5 976,-268.5"/>
-<text text-anchor="start" x="984" y="-286.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/meta_schedule</text>
-<text text-anchor="middle" x="1052" y="-275.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/cost_model.h</text>
+<g id="a_node27"><a xlink:href="meta__schedule_2cost__model_8h.html" target="_top" xlink:title="include/tvm/meta_schedule\l/cost_model.h">
+<polygon fill="#ffffff" stroke="#000000" points="2664,-268.5 2664,-298.5 2816,-298.5 2816,-268.5 2664,-268.5"/>
+<text text-anchor="start" x="2672" y="-286.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/meta_schedule</text>
+<text text-anchor="middle" x="2740" y="-275.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/cost_model.h</text>
 </a>
 </g>
 </g>
 <!-- Node20&#45;&gt;Node147 -->
-<g id="edge105" class="edge">
+<g id="edge102" class="edge">
 <title>Node20&#45;&gt;Node147</title>
-<path fill="none" stroke="#191970" d="M1239.801,-802.3981C1191.6915,-795.9972 1126.8451,-783.0351 1075,-757 1028.2578,-733.5274 986,-732.3049 986,-680 986,-680 986,-680 986,-417.5 986,-379.8735 987.5231,-368.3214 1005,-335 1012.3091,-321.0645 1024.315,-308.0017 1034.3626,-298.5189"/>
-<polygon fill="#191970" stroke="#191970" points="1239.4413,-805.8805 1249.8023,-803.6668 1240.3223,-798.9361 1239.4413,-805.8805"/>
+<path fill="none" stroke="#191970" d="M2331.2686,-805.6984C2471.3038,-798.0865 2778,-771.0249 2778,-680 2778,-680 2778,-680 2778,-417.5 2778,-373.0065 2758.3691,-323.2478 2747.3268,-298.778"/>
+<polygon fill="#191970" stroke="#191970" points="2330.8785,-802.214 2321.076,-806.2341 2331.2459,-809.2044 2330.8785,-802.214"/>
 </g>
 <!-- Node148 -->
-<g id="node34" class="node">
+<g id="node28" class="node">
 <title>Node148</title>
-<g id="a_node34"><a xlink:href="measure__candidate_8h.html" target="_top" xlink:title="include/tvm/meta_schedule\l/measure_candidate.h">
-<polygon fill="#ffffff" stroke="#000000" points="1014,-335.5 1014,-365.5 1166,-365.5 1166,-335.5 1014,-335.5"/>
-<text text-anchor="start" x="1022" y="-353.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/meta_schedule</text>
-<text text-anchor="middle" x="1090" y="-342.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/measure_candidate.h</text>
+<g id="a_node28"><a xlink:href="measure__candidate_8h.html" target="_top" xlink:title="include/tvm/meta_schedule\l/measure_candidate.h">
+<polygon fill="#ffffff" stroke="#000000" points="2522,-335.5 2522,-365.5 2674,-365.5 2674,-335.5 2522,-335.5"/>
+<text text-anchor="start" x="2530" y="-353.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/meta_schedule</text>
+<text text-anchor="middle" x="2598" y="-342.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/measure_candidate.h</text>
 </a>
 </g>
 </g>
 <!-- Node20&#45;&gt;Node148 -->
-<g id="edge110" class="edge">
+<g id="edge107" class="edge">
 <title>Node20&#45;&gt;Node148</title>
-<path fill="none" stroke="#191970" d="M1239.6565,-801.9106C1166.8598,-790.6834 1062,-760.534 1062,-680 1062,-680 1062,-680 1062,-484.5 1062,-440.5323 1076.6225,-390.1326 1084.7334,-365.5584"/>
-<polygon fill="#191970" stroke="#191970" points="1239.3016,-805.3957 1249.7014,-803.3724 1240.3098,-798.4687 1239.3016,-805.3957"/>
+<path fill="none" stroke="#191970" d="M2331.0802,-802.5645C2428.2695,-793.5072 2597.4887,-775.3464 2619,-757 2646.2738,-733.7389 2643,-715.846 2643,-680 2643,-680 2643,-680 2643,-484.5 2643,-439.3452 2619.7529,-389.8758 2606.6765,-365.6211"/>
+<polygon fill="#191970" stroke="#191970" points="2330.6779,-799.0867 2321.0421,-803.4915 2331.3217,-806.057 2330.6779,-799.0867"/>
 </g>
 <!-- Node149 -->
-<g id="node35" class="node">
+<g id="node29" class="node">
 <title>Node149</title>
-<g id="a_node35"><a xlink:href="feature__extractor_8h.html" target="_top" xlink:title="include/tvm/meta_schedule\l/feature_extractor.h">
-<polygon fill="#ffffff" stroke="#000000" points="332,-268.5 332,-298.5 484,-298.5 484,-268.5 332,-268.5"/>
-<text text-anchor="start" x="340" y="-286.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/meta_schedule</text>
-<text text-anchor="middle" x="408" y="-275.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/feature_extractor.h</text>
+<g id="a_node29"><a xlink:href="feature__extractor_8h.html" target="_top" xlink:title="include/tvm/meta_schedule\l/feature_extractor.h">
+<polygon fill="#ffffff" stroke="#000000" points="2418,-268.5 2418,-298.5 2570,-298.5 2570,-268.5 2418,-268.5"/>
+<text text-anchor="start" x="2426" y="-286.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/meta_schedule</text>
+<text text-anchor="middle" x="2494" y="-275.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/feature_extractor.h</text>
 </a>
 </g>
 </g>
 <!-- Node20&#45;&gt;Node149 -->
-<g id="edge108" class="edge">
+<g id="edge105" class="edge">
 <title>Node20&#45;&gt;Node149</title>
-<path fill="none" stroke="#191970" d="M1239.7501,-807.8635C997.0195,-804.7097 190,-786.2357 190,-680 190,-680 190,-680 190,-417.5 190,-379.8735 182.7541,-361.9611 209,-335 226.2695,-317.2599 283.2765,-303.6214 331.8731,-294.8497"/>
-<polygon fill="#191970" stroke="#191970" points="1239.8131,-811.3645 1249.8562,-807.9901 1239.9009,-804.365 1239.8131,-811.3645"/>
+<path fill="none" stroke="#191970" d="M2331.3814,-800.5078C2378.7938,-793.2632 2442.5795,-780.0698 2495,-757 2549.621,-732.9618 2605,-739.6765 2605,-680 2605,-680 2605,-680 2605,-484.5 2605,-446.8735 2609.3508,-431.5041 2586,-402 2563.5499,-373.6339 2535.4501,-394.3661 2513,-366 2497.4936,-346.4074 2494.2031,-316.4555 2493.7422,-298.5426"/>
+<polygon fill="#191970" stroke="#191970" points="2330.6017,-797.0849 2321.2201,-802.008 2331.6241,-804.0098 2330.6017,-797.0849"/>
 </g>
 <!-- Node150 -->
-<g id="node36" class="node">
+<g id="node30" class="node">
 <title>Node150</title>
-<g id="a_node36"><a xlink:href="runner_8h.html" target="_top" xlink:title="include/tvm/meta_schedule\l/runner.h">
-<polygon fill="#ffffff" stroke="#000000" points="616,-335.5 616,-365.5 768,-365.5 768,-335.5 616,-335.5"/>
-<text text-anchor="start" x="624" y="-353.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/meta_schedule</text>
-<text text-anchor="middle" x="692" y="-342.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/runner.h</text>
+<g id="a_node30"><a xlink:href="runner_8h.html" target="_top" xlink:title="include/tvm/meta_schedule\l/runner.h">
+<polygon fill="#ffffff" stroke="#000000" points="2806,-335.5 2806,-365.5 2958,-365.5 2958,-335.5 2806,-335.5"/>
+<text text-anchor="start" x="2814" y="-353.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/meta_schedule</text>
+<text text-anchor="middle" x="2882" y="-342.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/runner.h</text>
 </a>
 </g>
 </g>
 <!-- Node20&#45;&gt;Node150 -->
-<g id="edge112" class="edge">
+<g id="edge109" class="edge">
 <title>Node20&#45;&gt;Node150</title>
-<path fill="none" stroke="#191970" d="M1239.5578,-805.0933C1113.1871,-798.2509 853.0508,-781.3786 820,-757 790.0366,-734.8987 787,-717.2327 787,-680 787,-680 787,-680 787,-484.5 787,-446.4351 785.8338,-433.1805 764,-402 753.3335,-386.7674 736.6651,-374.429 722.0514,-365.652"/>
-<polygon fill="#191970" stroke="#191970" points="1239.7176,-808.6067 1249.8904,-805.6465 1240.0919,-801.6168 1239.7176,-808.6067"/>
+<path fill="none" stroke="#191970" d="M2331.2541,-806.3149C2426.5156,-802.0932 2593.2614,-790.0869 2643,-757 2793.6405,-656.7918 2862.3066,-426.6533 2878.2667,-365.598"/>
+<polygon fill="#191970" stroke="#191970" points="2330.9767,-802.8235 2321.1348,-806.746 2331.2747,-809.8171 2330.9767,-802.8235"/>
 </g>
-<!-- Node158 -->
+<!-- Node160 -->
 <g id="node38" class="node">
-<title>Node158</title>
+<title>Node160</title>
 <g id="a_node38"><a xlink:href="ir_2function_8h.html" target="_top" xlink:title="Function nodes. ">
-<polygon fill="#ffffff" stroke="#000000" points="2736,-670.5 2736,-689.5 2872,-689.5 2872,-670.5 2736,-670.5"/>
-<text text-anchor="middle" x="2804" y="-677.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/ir/function.h</text>
+<polygon fill="#ffffff" stroke="#000000" points="2291,-670.5 2291,-689.5 2427,-689.5 2427,-670.5 2291,-670.5"/>
+<text text-anchor="middle" x="2359" y="-677.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/ir/function.h</text>
 </a>
 </g>
 </g>
-<!-- Node20&#45;&gt;Node158 -->
-<g id="edge85" class="edge">
-<title>Node20&#45;&gt;Node158</title>
-<path fill="none" stroke="#191970" d="M1376.3966,-806.6566C1568.7228,-801.1655 2105.1598,-783.6265 2180,-757 2202.1968,-749.1029 2200.8834,-734.1189 2223,-726 2315.4331,-692.0683 2603.1308,-683.1427 2735.7667,-680.8132"/>
-<polygon fill="#191970" stroke="#191970" points="1376.0321,-803.1655 1366.1353,-806.9475 1376.2305,-810.1626 1376.0321,-803.1655"/>
+<!-- Node20&#45;&gt;Node160 -->
+<g id="edge82" class="edge">
+<title>Node20&#45;&gt;Node160</title>
+<path fill="none" stroke="#191970" d="M2280.5165,-785.0535C2301.788,-756.5807 2336.6846,-709.8701 2351.7266,-689.7358"/>
+<polygon fill="#191970" stroke="#191970" points="2277.5706,-783.1488 2274.3894,-793.2548 2283.1784,-787.3383 2277.5706,-783.1488"/>
 </g>
-<!-- Node166 -->
+<!-- Node168 -->
 <g id="node43" class="node">
-<title>Node166</title>
+<title>Node168</title>
 <g id="a_node43"><a xlink:href="ir_2type_8h.html" target="_top" xlink:title="IR/AST nodes for the unified type system in TVM. ">
-<polygon fill="#ffffff" stroke="#ff0000" points="1640,-732 1640,-751 1758,-751 1758,-732 1640,-732"/>
-<text text-anchor="middle" x="1699" y="-739" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/ir/type.h</text>
+<polygon fill="#ffffff" stroke="#ff0000" points="1067,-732 1067,-751 1185,-751 1185,-732 1067,-732"/>
+<text text-anchor="middle" x="1126" y="-739" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/ir/type.h</text>
 </a>
 </g>
 </g>
-<!-- Node20&#45;&gt;Node166 -->
-<g id="edge100" class="edge">
-<title>Node20&#45;&gt;Node166</title>
-<path fill="none" stroke="#191970" d="M1376.2208,-798.4341C1440.7279,-788.6839 1540.1593,-773.0543 1626,-757 1635.6789,-755.1898 1646.0447,-753.0883 1655.8367,-751.0281"/>
-<polygon fill="#191970" stroke="#191970" points="1375.542,-794.9968 1366.1751,-799.9476 1376.5849,-801.9186 1375.542,-794.9968"/>
+<!-- Node20&#45;&gt;Node168 -->
+<g id="edge97" class="edge">
+<title>Node20&#45;&gt;Node168</title>
+<path fill="none" stroke="#191970" d="M2194.5898,-804.4688C1988.8542,-792.3454 1378.101,-756.3556 1185.1304,-744.9844"/>
+<polygon fill="#191970" stroke="#191970" points="2194.4705,-807.9677 2204.6591,-805.0621 2194.8823,-800.9799 2194.4705,-807.9677"/>
 </g>
-<!-- Node157 -->
+<!-- Node159 -->
 <g id="node44" class="node">
-<title>Node157</title>
+<title>Node159</title>
 <g id="a_node44"><a xlink:href="schedule__rule_8h.html" target="_top" xlink:title="include/tvm/meta_schedule\l/schedule_rule.h">
-<polygon fill="#ffffff" stroke="#000000" points="218,-335.5 218,-365.5 370,-365.5 370,-335.5 218,-335.5"/>
-<text text-anchor="start" x="226" y="-353.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/meta_schedule</text>
-<text text-anchor="middle" x="294" y="-342.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/schedule_rule.h</text>
+<polygon fill="#ffffff" stroke="#000000" points="929,-335.5 929,-365.5 1081,-365.5 1081,-335.5 929,-335.5"/>
+<text text-anchor="start" x="937" y="-353.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/meta_schedule</text>
+<text text-anchor="middle" x="1005" y="-342.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/schedule_rule.h</text>
 </a>
 </g>
 </g>
-<!-- Node20&#45;&gt;Node157 -->
-<g id="edge113" class="edge">
-<title>Node20&#45;&gt;Node157</title>
-<path fill="none" stroke="#191970" d="M1239.7711,-806.1775C1026.2553,-798.0125 385,-766.3063 385,-680 385,-680 385,-680 385,-484.5 385,-433.8818 340.011,-388.1979 313.2988,-365.5343"/>
-<polygon fill="#191970" stroke="#191970" points="1239.7878,-809.6805 1249.9126,-806.5598 1240.0516,-802.6855 1239.7878,-809.6805"/>
+<!-- Node20&#45;&gt;Node159 -->
+<g id="edge110" class="edge">
+<title>Node20&#45;&gt;Node159</title>
+<path fill="none" stroke="#191970" d="M2194.662,-807.4252C1946.746,-803.2953 1103.1035,-787.1482 986,-757 912.278,-738.0203 833,-756.126 833,-680 833,-680 833,-680 833,-618.5 833,-506.2698 941.1535,-403.5114 985.9358,-365.7735"/>
+<polygon fill="#191970" stroke="#191970" points="2194.9156,-810.9298 2204.9722,-807.5957 2195.0315,-803.9307 2194.9156,-810.9298"/>
 </g>
-<!-- Node205 -->
+<!-- Node203 -->
 <g id="node45" class="node">
-<title>Node205</title>
+<title>Node203</title>
 <g id="a_node45"><a xlink:href="structural__equal_8h.html" target="_top" xlink:title="Structural equality comparison. ">
-<polygon fill="#ffffff" stroke="#ff0000" points="1814.5,-726.5 1814.5,-756.5 1965.5,-756.5 1965.5,-726.5 1814.5,-726.5"/>
-<text text-anchor="start" x="1822.5" y="-744.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/node/structural</text>
-<text text-anchor="middle" x="1890" y="-733.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">_equal.h</text>
+<polygon fill="#ffffff" stroke="#ff0000" points="3176.5,-726.5 3176.5,-756.5 3327.5,-756.5 3327.5,-726.5 3176.5,-726.5"/>
+<text text-anchor="start" x="3184.5" y="-744.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/node/structural</text>
+<text text-anchor="middle" x="3252" y="-733.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">_equal.h</text>
 </a>
 </g>
 </g>
-<!-- Node20&#45;&gt;Node205 -->
-<g id="edge119" class="edge">
-<title>Node20&#45;&gt;Node205</title>
-<path fill="none" stroke="#191970" d="M1376.1555,-802.8039C1471.2847,-794.5263 1649.1729,-777.8584 1800,-757 1804.7117,-756.3484 1809.555,-755.6367 1814.4344,-754.8878"/>
-<polygon fill="#191970" stroke="#191970" points="1375.7526,-799.3256 1366.0917,-803.6751 1376.3563,-806.2995 1375.7526,-799.3256"/>
+<!-- Node20&#45;&gt;Node203 -->
+<g id="edge116" class="edge">
+<title>Node20&#45;&gt;Node203</title>
+<path fill="none" stroke="#191970" d="M2331.0499,-807.2319C2481.392,-803.8548 2853.0241,-792.341 3162,-757 3166.7257,-756.4595 3171.5798,-755.8328 3176.4672,-755.1475"/>
+<polygon fill="#191970" stroke="#191970" points="2330.9612,-803.7329 2321.0405,-807.452 2331.1151,-810.7313 2330.9612,-803.7329"/>
 </g>
-<!-- Node217 -->
+<!-- Node213 -->
 <g id="node46" class="node">
-<title>Node217</title>
+<title>Node213</title>
 <g id="a_node46"><a xlink:href="papi_8h.html" target="_top" xlink:title="include/tvm/runtime\l/contrib/papi.h">
-<polygon fill="#ffffff" stroke="#000000" points="1984,-726.5 1984,-756.5 2100,-756.5 2100,-726.5 1984,-726.5"/>
-<text text-anchor="start" x="1992" y="-744.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/runtime</text>
-<text text-anchor="middle" x="2042" y="-733.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/contrib/papi.h</text>
+<polygon fill="#ffffff" stroke="#000000" points="3346,-726.5 3346,-756.5 3462,-756.5 3462,-726.5 3346,-726.5"/>
+<text text-anchor="start" x="3354" y="-744.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/runtime</text>
+<text text-anchor="middle" x="3404" y="-733.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/contrib/papi.h</text>
 </a>
 </g>
 </g>
-<!-- Node20&#45;&gt;Node217 -->
-<g id="edge120" class="edge">
-<title>Node20&#45;&gt;Node217</title>
-<path fill="none" stroke="#191970" d="M1376.5776,-806.6021C1497.8965,-802.49 1758.0363,-790.2795 1975,-757 1977.9503,-756.5475 1980.9633,-756.0355 1983.9987,-755.48"/>
-<polygon fill="#191970" stroke="#191970" points="1376.2068,-803.1124 1366.3281,-806.9413 1376.4384,-810.1085 1376.2068,-803.1124"/>
+<!-- Node20&#45;&gt;Node213 -->
+<g id="edge117" class="edge">
+<title>Node20&#45;&gt;Node213</title>
+<path fill="none" stroke="#191970" d="M2331.1936,-806.8316C2535.6432,-801.5763 3141.4109,-784.0901 3337,-757 3339.8383,-756.6069 3342.7333,-756.1512 3345.6494,-755.6483"/>
+<polygon fill="#191970" stroke="#191970" points="2331.0935,-803.333 2321.1862,-807.0873 2331.2723,-810.3307 2331.0935,-803.333"/>
 </g>
-<!-- Node218 -->
+<!-- Node214 -->
 <g id="node47" class="node">
-<title>Node218</title>
+<title>Node214</title>
 <g id="a_node47"><a xlink:href="packed__func_8h.html" target="_top" xlink:title="Type&#45;erased function used across TVM API. ">
-<polygon fill="#ffffff" stroke="#ff0000" points="639,-402.5 639,-432.5 755,-432.5 755,-402.5 639,-402.5"/>
-<text text-anchor="start" x="647" y="-420.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/runtime</text>
-<text text-anchor="middle" x="697" y="-409.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/packed_func.h</text>
+<polygon fill="#ffffff" stroke="#ff0000" points="2215,-402.5 2215,-432.5 2331,-432.5 2331,-402.5 2215,-402.5"/>
+<text text-anchor="start" x="2223" y="-420.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/runtime</text>
+<text text-anchor="middle" x="2273" y="-409.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/packed_func.h</text>
 </a>
 </g>
 </g>
-<!-- Node20&#45;&gt;Node218 -->
-<g id="edge121" class="edge">
-<title>Node20&#45;&gt;Node218</title>
-<path fill="none" stroke="#191970" d="M1239.3326,-805.2111C1103.8933,-798.2447 812.0193,-780.6779 773,-757 738.1161,-735.8316 723,-720.8043 723,-680 723,-680 723,-680 723,-551.5 723,-507.6591 709.422,-457.2029 701.8904,-432.5877"/>
-<polygon fill="#191970" stroke="#191970" points="1239.4271,-808.7203 1249.5923,-805.7339 1239.7834,-801.7294 1239.4271,-808.7203"/>
+<!-- Node20&#45;&gt;Node214 -->
+<g id="edge118" class="edge">
+<title>Node20&#45;&gt;Node214</title>
+<path fill="none" stroke="#191970" d="M2263,-783.3849C2263,-757.4823 2263,-715.9175 2263,-680 2263,-680 2263,-680 2263,-551.5 2263,-508.3421 2268.2223,-457.5818 2271.1191,-432.7453"/>
+<polygon fill="#191970" stroke="#191970" points="2259.5001,-783.4649 2263,-793.4649 2266.5001,-783.465 2259.5001,-783.4649"/>
 </g>
-<!-- Node195 -->
+<!-- Node193 -->
 <g id="node48" class="node">
-<title>Node195</title>
+<title>Node193</title>
 <g id="a_node48"><a xlink:href="buffer_8h.html" target="_top" xlink:title="Symbolic n&#45;dimensional array, to represent a memory buffer. ">
-<polygon fill="#ffffff" stroke="#ff0000" points="2443,-609 2443,-628 2571,-628 2571,-609 2443,-609"/>
-<text text-anchor="middle" x="2507" y="-616" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/tir/buffer.h</text>
+<polygon fill="#ffffff" stroke="#ff0000" points="1993,-609 1993,-628 2121,-628 2121,-609 1993,-609"/>
+<text text-anchor="middle" x="2057" y="-616" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/tir/buffer.h</text>
 </a>
 </g>
 </g>
-<!-- Node20&#45;&gt;Node195 -->
-<g id="edge137" class="edge">
-<title>Node20&#45;&gt;Node195</title>
-<path fill="none" stroke="#191970" d="M1376.1645,-805.9882C1559.6875,-798.9747 2055.158,-778.2951 2126,-757 2152.5021,-749.0335 2154.7245,-737.2685 2180,-726 2281.5211,-680.7393 2408.1404,-644.4995 2469.7948,-628.065"/>
-<polygon fill="#191970" stroke="#191970" points="1375.9549,-802.4935 1366.0953,-806.3712 1376.2211,-809.4885 1375.9549,-802.4935"/>
+<!-- Node20&#45;&gt;Node193 -->
+<g id="edge134" class="edge">
+<title>Node20&#45;&gt;Node193</title>
+<path fill="none" stroke="#191970" d="M2195.3528,-790.9178C2174.3229,-783.0599 2152.1751,-772.0701 2135,-757 2092.4766,-719.6884 2068.0395,-653.3204 2059.9906,-628.3578"/>
+<polygon fill="#191970" stroke="#191970" points="2194.3199,-794.2651 2204.9146,-794.3112 2196.6611,-787.6682 2194.3199,-794.2651"/>
 </g>
-<!-- Node196 -->
+<!-- Node194 -->
 <g id="node49" class="node">
-<title>Node196</title>
+<title>Node194</title>
 <g id="a_node49"><a xlink:href="tir_2expr_8h.html" target="_top" xlink:title="TIR expressions. ">
-<polygon fill="#ffffff" stroke="#ff0000" points="2446.5,-542 2446.5,-561 2567.5,-561 2567.5,-542 2446.5,-542"/>
-<text text-anchor="middle" x="2507" y="-549" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/tir/expr.h</text>
+<polygon fill="#ffffff" stroke="#ff0000" points="2037.5,-542 2037.5,-561 2158.5,-561 2158.5,-542 2037.5,-542"/>
+<text text-anchor="middle" x="2098" y="-549" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/tir/expr.h</text>
 </a>
 </g>
 </g>
-<!-- Node20&#45;&gt;Node196 -->
-<g id="edge143" class="edge">
-<title>Node20&#45;&gt;Node196</title>
-<path fill="none" stroke="#191970" d="M1376.3938,-805.4566C1486.5769,-799.7834 1698.0034,-785.4961 1767,-757 1787.4653,-748.5477 1787.4885,-738.1489 1806,-726 1850.2169,-696.981 1861.1939,-687.7928 1911,-670 1962.2886,-651.6776 2318.6331,-585.8101 2454.5206,-561.0229"/>
-<polygon fill="#191970" stroke="#191970" points="1376.1885,-801.9625 1366.378,-805.963 1376.542,-808.9535 1376.1885,-801.9625"/>
+<!-- Node20&#45;&gt;Node194 -->
+<g id="edge140" class="edge">
+<title>Node20&#45;&gt;Node194</title>
+<path fill="none" stroke="#191970" d="M2247.6006,-784.5142C2212.5006,-729.8434 2127.7798,-597.8843 2104.1888,-561.1395"/>
+<polygon fill="#191970" stroke="#191970" points="2244.8247,-786.6691 2253.1726,-793.1931 2250.7152,-782.8872 2244.8247,-786.6691"/>
 </g>
-<!-- Node201 -->
+<!-- Node199 -->
 <g id="node50" class="node">
-<title>Node201</title>
+<title>Node199</title>
 <g id="a_node50"><a xlink:href="index__map_8h.html" target="_top" xlink:title="Defines a remapping of buffer indices. ">
-<polygon fill="#ffffff" stroke="#ff0000" points="2232,-726.5 2232,-756.5 2350,-756.5 2350,-726.5 2232,-726.5"/>
-<text text-anchor="start" x="2240" y="-744.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/tir/index</text>
-<text text-anchor="middle" x="2291" y="-733.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">_map.h</text>
+<polygon fill="#ffffff" stroke="#ff0000" points="3480,-726.5 3480,-756.5 3598,-756.5 3598,-726.5 3480,-726.5"/>
+<text text-anchor="start" x="3488" y="-744.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/tir/index</text>
+<text text-anchor="middle" x="3539" y="-733.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">_map.h</text>
 </a>
 </g>
 </g>
-<!-- Node20&#45;&gt;Node201 -->
-<g id="edge144" class="edge">
-<title>Node20&#45;&gt;Node201</title>
-<path fill="none" stroke="#191970" d="M1376.2127,-805.9924C1558.7098,-799.0671 2055.3112,-778.7458 2218,-757 2222.4947,-756.3992 2227.1249,-755.673 2231.7695,-754.8668"/>
-<polygon fill="#191970" stroke="#191970" points="1376.0639,-802.4954 1366.2033,-806.3707 1376.3284,-809.4904 1376.0639,-802.4954"/>
+<!-- Node20&#45;&gt;Node199 -->
+<g id="edge141" class="edge">
+<title>Node20&#45;&gt;Node199</title>
+<path fill="none" stroke="#191970" d="M2331.2034,-807.2217C2552.3567,-802.8205 3248.1541,-786.896 3471,-757 3473.8802,-756.6136 3476.818,-756.1634 3479.7773,-755.6648"/>
+<polygon fill="#191970" stroke="#191970" points="2330.9637,-803.7256 2321.0348,-807.4226 2331.102,-810.7243 2330.9637,-803.7256"/>
 </g>
 <!-- Node21&#45;&gt;Node22 -->
 <g id="edge2" class="edge">
 <title>Node21&#45;&gt;Node22</title>
-<path fill="none" stroke="#191970" d="M1937.234,-667.2507C1901.7528,-655.5818 1850.0417,-638.5752 1818.0136,-628.0419"/>
-<polygon fill="#191970" stroke="#191970" points="1936.1883,-670.5912 1946.7813,-670.3906 1938.3753,-663.9416 1936.1883,-670.5912"/>
+<path fill="none" stroke="#191970" d="M983.5622,-671.8778C1080.0316,-660.1063 1257.6928,-638.4274 1354.2612,-626.6437"/>
+<polygon fill="#191970" stroke="#191970" points="982.8506,-668.4386 973.3482,-673.1242 983.6985,-675.3871 982.8506,-668.4386"/>
 </g>
 <!-- Node82 -->
-<g id="node24" class="node">
+<g id="node33" class="node">
 <title>Node82</title>
-<g id="a_node24"><a xlink:href="relay_2adt_8h.html" target="_top" xlink:title="Algebraic data types for Relay. ">
-<polygon fill="#ffffff" stroke="#ff0000" points="2654,-140 2654,-159 2784,-159 2784,-140 2654,-140"/>
-<text text-anchor="middle" x="2719" y="-147" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/relay/adt.h</text>
+<g id="a_node33"><a xlink:href="relay_2adt_8h.html" target="_top" xlink:title="Algebraic data types for Relay. ">
+<polygon fill="#ffffff" stroke="#ff0000" points="180,-475 180,-494 310,-494 310,-475 180,-475"/>
+<text text-anchor="middle" x="245" y="-482" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/relay/adt.h</text>
 </a>
 </g>
 </g>
 <!-- Node21&#45;&gt;Node82 -->
-<g id="edge84" class="edge">
+<g id="edge81" class="edge">
 <title>Node21&#45;&gt;Node82</title>
-<path fill="none" stroke="#191970" d="M2042.217,-677.7998C2177.1704,-672.8509 2479.74,-659.2531 2580,-634 2667.6945,-611.9118 2766,-641.9335 2766,-551.5 2766,-551.5 2766,-551.5 2766,-484.5 2766,-354.4145 2730.8144,-198.6467 2721.3307,-159.025"/>
-<polygon fill="#191970" stroke="#191970" points="2041.8841,-674.3095 2032.0173,-678.1691 2042.1374,-681.3049 2041.8841,-674.3095"/>
+<path fill="none" stroke="#191970" d="M850.9772,-671.6409C718.8685,-654.1876 426.9613,-611.7634 337,-567 300.3142,-548.7457 267.1582,-511.8061 252.6862,-494.2246"/>
+<polygon fill="#191970" stroke="#191970" points="850.5998,-675.1213 860.9706,-672.9537 851.5117,-668.1809 850.5998,-675.1213"/>
 </g>
 <!-- Node23 -->
 <g id="node4" class="node">
 <title>Node23</title>
 <g id="a_node4"><a xlink:href="driver__api_8h.html" target="_top" xlink:title="Compiler driver APIs to drive the compilation. ">
-<polygon fill="#ffffff" stroke="#000000" points="2086,-268.5 2086,-298.5 2192,-298.5 2192,-268.5 2086,-268.5"/>
-<text text-anchor="start" x="2094" y="-286.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/driver</text>
-<text text-anchor="middle" x="2139" y="-275.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/driver_api.h</text>
+<polygon fill="#ffffff" stroke="#000000" points="1415,-268.5 1415,-298.5 1521,-298.5 1521,-268.5 1415,-268.5"/>
+<text text-anchor="start" x="1423" y="-286.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/driver</text>
+<text text-anchor="middle" x="1468" y="-275.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/driver_api.h</text>
 </a>
 </g>
 </g>
 <!-- Node22&#45;&gt;Node23 -->
 <g id="edge3" class="edge">
 <title>Node22&#45;&gt;Node23</title>
-<path fill="none" stroke="#191970" d="M1865.9169,-611.0603C1940.3172,-602.7984 2046.9487,-587.8259 2083,-567 2124.3907,-543.0897 2152,-532.3005 2152,-484.5 2152,-484.5 2152,-484.5 2152,-417.5 2152,-374.2597 2145.211,-323.5361 2141.4452,-298.7263"/>
-<polygon fill="#191970" stroke="#191970" points="1865.2046,-607.6171 1855.6433,-612.1814 1865.9641,-614.5758 1865.2046,-607.6171"/>
+<path fill="none" stroke="#191970" d="M1430.4187,-599.6573C1434.7362,-590.1136 1439.4514,-578.201 1442,-567 1464.9185,-466.2758 1441.1152,-437.2206 1456,-335 1457.7991,-322.6448 1461.0734,-308.8824 1463.7477,-298.6948"/>
+<polygon fill="#191970" stroke="#191970" points="1427.1397,-598.4032 1426.0111,-608.9378 1433.4628,-601.4063 1427.1397,-598.4032"/>
 </g>
 <!-- Node24 -->
 <g id="node5" class="node">
 <title>Node24</title>
 <g id="a_node5"><a xlink:href="diagnostic_8h.html" target="_top" xlink:title="A new diagnostic interface for TVM error reporting. ">
-<polygon fill="#ffffff" stroke="#ff0000" points="1716,-542 1716,-561 1862,-561 1862,-542 1716,-542"/>
-<text text-anchor="middle" x="1789" y="-549" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/ir/diagnostic.h</text>
+<polygon fill="#ffffff" stroke="#ff0000" points="1109,-542 1109,-561 1255,-561 1255,-542 1109,-542"/>
+<text text-anchor="middle" x="1182" y="-549" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/ir/diagnostic.h</text>
 </a>
 </g>
 </g>
 <!-- Node22&#45;&gt;Node24 -->
 <g id="edge4" class="edge">
 <title>Node22&#45;&gt;Node24</title>
-<path fill="none" stroke="#191970" d="M1789,-598.6079C1789,-586.214 1789,-570.8263 1789,-561.0817"/>
-<polygon fill="#191970" stroke="#191970" points="1785.5001,-598.9005 1789,-608.9005 1792.5001,-598.9006 1785.5001,-598.9005"/>
+<path fill="none" stroke="#191970" d="M1377.2458,-606.2342C1330.8857,-593.2378 1258.6485,-572.9872 1216.2588,-561.1039"/>
+<polygon fill="#191970" stroke="#191970" points="1376.4374,-609.6424 1387.0109,-608.9717 1378.3269,-602.9022 1376.4374,-609.6424"/>
 </g>
 <!-- Node22&#45;&gt;Node26 -->
-<g id="edge50" class="edge">
+<g id="edge37" class="edge">
 <title>Node22&#45;&gt;Node26</title>
-<path fill="none" stroke="#191970" d="M1848.56,-607.0883C1924.2596,-592.5168 2045.4083,-568.9408 2047,-567 2076.0597,-531.5663 2008.3024,-506.7363 1959.8012,-494.1031"/>
-<polygon fill="#191970" stroke="#191970" points="1847.8124,-603.6679 1838.6535,-608.9938 1849.1346,-610.5419 1847.8124,-603.6679"/>
+<path fill="none" stroke="#191970" d="M1419.9351,-598.7301C1418.1603,-581.0245 1413.4527,-554.9977 1401,-536 1388.8222,-517.4217 1367.5693,-502.9337 1352.0571,-494.1211"/>
+<polygon fill="#191970" stroke="#191970" points="1416.4514,-599.0759 1420.7172,-608.774 1423.4303,-598.5325 1416.4514,-599.0759"/>
 </g>
 <!-- Node27 -->
 <g id="node7" class="node">
 <title>Node27</title>
 <g id="a_node7"><a xlink:href="parser_8h.html" target="_top" xlink:title="A parser for TVM IR. ">
-<polygon fill="#ffffff" stroke="#000000" points="1564,-335.5 1564,-365.5 1672,-365.5 1672,-335.5 1564,-335.5"/>
-<text text-anchor="start" x="1572" y="-353.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/parser</text>
-<text text-anchor="middle" x="1618" y="-342.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/parser.h</text>
+<polygon fill="#ffffff" stroke="#000000" points="1099,-335.5 1099,-365.5 1207,-365.5 1207,-335.5 1099,-335.5"/>
+<text text-anchor="start" x="1107" y="-353.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/parser</text>
+<text text-anchor="middle" x="1153" y="-342.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/parser.h</text>
 </a>
 </g>
 </g>
 <!-- Node22&#45;&gt;Node27 -->
-<g id="edge75" class="edge">
+<g id="edge62" class="edge">
 <title>Node22&#45;&gt;Node27</title>
-<path fill="none" stroke="#191970" d="M1728.5759,-606.8349C1685.8496,-597.4123 1633.5182,-583.104 1618,-567 1565.9826,-513.0191 1560.957,-473.6509 1583,-402 1587.1323,-388.5678 1596.0933,-375.4972 1603.9133,-365.8848"/>
-<polygon fill="#191970" stroke="#191970" points="1727.8503,-610.2588 1738.3635,-608.9464 1729.3265,-603.4162 1727.8503,-610.2588"/>
+<path fill="none" stroke="#191970" d="M1344.2825,-614.3036C1219.9767,-606.8742 985.3172,-589.9944 955,-567 919.004,-539.6985 904.6552,-507.6795 928,-469 948.8167,-434.5092 1054.3013,-388.9715 1113.1565,-365.666"/>
+<polygon fill="#191970" stroke="#191970" points="1344.3083,-617.8112 1354.4975,-614.9077 1344.7216,-610.8234 1344.3083,-617.8112"/>
 </g>
 <!-- Node31 -->
 <g id="node10" class="node">
 <title>Node31</title>
 <g id="a_node10"><a xlink:href="target_8h.html" target="_top" xlink:title="Compilation target object. ">
-<polygon fill="#ffffff" stroke="#ff0000" points="1842.5,-335.5 1842.5,-365.5 1949.5,-365.5 1949.5,-335.5 1842.5,-335.5"/>
-<text text-anchor="start" x="1850.5" y="-353.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/target</text>
-<text text-anchor="middle" x="1896" y="-342.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/target.h</text>
+<polygon fill="#ffffff" stroke="#ff0000" points="1263.5,-335.5 1263.5,-365.5 1370.5,-365.5 1370.5,-335.5 1263.5,-335.5"/>
+<text text-anchor="start" x="1271.5" y="-353.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/target</text>
+<text text-anchor="middle" x="1317" y="-342.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/target.h</text>
 </a>
 </g>
 </g>
 <!-- Node22&#45;&gt;Node31 -->
-<g id="edge81" class="edge">
+<g id="edge78" class="edge">
 <title>Node22&#45;&gt;Node31</title>
-<path fill="none" stroke="#191970" d="M1753.0502,-604.8639C1734.5742,-596.3388 1712.878,-583.7652 1698,-567 1667.933,-533.119 1644.3952,-506.3677 1670,-469 1695.0529,-432.4377 1723.3299,-452.7669 1763,-433 1804.1006,-412.5203 1848.8978,-383.1262 1874.599,-365.5126"/>
-<polygon fill="#191970" stroke="#191970" points="1751.8018,-608.1383 1762.3652,-608.9538 1754.6159,-601.7288 1751.8018,-608.1383"/>
+<path fill="none" stroke="#191970" d="M1424.2937,-598.6545C1428.2922,-569.0747 1432.4158,-512.2676 1414,-469 1395.779,-426.19 1357.1414,-386.5342 1334.5152,-365.7254"/>
+<polygon fill="#191970" stroke="#191970" points="1420.7792,-598.4916 1422.761,-608.8994 1427.7021,-599.5274 1420.7792,-598.4916"/>
 </g>
 <!-- Node22&#45;&gt;Node45 -->
-<g id="edge69" class="edge">
+<g id="edge56" class="edge">
 <title>Node22&#45;&gt;Node45</title>
-<path fill="none" stroke="#191970" d="M1827.2002,-604.4933C1843.7847,-596.2815 1861.5495,-584.0952 1871,-567 1877.6658,-554.9421 1876.2725,-548.729 1871,-536 1862.3409,-515.095 1849.1204,-517.6809 1835,-500 1780.9053,-432.265 1801.7167,-385.1341 1731,-335 1718.6592,-326.2511 1642.5957,-309.9055 1582.2759,-297.9688"/>
-<polygon fill="#191970" stroke="#191970" points="1825.2743,-601.526 1817.6436,-608.876 1828.1923,-607.8887 1825.2743,-601.526"/>
+<path fill="none" stroke="#191970" d="M1478.6165,-606.7204C1520.2036,-597.1243 1571.6319,-582.643 1587,-567 1640.5233,-512.5193 1604.4555,-471.9995 1635,-402 1652.1691,-362.6532 1680.0407,-320.395 1695.2539,-298.5337"/>
+<polygon fill="#191970" stroke="#191970" points="1477.8106,-603.3141 1468.8279,-608.9322 1479.3534,-610.142 1477.8106,-603.3141"/>
 </g>
 <!-- Node22&#45;&gt;Node48 -->
-<g id="edge74" class="edge">
+<g id="edge61" class="edge">
 <title>Node22&#45;&gt;Node48</title>
-<path fill="none" stroke="#191970" d="M1712.2577,-611.8693C1613.4596,-602.7718 1447.5344,-585.4366 1389,-567 1205.2651,-509.129 1152.7434,-489.61 1005,-366 962.7261,-330.6314 973.0173,-301.1739 929,-268 894.5954,-242.0708 877.8678,-250.5821 839,-232 794.6429,-210.7935 745.1969,-181.8907 716.4441,-164.5259"/>
-<polygon fill="#191970" stroke="#191970" points="1712.2099,-615.3794 1722.4869,-612.8039 1712.8469,-608.4085 1712.2099,-615.3794"/>
+<path fill="none" stroke="#191970" d="M1469.7139,-605.5918C1491.1744,-597.6557 1515.0774,-585.3835 1531,-567 1592.1207,-496.4329 1565.1991,-455.4269 1592,-366 1605.04,-322.4894 1588.8701,-300.1073 1621,-268 1672.031,-217.0049 1873.3978,-178.6247 1985.5491,-160.6972"/>
+<polygon fill="#191970" stroke="#191970" points="1468.3637,-602.3555 1460.0629,-608.9394 1470.6577,-608.969 1468.3637,-602.3555"/>
 </g>
 <!-- Node22&#45;&gt;Node49 -->
-<g id="edge70" class="edge">
+<g id="edge57" class="edge">
 <title>Node22&#45;&gt;Node49</title>
-<path fill="none" stroke="#191970" d="M1712.1481,-611.1815C1585.3058,-596.953 1346,-560.0922 1346,-484.5 1346,-484.5 1346,-484.5 1346,-417.5 1346,-374.3421 1340.7777,-323.5818 1337.8809,-298.7453"/>
-<polygon fill="#191970" stroke="#191970" points="1711.8758,-614.6726 1722.1988,-612.288 1712.6419,-607.7147 1711.8758,-614.6726"/>
+<path fill="none" stroke="#191970" d="M1497.653,-615.7553C1675.6635,-609.0185 2106.5871,-590.4912 2168,-567 2179.3137,-562.6724 2334.6824,-443.8836 2340,-433 2346.0483,-420.6208 2342.9232,-415.4641 2340,-402 2335.4431,-381.0111 2307.9049,-325.9028 2294.0084,-298.8826"/>
+<polygon fill="#191970" stroke="#191970" points="1497.4378,-612.2608 1487.5765,-616.1342 1497.701,-619.2559 1497.4378,-612.2608"/>
 </g>
 <!-- Node22&#45;&gt;Node51 -->
-<g id="edge71" class="edge">
+<g id="edge58" class="edge">
 <title>Node22&#45;&gt;Node51</title>
-<path fill="none" stroke="#191970" d="M1716.7905,-607.1899C1667.7649,-598.1665 1608.5925,-584.2057 1590,-567 1566.8927,-545.6163 1573.5629,-530.7918 1567,-500 1551.6728,-428.0882 1514.4475,-396.3328 1555,-335 1566.4585,-317.6698 1585.0601,-306.1529 1604.2785,-298.5061"/>
-<polygon fill="#191970" stroke="#191970" points="1716.3003,-610.6579 1726.7621,-608.9841 1717.5399,-603.7685 1716.3003,-610.6579"/>
+<path fill="none" stroke="#191970" d="M1344.0524,-617.2971C1074.6385,-612.842 187.2494,-596.0223 134,-567 94.6446,-545.5503 76,-529.3212 76,-484.5 76,-484.5 76,-484.5 76,-417.5 76,-374.4618 76,-323.6482 76,-298.7729"/>
+<polygon fill="#191970" stroke="#191970" points="1344.0794,-620.7979 1354.1356,-617.4628 1344.1945,-613.7988 1344.0794,-620.7979"/>
 </g>
 <!-- Node22&#45;&gt;Node52 -->
-<g id="edge72" class="edge">
+<g id="edge59" class="edge">
 <title>Node22&#45;&gt;Node52</title>
-<path fill="none" stroke="#191970" d="M1712.1182,-613.7098C1585.9475,-605.4056 1340.5231,-587.2703 1256,-567 1097.6527,-529.0253 1034.044,-544.1057 915,-433 874.2489,-394.9663 854.4873,-328.7226 847.3056,-298.8667"/>
-<polygon fill="#191970" stroke="#191970" points="1712.263,-617.2267 1722.4701,-614.3867 1712.7198,-610.2416 1712.263,-617.2267"/>
+<path fill="none" stroke="#191970" d="M1488.0496,-607.0161C1535.1361,-597.7369 1592.8245,-583.5303 1611,-567 1668.9554,-514.2908 1622.5655,-461.9454 1673,-402 1716.0863,-350.7885 1787.2795,-316.5593 1832.8851,-298.571"/>
+<polygon fill="#191970" stroke="#191970" points="1487.3365,-603.589 1478.1808,-608.9204 1488.6629,-610.4622 1487.3365,-603.589"/>
 </g>
 <!-- Node22&#45;&gt;Node53 -->
-<g id="edge73" class="edge">
+<g id="edge60" class="edge">
 <title>Node22&#45;&gt;Node53</title>
-<path fill="none" stroke="#191970" d="M1712.1606,-616.4292C1584.5904,-612.169 1321.3041,-599.8846 1101,-567 699.0683,-507.0041 479.6759,-669.1224 209,-366 192.59,-347.6229 194.0871,-316.9864 196.8904,-298.6679"/>
-<polygon fill="#191970" stroke="#191970" points="1712.1137,-619.9294 1722.2227,-616.7579 1712.3424,-612.9332 1712.1137,-619.9294"/>
+<path fill="none" stroke="#191970" d="M1344.5044,-614.5968C1213.2955,-607.3173 956.0862,-590.2727 922,-567 856.3542,-522.1797 822.5815,-475.4366 853,-402 869.1156,-363.0935 883.4273,-355.8776 920,-335 957.8333,-313.4028 1004.8053,-300.7055 1043.7193,-293.325"/>
+<polygon fill="#191970" stroke="#191970" points="1344.3195,-618.0918 1354.4965,-615.1458 1344.7036,-611.1024 1344.3195,-618.0918"/>
 </g>
 <!-- Node54 -->
 <g id="node20" class="node">
 <title>Node54</title>
 <g id="a_node20"><a xlink:href="interpreter_8h.html" target="_top" xlink:title="An interpreter for Relay. ">
-<polygon fill="#ffffff" stroke="#000000" points="2055.5,-140 2055.5,-159 2220.5,-159 2220.5,-140 2055.5,-140"/>
-<text text-anchor="middle" x="2138" y="-147" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/relay/interpreter.h</text>
+<polygon fill="#ffffff" stroke="#000000" points="293.5,-274 293.5,-293 458.5,-293 458.5,-274 293.5,-274"/>
+<text text-anchor="middle" x="376" y="-281" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/relay/interpreter.h</text>
 </a>
 </g>
 </g>
 <!-- Node22&#45;&gt;Node54 -->
-<g id="edge79" class="edge">
+<g id="edge75" class="edge">
 <title>Node22&#45;&gt;Node54</title>
-<path fill="none" stroke="#191970" d="M1865.8638,-614.7611C1957.0185,-609.0859 2102.3644,-595.7518 2148,-567 2178.2973,-547.9118 2316.6084,-333.268 2327,-299 2330.9983,-285.8151 2334.397,-279.6238 2327,-268 2306.718,-236.1283 2282.4777,-251.2966 2250,-232 2211.175,-208.9322 2169.3094,-175.5443 2149.5618,-159.2219"/>
-<polygon fill="#191970" stroke="#191970" points="1865.6192,-611.2694 1855.849,-615.3675 1866.0423,-618.2566 1865.6192,-611.2694"/>
+<path fill="none" stroke="#191970" d="M1344.3984,-616.7318C1085.2701,-610.5587 254.6956,-589.1358 202,-567 152.573,-546.2372 114,-538.1109 114,-484.5 114,-484.5 114,-484.5 114,-417.5 114,-335.6282 216.6638,-303.7486 293.4612,-291.3545"/>
+<polygon fill="#191970" stroke="#191970" points="1344.3945,-620.2326 1354.4748,-616.971 1344.5607,-613.2345 1344.3945,-620.2326"/>
 </g>
 <!-- Node57 -->
 <g id="node21" class="node">
 <title>Node57</title>
 <g id="a_node21"><a xlink:href="codegen_8h.html" target="_top" xlink:title="Translates IRModule to runtime::Module. ">
-<polygon fill="#ffffff" stroke="#000000" points="2210.5,-268.5 2210.5,-298.5 2317.5,-298.5 2317.5,-268.5 2210.5,-268.5"/>
-<text text-anchor="start" x="2218.5" y="-286.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/target</text>
-<text text-anchor="middle" x="2264" y="-275.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/codegen.h</text>
+<polygon fill="#ffffff" stroke="#000000" points="2008.5,-268.5 2008.5,-298.5 2115.5,-298.5 2115.5,-268.5 2008.5,-268.5"/>
+<text text-anchor="start" x="2016.5" y="-286.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/target</text>
+<text text-anchor="middle" x="2062" y="-275.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/codegen.h</text>
 </a>
 </g>
 </g>
 <!-- Node22&#45;&gt;Node57 -->
-<g id="edge80" class="edge">
-<title>Node22&#45;&gt;Node57</title>
-<path fill="none" stroke="#191970" d="M1865.5931,-613.7901C1950.1831,-607.3552 2079.8758,-593.4641 2121,-567 2150.9579,-547.7216 2147.7649,-531.1783 2165,-500 2205.3904,-426.9338 2243.6791,-334.3781 2258.0349,-298.5821"/>
-<polygon fill="#191970" stroke="#191970" points="1865.2234,-610.3078 1855.5098,-614.5382 1865.7414,-617.2886 1865.2234,-610.3078"/>
-</g>
-<!-- Node80 -->
-<g id="node23" class="node">
-<title>Node80</title>
-<g id="a_node23"><a xlink:href="relay_2expr_8h.html" target="_top" xlink:title="Relay expression language. ">
-<polygon fill="#ffffff" stroke="#ff0000" points="2258.5,-207 2258.5,-226 2393.5,-226 2393.5,-207 2258.5,-207"/>
-<text text-anchor="middle" x="2326" y="-214" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/relay/expr.h</text>
-</a>
-</g>
-</g>
-<!-- Node22&#45;&gt;Node80 -->
 <g id="edge77" class="edge">
-<title>Node22&#45;&gt;Node80</title>
-<path fill="none" stroke="#191970" d="M1865.735,-614.3778C1987.4255,-607.164 2213.8569,-590.7263 2242,-567 2300.3914,-517.7726 2241.4185,-460.9298 2290,-402 2312.2537,-375.006 2341.777,-395.8634 2360,-366 2382.6876,-328.8199 2372.528,-309.7149 2360,-268 2355.131,-251.7877 2343.3209,-235.97 2334.9923,-226.2355"/>
-<polygon fill="#191970" stroke="#191970" points="1865.5051,-610.8852 1855.727,-614.9643 1865.9147,-617.8732 1865.5051,-610.8852"/>
-</g>
-<!-- Node83 -->
-<g id="node25" class="node">
-<title>Node83</title>
-<g id="a_node25"><a xlink:href="relay_2analysis_8h.html" target="_top" xlink:title="The set of Relay analysis passes written in C++. ">
-<polygon fill="#ffffff" stroke="#000000" points="2561.5,-73 2561.5,-92 2716.5,-92 2716.5,-73 2561.5,-73"/>
-<text text-anchor="middle" x="2639" y="-80" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/relay/analysis.h</text>
-</a>
-</g>
-</g>
-<!-- Node22&#45;&gt;Node83 -->
-<g id="edge76" class="edge">
-<title>Node22&#45;&gt;Node83</title>
-<path fill="none" stroke="#191970" d="M1865.9557,-614.9559C1998.6434,-608.2147 2259.545,-591.9533 2293,-567 2356.5791,-519.5779 2305.5899,-458.7528 2361,-402 2513.9885,-245.304 2728.1094,-374.161 2793,-165 2797.0825,-151.841 2801.0506,-145.181 2793,-134 2780.2748,-116.3266 2726.8177,-101.2277 2686.4283,-92.0329"/>
-<polygon fill="#191970" stroke="#191970" points="1865.6645,-611.4661 1855.8527,-615.4634 1866.0157,-618.4572 1865.6645,-611.4661"/>
-</g>
-<!-- Node91 -->
-<g id="node26" class="node">
-<title>Node91</title>
-<g id="a_node26"><a xlink:href="relay_2feature_8h.html" target="_top" xlink:title="Detect features used in Expr/Module. ">
-<polygon fill="#ffffff" stroke="#000000" points="2314.5,-140 2314.5,-159 2463.5,-159 2463.5,-140 2314.5,-140"/>
-<text text-anchor="middle" x="2389" y="-147" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/relay/feature.h</text>
-</a>
-</g>
-</g>
-<!-- Node22&#45;&gt;Node91 -->
-<g id="edge78" class="edge">
-<title>Node22&#45;&gt;Node91</title>
-<path fill="none" stroke="#191970" d="M1865.8919,-614.6236C1993.0032,-607.5694 2236.0721,-591.1028 2267,-567 2328.6688,-518.9401 2280.0835,-463.7802 2328,-402 2345.2271,-379.7885 2364.8878,-389.7012 2380,-366 2412.4868,-315.0494 2397.8704,-292.2083 2403,-232 2404.1696,-218.272 2404.8232,-214.6566 2403,-201 2401.0138,-186.122 2395.935,-169.4507 2392.4731,-159.2137"/>
-<polygon fill="#191970" stroke="#191970" points="1865.6197,-611.1332 1855.8266,-615.176 1866.0034,-618.1227 1865.6197,-611.1332"/>
+<title>Node22&#45;&gt;Node57</title>
+<path fill="none" stroke="#191970" d="M1498.0962,-611.5374C1541.1105,-604.9302 1594.2649,-592.0604 1636,-567 1719.9486,-516.5919 1702.4981,-459.1646 1782,-402 1852.9278,-351.0005 1949.172,-316.4156 2008.3555,-298.3366"/>
+<polygon fill="#191970" stroke="#191970" points="1497.3512,-608.1088 1487.9576,-613.0088 1498.3566,-615.0362 1497.3512,-608.1088"/>
 </g>
 <!-- Node96 -->
-<g id="node28" class="node">
+<g id="node22" class="node">
 <title>Node96</title>
-<g id="a_node28"><a xlink:href="tir_2analysis_8h.html" target="_top" xlink:title="Analysis utilities and passes for TIR. ">
-<polygon fill="#ffffff" stroke="#ff0000" points="2370,-408 2370,-427 2510,-427 2510,-408 2370,-408"/>
-<text text-anchor="middle" x="2440" y="-415" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/tir/analysis.h</text>
+<g id="a_node22"><a xlink:href="tir_2analysis_8h.html" target="_top" xlink:title="Analysis utilities and passes for TIR. ">
+<polygon fill="#ffffff" stroke="#ff0000" points="1791,-408 1791,-427 1931,-427 1931,-408 1791,-408"/>
+<text text-anchor="middle" x="1861" y="-415" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/tir/analysis.h</text>
 </a>
 </g>
 </g>
 <!-- Node22&#45;&gt;Node96 -->
-<g id="edge82" class="edge">
+<g id="edge79" class="edge">
 <title>Node22&#45;&gt;Node96</title>
-<path fill="none" stroke="#191970" d="M1843.9112,-607.1093C1852.6126,-605.5597 1861.5324,-604.12 1870,-603 1977.3608,-588.7995 2266.2932,-627.6687 2356,-567 2396.1745,-539.83 2379.3664,-510.7778 2404,-469 2412.9761,-453.7769 2425.0081,-437.1973 2432.6338,-427.0741"/>
-<polygon fill="#191970" stroke="#191970" points="1843.0368,-603.7115 1833.837,-608.9664 1844.3058,-610.5955 1843.0368,-603.7115"/>
+<path fill="none" stroke="#191970" d="M1497.9104,-612.3396C1578.5656,-604.7816 1699.0511,-590.0565 1739,-567 1799.6406,-532.0012 1842.5818,-454.5043 1856.3053,-427.2281"/>
+<polygon fill="#191970" stroke="#191970" points="1497.4271,-608.8692 1487.7895,-613.2701 1498.068,-615.8398 1497.4271,-608.8692"/>
 </g>
 <!-- Node112 -->
-<g id="node29" class="node">
+<g id="node23" class="node">
 <title>Node112</title>
-<g id="a_node29"><a xlink:href="type__relation_8h.html" target="_top" xlink:title="Type relation and function for type inference(checking). ">
-<polygon fill="#ffffff" stroke="#ff0000" points="1678.5,-469.5 1678.5,-499.5 1787.5,-499.5 1787.5,-469.5 1678.5,-469.5"/>
-<text text-anchor="start" x="1686.5" y="-487.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/ir/type</text>
-<text text-anchor="middle" x="1733" y="-476.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">_relation.h</text>
+<g id="a_node23"><a xlink:href="type__relation_8h.html" target="_top" xlink:title="Type relation and function for type inference(checking). ">
+<polygon fill="#ffffff" stroke="#ff0000" points="937.5,-469.5 937.5,-499.5 1046.5,-499.5 1046.5,-469.5 937.5,-469.5"/>
+<text text-anchor="start" x="945.5" y="-487.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/ir/type</text>
+<text text-anchor="middle" x="992" y="-476.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">_relation.h</text>
 </a>
 </g>
 </g>
 <!-- Node22&#45;&gt;Node112 -->
-<g id="edge51" class="edge">
+<g id="edge38" class="edge">
 <title>Node22&#45;&gt;Node112</title>
-<path fill="none" stroke="#191970" d="M1750.7998,-604.4933C1734.2153,-596.2815 1716.4505,-584.0952 1707,-567 1694.8665,-545.0516 1709.6278,-516.6309 1721.358,-499.5225"/>
-<polygon fill="#191970" stroke="#191970" points="1749.8077,-607.8887 1760.3564,-608.876 1752.7257,-601.526 1749.8077,-607.8887"/>
+<path fill="none" stroke="#191970" d="M1344.0761,-615.2868C1220.9081,-609.3438 990.6925,-594.645 966,-567 948.9933,-547.9598 966.0391,-517.8266 979.411,-499.7296"/>
+<polygon fill="#191970" stroke="#191970" points="1344.0483,-618.7893 1354.203,-615.7673 1344.3802,-611.7972 1344.0483,-618.7893"/>
 </g>
 <!-- Node144 -->
-<g id="node30" class="node">
+<g id="node24" class="node">
 <title>Node144</title>
-<g id="a_node30"><a xlink:href="error_8h.html" target="_top" xlink:title="Utilities for error tracking and reporting. ">
-<polygon fill="#ffffff" stroke="#ff0000" points="1918.5,-542 1918.5,-561 2037.5,-561 2037.5,-542 1918.5,-542"/>
-<text text-anchor="middle" x="1978" y="-549" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/ir/error.h</text>
+<g id="a_node24"><a xlink:href="error_8h.html" target="_top" xlink:title="Utilities for error tracking and reporting. ">
+<polygon fill="#ffffff" stroke="#ff0000" points="1273.5,-542 1273.5,-561 1392.5,-561 1392.5,-542 1273.5,-542"/>
+<text text-anchor="middle" x="1333" y="-549" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/ir/error.h</text>
 </a>
 </g>
 </g>
 <!-- Node22&#45;&gt;Node144 -->
-<g id="edge46" class="edge">
+<g id="edge33" class="edge">
 <title>Node22&#45;&gt;Node144</title>
-<path fill="none" stroke="#191970" d="M1825.9087,-605.416C1862.6458,-592.3928 1918.153,-572.7156 1950.9711,-561.0817"/>
-<polygon fill="#191970" stroke="#191970" points="1824.3349,-602.2604 1816.0791,-608.9005 1826.6738,-608.8581 1824.3349,-602.2604"/>
+<path fill="none" stroke="#191970" d="M1400.1899,-602.6559C1383.225,-589.7395 1359.8204,-571.9201 1345.5849,-561.0817"/>
+<polygon fill="#191970" stroke="#191970" points="1398.3151,-605.6275 1408.3917,-608.9005 1402.5555,-600.058 1398.3151,-605.6275"/>
 </g>
 <!-- Node145 -->
-<g id="node31" class="node">
+<g id="node25" class="node">
 <title>Node145</title>
-<g id="a_node31"><a xlink:href="global__var__supply_8h.html" target="_top" xlink:title="GlobalVarSupply that can be used to generate unique. ">
-<polygon fill="#ffffff" stroke="#000000" points="2006.5,-469.5 2006.5,-499.5 2123.5,-499.5 2123.5,-469.5 2006.5,-469.5"/>
-<text text-anchor="start" x="2014.5" y="-487.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/ir/global</text>
-<text text-anchor="middle" x="2065" y="-476.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">_var_supply.h</text>
+<g id="a_node25"><a xlink:href="global__var__supply_8h.html" target="_top" xlink:title="GlobalVarSupply that can be used to generate unique. ">
+<polygon fill="#ffffff" stroke="#000000" points="1465.5,-335.5 1465.5,-365.5 1582.5,-365.5 1582.5,-335.5 1465.5,-335.5"/>
+<text text-anchor="start" x="1473.5" y="-353.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/ir/global</text>
+<text text-anchor="middle" x="1524" y="-342.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">_var_supply.h</text>
 </a>
 </g>
 </g>
 <!-- Node22&#45;&gt;Node145 -->
-<g id="edge48" class="edge">
+<g id="edge35" class="edge">
 <title>Node22&#45;&gt;Node145</title>
-<path fill="none" stroke="#191970" d="M1848.9849,-606.9825C1856.0673,-605.6341 1863.1937,-604.2827 1870,-603 1913.0886,-594.8795 2035.7624,-600.5441 2064,-567 2079.7742,-548.2615 2074.9212,-517.7464 2069.9595,-499.5483"/>
-<polygon fill="#191970" stroke="#191970" points="1847.8495,-603.6359 1838.6823,-608.9474 1849.161,-610.5119 1847.8495,-603.6359"/>
+<path fill="none" stroke="#191970" d="M1462.516,-605.0192C1480.5385,-596.9404 1500.0047,-584.7138 1511,-567 1551.5107,-501.7355 1535.7126,-403.0067 1527.6232,-365.5909"/>
+<polygon fill="#191970" stroke="#191970" points="1461.1578,-601.7934 1453.2755,-608.8729 1463.8523,-608.254 1461.1578,-601.7934"/>
 </g>
 <!-- Node146 -->
-<g id="node32" class="node">
+<g id="node26" class="node">
 <title>Node146</title>
-<g id="a_node32"><a xlink:href="arg__info_8h.html" target="_top" xlink:title="include/tvm/meta_schedule\l/arg_info.h">
-<polygon fill="#ffffff" stroke="#000000" points="1602,-402.5 1602,-432.5 1754,-432.5 1754,-402.5 1602,-402.5"/>
-<text text-anchor="start" x="1610" y="-420.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/meta_schedule</text>
-<text text-anchor="middle" x="1678" y="-409.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/arg_info.h</text>
+<g id="a_node26"><a xlink:href="arg__info_8h.html" target="_top" xlink:title="include/tvm/meta_schedule\l/arg_info.h">
+<polygon fill="#ffffff" stroke="#000000" points="2425,-402.5 2425,-432.5 2577,-432.5 2577,-402.5 2425,-402.5"/>
+<text text-anchor="start" x="2433" y="-420.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/meta_schedule</text>
+<text text-anchor="middle" x="2501" y="-409.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/arg_info.h</text>
 </a>
 </g>
 </g>
 <!-- Node22&#45;&gt;Node146 -->
-<g id="edge52" class="edge">
+<g id="edge39" class="edge">
 <title>Node22&#45;&gt;Node146</title>
-<path fill="none" stroke="#191970" d="M1731.7671,-606.5372C1693.3042,-597.2035 1647.5973,-583.1403 1636,-567 1610.5846,-531.6285 1621.3898,-510.032 1636,-469 1640.9537,-455.0879 1651.4537,-442.2129 1660.7153,-432.8159"/>
-<polygon fill="#191970" stroke="#191970" points="1731.3387,-610.0323 1741.8754,-608.9243 1732.9475,-603.2196 1731.3387,-610.0323"/>
+<path fill="none" stroke="#191970" d="M1497.9479,-616.7458C1688.6561,-611.9611 2172.5822,-596.9905 2239,-567 2295.139,-541.6508 2284.3216,-500.9243 2337,-469 2364.4363,-452.373 2397.7493,-440.6288 2427.1654,-432.5986"/>
+<polygon fill="#191970" stroke="#191970" points="1497.6337,-613.2524 1487.7237,-616.9996 1497.8075,-620.2503 1497.6337,-613.2524"/>
+</g>
+<!-- Node83 -->
+<g id="node31" class="node">
+<title>Node83</title>
+<g id="a_node31"><a xlink:href="relay_2analysis_8h.html" target="_top" xlink:title="The set of Relay analysis passes written in C++. ">
+<polygon fill="#ffffff" stroke="#000000" points="179.5,-408 179.5,-427 334.5,-427 334.5,-408 179.5,-408"/>
+<text text-anchor="middle" x="257" y="-415" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/relay/analysis.h</text>
+</a>
+</g>
+</g>
+<!-- Node22&#45;&gt;Node83 -->
+<g id="edge63" class="edge">
+<title>Node22&#45;&gt;Node83</title>
+<path fill="none" stroke="#191970" d="M1344.156,-616.3392C1113.0873,-609.6334 434.4845,-588.344 337,-567 259.2805,-549.9835 215.5014,-565.9508 171,-500 163.2935,-488.5791 164.2066,-480.9865 171,-469 182.7558,-448.2577 206.559,-434.9669 226.1483,-427.0927"/>
+<polygon fill="#191970" stroke="#191970" points="1344.3923,-619.8474 1354.4893,-616.638 1344.5947,-612.8503 1344.3923,-619.8474"/>
+</g>
+<!-- Node80 -->
+<g id="node32" class="node">
+<title>Node80</title>
+<g id="a_node32"><a xlink:href="relay_2expr_8h.html" target="_top" xlink:title="Relay expression language. ">
+<polygon fill="#ffffff" stroke="#ff0000" points="346.5,-542 346.5,-561 481.5,-561 481.5,-542 346.5,-542"/>
+<text text-anchor="middle" x="414" y="-549" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/relay/expr.h</text>
+</a>
+</g>
+</g>
+<!-- Node22&#45;&gt;Node80 -->
+<g id="edge64" class="edge">
+<title>Node22&#45;&gt;Node80</title>
+<path fill="none" stroke="#191970" d="M1343.9369,-613.3727C1153.3435,-600.6917 662.265,-568.0181 481.8196,-556.0123"/>
+<polygon fill="#191970" stroke="#191970" points="1343.9377,-616.8804 1354.1481,-614.0521 1344.4025,-609.8958 1343.9377,-616.8804"/>
+</g>
+<!-- Node91 -->
+<g id="node34" class="node">
+<title>Node91</title>
+<g id="a_node34"><a xlink:href="relay_2feature_8h.html" target="_top" xlink:title="Detect features used in Expr/Module. ">
+<polygon fill="#ffffff" stroke="#000000" points="614.5,-475 614.5,-494 763.5,-494 763.5,-475 614.5,-475"/>
+<text text-anchor="middle" x="689" y="-482" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/relay/feature.h</text>
+</a>
+</g>
+</g>
+<!-- Node22&#45;&gt;Node91 -->
+<g id="edge74" class="edge">
+<title>Node22&#45;&gt;Node91</title>
+<path fill="none" stroke="#191970" d="M1344.3216,-615.0819C1185.6068,-607.6091 831.6128,-588.7368 781,-567 742.9548,-550.6606 709.9842,-512.067 696.0605,-494.0613"/>
+<polygon fill="#191970" stroke="#191970" points="1344.3355,-618.5863 1354.4882,-615.5575 1344.6627,-611.5939 1344.3355,-618.5863"/>
 </g>
 <!-- Node151 -->
-<g id="node37" class="node">
+<g id="node36" class="node">
 <title>Node151</title>
-<g id="a_node37"><a xlink:href="state_8h.html" target="_top" xlink:title="This file defines ScheduleState, the core data structure of TensorIR scheduling. ">
-<polygon fill="#ffffff" stroke="#ff0000" points="2528,-402.5 2528,-432.5 2662,-432.5 2662,-402.5 2528,-402.5"/>
-<text text-anchor="start" x="2536" y="-420.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/tir/schedule</text>
-<text text-anchor="middle" x="2595" y="-409.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/state.h</text>
+<g id="a_node36"><a xlink:href="ir__docsifier_8h.html" target="_top" xlink:title="include/tvm/script\l/printer/ir_docsifier.h">
+<polygon fill="#ffffff" stroke="#ff0000" points="975,-536.5 975,-566.5 1091,-566.5 1091,-536.5 975,-536.5"/>
+<text text-anchor="start" x="983" y="-554.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/script</text>
+<text text-anchor="middle" x="1033" y="-543.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/printer/ir_docsifier.h</text>
 </a>
 </g>
 </g>
 <!-- Node22&#45;&gt;Node151 -->
-<g id="edge83" class="edge">
+<g id="edge76" class="edge">
 <title>Node22&#45;&gt;Node151</title>
-<path fill="none" stroke="#191970" d="M1843.9075,-607.0814C1852.6094,-605.5354 1861.5303,-604.1042 1870,-603 1983.1016,-588.2554 2274.8887,-606.1989 2382,-567 2416.6445,-554.3214 2485.6478,-489.9698 2516,-469 2534.1846,-456.4365 2555.1594,-442.8019 2570.9246,-432.7196"/>
-<polygon fill="#191970" stroke="#191970" points="1843.034,-603.6833 1833.833,-608.9361 1844.3015,-610.5676 1843.034,-603.6833"/>
+<path fill="none" stroke="#191970" d="M1344.1763,-607.6801C1278.9834,-598.1387 1183.0744,-583.2537 1100,-567 1097.0708,-566.4269 1094.0747,-565.8179 1091.0527,-565.1855"/>
+<polygon fill="#191970" stroke="#191970" points="1343.9689,-611.1868 1354.369,-609.165 1344.978,-604.2599 1343.9689,-611.1868"/>
+</g>
+<!-- Node153 -->
+<g id="node37" class="node">
+<title>Node153</title>
+<g id="a_node37"><a xlink:href="state_8h.html" target="_top" xlink:title="This file defines ScheduleState, the core data structure of TensorIR scheduling. ">
+<polygon fill="#ffffff" stroke="#ff0000" points="1987,-402.5 1987,-432.5 2121,-432.5 2121,-402.5 1987,-402.5"/>
+<text text-anchor="start" x="1995" y="-420.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/tir/schedule</text>
+<text text-anchor="middle" x="2054" y="-409.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/state.h</text>
+</a>
+</g>
+</g>
+<!-- Node22&#45;&gt;Node153 -->
+<g id="edge80" class="edge">
+<title>Node22&#45;&gt;Node153</title>
+<path fill="none" stroke="#191970" d="M1497.933,-614.7642C1570.2228,-609.504 1680.6508,-596.9255 1772,-567 1875.2119,-533.1884 1985.6838,-463.5166 2031.7979,-432.7153"/>
+<polygon fill="#191970" stroke="#191970" points="1497.283,-611.301 1487.5508,-615.4884 1497.7702,-618.284 1497.283,-611.301"/>
 </g>
 <!-- Node24&#45;&gt;Node26 -->
 <g id="edge5" class="edge">
 <title>Node24&#45;&gt;Node26</title>
-<path fill="none" stroke="#191970" d="M1816.0496,-537.2298C1840.7047,-524.2227 1876.4451,-505.3676 1897.8378,-494.0817"/>
-<polygon fill="#191970" stroke="#191970" points="1814.4075,-534.1388 1807.196,-541.9005 1817.6738,-540.33 1814.4075,-534.1388"/>
+<path fill="none" stroke="#191970" d="M1213.006,-537.7424C1242.3495,-524.7224 1285.634,-505.5167 1311.4055,-494.0817"/>
+<polygon fill="#191970" stroke="#191970" points="1211.3557,-534.6455 1203.6346,-541.9005 1214.1948,-541.0439 1211.3557,-534.6455"/>
 </g>
 <!-- Node24&#45;&gt;Node112 -->
-<g id="edge45" class="edge">
+<g id="edge32" class="edge">
 <title>Node24&#45;&gt;Node112</title>
-<path fill="none" stroke="#191970" d="M1774.4085,-534.0423C1765.4549,-523.33 1754.1068,-509.7528 1745.5555,-499.5218"/>
-<polygon fill="#191970" stroke="#191970" points="1771.8779,-536.4723 1780.9766,-541.9005 1777.2489,-531.9831 1771.8779,-536.4723"/>
+<path fill="none" stroke="#191970" d="M1145.0224,-538.4605C1113.3658,-527.2974 1067.7776,-511.2216 1034.599,-499.5218"/>
+<polygon fill="#191970" stroke="#191970" points="1144.1828,-541.8757 1154.7776,-541.9005 1146.5108,-535.2741 1144.1828,-541.8757"/>
 </g>
 <!-- Node26&#45;&gt;Node23 -->
 <g id="edge6" class="edge">
 <title>Node26&#45;&gt;Node23</title>
-<path fill="none" stroke="#191970" d="M1951.4625,-471.0186C1972.1441,-462.1207 1998.009,-449.1318 2018,-433 2043.9855,-412.0309 2103.2212,-332.5785 2127.8934,-298.8214"/>
-<polygon fill="#191970" stroke="#191970" points="1949.9533,-467.8558 1942.0704,-474.9346 1952.6472,-474.3167 1949.9533,-467.8558"/>
+<path fill="none" stroke="#191970" d="M1342.9452,-465.646C1358.5618,-436.6208 1390.429,-379.6703 1423,-335 1432.3737,-322.1442 1444.2637,-308.6365 1453.4675,-298.6817"/>
+<polygon fill="#191970" stroke="#191970" points="1339.7435,-464.2116 1338.1246,-474.6821 1345.9196,-467.5065 1339.7435,-464.2116"/>
 </g>
 <!-- Node26&#45;&gt;Node27 -->
 <g id="edge7" class="edge">
 <title>Node26&#45;&gt;Node27</title>
-<path fill="none" stroke="#191970" d="M1839.295,-473.5156C1745.0479,-459.8405 1597.0117,-437.702 1593,-433 1576.4865,-413.645 1592.9295,-383.6151 1605.8407,-365.623"/>
-<polygon fill="#191970" stroke="#191970" points="1838.9751,-477.0057 1849.3736,-474.9755 1839.9786,-470.078 1838.9751,-477.0057"/>
+<path fill="none" stroke="#191970" d="M1297.9088,-470.9646C1276.8373,-461.9314 1250.1129,-448.8211 1229,-433 1202.6629,-413.2641 1177.9777,-383.6318 1164.2413,-365.7704"/>
+<polygon fill="#191970" stroke="#191970" points="1296.5755,-474.2007 1307.152,-474.8233 1299.2722,-467.741 1296.5755,-474.2007"/>
 </g>
 <!-- Node28 -->
 <g id="node8" class="node">
 <title>Node28</title>
 <g id="a_node8"><a xlink:href="relay_2transform_8h.html" target="_top" xlink:title="Relay specific transformation passes. ">
-<polygon fill="#ffffff" stroke="#ff0000" points="1933,-73 1933,-92 2095,-92 2095,-73 1933,-73"/>
-<text text-anchor="middle" x="2014" y="-80" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/relay/transform.h</text>
+<polygon fill="#ffffff" stroke="#ff0000" points="648,-274 648,-293 810,-293 810,-274 648,-274"/>
+<text text-anchor="middle" x="729" y="-281" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/relay/transform.h</text>
 </a>
 </g>
 </g>
 <!-- Node26&#45;&gt;Node28 -->
 <g id="edge8" class="edge">
 <title>Node26&#45;&gt;Node28</title>
-<path fill="none" stroke="#191970" d="M1937.7518,-468.324C1948.1115,-459.2344 1959.4338,-446.9254 1965,-433 1970.1138,-420.2064 1965.6151,-415.764 1965,-402 1963.6653,-372.1329 1963.1921,-364.6016 1959,-335 1956.7145,-318.8614 1953.5628,-315.2246 1952,-299 1950.679,-285.2857 1949.7875,-281.599 1952,-268 1963.3416,-198.288 1997.1828,-119.399 2009.4745,-92.2665"/>
-<polygon fill="#191970" stroke="#191970" points="1935.2709,-465.833 1929.7782,-474.8928 1939.7219,-471.2357 1935.2709,-465.833"/>
+<path fill="none" stroke="#191970" d="M1283.4131,-472.4685C1205.2158,-453.0063 1048.8886,-412.1118 920,-366 856.0616,-343.1251 783.0252,-309.3549 748.7358,-293.0285"/>
+<polygon fill="#191970" stroke="#191970" points="1282.9154,-475.951 1293.4637,-474.9589 1284.5991,-469.1565 1282.9154,-475.951"/>
 </g>
 <!-- Node30 -->
 <g id="node9" class="node">
 <title>Node30</title>
 <g id="a_node9"><a xlink:href="target__kind_8h.html" target="_top" xlink:title="Target kind registry. ">
-<polygon fill="#ffffff" stroke="#000000" points="1848.5,-402.5 1848.5,-432.5 1955.5,-432.5 1955.5,-402.5 1848.5,-402.5"/>
-<text text-anchor="start" x="1856.5" y="-420.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/target</text>
-<text text-anchor="middle" x="1902" y="-409.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/target_kind.h</text>
+<polygon fill="#ffffff" stroke="#000000" points="1238.5,-402.5 1238.5,-432.5 1345.5,-432.5 1345.5,-402.5 1238.5,-402.5"/>
+<text text-anchor="start" x="1246.5" y="-420.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/target</text>
+<text text-anchor="middle" x="1292" y="-409.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/target_kind.h</text>
 </a>
 </g>
 </g>
 <!-- Node26&#45;&gt;Node30 -->
 <g id="edge9" class="edge">
 <title>Node26&#45;&gt;Node30</title>
-<path fill="none" stroke="#191970" d="M1911.9471,-465.1042C1909.7791,-454.7287 1907.1497,-442.145 1905.1389,-432.5218"/>
-<polygon fill="#191970" stroke="#191970" points="1908.5227,-465.8279 1913.9941,-474.9005 1915.3747,-464.396 1908.5227,-465.8279"/>
-</g>
-<!-- Node76 -->
-<g id="node22" class="node">
-<title>Node76</title>
-<g id="a_node22"><a xlink:href="virtual__device_8h.html" target="_top" xlink:title="A compile time representation for where data is to be stored at runtime, and how to compile code to c...">
-<polygon fill="#ffffff" stroke="#ff0000" points="1960.5,-268.5 1960.5,-298.5 2067.5,-298.5 2067.5,-268.5 1960.5,-268.5"/>
-<text text-anchor="start" x="1968.5" y="-286.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/target</text>
-<text text-anchor="middle" x="2014" y="-275.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/virtual_device.h</text>
-</a>
-</g>
-</g>
-<!-- Node26&#45;&gt;Node76 -->
-<g id="edge43" class="edge">
-<title>Node26&#45;&gt;Node76</title>
-<path fill="none" stroke="#191970" d="M1952.6899,-470.3776C1969.2893,-462.0534 1987.5031,-449.8057 1998,-433 2024.5503,-390.4925 2020.2751,-327.6525 2016.4782,-298.8892"/>
-<polygon fill="#191970" stroke="#191970" points="1950.7478,-467.4221 1943.1735,-474.8302 1953.7143,-473.7624 1950.7478,-467.4221"/>
+<path fill="none" stroke="#191970" d="M1321.7283,-466.0804C1315.2699,-455.5264 1307.2672,-442.4489 1301.1924,-432.5218"/>
+<polygon fill="#191970" stroke="#191970" points="1318.9206,-468.1977 1327.1257,-474.9005 1324.8914,-464.544 1318.9206,-468.1977"/>
 </g>
 <!-- Node26&#45;&gt;Node96 -->
-<g id="edge44" class="edge">
+<g id="edge31" class="edge">
 <title>Node26&#45;&gt;Node96</title>
-<path fill="none" stroke="#191970" d="M1973.1863,-473.1544C1981.4946,-471.6622 1989.9559,-470.2273 1998,-469 2128.7861,-449.0454 2282.3843,-432.7423 2369.7493,-424.1348"/>
-<polygon fill="#191970" stroke="#191970" points="1972.3702,-469.7456 1963.1661,-474.993 1973.6336,-476.6307 1972.3702,-469.7456"/>
+<path fill="none" stroke="#191970" d="M1415.0925,-474.083C1518.2425,-460.9938 1693.503,-438.7544 1790.7437,-426.4151"/>
+<polygon fill="#191970" stroke="#191970" points="1414.5131,-470.6283 1405.0333,-475.3594 1415.3944,-477.5726 1414.5131,-470.6283"/>
 </g>
 <!-- Node30&#45;&gt;Node31 -->
 <g id="edge10" class="edge">
 <title>Node30&#45;&gt;Node31</title>
-<path fill="none" stroke="#191970" d="M1899.7449,-392.3179C1898.941,-383.3414 1898.061,-373.5143 1897.3561,-365.6432"/>
-<polygon fill="#191970" stroke="#191970" points="1896.2604,-392.6487 1900.6385,-402.2967 1903.2325,-392.0243 1896.2604,-392.6487"/>
+<path fill="none" stroke="#191970" d="M1301.1783,-392.9021C1304.586,-383.7696 1308.3487,-373.6854 1311.3496,-365.6432"/>
+<polygon fill="#191970" stroke="#191970" points="1297.8897,-391.704 1297.6729,-402.2967 1304.448,-394.1512 1297.8897,-391.704"/>
 </g>
 <!-- Node31&#45;&gt;Node23 -->
 <g id="edge11" class="edge">
 <title>Node31&#45;&gt;Node23</title>
-<path fill="none" stroke="#191970" d="M1959.3855,-333.0233C1998.721,-322.1778 2048.687,-308.4011 2085.9272,-298.1332"/>
-<polygon fill="#191970" stroke="#191970" points="1958.3382,-329.6814 1949.6282,-335.7136 1960.1988,-336.4296 1958.3382,-329.6814"/>
+<path fill="none" stroke="#191970" d="M1360.2418,-331.3132C1383.6503,-320.9267 1412.1539,-308.2794 1433.9649,-298.6017"/>
+<polygon fill="#191970" stroke="#191970" points="1358.6084,-328.2089 1350.8873,-335.4639 1361.4475,-334.6073 1358.6084,-328.2089"/>
 </g>
 <!-- Node31&#45;&gt;Node28 -->
 <g id="edge29" class="edge">
 <title>Node31&#45;&gt;Node28</title>
-<path fill="none" stroke="#191970" d="M1885.0863,-326.0587C1867.7037,-283.045 1839.5964,-192.8131 1880,-134 1891.2193,-117.6688 1940.1936,-101.7575 1975.6962,-92.0444"/>
-<polygon fill="#191970" stroke="#191970" points="1881.9289,-327.5803 1889.0193,-335.4529 1888.3858,-324.877 1881.9289,-327.5803"/>
+<path fill="none" stroke="#191970" d="M1253.1426,-340.2363C1240.8319,-338.3889 1228.0265,-336.5578 1216,-335 1073.5066,-316.5421 906.9114,-300.0104 810.1358,-290.9057"/>
+<polygon fill="#191970" stroke="#191970" points="1252.927,-343.7436 1263.3394,-341.7862 1253.979,-336.8231 1252.927,-343.7436"/>
 </g>
 <!-- Node31&#45;&gt;Node45 -->
 <g id="edge12" class="edge">
 <title>Node31&#45;&gt;Node45</title>
-<path fill="none" stroke="#191970" d="M1832.2645,-339.9876C1770.531,-329.7465 1674.2954,-313.6263 1591,-299 1588.1929,-298.5071 1585.3342,-298.002 1582.4472,-297.4891"/>
-<polygon fill="#191970" stroke="#191970" points="1831.8079,-343.4596 1842.2457,-341.6421 1832.9527,-336.5538 1831.8079,-343.4596"/>
+<path fill="none" stroke="#191970" d="M1380.8623,-339.5006C1449.0904,-327.7492 1557.2032,-309.1282 1629.8225,-296.6205"/>
+<polygon fill="#191970" stroke="#191970" points="1379.9412,-336.1076 1370.6804,-341.2543 1381.1294,-343.006 1379.9412,-336.1076"/>
 </g>
 <!-- Node31&#45;&gt;Node48 -->
 <g id="edge27" class="edge">
 <title>Node31&#45;&gt;Node48</title>
-<path fill="none" stroke="#191970" d="M1872.7852,-328.4535C1850.7622,-308.9749 1815.7967,-281.611 1780,-268 1593.9631,-197.2632 983.7695,-162.9418 768.2357,-152.8073"/>
-<polygon fill="#191970" stroke="#191970" points="1870.524,-331.1274 1880.291,-335.233 1875.216,-325.9327 1870.524,-331.1274"/>
+<path fill="none" stroke="#191970" d="M1336.8639,-327.65C1353.7259,-309.4261 1379.367,-284.3335 1406,-268 1479.4784,-222.9373 1504.0834,-220.691 1588,-201 1726.0054,-168.617 1891.4839,-156.5133 1985.9238,-152.0484"/>
+<polygon fill="#191970" stroke="#191970" points="1333.8856,-325.7241 1329.7622,-335.4836 1339.0718,-330.4256 1333.8856,-325.7241"/>
 </g>
 <!-- Node31&#45;&gt;Node49 -->
 <g id="edge19" class="edge">
 <title>Node31&#45;&gt;Node49</title>
-<path fill="none" stroke="#191970" d="M1832.107,-343.6025C1703.6924,-329.7358 1426.0452,-299.7325 1421,-299 1418.1795,-298.5905 1415.3095,-298.1564 1412.4128,-297.7036"/>
-<polygon fill="#191970" stroke="#191970" points="1831.8852,-347.0989 1842.2031,-344.6927 1832.6367,-340.1393 1831.8852,-347.0989"/>
+<path fill="none" stroke="#191970" d="M1380.948,-342.5929C1404.5113,-339.8843 1431.4421,-337.0283 1456,-335 1752.7531,-310.4903 1828.0706,-321.2723 2125,-299 2152.7219,-296.9206 2183.0805,-294.1315 2209.8905,-291.4925"/>
+<polygon fill="#191970" stroke="#191970" points="1380.2553,-339.1499 1370.727,-343.7826 1381.0647,-346.1029 1380.2553,-339.1499"/>
 </g>
 <!-- Node31&#45;&gt;Node51 -->
 <g id="edge23" class="edge">
 <title>Node31&#45;&gt;Node51</title>
-<path fill="none" stroke="#191970" d="M1837.0545,-332.5484C1802.2315,-321.9432 1758.6363,-308.6665 1725.5876,-298.6017"/>
-<polygon fill="#191970" stroke="#191970" points="1836.0418,-335.8987 1846.6277,-335.4639 1838.0812,-329.2023 1836.0418,-335.8987"/>
+<path fill="none" stroke="#191970" d="M1253.2156,-339.588C1240.8983,-337.7994 1228.0718,-336.1557 1216,-335 803.3555,-295.4962 697.8698,-322.4063 284,-299 240.2543,-296.526 191.3939,-292.9279 152.0268,-289.8211"/>
+<polygon fill="#191970" stroke="#191970" points="1253.0063,-343.0957 1263.4147,-341.1171 1254.0442,-336.1731 1253.0063,-343.0957"/>
 </g>
 <!-- Node31&#45;&gt;Node52 -->
 <g id="edge24" class="edge">
 <title>Node31&#45;&gt;Node52</title>
-<path fill="none" stroke="#191970" d="M1832.1363,-345.5331C1789.3801,-342.2971 1731.834,-338.1136 1681,-335 1363.8579,-315.5752 1283.5107,-326.8825 967,-299 951.7776,-297.659 935.5377,-295.8696 920.0157,-293.9836"/>
-<polygon fill="#191970" stroke="#191970" points="1831.8867,-349.0241 1842.1232,-346.292 1832.4172,-342.0442 1831.8867,-349.0241"/>
+<path fill="none" stroke="#191970" d="M1381.0166,-343.3073C1404.5871,-340.6732 1431.5049,-337.6819 1456,-335 1604.8566,-318.7022 1642.7976,-320.4462 1791,-299 1793.8207,-298.5918 1796.6909,-298.1589 1799.5877,-297.707"/>
+<polygon fill="#191970" stroke="#191970" points="1380.3402,-339.861 1370.7912,-344.451 1381.1183,-346.8176 1380.3402,-339.861"/>
 </g>
 <!-- Node31&#45;&gt;Node53 -->
 <g id="edge25" class="edge">
 <title>Node31&#45;&gt;Node53</title>
-<path fill="none" stroke="#191970" d="M1832.1584,-345.1335C1789.4115,-341.7284 1731.8675,-337.5065 1681,-335 1077.9641,-305.2849 925.028,-344.7997 323,-299 307.87,-297.849 291.745,-296.1564 276.3204,-294.3078"/>
-<polygon fill="#191970" stroke="#191970" points="1831.8948,-348.6235 1842.1429,-345.9354 1832.4552,-341.646 1831.8948,-348.6235"/>
+<path fill="none" stroke="#191970" d="M1263.0138,-332.1392C1232.0352,-321.6033 1193.6176,-308.5375 1164.4035,-298.6017"/>
+<polygon fill="#191970" stroke="#191970" points="1262.1949,-335.5575 1272.7894,-335.4639 1264.4489,-328.9303 1262.1949,-335.5575"/>
 </g>
 <!-- Node31&#45;&gt;Node54 -->
 <g id="edge28" class="edge">
 <title>Node31&#45;&gt;Node54</title>
-<path fill="none" stroke="#191970" d="M1908.8079,-326.3994C1918.9379,-308.8339 1934.2935,-285.1986 1952,-268 2004.0461,-217.4467 2080.9577,-176.8043 2117.4037,-159.1005"/>
-<polygon fill="#191970" stroke="#191970" points="1905.5694,-325.019 1903.7359,-335.454 1911.6765,-328.44 1905.5694,-325.019"/>
+<path fill="none" stroke="#191970" d="M1253.1933,-339.808C1240.878,-337.9994 1228.0579,-336.2922 1216,-335 1178.7959,-331.0131 663.1463,-300.4234 458.6725,-288.3645"/>
+<polygon fill="#191970" stroke="#191970" points="1252.9819,-343.3156 1263.3917,-341.3442 1254.0246,-336.3937 1252.9819,-343.3156"/>
 </g>
 <!-- Node31&#45;&gt;Node57 -->
 <g id="edge30" class="edge">
 <title>Node31&#45;&gt;Node57</title>
-<path fill="none" stroke="#191970" d="M1959.6714,-341.1532C2021.5514,-331.7389 2118.0637,-316.1799 2201,-299 2204.0883,-298.3603 2207.2546,-297.6751 2210.4451,-296.962"/>
-<polygon fill="#191970" stroke="#191970" points="1959.0306,-337.7102 1949.6671,-342.6676 1960.0783,-344.6314 1959.0306,-337.7102"/>
-</g>
-<!-- Node31&#45;&gt;Node76 -->
-<g id="edge31" class="edge">
-<title>Node31&#45;&gt;Node76</title>
-<path fill="none" stroke="#191970" d="M1931.4773,-330.3561C1949.365,-320.1995 1970.7023,-308.0843 1987.1986,-298.7177"/>
-<polygon fill="#191970" stroke="#191970" points="1929.4494,-327.4827 1922.4815,-335.4639 1932.9057,-333.5699 1929.4494,-327.4827"/>
+<path fill="none" stroke="#191970" d="M1380.9683,-342.8265C1404.5338,-340.1422 1431.4607,-337.242 1456,-335 1680.0808,-314.5275 1737.3888,-324.0868 1961,-299 1976.4257,-297.2694 1993.1059,-294.911 2008.3808,-292.5562"/>
+<polygon fill="#191970" stroke="#191970" points="1380.2811,-339.3823 1370.7461,-344.0011 1381.0803,-346.3365 1380.2811,-339.3823"/>
 </g>
 <!-- Node45&#45;&gt;Node46 -->
 <g id="edge13" class="edge">
 <title>Node45&#45;&gt;Node46</title>
-<path fill="none" stroke="#191970" d="M1462.2892,-264.3079C1441.2105,-254.8679 1415.6453,-243.146 1393,-232 1309.5182,-190.9103 1297.0723,-160.9019 1208,-134 1127.7417,-109.7601 891.1399,-93.5315 768.1684,-86.4877"/>
-<polygon fill="#191970" stroke="#191970" points="1461.0732,-267.5979 1471.6317,-268.4745 1463.9244,-261.2048 1461.0732,-267.5979"/>
+<path fill="none" stroke="#191970" d="M1790.8876,-266.4546C1983.2542,-227.8276 2449.797,-134.1462 2632.1725,-97.5253"/>
+<polygon fill="#191970" stroke="#191970" points="1789.9435,-263.0743 1780.8283,-268.4745 1791.3217,-269.9373 1789.9435,-263.0743"/>
 </g>
 <!-- Node45&#45;&gt;Node47 -->
 <g id="edge15" class="edge">
 <title>Node45&#45;&gt;Node47</title>
-<path fill="none" stroke="#191970" d="M1490.111,-259.819C1457.6181,-213.5895 1378.599,-111.9292 1284,-67 1219.6284,-36.4271 1020.8342,-23.3302 910.2298,-18.2885"/>
-<polygon fill="#191970" stroke="#191970" points="1487.4089,-262.0648 1495.98,-268.2926 1493.1633,-258.079 1487.4089,-262.0648"/>
+<path fill="none" stroke="#191970" d="M1735.2034,-262.5705C1783.1354,-229.3657 1882.6215,-165.1892 1977,-134 2130.9357,-83.129 2616.9953,-37.8716 2805.8841,-21.7555"/>
+<polygon fill="#191970" stroke="#191970" points="1733.0593,-259.7989 1726.8662,-268.3952 1737.0683,-265.5372 1733.0593,-259.7989"/>
 </g>
 <!-- Node45&#45;&gt;Node48 -->
 <g id="edge16" class="edge">
 <title>Node45&#45;&gt;Node48</title>
-<path fill="none" stroke="#191970" d="M1419.3299,-267.7946C1295.6857,-249.0465 1261.7938,-253.6329 1142,-232 1082.2705,-221.2138 1068.545,-212.7622 1009,-201 927.0878,-184.8195 832.5356,-169.9467 768.1599,-160.3883"/>
-<polygon fill="#191970" stroke="#191970" points="1419.1603,-271.3096 1429.5761,-269.3701 1420.2242,-264.3909 1419.1603,-271.3096"/>
+<path fill="none" stroke="#191970" d="M1755.7973,-264.7561C1827.0326,-237.9428 1956.6221,-189.1647 2022.001,-164.5558"/>
+<polygon fill="#191970" stroke="#191970" points="1754.2717,-261.5905 1746.1457,-268.389 1756.7377,-268.1418 1754.2717,-261.5905"/>
 </g>
 <!-- Node46&#45;&gt;Node47 -->
 <g id="edge14" class="edge">
 <title>Node46&#45;&gt;Node47</title>
-<path fill="none" stroke="#191970" d="M732.9253,-63.1902C754.8011,-52.8685 781.3496,-40.3421 801.7475,-30.7177"/>
-<polygon fill="#191970" stroke="#191970" points="731.4179,-60.0313 723.8676,-67.4639 734.405,-66.362 731.4179,-60.0313"/>
+<path fill="none" stroke="#191970" d="M2755.6735,-63.865C2783.0667,-53.3773 2816.8283,-40.4515 2842.5553,-30.6017"/>
+<polygon fill="#191970" stroke="#191970" points="2754.3609,-60.6197 2746.2734,-67.4639 2756.8638,-67.157 2754.3609,-60.6197"/>
 </g>
 <!-- Node48&#45;&gt;Node46 -->
 <g id="edge17" class="edge">
 <title>Node48&#45;&gt;Node46</title>
-<path fill="none" stroke="#191970" d="M692,-124.0249C692,-115.128 692,-105.4287 692,-97.6432"/>
-<polygon fill="#191970" stroke="#191970" points="688.5001,-124.2966 692,-134.2967 695.5001,-124.2967 688.5001,-124.2966"/>
+<path fill="none" stroke="#191970" d="M2148.2734,-140.5383C2275.192,-127.3545 2510.2116,-102.9416 2630.9898,-90.3956"/>
+<polygon fill="#191970" stroke="#191970" points="2147.7892,-137.0696 2138.2043,-141.5842 2148.5125,-144.0322 2147.7892,-137.0696"/>
 </g>
 <!-- Node48&#45;&gt;Node47 -->
 <g id="edge18" class="edge">
 <title>Node48&#45;&gt;Node47</title>
-<path fill="none" stroke="#191970" d="M732.7069,-129.4859C747.6132,-121.0088 763.9982,-110.2459 777,-98 798.4172,-77.828 816.398,-48.3353 826.1457,-30.6193"/>
-<polygon fill="#191970" stroke="#191970" points="730.7357,-126.5755 723.6602,-134.4615 734.1091,-132.7091 730.7357,-126.5755"/>
+<path fill="none" stroke="#191970" d="M2148.3819,-135.3839C2309.4581,-109.0617 2653.1545,-52.8967 2805.9989,-27.9197"/>
+<polygon fill="#191970" stroke="#191970" points="2147.3448,-132.0069 2138.0402,-137.0739 2148.4738,-138.9153 2147.3448,-132.0069"/>
 </g>
 <!-- Node49&#45;&gt;Node50 -->
 <g id="edge20" class="edge">
 <title>Node49&#45;&gt;Node50</title>
-<path fill="none" stroke="#191970" d="M1249.6586,-269.9776C1184.7952,-259.7494 1093.7835,-245.25 1014,-232 1009.4642,-231.2467 1004.801,-230.4652 1000.0977,-229.6715"/>
-<polygon fill="#191970" stroke="#191970" points="1249.4156,-273.4824 1259.8385,-271.5817 1250.5052,-266.5677 1249.4156,-273.4824"/>
+<path fill="none" stroke="#191970" d="M2350.9155,-265.8198C2390.016,-255.1704 2439.2817,-241.7525 2476.552,-231.6017"/>
+<polygon fill="#191970" stroke="#191970" points="2349.936,-262.459 2341.2072,-268.4639 2351.7755,-269.213 2349.936,-262.459"/>
 </g>
 <!-- Node50&#45;&gt;Node46 -->
 <g id="edge21" class="edge">
 <title>Node50&#45;&gt;Node46</title>
-<path fill="none" stroke="#191970" d="M888.9465,-196.2536C842.0304,-169.1555 759.8891,-121.7118 718.0668,-97.5558"/>
-<polygon fill="#191970" stroke="#191970" points="887.4277,-199.4182 897.8376,-201.389 890.9288,-193.3566 887.4277,-199.4182"/>
+<path fill="none" stroke="#191970" d="M2554.2838,-193.7535C2571.8652,-176.3961 2597.4397,-152.4141 2622,-134 2640.0846,-120.4411 2661.9185,-107.2733 2678.929,-97.6468"/>
+<polygon fill="#191970" stroke="#191970" points="2551.4084,-191.6781 2546.8007,-201.2186 2556.3522,-196.6339 2551.4084,-191.6781"/>
 </g>
 <!-- Node50&#45;&gt;Node48 -->
 <g id="edge22" class="edge">
 <title>Node50&#45;&gt;Node48</title>
-<path fill="none" stroke="#191970" d="M862.3096,-198.6842C825.51,-188.0568 779.2927,-174.7095 744.2924,-164.6017"/>
-<polygon fill="#191970" stroke="#191970" points="861.3562,-202.0519 871.9347,-201.4639 863.2984,-195.3267 861.3562,-202.0519"/>
+<path fill="none" stroke="#191970" d="M2446.0086,-204.2416C2358.026,-191.6994 2222.6104,-172.3955 2138.3166,-160.3792"/>
+<polygon fill="#191970" stroke="#191970" points="2445.5701,-207.7144 2455.964,-205.6608 2446.5581,-200.7845 2445.5701,-207.7144"/>
 </g>
 <!-- Node53&#45;&gt;Node48 -->
 <g id="edge26" class="edge">
 <title>Node53&#45;&gt;Node48</title>
-<path fill="none" stroke="#191970" d="M265.2338,-265.7331C362.8614,-239.1434 545.2568,-189.4667 636.7205,-164.5558"/>
-<polygon fill="#191970" stroke="#191970" points="264.211,-262.3841 255.4823,-268.389 266.0506,-269.138 264.211,-262.3841"/>
-</g>
-<!-- Node76&#45;&gt;Node28 -->
-<g id="edge42" class="edge">
-<title>Node76&#45;&gt;Node28</title>
-<path fill="none" stroke="#191970" d="M2014,-258.3473C2014,-213.6419 2014,-122.2292 2014,-92.2562"/>
-<polygon fill="#191970" stroke="#191970" points="2010.5001,-258.3923 2014,-268.3923 2017.5001,-258.3924 2010.5001,-258.3923"/>
-</g>
-<!-- Node76&#45;&gt;Node80 -->
-<g id="edge32" class="edge">
-<title>Node76&#45;&gt;Node80</title>
-<path fill="none" stroke="#191970" d="M2077.7413,-267.8788C2153.1903,-250.9189 2229.0932,-235.5579 2277.2174,-226.0306"/>
-<polygon fill="#191970" stroke="#191970" points="2076.5675,-264.5557 2067.5841,-270.1725 2078.1094,-271.3838 2076.5675,-264.5557"/>
-</g>
-<!-- Node80&#45;&gt;Node28 -->
-<g id="edge41" class="edge">
-<title>Node80&#45;&gt;Node28</title>
-<path fill="none" stroke="#191970" d="M2308.8664,-200.0227C2286.2386,-178.5797 2247.2459,-142.8034 2230,-134 2187.2513,-112.1783 2134.9952,-99.4011 2092.5898,-92.0324"/>
-<polygon fill="#191970" stroke="#191970" points="2306.4718,-202.5756 2316.1251,-206.9419 2311.3017,-197.5087 2306.4718,-202.5756"/>
-</g>
-<!-- Node80&#45;&gt;Node54 -->
-<g id="edge40" class="edge">
-<title>Node80&#45;&gt;Node54</title>
-<path fill="none" stroke="#191970" d="M2289.2866,-203.416C2252.7439,-190.3928 2197.5303,-170.7156 2164.8858,-159.0817"/>
-<polygon fill="#191970" stroke="#191970" points="2288.4695,-206.8403 2299.0642,-206.9005 2290.8195,-200.2466 2288.4695,-206.8403"/>
-</g>
-<!-- Node80&#45;&gt;Node82 -->
-<g id="edge33" class="edge">
-<title>Node80&#45;&gt;Node82</title>
-<path fill="none" stroke="#191970" d="M2400.7271,-205.4387C2465.4829,-195.6142 2561.6191,-180.4544 2645,-165 2654.8087,-163.182 2665.3155,-161.0773 2675.2413,-159.0164"/>
-<polygon fill="#191970" stroke="#191970" points="2399.9745,-202.0127 2390.6104,-206.969 2401.0214,-208.934 2399.9745,-202.0127"/>
-</g>
-<!-- Node80&#45;&gt;Node83 -->
-<g id="edge35" class="edge">
-<title>Node80&#45;&gt;Node83</title>
-<path fill="none" stroke="#191970" d="M2313.3904,-198.2725C2302.4409,-179.9819 2290.5624,-152.0086 2306,-134 2322.443,-114.8185 2468.3297,-98.0735 2561.4742,-89.2111"/>
-<polygon fill="#191970" stroke="#191970" points="2310.5426,-200.3157 2318.882,-206.8506 2316.438,-196.5415 2310.5426,-200.3157"/>
-</g>
-<!-- Node80&#45;&gt;Node91 -->
-<g id="edge36" class="edge">
-<title>Node80&#45;&gt;Node91</title>
-<path fill="none" stroke="#191970" d="M2341.9112,-199.5785C2353.9608,-186.7639 2370.0795,-169.6218 2379.9904,-159.0817"/>
-<polygon fill="#191970" stroke="#191970" points="2339.3268,-197.2177 2335.0264,-206.9005 2344.4265,-202.0129 2339.3268,-197.2177"/>
-</g>
-<!-- Node92 -->
-<g id="node27" class="node">
-<title>Node92</title>
-<g id="a_node27"><a xlink:href="relay_2function_8h.html" target="_top" xlink:title="Relay Function. ">
-<polygon fill="#ffffff" stroke="#ff0000" points="2482,-140 2482,-159 2636,-159 2636,-140 2482,-140"/>
-<text text-anchor="middle" x="2559" y="-147" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/relay/function.h</text>
-</a>
-</g>
-</g>
-<!-- Node80&#45;&gt;Node92 -->
-<g id="edge37" class="edge">
-<title>Node80&#45;&gt;Node92</title>
-<path fill="none" stroke="#191970" d="M2369.1991,-204.0779C2414.4432,-191.0678 2484.5288,-170.9145 2525.6787,-159.0817"/>
-<polygon fill="#191970" stroke="#191970" points="2368.0265,-200.7732 2359.3832,-206.9005 2369.961,-207.5006 2368.0265,-200.7732"/>
-</g>
-<!-- Node82&#45;&gt;Node83 -->
-<g id="edge34" class="edge">
-<title>Node82&#45;&gt;Node83</title>
-<path fill="none" stroke="#191970" d="M2699.6558,-133.2992C2684.2694,-120.4131 2663.2635,-102.8207 2650.4408,-92.0817"/>
-<polygon fill="#191970" stroke="#191970" points="2697.6242,-136.1631 2707.538,-139.9005 2702.1187,-130.7965 2697.6242,-136.1631"/>
-</g>
-<!-- Node92&#45;&gt;Node28 -->
-<g id="edge39" class="edge">
-<title>Node92&#45;&gt;Node28</title>
-<path fill="none" stroke="#191970" d="M2498.9674,-138.1538C2490.2703,-136.6633 2481.416,-135.2292 2473,-134 2341.0877,-114.7341 2186.9288,-98.7717 2095.0503,-89.9474"/>
-<polygon fill="#191970" stroke="#191970" points="2498.6156,-141.6453 2509.069,-139.9203 2499.8215,-134.7499 2498.6156,-141.6453"/>
-</g>
-<!-- Node92&#45;&gt;Node83 -->
-<g id="edge38" class="edge">
-<title>Node92&#45;&gt;Node83</title>
-<path fill="none" stroke="#191970" d="M2578.3442,-133.2992C2593.7306,-120.4131 2614.7365,-102.8207 2627.5592,-92.0817"/>
-<polygon fill="#191970" stroke="#191970" points="2575.8813,-130.7965 2570.462,-139.9005 2580.3758,-136.1631 2575.8813,-130.7965"/>
+<path fill="none" stroke="#191970" d="M1164.8917,-264.5719C1213.3259,-245.0604 1293.1797,-215.505 1365,-201 1481.9284,-177.3849 1830.4177,-159.6645 1985.7805,-152.715"/>
+<polygon fill="#191970" stroke="#191970" points="1163.2879,-261.4457 1155.3416,-268.4534 1165.9235,-267.9306 1163.2879,-261.4457"/>
 </g>
 <!-- Node144&#45;&gt;Node26 -->
-<g id="edge47" class="edge">
+<g id="edge34" class="edge">
 <title>Node144&#45;&gt;Node26</title>
-<path fill="none" stroke="#191970" d="M1962.0047,-534.2147C1950.1842,-521.441 1934.5282,-504.5224 1924.8666,-494.0817"/>
-<polygon fill="#191970" stroke="#191970" points="1959.7561,-536.9381 1969.1169,-541.9005 1964.8939,-532.1837 1959.7561,-536.9381"/>
+<path fill="none" stroke="#191970" d="M1333,-531.6079C1333,-519.214 1333,-503.8263 1333,-494.0817"/>
+<polygon fill="#191970" stroke="#191970" points="1329.5001,-531.9005 1333,-541.9005 1336.5001,-531.9006 1329.5001,-531.9005"/>
 </g>
 <!-- Node145&#45;&gt;Node23 -->
-<g id="edge49" class="edge">
+<g id="edge36" class="edge">
 <title>Node145&#45;&gt;Node23</title>
-<path fill="none" stroke="#191970" d="M2074.0622,-459.8851C2089.5848,-417.7224 2120.8096,-332.9092 2133.4744,-298.5088"/>
-<polygon fill="#191970" stroke="#191970" points="2070.7325,-458.7989 2070.562,-469.3923 2077.3015,-461.2173 2070.7325,-458.7989"/>
+<path fill="none" stroke="#191970" d="M1504.6515,-327.3509C1496.7089,-317.8482 1487.7364,-307.1132 1480.657,-298.6432"/>
+<polygon fill="#191970" stroke="#191970" points="1502.1941,-329.8684 1511.2927,-335.2967 1507.5651,-325.3792 1502.1941,-329.8684"/>
 </g>
 <!-- Node146&#45;&gt;Node49 -->
-<g id="edge56" class="edge">
+<g id="edge43" class="edge">
 <title>Node146&#45;&gt;Node49</title>
-<path fill="none" stroke="#191970" d="M1700.4342,-394.5621C1715.1768,-376.7049 1729.1749,-352.1535 1714,-335 1670.5336,-285.8662 1485.8512,-308.8884 1421,-299 1418.1825,-298.5704 1415.315,-298.1192 1412.4205,-297.6519"/>
-<polygon fill="#191970" stroke="#191970" points="1697.6623,-392.4149 1693.7104,-402.2451 1702.93,-397.0249 1697.6623,-392.4149"/>
+<path fill="none" stroke="#191970" d="M2468.2429,-397.0839C2424.7197,-369.9579 2348.8337,-322.6615 2310.1567,-298.5558"/>
+<polygon fill="#191970" stroke="#191970" points="2466.4168,-400.0699 2476.7547,-402.389 2470.1193,-394.1293 2466.4168,-400.0699"/>
 </g>
 <!-- Node146&#45;&gt;Node50 -->
-<g id="edge68" class="edge">
+<g id="edge55" class="edge">
 <title>Node146&#45;&gt;Node50</title>
-<path fill="none" stroke="#191970" d="M1705.6453,-395.775C1742.0933,-364.6193 1797.5554,-306.7432 1761,-268 1713.7199,-217.8903 1210.7506,-236.4431 1142,-232 1094.8596,-228.9535 1042.0103,-225.2042 1000.1544,-222.1562"/>
-<polygon fill="#191970" stroke="#191970" points="1703.1839,-393.2705 1697.7498,-402.3656 1707.6697,-398.6443 1703.1839,-393.2705"/>
+<path fill="none" stroke="#191970" d="M2471.3607,-396.2277C2434.312,-366.9638 2379.1779,-312.8171 2409,-268 2420.4783,-250.7503 2438.9646,-239.2672 2458.1559,-231.627"/>
+<polygon fill="#191970" stroke="#191970" points="2469.3527,-399.0989 2479.4118,-402.4256 2473.6228,-393.5521 2469.3527,-399.0989"/>
 </g>
 <!-- Node146&#45;&gt;Node147 -->
-<g id="edge53" class="edge">
+<g id="edge40" class="edge">
 <title>Node146&#45;&gt;Node147</title>
-<path fill="none" stroke="#191970" d="M1687.2915,-392.5119C1692.3026,-374.2539 1694.948,-350.1367 1681,-335 1678.6769,-332.4789 1295.1167,-302.3902 1128.1684,-289.4047"/>
-<polygon fill="#191970" stroke="#191970" points="1683.9015,-391.6265 1684.2654,-402.2151 1690.584,-393.7107 1683.9015,-391.6265"/>
+<path fill="none" stroke="#191970" d="M2587.3888,-405.7952C2638.6858,-397.1728 2697.2785,-383.8439 2716,-366 2734.5721,-348.2985 2739.0219,-317.1416 2739.9425,-298.6007"/>
+<polygon fill="#191970" stroke="#191970" points="2586.5437,-402.3869 2577.2407,-407.4569 2587.6749,-409.2949 2586.5437,-402.3869"/>
 </g>
 <!-- Node146&#45;&gt;Node148 -->
-<g id="edge57" class="edge">
+<g id="edge44" class="edge">
 <title>Node146&#45;&gt;Node148</title>
-<path fill="none" stroke="#191970" d="M1591.5726,-407.652C1476.708,-394.5637 1275.5509,-371.6427 1166.1328,-359.175"/>
-<polygon fill="#191970" stroke="#191970" points="1591.4001,-411.1549 1601.7321,-408.8096 1592.1926,-404.1999 1591.4001,-411.1549"/>
+<path fill="none" stroke="#191970" d="M2531.6246,-396.3469C2546.0578,-386.3776 2562.9467,-374.7121 2576.0763,-365.6432"/>
+<polygon fill="#191970" stroke="#191970" points="2529.2497,-393.7335 2523.0108,-402.2967 2533.228,-399.4932 2529.2497,-393.7335"/>
 </g>
 <!-- Node146&#45;&gt;Node150 -->
-<g id="edge62" class="edge">
+<g id="edge49" class="edge">
 <title>Node146&#45;&gt;Node150</title>
-<path fill="none" stroke="#191970" d="M1591.6283,-411.6309C1401.3794,-398.7033 948.7647,-367.9475 768.2456,-355.681"/>
-<polygon fill="#191970" stroke="#191970" points="1591.6452,-415.1401 1601.8595,-412.3262 1592.1199,-408.1562 1591.6452,-415.1401"/>
+<path fill="none" stroke="#191970" d="M2587.1231,-402.355C2652.9309,-390.7825 2742.7778,-374.9826 2805.9467,-363.8742"/>
+<polygon fill="#191970" stroke="#191970" points="2586.428,-398.9234 2577.1853,-404.1026 2587.6404,-405.8177 2586.428,-398.9234"/>
 </g>
 <!-- Node147&#45;&gt;Node47 -->
-<g id="edge55" class="edge">
+<g id="edge42" class="edge">
 <title>Node147&#45;&gt;Node47</title>
-<path fill="none" stroke="#191970" d="M1041.522,-259.2376C1033.6736,-242.2405 1022.0714,-219.3726 1009,-201 959.2072,-131.0133 884.3826,-60.5713 851.0601,-30.556"/>
-<polygon fill="#191970" stroke="#191970" points="1038.3916,-260.8101 1045.6919,-268.4884 1044.7732,-257.9335 1038.3916,-260.8101"/>
+<path fill="none" stroke="#191970" d="M2752.762,-259.414C2781.4956,-205.1844 2850.5828,-74.7944 2874.0468,-30.5103"/>
+<polygon fill="#191970" stroke="#191970" points="2749.6307,-257.8483 2748.0414,-268.3233 2755.8161,-261.1257 2749.6307,-257.8483"/>
 </g>
 <!-- Node147&#45;&gt;Node50 -->
-<g id="edge54" class="edge">
+<g id="edge41" class="edge">
 <title>Node147&#45;&gt;Node50</title>
-<path fill="none" stroke="#191970" d="M1014.3151,-263.7743C994.7505,-253.5335 971.2132,-241.2132 953.0727,-231.7177"/>
-<polygon fill="#191970" stroke="#191970" points="1012.7915,-266.9272 1023.2743,-268.4639 1016.0378,-260.7255 1012.7915,-266.9272"/>
+<path fill="none" stroke="#191970" d="M2683.4237,-265.2759C2650.6423,-254.7165 2609.8618,-241.5805 2578.8829,-231.6017"/>
+<polygon fill="#191970" stroke="#191970" points="2682.7292,-268.7292 2693.3207,-268.4639 2684.8755,-262.0664 2682.7292,-268.7292"/>
 </g>
 <!-- Node148&#45;&gt;Node46 -->
-<g id="edge60" class="edge">
+<g id="edge47" class="edge">
 <title>Node148&#45;&gt;Node46</title>
-<path fill="none" stroke="#191970" d="M1126.9467,-329.6709C1137.6902,-321.597 1148.1098,-311.3002 1154,-299 1159.9507,-286.5735 1161.423,-279.6071 1154,-268 1108.8882,-197.4604 867.427,-127.4401 752.665,-97.5582"/>
-<polygon fill="#191970" stroke="#191970" points="1124.9294,-326.8107 1118.7377,-335.4081 1128.9394,-332.5483 1124.9294,-326.8107"/>
+<path fill="none" stroke="#191970" d="M2608.0717,-325.7364C2630.2908,-271.106 2682.9586,-141.6111 2700.8951,-97.5103"/>
+<polygon fill="#191970" stroke="#191970" points="2604.6981,-324.7415 2604.1726,-335.3233 2611.1823,-327.3788 2604.6981,-324.7415"/>
 </g>
 <!-- Node148&#45;&gt;Node50 -->
-<g id="edge61" class="edge">
+<g id="edge48" class="edge">
 <title>Node148&#45;&gt;Node50</title>
-<path fill="none" stroke="#191970" d="M1116.7746,-328.4135C1134.4745,-311.1276 1151.8971,-286.9283 1137,-268 1128.2501,-256.8824 1057.7545,-241.297 1000.2227,-230.1894"/>
-<polygon fill="#191970" stroke="#191970" points="1114.2275,-326.0019 1109.2654,-335.3629 1118.9821,-331.1394 1114.2275,-326.0019"/>
+<path fill="none" stroke="#191970" d="M2595.5628,-325.15C2593.1361,-308.2031 2588.3523,-285.8311 2579,-268 2571.6909,-254.0645 2559.685,-241.0017 2549.6374,-231.5189"/>
+<polygon fill="#191970" stroke="#191970" points="2592.1305,-325.911 2596.8414,-335.4009 2599.0767,-325.0445 2592.1305,-325.911"/>
 </g>
 <!-- Node148&#45;&gt;Node147 -->
-<g id="edge58" class="edge">
+<g id="edge45" class="edge">
 <title>Node148&#45;&gt;Node147</title>
-<path fill="none" stroke="#191970" d="M1076.3788,-326.4837C1071.1132,-317.1996 1065.247,-306.8565 1060.5887,-298.6432"/>
-<polygon fill="#191970" stroke="#191970" points="1073.3993,-328.325 1081.3772,-335.2967 1079.4882,-324.8716 1073.3993,-328.325"/>
+<path fill="none" stroke="#191970" d="M2638.9253,-331.1902C2660.8011,-320.8685 2687.3496,-308.3421 2707.7475,-298.7177"/>
+<polygon fill="#191970" stroke="#191970" points="2637.4179,-328.0313 2629.8676,-335.4639 2640.405,-334.362 2637.4179,-328.0313"/>
 </g>
 <!-- Node148&#45;&gt;Node149 -->
-<g id="edge59" class="edge">
+<g id="edge46" class="edge">
 <title>Node148&#45;&gt;Node149</title>
-<path fill="none" stroke="#191970" d="M1003.4911,-342.0013C868.9639,-328.7853 611.8312,-303.5245 484.0186,-290.9681"/>
-<polygon fill="#191970" stroke="#191970" points="1003.4568,-345.5147 1013.7511,-343.0093 1004.1412,-338.5483 1003.4568,-345.5147"/>
+<path fill="none" stroke="#191970" d="M2565.861,-329.7951C2550.2777,-319.7558 2531.8939,-307.9124 2517.6216,-298.7177"/>
+<polygon fill="#191970" stroke="#191970" points="2564.3583,-332.9904 2574.6604,-335.4639 2568.1494,-327.1058 2564.3583,-332.9904"/>
 </g>
 <!-- Node150&#45;&gt;Node46 -->
-<g id="edge64" class="edge">
+<g id="edge51" class="edge">
 <title>Node150&#45;&gt;Node46</title>
-<path fill="none" stroke="#191970" d="M665.482,-328.2769C656.905,-319.8858 648.0555,-309.7486 642,-299 611.7871,-245.3715 614.8417,-226.052 607,-165 605.2448,-151.3345 600.2376,-146.004 607,-134 616.3156,-117.4638 633.3193,-105.6299 649.6841,-97.5103"/>
-<polygon fill="#191970" stroke="#191970" points="663.1729,-330.9094 672.872,-335.1729 667.9486,-325.7915 663.1729,-330.9094"/>
+<path fill="none" stroke="#191970" d="M2884.7544,-325.0184C2885.5578,-308.1823 2884.6303,-285.9882 2877,-268 2844.5036,-191.3908 2766.2343,-126.0779 2728.3098,-97.6712"/>
+<polygon fill="#191970" stroke="#191970" points="2881.2479,-324.9872 2884.0295,-335.2103 2888.2302,-325.4838 2881.2479,-324.9872"/>
 </g>
 <!-- Node150&#45;&gt;Node47 -->
-<g id="edge66" class="edge">
+<g id="edge53" class="edge">
 <title>Node150&#45;&gt;Node47</title>
-<path fill="none" stroke="#191970" d="M700.7674,-325.9059C706.8549,-309.3559 715.3902,-287.1447 724,-268 745.1158,-221.0469 755.578,-211.8142 777,-165 790.3588,-135.8065 792.5125,-127.9792 804,-98 812.9433,-74.6604 822.74,-47.3666 828.6264,-30.7635"/>
-<polygon fill="#191970" stroke="#191970" points="697.4428,-324.807 697.3146,-335.4011 704.0214,-327.1992 697.4428,-324.807"/>
+<path fill="none" stroke="#191970" d="M2889.0473,-325.4573C2891.0694,-317.113 2892.9973,-307.7214 2894,-299 2895.5736,-285.3124 2894.3271,-281.7739 2894,-268 2891.8588,-177.8391 2885.4226,-69.7221 2882.9667,-30.5751"/>
+<polygon fill="#191970" stroke="#191970" points="2885.6075,-324.7799 2886.4982,-335.3372 2892.3856,-326.5287 2885.6075,-324.7799"/>
 </g>
 <!-- Node150&#45;&gt;Node48 -->
-<g id="edge67" class="edge">
+<g id="edge54" class="edge">
 <title>Node150&#45;&gt;Node48</title>
-<path fill="none" stroke="#191970" d="M667.8646,-327.7793C660.6931,-319.49 653.754,-309.5428 650,-299 632.8177,-250.7444 664.2758,-191.7673 681.6384,-164.5646"/>
-<polygon fill="#191970" stroke="#191970" points="665.355,-330.2206 674.7153,-335.1841 670.4933,-325.4668 665.355,-330.2206"/>
+<path fill="none" stroke="#191970" d="M2879.6668,-325.4727C2876.6581,-307.1532 2869.77,-282.9753 2854,-268 2774.6249,-192.6248 2724.6134,-221.0296 2617,-201 2447.661,-169.4818 2245.489,-156.8954 2138.011,-152.1516"/>
+<polygon fill="#191970" stroke="#191970" points="2876.2105,-326.0317 2881.0278,-335.4681 2883.1465,-325.0873 2876.2105,-326.0317"/>
 </g>
 <!-- Node150&#45;&gt;Node50 -->
-<g id="edge65" class="edge">
+<g id="edge52" class="edge">
 <title>Node150&#45;&gt;Node50</title>
-<path fill="none" stroke="#191970" d="M705.8504,-326.6961C717.7526,-308.2083 736.5259,-283.2314 759,-268 785.4559,-250.07 818.456,-238.1453 847.9767,-230.3324"/>
-<polygon fill="#191970" stroke="#191970" points="702.7267,-325.0917 700.4214,-335.4327 708.6723,-328.7863 702.7267,-325.0917"/>
+<path fill="none" stroke="#191970" d="M2871.5774,-326.0025C2862.2084,-307.0738 2846.5909,-281.842 2825,-268 2789.9445,-245.5258 2682.8022,-230.9311 2608.294,-223.2062"/>
+<polygon fill="#191970" stroke="#191970" points="2868.4911,-327.6648 2875.9057,-335.2327 2874.8289,-324.6928 2868.4911,-327.6648"/>
 </g>
 <!-- Node150&#45;&gt;Node147 -->
-<g id="edge63" class="edge">
+<g id="edge50" class="edge">
 <title>Node150&#45;&gt;Node147</title>
-<path fill="none" stroke="#191970" d="M778.2064,-334.456C838.3648,-323.2599 917.9193,-308.4539 975.7633,-297.6885"/>
-<polygon fill="#191970" stroke="#191970" points="777.5375,-331.0203 768.3467,-336.291 778.8183,-337.9022 777.5375,-331.0203"/>
+<path fill="none" stroke="#191970" d="M2841.0747,-331.1902C2819.1989,-320.8685 2792.6504,-308.3421 2772.2525,-298.7177"/>
+<polygon fill="#191970" stroke="#191970" points="2839.595,-334.362 2850.1324,-335.4639 2842.5821,-328.0313 2839.595,-334.362"/>
 </g>
-<!-- Node158&#45;&gt;Node22 -->
-<g id="edge86" class="edge">
-<title>Node158&#45;&gt;Node22</title>
-<path fill="none" stroke="#191970" d="M2725.7538,-675.259C2532.5964,-663.5553 2035.7581,-633.4513 1855.5511,-622.5324"/>
-<polygon fill="#191970" stroke="#191970" points="2725.6335,-678.758 2735.8269,-675.8693 2726.0569,-671.7708 2725.6335,-678.758"/>
+<!-- Node80&#45;&gt;Node28 -->
+<g id="edge73" class="edge">
+<title>Node80&#45;&gt;Node28</title>
+<path fill="none" stroke="#191970" d="M414.4056,-531.7835C415.6717,-513.9146 419.8674,-487.6071 433,-469 501.7168,-371.6374 639.3387,-314.4494 699.5993,-293.1255"/>
+<polygon fill="#191970" stroke="#191970" points="410.9031,-531.757 413.9369,-541.9081 417.8956,-532.0807 410.9031,-531.757"/>
 </g>
-<!-- Node158&#45;&gt;Node92 -->
-<g id="edge87" class="edge">
-<title>Node158&#45;&gt;Node92</title>
-<path fill="none" stroke="#191970" d="M2804,-660.2188C2804,-634.9855 2804,-589.9988 2804,-551.5 2804,-551.5 2804,-551.5 2804,-350.5 2804,-241.5654 2657.4396,-180.8997 2591.0295,-159.0022"/>
-<polygon fill="#191970" stroke="#191970" points="2800.5001,-660.3281 2804,-670.3281 2807.5001,-660.3282 2800.5001,-660.3281"/>
+<!-- Node80&#45;&gt;Node54 -->
+<g id="edge72" class="edge">
+<title>Node80&#45;&gt;Node54</title>
+<path fill="none" stroke="#191970" d="M407.9409,-532.0296C405.1608,-522.4625 402.0373,-510.7137 400,-500 384.9522,-420.8677 378.3302,-323.5388 376.5302,-293.0749"/>
+<polygon fill="#191970" stroke="#191970" points="404.627,-533.1637 410.8577,-541.7328 411.3307,-531.1486 404.627,-533.1637"/>
 </g>
-<!-- Node159 -->
+<!-- Node80&#45;&gt;Node83 -->
+<g id="edge67" class="edge">
+<title>Node80&#45;&gt;Node83</title>
+<path fill="none" stroke="#191970" d="M395.2802,-535.03C376.1955,-518.2893 345.6855,-491.6656 319,-469 301.7225,-454.3252 281.4838,-437.5999 268.9505,-427.2959"/>
+<polygon fill="#191970" stroke="#191970" points="393.1293,-537.7991 402.9528,-541.7678 397.7483,-532.5393 393.1293,-537.7991"/>
+</g>
+<!-- Node80&#45;&gt;Node82 -->
+<g id="edge65" class="edge">
+<title>Node80&#45;&gt;Node82</title>
+<path fill="none" stroke="#191970" d="M380.1511,-538.0806C347.3005,-525.057 298.2631,-505.6162 269.1687,-494.0817"/>
+<polygon fill="#191970" stroke="#191970" points="379.2004,-541.4687 389.7864,-541.9005 381.7802,-534.9614 379.2004,-541.4687"/>
+</g>
+<!-- Node80&#45;&gt;Node91 -->
+<g id="edge68" class="edge">
+<title>Node80&#45;&gt;Node91</title>
+<path fill="none" stroke="#191970" d="M463.0091,-539.5596C516.2916,-526.578 600.3971,-506.0869 649.5809,-494.1039"/>
+<polygon fill="#191970" stroke="#191970" points="461.996,-536.204 453.1087,-541.9717 463.6531,-543.005 461.996,-536.204"/>
+</g>
+<!-- Node92 -->
+<g id="node35" class="node">
+<title>Node92</title>
+<g id="a_node35"><a xlink:href="relay_2function_8h.html" target="_top" xlink:title="Relay Function. ">
+<polygon fill="#ffffff" stroke="#ff0000" points="442,-475 442,-494 596,-494 596,-475 442,-475"/>
+<text text-anchor="middle" x="519" y="-482" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/relay/function.h</text>
+</a>
+</g>
+</g>
+<!-- Node80&#45;&gt;Node92 -->
+<g id="edge69" class="edge">
+<title>Node80&#45;&gt;Node92</title>
+<path fill="none" stroke="#191970" d="M437.7239,-536.3619C458.044,-523.3957 486.6867,-505.119 503.984,-494.0817"/>
+<polygon fill="#191970" stroke="#191970" points="435.5912,-533.5708 429.0439,-541.9005 439.3567,-539.4719 435.5912,-533.5708"/>
+</g>
+<!-- Node82&#45;&gt;Node83 -->
+<g id="edge66" class="edge">
+<title>Node82&#45;&gt;Node83</title>
+<path fill="none" stroke="#191970" d="M248.495,-464.9863C250.7262,-452.5286 253.5208,-436.9258 255.2839,-427.0817"/>
+<polygon fill="#191970" stroke="#191970" points="245.0372,-464.4401 246.7193,-474.9005 251.9276,-465.6742 245.0372,-464.4401"/>
+</g>
+<!-- Node92&#45;&gt;Node28 -->
+<g id="edge71" class="edge">
+<title>Node92&#45;&gt;Node28</title>
+<path fill="none" stroke="#191970" d="M536.4669,-467.7817C578.777,-427.2849 685.7177,-324.9273 718.9729,-293.0974"/>
+<polygon fill="#191970" stroke="#191970" points="533.9328,-465.3623 529.1287,-474.8054 538.7731,-470.4192 533.9328,-465.3623"/>
+</g>
+<!-- Node92&#45;&gt;Node83 -->
+<g id="edge70" class="edge">
+<title>Node92&#45;&gt;Node83</title>
+<path fill="none" stroke="#191970" d="M471.9907,-472.4785C421.2109,-459.4929 341.317,-439.062 294.5556,-427.1039"/>
+<polygon fill="#191970" stroke="#191970" points="471.1846,-475.885 481.74,-474.9717 472.919,-469.1032 471.1846,-475.885"/>
+</g>
+<!-- Node160&#45;&gt;Node22 -->
+<g id="edge83" class="edge">
+<title>Node160&#45;&gt;Node22</title>
+<path fill="none" stroke="#191970" d="M2280.5583,-674.857C2099.7785,-663.0041 1656.4561,-633.9377 1487.7008,-622.8732"/>
+<polygon fill="#191970" stroke="#191970" points="2280.5782,-678.3657 2290.7858,-675.5275 2281.0362,-671.3807 2280.5782,-678.3657"/>
+</g>
+<!-- Node160&#45;&gt;Node92 -->
+<g id="edge84" class="edge">
+<title>Node160&#45;&gt;Node92</title>
+<path fill="none" stroke="#191970" d="M2280.7445,-678.0536C1998.286,-670.4866 1032.276,-639.7621 733,-567 658.7134,-548.9389 575.4817,-511.5632 538.5088,-494.0042"/>
+<polygon fill="#191970" stroke="#191970" points="2280.811,-681.5565 2290.9005,-678.3236 2280.9971,-674.559 2280.811,-681.5565"/>
+</g>
+<!-- Node161 -->
 <g id="node39" class="node">
-<title>Node159</title>
+<title>Node161</title>
 <g id="a_node39"><a xlink:href="script_2ir__builder_2base_8h.html" target="_top" xlink:title="include/tvm/script\l/ir_builder/base.h">
-<polygon fill="#ffffff" stroke="#ff0000" points="2832,-603.5 2832,-633.5 2936,-633.5 2936,-603.5 2832,-603.5"/>
-<text text-anchor="start" x="2840" y="-621.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/script</text>
-<text text-anchor="middle" x="2884" y="-610.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/ir_builder/base.h</text>
+<polygon fill="#ffffff" stroke="#ff0000" points="2338,-603.5 2338,-633.5 2442,-633.5 2442,-603.5 2338,-603.5"/>
+<text text-anchor="start" x="2346" y="-621.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/script</text>
+<text text-anchor="middle" x="2390" y="-610.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/ir_builder/base.h</text>
 </a>
 </g>
 </g>
-<!-- Node158&#45;&gt;Node159 -->
-<g id="edge88" class="edge">
-<title>Node158&#45;&gt;Node159</title>
-<path fill="none" stroke="#191970" d="M2824.763,-664.0385C2837.0048,-654.6276 2852.4018,-642.7911 2864.4298,-633.5446"/>
-<polygon fill="#191970" stroke="#191970" points="2822.2949,-661.521 2816.5,-670.3906 2826.5613,-667.0707 2822.2949,-661.521"/>
+<!-- Node160&#45;&gt;Node161 -->
+<g id="edge85" class="edge">
+<title>Node160&#45;&gt;Node161</title>
+<path fill="none" stroke="#191970" d="M2368.4596,-661.2334C2372.9356,-652.3537 2378.2082,-641.8934 2382.4165,-633.5446"/>
+<polygon fill="#191970" stroke="#191970" points="2365.2196,-659.8855 2363.8438,-670.3906 2371.4704,-663.0363 2365.2196,-659.8855"/>
 </g>
-<!-- Node160 -->
+<!-- Node162 -->
 <g id="node40" class="node">
-<title>Node160</title>
-<g id="a_node40"><a xlink:href="ir__builder_2ir_2frame_8h.html" target="_top" xlink:title="include/tvm/script\l/ir_builder/ir/frame.h">
-<polygon fill="#ffffff" stroke="#ff0000" points="2858,-536.5 2858,-566.5 2974,-566.5 2974,-536.5 2858,-536.5"/>
-<text text-anchor="start" x="2866" y="-554.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/script</text>
-<text text-anchor="middle" x="2916" y="-543.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/ir_builder/ir/frame.h</text>
+<title>Node162</title>
+<g id="a_node40"><a xlink:href="ir_2frame_8h.html" target="_top" xlink:title="include/tvm/script\l/ir_builder/ir/frame.h">
+<polygon fill="#ffffff" stroke="#ff0000" points="2329,-536.5 2329,-566.5 2445,-566.5 2445,-536.5 2329,-536.5"/>
+<text text-anchor="start" x="2337" y="-554.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/script</text>
+<text text-anchor="middle" x="2387" y="-543.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/ir_builder/ir/frame.h</text>
 </a>
 </g>
 </g>
-<!-- Node158&#45;&gt;Node160 -->
-<g id="edge91" class="edge">
-<title>Node158&#45;&gt;Node160</title>
-<path fill="none" stroke="#191970" d="M2862.4542,-668.1751C2896.5694,-659.9561 2934.8492,-647.9889 2945,-634 2959.9796,-613.3564 2942.8092,-584.2777 2929.2345,-566.7255"/>
-<polygon fill="#191970" stroke="#191970" points="2861.4467,-664.8161 2852.5042,-670.4978 2863.038,-671.6329 2861.4467,-664.8161"/>
+<!-- Node160&#45;&gt;Node162 -->
+<g id="edge88" class="edge">
+<title>Node160&#45;&gt;Node162</title>
+<path fill="none" stroke="#191970" d="M2343.7753,-662.0933C2332.3361,-646.3503 2320.053,-622.9094 2329,-603 2335.7893,-587.8922 2349.4438,-575.4132 2361.7992,-566.5246"/>
+<polygon fill="#191970" stroke="#191970" points="2341.3356,-664.6563 2350.2461,-670.3881 2346.8549,-660.3507 2341.3356,-664.6563"/>
 </g>
-<!-- Node161 -->
+<!-- Node163 -->
 <g id="node41" class="node">
-<title>Node161</title>
+<title>Node163</title>
 <g id="a_node41"><a xlink:href="ir_2ir_8h.html" target="_top" xlink:title="include/tvm/script\l/ir_builder/ir/ir.h">
-<polygon fill="#ffffff" stroke="#000000" points="2907,-469.5 2907,-499.5 3011,-499.5 3011,-469.5 2907,-469.5"/>
-<text text-anchor="start" x="2915" y="-487.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/script</text>
-<text text-anchor="middle" x="2959" y="-476.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/ir_builder/ir/ir.h</text>
+<polygon fill="#ffffff" stroke="#000000" points="2346,-469.5 2346,-499.5 2450,-499.5 2450,-469.5 2346,-469.5"/>
+<text text-anchor="start" x="2354" y="-487.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/script</text>
+<text text-anchor="middle" x="2398" y="-476.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/ir_builder/ir/ir.h</text>
 </a>
 </g>
 </g>
-<!-- Node158&#45;&gt;Node161 -->
-<g id="edge92" class="edge">
-<title>Node158&#45;&gt;Node161</title>
-<path fill="none" stroke="#191970" d="M2864.5975,-668.3231C2902.5946,-659.8803 2946.9516,-647.6025 2961,-634 2993.0698,-602.9481 2992.6566,-579.5826 2983,-536 2980.1406,-523.095 2973.699,-509.5896 2968.2087,-499.6481"/>
-<polygon fill="#191970" stroke="#191970" points="2863.621,-664.9534 2854.5896,-670.4928 2865.1042,-671.7945 2863.621,-664.9534"/>
+<!-- Node160&#45;&gt;Node163 -->
+<g id="edge89" class="edge">
+<title>Node160&#45;&gt;Node163</title>
+<path fill="none" stroke="#191970" d="M2408.0607,-666.6697C2424.4646,-659.6786 2441.0813,-649.2739 2451,-634 2474.7328,-597.4539 2471.5142,-575.9013 2454,-536 2447.4702,-521.1236 2434.2875,-508.6537 2422.3524,-499.7121"/>
+<polygon fill="#191970" stroke="#191970" points="2406.5265,-663.5111 2398.4791,-670.4025 2409.0676,-670.0336 2406.5265,-663.5111"/>
 </g>
-<!-- Node163 -->
+<!-- Node165 -->
 <g id="node42" class="node">
-<title>Node163</title>
+<title>Node165</title>
 <g id="a_node42"><a xlink:href="tir_2function_8h.html" target="_top" xlink:title="TIR Function. ">
-<polygon fill="#ffffff" stroke="#ff0000" points="2525,-475 2525,-494 2665,-494 2665,-475 2525,-475"/>
-<text text-anchor="middle" x="2595" y="-482" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/tir/function.h</text>
+<polygon fill="#ffffff" stroke="#ff0000" points="2019,-475 2019,-494 2159,-494 2159,-475 2019,-475"/>
+<text text-anchor="middle" x="2089" y="-482" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/tir/function.h</text>
 </a>
 </g>
 </g>
-<!-- Node158&#45;&gt;Node163 -->
-<g id="edge93" class="edge">
-<title>Node158&#45;&gt;Node163</title>
-<path fill="none" stroke="#191970" d="M2786.3137,-663.4561C2744.1893,-624.0527 2638.9444,-525.6059 2605.4269,-494.2534"/>
-<polygon fill="#191970" stroke="#191970" points="2783.9417,-666.0299 2793.6357,-670.3051 2788.7236,-660.9178 2783.9417,-666.0299"/>
-</g>
-<!-- Node159&#45;&gt;Node160 -->
-<g id="edge89" class="edge">
-<title>Node159&#45;&gt;Node160</title>
-<path fill="none" stroke="#191970" d="M2895.6092,-594.1932C2900.0075,-584.9844 2904.8855,-574.771 2908.7674,-566.6432"/>
-<polygon fill="#191970" stroke="#191970" points="2892.4129,-592.7646 2891.2613,-603.2967 2898.7294,-595.7815 2892.4129,-592.7646"/>
-</g>
-<!-- Node160&#45;&gt;Node161 -->
+<!-- Node160&#45;&gt;Node165 -->
 <g id="edge90" class="edge">
-<title>Node160&#45;&gt;Node161</title>
-<path fill="none" stroke="#191970" d="M2931.2275,-527.7735C2937.2334,-518.4154 2943.9551,-507.9421 2949.2812,-499.6432"/>
-<polygon fill="#191970" stroke="#191970" points="2928.2131,-525.9903 2925.7574,-536.2967 2934.1042,-529.7712 2928.2131,-525.9903"/>
+<title>Node160&#45;&gt;Node165</title>
+<path fill="none" stroke="#191970" d="M2338.7851,-664.0382C2304.2914,-636.966 2231.6165,-580.6391 2168,-536 2146.2888,-520.7654 2120.4201,-504.1889 2104.3501,-494.0695"/>
+<polygon fill="#191970" stroke="#191970" points="2336.6793,-666.8348 2346.7033,-670.2657 2341.0067,-661.3326 2336.6793,-666.8348"/>
 </g>
-<!-- Node163&#45;&gt;Node23 -->
-<g id="edge94" class="edge">
-<title>Node163&#45;&gt;Node23</title>
-<path fill="none" stroke="#191970" d="M2624.2361,-470.1711C2653.9853,-453.7573 2692.8433,-426.0285 2671,-402 2614.3897,-339.7265 2374.5925,-390.2396 2294,-366 2243.2054,-350.7226 2189.8536,-318.1 2160.8866,-298.7686"/>
-<polygon fill="#191970" stroke="#191970" points="2622.5585,-467.099 2615.3733,-474.8851 2625.8457,-473.2792 2622.5585,-467.099"/>
+<!-- Node161&#45;&gt;Node162 -->
+<g id="edge86" class="edge">
+<title>Node161&#45;&gt;Node162</title>
+<path fill="none" stroke="#191970" d="M2388.8593,-593.0249C2388.461,-584.128 2388.0267,-574.4287 2387.6781,-566.6432"/>
+<polygon fill="#191970" stroke="#191970" points="2385.3754,-593.4633 2389.3193,-603.2967 2392.3683,-593.1501 2385.3754,-593.4633"/>
 </g>
-<!-- Node163&#45;&gt;Node96 -->
-<g id="edge96" class="edge">
-<title>Node163&#45;&gt;Node96</title>
-<path fill="none" stroke="#191970" d="M2563.5647,-470.9118C2533.4385,-457.8895 2488.7358,-438.5664 2462.1665,-427.0817"/>
-<polygon fill="#191970" stroke="#191970" points="2562.2244,-474.1454 2572.7923,-474.9005 2565.0019,-467.72 2562.2244,-474.1454"/>
+<!-- Node162&#45;&gt;Node163 -->
+<g id="edge87" class="edge">
+<title>Node162&#45;&gt;Node163</title>
+<path fill="none" stroke="#191970" d="M2391.1344,-526.3179C2392.6081,-517.3414 2394.2215,-507.5143 2395.5138,-499.6432"/>
+<polygon fill="#191970" stroke="#191970" points="2387.6625,-525.8617 2389.4961,-536.2967 2394.57,-526.9958 2387.6625,-525.8617"/>
 </g>
-<!-- Node163&#45;&gt;Node146 -->
-<g id="edge95" class="edge">
-<title>Node163&#45;&gt;Node146</title>
-<path fill="none" stroke="#191970" d="M2514.8883,-479.7558C2376.9568,-471.4263 2085.7502,-453.1343 1840,-433 1811.9817,-430.7045 1781.2706,-427.8518 1754.1925,-425.2219"/>
-<polygon fill="#191970" stroke="#191970" points="2514.76,-483.2543 2524.9524,-480.3623 2515.1811,-476.267 2514.76,-483.2543"/>
+<!-- Node165&#45;&gt;Node23 -->
+<g id="edge91" class="edge">
+<title>Node165&#45;&gt;Node23</title>
+<path fill="none" stroke="#191970" d="M2107.7802,-467.8512C2125.775,-449.9334 2147.772,-421.4008 2130,-402 2099.11,-368.2788 1764.8012,-375.1741 1720,-366 1641.5277,-349.9309 1553.311,-317.5928 1504.8484,-298.5249"/>
+<polygon fill="#191970" stroke="#191970" points="2105.2808,-465.3981 2100.4408,-474.8228 2110.1018,-470.4734 2105.2808,-465.3981"/>
 </g>
-<!-- Node163&#45;&gt;Node151 -->
-<g id="edge97" class="edge">
-<title>Node163&#45;&gt;Node151</title>
-<path fill="none" stroke="#191970" d="M2595,-464.7758C2595,-454.4641 2595,-442.0437 2595,-432.5218"/>
-<polygon fill="#191970" stroke="#191970" points="2591.5001,-464.9005 2595,-474.9005 2598.5001,-464.9006 2591.5001,-464.9005"/>
+<!-- Node165&#45;&gt;Node96 -->
+<g id="edge93" class="edge">
+<title>Node165&#45;&gt;Node96</title>
+<path fill="none" stroke="#191970" d="M2046.7279,-472.0779C2002.4547,-459.0678 1933.8731,-438.9145 1893.6062,-427.0817"/>
+<polygon fill="#191970" stroke="#191970" points="2045.752,-475.4391 2056.3332,-474.9005 2047.7256,-468.7231 2045.752,-475.4391"/>
 </g>
-<!-- Node166&#45;&gt;Node21 -->
-<g id="edge101" class="edge">
-<title>Node166&#45;&gt;Node21</title>
-<path fill="none" stroke="#191970" d="M1751.7866,-729.7802C1804.35,-718.11 1884.0553,-700.4137 1932.9219,-689.5643"/>
-<polygon fill="#191970" stroke="#191970" points="1750.981,-726.3738 1741.9773,-731.9581 1752.4982,-733.2074 1750.981,-726.3738"/>
+<!-- Node165&#45;&gt;Node146 -->
+<g id="edge92" class="edge">
+<title>Node165&#45;&gt;Node146</title>
+<path fill="none" stroke="#191970" d="M2157.7004,-473.3278C2231.4257,-461.3385 2348.2245,-442.3446 2424.7437,-429.9009"/>
+<polygon fill="#191970" stroke="#191970" points="2156.9005,-469.9119 2147.592,-474.9717 2158.0242,-476.8211 2156.9005,-469.9119"/>
 </g>
-<!-- Node166&#45;&gt;Node22 -->
-<g id="edge102" class="edge">
-<title>Node166&#45;&gt;Node22</title>
-<path fill="none" stroke="#191970" d="M1712.1755,-723.4934C1731.5239,-697.0507 1766.9506,-648.6342 1781.9707,-628.1068"/>
-<polygon fill="#191970" stroke="#191970" points="1709.1427,-721.7113 1706.0621,-731.8484 1714.7919,-725.8449 1709.1427,-721.7113"/>
+<!-- Node165&#45;&gt;Node153 -->
+<g id="edge94" class="edge">
+<title>Node165&#45;&gt;Node153</title>
+<path fill="none" stroke="#191970" d="M2079.2086,-465.7565C2073.7253,-455.2598 2066.9801,-442.3476 2061.8472,-432.5218"/>
+<polygon fill="#191970" stroke="#191970" points="2076.2529,-467.6576 2083.9854,-474.9005 2082.4573,-464.4165 2076.2529,-467.6576"/>
 </g>
-<!-- Node166&#45;&gt;Node112 -->
-<g id="edge103" class="edge">
-<title>Node166&#45;&gt;Node112</title>
-<path fill="none" stroke="#191970" d="M1695.0799,-721.7396C1688.5052,-684.4379 1677.8446,-601.5151 1698,-536 1702.1323,-522.5678 1711.0933,-509.4972 1718.9133,-499.8848"/>
-<polygon fill="#191970" stroke="#191970" points="1691.6783,-722.5916 1696.9505,-731.7815 1698.56,-721.3097 1691.6783,-722.5916"/>
+<!-- Node168&#45;&gt;Node21 -->
+<g id="edge98" class="edge">
+<title>Node168&#45;&gt;Node21</title>
+<path fill="none" stroke="#191970" d="M1083.6861,-729.0488C1044.0074,-717.373 985.529,-700.1652 949.4269,-689.5419"/>
+<polygon fill="#191970" stroke="#191970" points="1082.7624,-732.4253 1093.3438,-731.8906 1084.7385,-725.71 1082.7624,-732.4253"/>
 </g>
-<!-- Node157&#45;&gt;Node53 -->
-<g id="edge114" class="edge">
-<title>Node157&#45;&gt;Node53</title>
-<path fill="none" stroke="#191970" d="M264.3225,-329.3469C250.3357,-319.3776 233.9692,-307.7121 221.2456,-298.6432"/>
-<polygon fill="#191970" stroke="#191970" points="262.4953,-332.3426 272.6699,-335.2967 266.5582,-326.6423 262.4953,-332.3426"/>
+<!-- Node168&#45;&gt;Node22 -->
+<g id="edge99" class="edge">
+<title>Node168&#45;&gt;Node22</title>
+<path fill="none" stroke="#191970" d="M1158.5444,-727.9307C1218.6825,-702.8561 1345.8246,-649.8443 1397.9594,-628.1068"/>
+<polygon fill="#191970" stroke="#191970" points="1157.0311,-724.7695 1149.1482,-731.8484 1159.725,-731.2304 1157.0311,-724.7695"/>
 </g>
-<!-- Node218&#45;&gt;Node23 -->
-<g id="edge122" class="edge">
-<title>Node218&#45;&gt;Node23</title>
-<path fill="none" stroke="#191970" d="M765.3181,-413.8835C875.4998,-407.5245 1099.1003,-392.4428 1287,-366 1358.2947,-355.9668 1374.5624,-343.9599 1446,-335 1724.7168,-300.0424 1799.2952,-341.2507 2077,-299 2079.8519,-298.5661 2082.7641,-298.0605 2085.6948,-297.5021"/>
-<polygon fill="#191970" stroke="#191970" points="764.7986,-410.4073 755.0144,-414.4717 765.1977,-417.3959 764.7986,-410.4073"/>
+<!-- Node168&#45;&gt;Node112 -->
+<g id="edge100" class="edge">
+<title>Node168&#45;&gt;Node112</title>
+<path fill="none" stroke="#191970" d="M1101.6735,-726.1122C1062.0288,-699.4073 984.9094,-640.2548 955,-567 949.792,-554.2445 950.7773,-549.1147 955,-536 959.3864,-522.3767 968.8592,-509.3091 977.1214,-499.7459"/>
+<polygon fill="#191970" stroke="#191970" points="1100.1682,-729.3123 1110.4422,-731.8999 1104.0243,-723.4701 1100.1682,-729.3123"/>
 </g>
-<!-- Node218&#45;&gt;Node27 -->
-<g id="edge135" class="edge">
-<title>Node218&#45;&gt;Node27</title>
-<path fill="none" stroke="#191970" d="M765.2722,-413.919C900.6106,-406.6116 1212.456,-388.7341 1474,-366 1503.6842,-363.4198 1536.7419,-359.9071 1563.6896,-356.8781"/>
-<polygon fill="#191970" stroke="#191970" points="764.8555,-410.4363 755.058,-414.4685 765.2316,-417.4262 764.8555,-410.4363"/>
+<!-- Node159&#45;&gt;Node53 -->
+<g id="edge111" class="edge">
+<title>Node159&#45;&gt;Node53</title>
+<path fill="none" stroke="#191970" d="M1039.5753,-330.3561C1057.0083,-320.1995 1077.8031,-308.0843 1093.88,-298.7177"/>
+<polygon fill="#191970" stroke="#191970" points="1037.6868,-327.4056 1030.8082,-335.4639 1041.2107,-333.454 1037.6868,-327.4056"/>
 </g>
-<!-- Node218&#45;&gt;Node45 -->
-<g id="edge123" class="edge">
-<title>Node218&#45;&gt;Node45</title>
-<path fill="none" stroke="#191970" d="M765.4493,-414.4087C864.466,-409.0875 1053.104,-395.7575 1211,-366 1301.4104,-348.961 1404.3462,-317.2752 1461.5899,-298.52"/>
-<polygon fill="#191970" stroke="#191970" points="765.0812,-410.9232 755.279,-414.944 765.4492,-417.9135 765.0812,-410.9232"/>
+<!-- Node214&#45;&gt;Node23 -->
+<g id="edge119" class="edge">
+<title>Node214&#45;&gt;Node23</title>
+<path fill="none" stroke="#191970" d="M2204.8395,-407.7742C2190.9968,-405.8332 2176.5337,-403.8297 2163,-402 2039.536,-385.3079 2007.955,-386.1011 1885,-366 1753.5379,-344.5082 1600.1525,-312.3095 1521.1136,-295.1867"/>
+<polygon fill="#191970" stroke="#191970" points="2204.3974,-411.2464 2214.7874,-409.1732 2205.3723,-404.3146 2204.3974,-411.2464"/>
 </g>
-<!-- Node218&#45;&gt;Node46 -->
-<g id="edge127" class="edge">
-<title>Node218&#45;&gt;Node46</title>
-<path fill="none" stroke="#191970" d="M646.9122,-398.2246C630.0334,-390.0609 611.958,-379.3134 598,-366 567.3033,-336.7209 550,-325.9212 550,-283.5 550,-283.5 550,-283.5 550,-216.5 550,-158.3676 613.7289,-117.985 655.8,-97.6597"/>
-<polygon fill="#191970" stroke="#191970" points="645.5874,-401.4681 656.1325,-402.494 648.5288,-395.1161 645.5874,-401.4681"/>
+<!-- Node214&#45;&gt;Node27 -->
+<g id="edge132" class="edge">
+<title>Node214&#45;&gt;Node27</title>
+<path fill="none" stroke="#191970" d="M2205.0299,-409.172C2181.3201,-406.5222 2154.5147,-403.8119 2130,-402 1726.7831,-372.1979 1621.8075,-419.1539 1221,-366 1216.4388,-365.3951 1211.7333,-364.6283 1207.0271,-363.7609"/>
+<polygon fill="#191970" stroke="#191970" points="2204.6552,-412.6518 2214.9859,-410.3011 2205.4441,-405.6964 2204.6552,-412.6518"/>
 </g>
-<!-- Node218&#45;&gt;Node47 -->
-<g id="edge133" class="edge">
-<title>Node218&#45;&gt;Node47</title>
-<path fill="none" stroke="#191970" d="M629.271,-400.9882C606.4955,-393.0841 582.0577,-381.7938 563,-366 529.8093,-338.4936 512,-326.6071 512,-283.5 512,-283.5 512,-283.5 512,-149.5 512,-93.579 557.1197,-92.2807 607,-67 654.04,-43.1589 712.1096,-30.2417 757.7401,-23.3065"/>
-<polygon fill="#191970" stroke="#191970" points="628.3164,-404.3588 638.9096,-404.1716 630.5118,-397.7119 628.3164,-404.3588"/>
+<!-- Node214&#45;&gt;Node45 -->
+<g id="edge120" class="edge">
+<title>Node214&#45;&gt;Node45</title>
+<path fill="none" stroke="#191970" d="M2205.0845,-405.9861C2150.7198,-396.4004 2072.6471,-381.806 2005,-366 1916.5124,-345.3246 1814.7624,-316.0591 1755.6841,-298.5121"/>
+<polygon fill="#191970" stroke="#191970" points="2204.4975,-409.4365 2214.9519,-407.7168 2205.7069,-402.5417 2204.4975,-409.4365"/>
 </g>
-<!-- Node218&#45;&gt;Node48 -->
-<g id="edge134" class="edge">
-<title>Node218&#45;&gt;Node48</title>
-<path fill="none" stroke="#191970" d="M628.5264,-411.5097C578.3015,-405.124 509.4775,-392.133 454,-366 433.969,-356.5643 434.19,-346.0461 415,-335 376.9461,-313.0956 347.7525,-335.2659 323,-299 315.2329,-287.6202 314.5862,-278.9103 323,-268 361.4458,-218.1465 399.843,-250.5633 460,-232 528.6247,-210.8237 607.0935,-181.9115 652.9319,-164.5366"/>
-<polygon fill="#191970" stroke="#191970" points="628.2825,-415.0057 638.6317,-412.7376 629.1269,-408.0569 628.2825,-415.0057"/>
+<!-- Node214&#45;&gt;Node46 -->
+<g id="edge124" class="edge">
+<title>Node214&#45;&gt;Node46</title>
+<path fill="none" stroke="#191970" d="M2340.9989,-409.5292C2364.711,-406.9091 2391.5105,-404.1263 2416,-402 2477.1228,-396.6931 2925.0551,-410.7748 2967,-366 3062.3102,-264.2595 2829.3022,-139.937 2740.0426,-97.5105"/>
+<polygon fill="#191970" stroke="#191970" points="2340.5917,-406.0529 2331.0413,-410.6398 2341.3676,-413.0097 2340.5917,-406.0529"/>
 </g>
-<!-- Node218&#45;&gt;Node49 -->
-<g id="edge125" class="edge">
-<title>Node218&#45;&gt;Node49</title>
-<path fill="none" stroke="#191970" d="M765.4035,-414.4394C879.141,-408.6348 1101.8771,-394.0241 1175,-366 1196.9995,-357.5688 1197.5319,-346.6671 1218,-335 1242.9534,-320.7762 1272.5869,-307.856 1295.9148,-298.5145"/>
-<polygon fill="#191970" stroke="#191970" points="764.8899,-410.9607 755.0778,-414.9575 765.2407,-417.9519 764.8899,-410.9607"/>
+<!-- Node214&#45;&gt;Node47 -->
+<g id="edge130" class="edge">
+<title>Node214&#45;&gt;Node47</title>
+<path fill="none" stroke="#191970" d="M2340.9957,-409.493C2364.7076,-406.8698 2391.5078,-404.0943 2416,-402 2479.6707,-396.5555 2944.5467,-410.9177 2990,-366 3088.28,-268.8781 2942.3444,-84.5665 2895.5987,-30.6631"/>
+<polygon fill="#191970" stroke="#191970" points="2340.5879,-406.0167 2331.0383,-410.6054 2341.3651,-412.9734 2340.5879,-406.0167"/>
 </g>
-<!-- Node218&#45;&gt;Node50 -->
+<!-- Node214&#45;&gt;Node48 -->
 <g id="edge131" class="edge">
-<title>Node218&#45;&gt;Node50</title>
-<path fill="none" stroke="#191970" d="M641.7544,-397.7188C627.9982,-390.0189 614.927,-379.669 607,-366 600.0881,-354.0814 600.09,-346.9197 607,-335 637.0529,-283.1585 668.4874,-290.5625 724,-268 763.6933,-251.8671 809.9091,-239.5739 847.9088,-231.0511"/>
-<polygon fill="#191970" stroke="#191970" points="640.3535,-400.9343 650.8458,-402.4044 643.5603,-394.712 640.3535,-400.9343"/>
+<title>Node214&#45;&gt;Node48</title>
+<path fill="none" stroke="#191970" d="M2235.8334,-397.1618C2222.3193,-388.6569 2207.5449,-377.9595 2196,-366 2163.7642,-332.6067 2094.245,-208.3137 2070.3023,-164.7241"/>
+<polygon fill="#191970" stroke="#191970" points="2234.1232,-400.2181 2244.4855,-402.4253 2237.7613,-394.2378 2234.1232,-400.2181"/>
 </g>
-<!-- Node218&#45;&gt;Node52 -->
-<g id="edge128" class="edge">
-<title>Node218&#45;&gt;Node52</title>
-<path fill="none" stroke="#191970" d="M733.7768,-397.2074C747.999,-388.5178 763.9567,-377.6806 777,-366 800.092,-345.3205 821.7302,-316.2531 833.8779,-298.7128"/>
-<polygon fill="#191970" stroke="#191970" points="731.984,-394.2013 725.1872,-402.3287 735.5688,-400.2138 731.984,-394.2013"/>
+<!-- Node214&#45;&gt;Node49 -->
+<g id="edge122" class="edge">
+<title>Node214&#45;&gt;Node49</title>
+<path fill="none" stroke="#191970" d="M2275.4407,-392.3415C2278.1114,-364.8131 2282.3065,-321.5714 2284.5194,-298.7614"/>
+<polygon fill="#191970" stroke="#191970" points="2271.9481,-392.0977 2274.466,-402.389 2278.9153,-392.7737 2271.9481,-392.0977"/>
 </g>
-<!-- Node218&#45;&gt;Node53 -->
-<g id="edge132" class="edge">
-<title>Node218&#45;&gt;Node53</title>
-<path fill="none" stroke="#191970" d="M628.7685,-413.0577C571.6052,-407.4073 488.8609,-394.6539 422,-366 400.3451,-356.7196 400.1898,-345.2983 379,-335 345.8441,-318.8861 306.7214,-306.911 273.521,-298.5841"/>
-<polygon fill="#191970" stroke="#191970" points="628.4952,-416.5474 638.7794,-414.0006 629.1517,-409.5782 628.4952,-416.5474"/>
+<!-- Node214&#45;&gt;Node50 -->
+<g id="edge128" class="edge">
+<title>Node214&#45;&gt;Node50</title>
+<path fill="none" stroke="#191970" d="M2248.0978,-395.5217C2215.8473,-364.444 2167.5176,-307.0948 2201,-268 2217.3851,-248.8684 2363.1911,-232.0446 2455.7404,-223.1673"/>
+<polygon fill="#191970" stroke="#191970" points="2245.808,-398.1728 2255.4938,-402.4666 2250.5997,-393.0698 2245.808,-398.1728"/>
 </g>
-<!-- Node218&#45;&gt;Node57 -->
-<g id="edge136" class="edge">
-<title>Node218&#45;&gt;Node57</title>
-<path fill="none" stroke="#191970" d="M765.3605,-414.6456C888.7284,-408.9847 1156.6274,-394.3601 1381,-366 1458.931,-356.1497 1476.9767,-344.0908 1555,-335 1840.6244,-301.7209 1916.6063,-341.5326 2201,-299 2204.0077,-298.5502 2207.0818,-298.0228 2210.1748,-297.4393"/>
-<polygon fill="#191970" stroke="#191970" points="765.1339,-411.1522 755.3029,-415.1019 765.4512,-418.145 765.1339,-411.1522"/>
+<!-- Node214&#45;&gt;Node52 -->
+<g id="edge125" class="edge">
+<title>Node214&#45;&gt;Node52</title>
+<path fill="none" stroke="#191970" d="M2206.4163,-399.7371C2172.5528,-390.3514 2130.8659,-378.2564 2094,-366 2029.6797,-344.6161 1956.2018,-315.8964 1913.0856,-298.5951"/>
+<polygon fill="#191970" stroke="#191970" points="2205.6332,-403.1517 2216.2037,-402.435 2207.4933,-396.4034 2205.6332,-403.1517"/>
 </g>
-<!-- Node218&#45;&gt;Node147 -->
-<g id="edge124" class="edge">
-<title>Node218&#45;&gt;Node147</title>
-<path fill="none" stroke="#191970" d="M746.6574,-398.7561C817.6926,-371.9428 946.9181,-323.1647 1012.1134,-298.5558"/>
-<polygon fill="#191970" stroke="#191970" points="745.1526,-395.583 737.0329,-402.389 747.6247,-402.132 745.1526,-395.583"/>
+<!-- Node214&#45;&gt;Node53 -->
+<g id="edge129" class="edge">
+<title>Node214&#45;&gt;Node53</title>
+<path fill="none" stroke="#191970" d="M2205.0283,-409.1927C2181.3185,-406.5446 2154.5134,-403.8302 2130,-402 2032.8549,-394.7472 1345.0784,-400.5615 1254,-366 1233.622,-358.2671 1234.1312,-347.0963 1216,-335 1195.3893,-321.2494 1170.7341,-308.0861 1151.5709,-298.5097"/>
+<polygon fill="#191970" stroke="#191970" points="2204.654,-412.6726 2214.9845,-410.3208 2205.4421,-405.7171 2204.654,-412.6726"/>
 </g>
-<!-- Node218&#45;&gt;Node149 -->
-<g id="edge126" class="edge">
-<title>Node218&#45;&gt;Node149</title>
-<path fill="none" stroke="#191970" d="M629.0186,-408.3651C589.0754,-400.9834 538.6175,-388.0627 498,-366 465.9633,-348.5983 436.3485,-317.3383 420.363,-298.6975"/>
-<polygon fill="#191970" stroke="#191970" points="628.477,-411.8236 638.9347,-410.1247 629.7001,-404.9313 628.477,-411.8236"/>
+<!-- Node214&#45;&gt;Node57 -->
+<g id="edge133" class="edge">
+<title>Node214&#45;&gt;Node57</title>
+<path fill="none" stroke="#191970" d="M2205.161,-401.2417C2180.1008,-393.2039 2152.3791,-381.7602 2130,-366 2104.0497,-347.7248 2082.427,-317.0541 2070.929,-298.7016"/>
+<polygon fill="#191970" stroke="#191970" points="2204.3942,-404.6676 2214.9814,-404.2646 2206.4537,-397.9774 2204.3942,-404.6676"/>
 </g>
-<!-- Node218&#45;&gt;Node150 -->
-<g id="edge129" class="edge">
-<title>Node218&#45;&gt;Node150</title>
-<path fill="none" stroke="#191970" d="M695.1207,-392.3179C694.4509,-383.3414 693.7175,-373.5143 693.1301,-365.6432"/>
-<polygon fill="#191970" stroke="#191970" points="691.6308,-392.5849 695.8654,-402.2967 698.6114,-392.0639 691.6308,-392.5849"/>
+<!-- Node214&#45;&gt;Node147 -->
+<g id="edge121" class="edge">
+<title>Node214&#45;&gt;Node147</title>
+<path fill="none" stroke="#191970" d="M2341.4235,-409.6255C2456.1952,-396.3178 2675.7635,-370.3928 2683,-366 2708.8942,-350.2813 2725.921,-317.8173 2734.1721,-298.6327"/>
+<polygon fill="#191970" stroke="#191970" points="2340.8576,-406.1675 2331.3268,-410.795 2341.6631,-413.121 2340.8576,-406.1675"/>
 </g>
-<!-- Node218&#45;&gt;Node157 -->
-<g id="edge130" class="edge">
-<title>Node218&#45;&gt;Node157</title>
-<path fill="none" stroke="#191970" d="M628.8319,-406.4344C565.4097,-396.1035 468.2822,-380.1912 384,-366 379.4659,-365.2366 374.8041,-364.4472 370.1017,-363.6475"/>
-<polygon fill="#191970" stroke="#191970" points="628.2818,-409.8908 638.7143,-408.0434 629.4067,-402.9818 628.2818,-409.8908"/>
+<!-- Node214&#45;&gt;Node149 -->
+<g id="edge123" class="edge">
+<title>Node214&#45;&gt;Node149</title>
+<path fill="none" stroke="#191970" d="M2306.6713,-397.0839C2351.409,-369.9579 2429.4128,-322.6615 2469.1692,-298.5558"/>
+<polygon fill="#191970" stroke="#191970" points="2304.6582,-394.2114 2297.9219,-402.389 2308.2875,-400.197 2304.6582,-394.2114"/>
 </g>
-<!-- Node195&#45;&gt;Node163 -->
-<g id="edge142" class="edge">
-<title>Node195&#45;&gt;Node163</title>
-<path fill="none" stroke="#191970" d="M2535.7315,-603.9305C2550.2382,-595.1429 2566.8654,-582.6162 2577,-567 2591.9238,-544.0042 2594.6494,-510.5831 2595.0345,-494.2376"/>
-<polygon fill="#191970" stroke="#191970" points="2533.981,-600.8996 2527.0502,-608.913 2537.4655,-606.9708 2533.981,-600.8996"/>
+<!-- Node214&#45;&gt;Node150 -->
+<g id="edge126" class="edge">
+<title>Node214&#45;&gt;Node150</title>
+<path fill="none" stroke="#191970" d="M2341.167,-410.0005C2455.7192,-397.3979 2686.0226,-372.0607 2805.7676,-358.8868"/>
+<polygon fill="#191970" stroke="#191970" points="2340.6749,-406.5334 2331.1176,-411.1061 2341.4404,-413.4915 2340.6749,-406.5334"/>
 </g>
-<!-- Node195&#45;&gt;Node196 -->
-<g id="edge138" class="edge">
-<title>Node195&#45;&gt;Node196</title>
-<path fill="none" stroke="#191970" d="M2507,-598.6079C2507,-586.214 2507,-570.8263 2507,-561.0817"/>
-<polygon fill="#191970" stroke="#191970" points="2503.5001,-598.9005 2507,-608.9005 2510.5001,-598.9006 2503.5001,-598.9005"/>
+<!-- Node214&#45;&gt;Node159 -->
+<g id="edge127" class="edge">
+<title>Node214&#45;&gt;Node159</title>
+<path fill="none" stroke="#191970" d="M2205.0349,-409.1026C2181.3256,-406.447 2154.5192,-403.7508 2130,-402 1668.6756,-369.0583 1549.8299,-415.6167 1090,-366 1087.1664,-365.6942 1084.2852,-365.3485 1081.379,-364.9704"/>
+<polygon fill="#191970" stroke="#191970" points="2204.6591,-412.5823 2214.9907,-410.2353 2205.4505,-405.6272 2204.6591,-412.5823"/>
 </g>
-<!-- Node196&#45;&gt;Node57 -->
+<!-- Node193&#45;&gt;Node165 -->
 <g id="edge139" class="edge">
-<title>Node196&#45;&gt;Node57</title>
-<path fill="none" stroke="#191970" d="M2566.5137,-539.8181C2608.3171,-530.4173 2659.2932,-516.1445 2674,-500 2703.6291,-467.4743 2714.5965,-437.0489 2688,-402 2643.0677,-342.7881 2421.6601,-305.1472 2317.8738,-290.4791"/>
-<polygon fill="#191970" stroke="#191970" points="2565.6757,-536.4186 2556.66,-541.9836 2567.1782,-543.2555 2565.6757,-536.4186"/>
+<title>Node193&#45;&gt;Node165</title>
+<path fill="none" stroke="#191970" d="M2042.8473,-600.138C2031.5802,-583.1362 2019.1952,-557.3372 2029,-536 2037.5979,-517.2895 2056.7195,-502.8242 2071.0805,-494.0531"/>
+<polygon fill="#191970" stroke="#191970" points="2040.2042,-602.4616 2048.8437,-608.5942 2045.9143,-598.4124 2040.2042,-602.4616"/>
 </g>
-<!-- Node196&#45;&gt;Node96 -->
-<g id="edge140" class="edge">
-<title>Node196&#45;&gt;Node96</title>
-<path fill="none" stroke="#191970" d="M2497.6382,-532.7765C2483.1425,-503.7849 2455.79,-449.08 2444.7748,-427.0496"/>
-<polygon fill="#191970" stroke="#191970" points="2494.5788,-534.484 2502.1815,-541.8631 2500.8398,-531.3535 2494.5788,-534.484"/>
+<!-- Node193&#45;&gt;Node194 -->
+<g id="edge135" class="edge">
+<title>Node193&#45;&gt;Node194</title>
+<path fill="none" stroke="#191970" d="M2068.2536,-600.1099C2075.9827,-587.4795 2085.9301,-571.2241 2092.1366,-561.0817"/>
+<polygon fill="#191970" stroke="#191970" points="2065.1086,-598.544 2062.8743,-608.9005 2071.0794,-602.1977 2065.1086,-598.544"/>
 </g>
-<!-- Node196&#45;&gt;Node163 -->
-<g id="edge141" class="edge">
-<title>Node196&#45;&gt;Node163</title>
-<path fill="none" stroke="#191970" d="M2527.8101,-535.6559C2544.775,-522.7395 2568.1796,-504.9201 2582.4151,-494.0817"/>
-<polygon fill="#191970" stroke="#191970" points="2525.4445,-533.058 2519.6083,-541.9005 2529.6849,-538.6275 2525.4445,-533.058"/>
+<!-- Node194&#45;&gt;Node57 -->
+<g id="edge136" class="edge">
+<title>Node194&#45;&gt;Node57</title>
+<path fill="none" stroke="#191970" d="M2129.3301,-537.0348C2143.9165,-528.4978 2159.8235,-516.1666 2168,-500 2174.2183,-487.7053 2170.0844,-482.6192 2168,-469 2163.279,-438.153 2165.6095,-427.0502 2147,-402 2130.0188,-379.1417 2111.7293,-388.2831 2094,-366 2077.8353,-345.6833 2069.0672,-316.237 2064.9746,-298.5691"/>
+<polygon fill="#191970" stroke="#191970" points="2127.5964,-533.9936 2120.4995,-541.8603 2130.9532,-540.1363 2127.5964,-533.9936"/>
+</g>
+<!-- Node194&#45;&gt;Node96 -->
+<g id="edge137" class="edge">
+<title>Node194&#45;&gt;Node96</title>
+<path fill="none" stroke="#191970" d="M2071.9474,-536.7698C2023.037,-509.1158 1918.7257,-450.1382 1877.89,-427.0496"/>
+<polygon fill="#191970" stroke="#191970" points="2070.5279,-539.988 2080.9555,-541.8631 2073.9732,-533.8945 2070.5279,-539.988"/>
+</g>
+<!-- Node194&#45;&gt;Node165 -->
+<g id="edge138" class="edge">
+<title>Node194&#45;&gt;Node165</title>
+<path fill="none" stroke="#191970" d="M2095.3788,-531.9863C2093.7053,-519.5286 2091.6094,-503.9258 2090.2871,-494.0817"/>
+<polygon fill="#191970" stroke="#191970" points="2091.9103,-532.4555 2096.7105,-541.9005 2098.848,-531.5236 2091.9103,-532.4555"/>
 </g>
 </g>
 </svg>
diff --git a/docs/reference/api/doxygen/c__runtime__api_8h.html b/docs/reference/api/doxygen/c__runtime__api_8h.html
index 380096fdb7..8d59221e1c 100644
--- a/docs/reference/api/doxygen/c__runtime__api_8h.html
+++ b/docs/reference/api/doxygen/c__runtime__api_8h.html
@@ -84,7 +84,7 @@ Include dependency graph for c_runtime_api.h:</div>
 </div><div class="textblock"><div class="dynheader">
 This graph shows which files directly or indirectly include this file:</div>
 <div class="dyncontent">
-<div class="center"><iframe scrolling="no" frameborder="0" src="c__runtime__api_8h__dep__incl.svg" width="3588" height="1124"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
+<div class="center"><iframe scrolling="no" frameborder="0" src="c__runtime__api_8h__dep__incl.svg" width="3306" height="1035"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
 </div>
 </div>
 </div>
diff --git a/docs/reference/api/doxygen/c__runtime__api_8h__dep__incl.svg b/docs/reference/api/doxygen/c__runtime__api_8h__dep__incl.svg
index be324d2a42..698e4586cf 100644
--- a/docs/reference/api/doxygen/c__runtime__api_8h__dep__incl.svg
+++ b/docs/reference/api/doxygen/c__runtime__api_8h__dep__incl.svg
@@ -4,1224 +4,1223 @@
 <!-- Generated by graphviz version 2.40.1 (20161225.0304)
  -->
 <!-- Title: include/tvm/runtime/c_runtime_api.h Pages: 1 -->
-<svg width="2691pt" height="843pt"
- viewBox="0.00 0.00 2691.00 843.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
-<g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 839)">
+<svg width="2479pt" height="776pt"
+ viewBox="0.00 0.00 2479.00 776.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+<g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 772)">
 <title>include/tvm/runtime/c_runtime_api.h</title>
-<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-839 2687,-839 2687,4 -4,4"/>
+<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-772 2475,-772 2475,4 -4,4"/>
 <!-- Node4 -->
 <g id="node1" class="node">
 <title>Node4</title>
-<polygon fill="#bfbfbf" stroke="#000000" points="1500,-804.5 1500,-834.5 1616,-834.5 1616,-804.5 1500,-804.5"/>
-<text text-anchor="start" x="1508" y="-822.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/runtime</text>
-<text text-anchor="middle" x="1558" y="-811.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/c_runtime_api.h</text>
+<polygon fill="#bfbfbf" stroke="#000000" points="949,-737.5 949,-767.5 1065,-767.5 1065,-737.5 949,-737.5"/>
+<text text-anchor="start" x="957" y="-755.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/runtime</text>
+<text text-anchor="middle" x="1007" y="-744.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/c_runtime_api.h</text>
 </g>
 <!-- Node5 -->
 <g id="node2" class="node">
 <title>Node5</title>
 <g id="a_node2"><a xlink:href="compute__dag_8h.html" target="_top" xlink:title="The auto&#45;scheduler&#39;s computational graph and related program analyses. ">
-<polygon fill="#ffffff" stroke="#000000" points="1770,-469.5 1770,-499.5 1922,-499.5 1922,-469.5 1770,-469.5"/>
-<text text-anchor="start" x="1778" y="-487.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/auto_scheduler</text>
-<text text-anchor="middle" x="1846" y="-476.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/compute_dag.h</text>
+<polygon fill="#ffffff" stroke="#000000" points="78,-402.5 78,-432.5 230,-432.5 230,-402.5 78,-402.5"/>
+<text text-anchor="start" x="86" y="-420.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/auto_scheduler</text>
+<text text-anchor="middle" x="154" y="-409.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/compute_dag.h</text>
 </a>
 </g>
 </g>
 <!-- Node4&#45;&gt;Node5 -->
 <g id="edge1" class="edge">
 <title>Node4&#45;&gt;Node5</title>
-<path fill="none" stroke="#191970" d="M1572.5071,-795.6108C1577.5776,-787.0009 1583.1988,-777.1514 1588,-768 1610.3535,-725.3927 1605.0574,-708.4368 1634,-670 1690.3362,-595.1835 1781.0179,-528.4985 1823.1367,-499.6449"/>
-<polygon fill="#191970" stroke="#191970" points="1569.397,-793.9929 1567.286,-804.3753 1575.4108,-797.5754 1569.397,-793.9929"/>
+<path fill="none" stroke="#191970" d="M938.6816,-750.0014C756.4636,-743.0573 270.8579,-722.6103 244,-701 214.2483,-677.0614 220,-656.6867 220,-618.5 220,-618.5 220,-618.5 220,-551.5 220,-504.3026 186.6415,-456.4861 167.3561,-432.7859"/>
+<polygon fill="#191970" stroke="#191970" points="938.8196,-753.509 948.945,-750.3904 939.0848,-746.5141 938.8196,-753.509"/>
 </g>
 <!-- Node13 -->
 <g id="node7" class="node">
 <title>Node13</title>
 <g id="a_node7"><a xlink:href="node_8h.html" target="_top" xlink:title="Definitions and helper macros for IR/AST nodes. ">
-<polygon fill="#ffffff" stroke="#000000" points="858.5,-475 858.5,-494 995.5,-494 995.5,-475 858.5,-475"/>
-<text text-anchor="middle" x="927" y="-482" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/node/node.h</text>
+<polygon fill="#ffffff" stroke="#000000" points="958.5,-408 958.5,-427 1095.5,-427 1095.5,-408 958.5,-408"/>
+<text text-anchor="middle" x="1027" y="-415" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/node/node.h</text>
 </a>
 </g>
 </g>
 <!-- Node4&#45;&gt;Node13 -->
 <g id="edge6" class="edge">
 <title>Node4&#45;&gt;Node13</title>
-<path fill="none" stroke="#191970" d="M1489.7942,-818.327C1256.9775,-814.0592 505.6994,-798.0116 464,-768 427.4253,-741.6768 412.4032,-707.0867 438,-670 491.1883,-592.9365 776.0801,-519.6405 884.8007,-494.0629"/>
-<polygon fill="#191970" stroke="#191970" points="1489.7677,-821.827 1499.8297,-818.5095 1489.895,-814.8282 1489.7677,-821.827"/>
+<path fill="none" stroke="#191970" d="M938.664,-748.1742C871.5815,-742.3338 776.0598,-729.2045 753,-701 698.563,-634.4179 576.8657,-697.7106 840,-469 859.824,-451.7694 924.9277,-436.4147 972.7651,-427.048"/>
+<polygon fill="#191970" stroke="#191970" points="938.4999,-751.6726 948.7565,-749.0171 939.0826,-744.6969 938.4999,-751.6726"/>
 </g>
-<!-- Node180 -->
+<!-- Node178 -->
 <g id="node20" class="node">
-<title>Node180</title>
+<title>Node178</title>
 <g id="a_node20"><a xlink:href="tir_2expr_8h.html" target="_top" xlink:title="TIR expressions. ">
-<polygon fill="#ffffff" stroke="#ff0000" points="157.5,-140 157.5,-159 278.5,-159 278.5,-140 157.5,-140"/>
-<text text-anchor="middle" x="218" y="-147" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/tir/expr.h</text>
+<polygon fill="#ffffff" stroke="#ff0000" points="966.5,-73 966.5,-92 1087.5,-92 1087.5,-73 966.5,-73"/>
+<text text-anchor="middle" x="1027" y="-80" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/tir/expr.h</text>
 </a>
 </g>
 </g>
-<!-- Node4&#45;&gt;Node180 -->
+<!-- Node4&#45;&gt;Node178 -->
 <g id="edge121" class="edge">
-<title>Node4&#45;&gt;Node180</title>
-<path fill="none" stroke="#191970" d="M1489.7755,-818.3853C1231.2208,-813.961 319.8704,-796.4816 193,-768 101.9794,-747.5665 0,-778.786 0,-685.5 0,-685.5 0,-685.5 0,-278 0,-204.632 92.7704,-172.3028 157.2748,-158.6916"/>
-<polygon fill="#191970" stroke="#191970" points="1489.727,-821.8849 1499.7852,-818.5556 1489.8462,-814.8859 1489.727,-821.8849"/>
+<title>Node4&#45;&gt;Node178</title>
+<path fill="none" stroke="#191970" d="M938.6073,-749.8407C762.0603,-742.6877 303.0134,-722.1432 278,-701 249.1859,-676.6442 258,-656.2287 258,-618.5 258,-618.5 258,-618.5 258,-417.5 258,-254.4057 344.5769,-205.8334 491,-134 580.8661,-89.9128 840.7229,-110.7934 940,-98 952.4773,-96.3921 965.8875,-94.224 978.3528,-92.0195"/>
+<polygon fill="#191970" stroke="#191970" points="938.6831,-753.3465 948.816,-750.2522 938.9651,-746.3522 938.6831,-753.3465"/>
 </g>
-<!-- Node194 -->
-<g id="node29" class="node">
-<title>Node194</title>
-<g id="a_node29"><a xlink:href="reflection_8h.html" target="_top" xlink:title="Reflection and serialization of compiler IR/AST nodes. ">
-<polygon fill="#ffffff" stroke="#ff0000" points="861.5,-542 861.5,-561 1020.5,-561 1020.5,-542 861.5,-542"/>
-<text text-anchor="middle" x="941" y="-549" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/node/reflection.h</text>
+<!-- Node191 -->
+<g id="node28" class="node">
+<title>Node191</title>
+<g id="a_node28"><a xlink:href="reflection_8h.html" target="_top" xlink:title="Reflection and serialization of compiler IR/AST nodes. ">
+<polygon fill="#ffffff" stroke="#ff0000" points="1172.5,-475 1172.5,-494 1331.5,-494 1331.5,-475 1172.5,-475"/>
+<text text-anchor="middle" x="1252" y="-482" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/node/reflection.h</text>
 </a>
 </g>
 </g>
-<!-- Node4&#45;&gt;Node194 -->
-<g id="edge57" class="edge">
-<title>Node4&#45;&gt;Node194</title>
-<path fill="none" stroke="#191970" d="M1489.6438,-818.9697C1271.756,-816.8874 602.6937,-807.3259 514,-768 475.5024,-750.9305 463.1156,-739.9065 447,-701 441.7275,-688.271 438.265,-680.6549 447,-670 498.7235,-606.9074 547.5117,-652.374 627,-634 729.693,-610.2622 849.9035,-577.1831 907.2441,-561.0759"/>
-<polygon fill="#191970" stroke="#191970" points="1489.642,-822.4698 1499.6742,-819.0633 1489.7073,-815.4701 1489.642,-822.4698"/>
+<!-- Node4&#45;&gt;Node191 -->
+<g id="edge52" class="edge">
+<title>Node4&#45;&gt;Node191</title>
+<path fill="none" stroke="#191970" d="M938.6457,-747.5195C874.3446,-741.2439 784.776,-727.8519 763,-701 754.3217,-690.2988 756.2038,-681.985 763,-670 820.224,-569.0862 878.6547,-577.4665 987,-536 1046.9668,-513.0492 1118.6443,-499.9137 1172.3244,-492.661"/>
+<polygon fill="#191970" stroke="#191970" points="938.4511,-751.0165 948.7345,-748.4665 939.1054,-744.0472 938.4511,-751.0165"/>
 </g>
-<!-- Node200 -->
+<!-- Node196 -->
 <g id="node31" class="node">
-<title>Node200</title>
+<title>Node196</title>
 <g id="a_node31"><a xlink:href="serialization_8h.html" target="_top" xlink:title="include/tvm/node/serialization.h">
-<polygon fill="#ffffff" stroke="#000000" points="488.5,-676 488.5,-695 661.5,-695 661.5,-676 488.5,-676"/>
-<text text-anchor="middle" x="575" y="-683" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/node/serialization.h</text>
+<polygon fill="#ffffff" stroke="#000000" points="1326.5,-609 1326.5,-628 1499.5,-628 1499.5,-609 1326.5,-609"/>
+<text text-anchor="middle" x="1413" y="-616" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/node/serialization.h</text>
 </a>
 </g>
 </g>
-<!-- Node4&#45;&gt;Node200 -->
-<g id="edge61" class="edge">
-<title>Node4&#45;&gt;Node200</title>
-<path fill="none" stroke="#191970" d="M1489.5909,-816.4183C1336.914,-809.2314 972.2842,-790.2689 851,-768 756.3107,-750.6141 647.9656,-712.7267 600.0422,-695.0007"/>
-<polygon fill="#191970" stroke="#191970" points="1489.6212,-819.9234 1499.7739,-816.8949 1489.9485,-812.9311 1489.6212,-819.9234"/>
+<!-- Node4&#45;&gt;Node196 -->
+<g id="edge58" class="edge">
+<title>Node4&#45;&gt;Node196</title>
+<path fill="none" stroke="#191970" d="M1075.0781,-745.9254C1130.1168,-739.0722 1208.8784,-725.7423 1274,-701 1324.1046,-681.9633 1376.8189,-645.4088 1400.3577,-628.0733"/>
+<polygon fill="#191970" stroke="#191970" points="1074.5946,-742.4582 1065.0858,-747.1309 1075.4331,-749.4078 1074.5946,-742.4582"/>
 </g>
 <!-- Node24 -->
 <g id="node32" class="node">
 <title>Node24</title>
 <g id="a_node32"><a xlink:href="relay_2qnn_2transform_8h.html" target="_top" xlink:title="include/tvm/relay/qnn\l/transform.h">
-<polygon fill="#ffffff" stroke="#000000" points="1815.5,-737.5 1815.5,-767.5 1938.5,-767.5 1938.5,-737.5 1815.5,-737.5"/>
-<text text-anchor="start" x="1823.5" y="-755.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/relay/qnn</text>
-<text text-anchor="middle" x="1877" y="-744.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/transform.h</text>
+<polygon fill="#ffffff" stroke="#000000" points="286.5,-670.5 286.5,-700.5 409.5,-700.5 409.5,-670.5 286.5,-670.5"/>
+<text text-anchor="start" x="294.5" y="-688.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/relay/qnn</text>
+<text text-anchor="middle" x="348" y="-677.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/transform.h</text>
 </a>
 </g>
 </g>
 <!-- Node4&#45;&gt;Node24 -->
-<g id="edge62" class="edge">
+<g id="edge59" class="edge">
 <title>Node4&#45;&gt;Node24</title>
-<path fill="none" stroke="#191970" d="M1626.1779,-805.5368C1676.0203,-795.2814 1745.2416,-780.9403 1806,-768 1809.085,-767.343 1812.2445,-766.6664 1815.4347,-765.9805"/>
-<polygon fill="#191970" stroke="#191970" points="1625.2516,-802.154 1616.1615,-807.5964 1626.6615,-809.0105 1625.2516,-802.154"/>
+<path fill="none" stroke="#191970" d="M938.4087,-749.3361C828.3492,-743.622 605.7113,-729.4383 419,-701 416.0065,-700.5441 412.9503,-700.0374 409.8694,-699.4935"/>
+<polygon fill="#191970" stroke="#191970" points="938.5373,-752.8471 948.7032,-749.8628 938.8951,-745.8563 938.5373,-752.8471"/>
 </g>
-<!-- Node201 -->
+<!-- Node197 -->
 <g id="node33" class="node">
-<title>Node201</title>
+<title>Node197</title>
 <g id="a_node33"><a xlink:href="builtin__fp16_8h.html" target="_top" xlink:title="Functions for conversion between fp32 and fp16. ">
-<polygon fill="#ffffff" stroke="#000000" points="1957,-737.5 1957,-767.5 2073,-767.5 2073,-737.5 1957,-737.5"/>
-<text text-anchor="start" x="1965" y="-755.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/runtime</text>
-<text text-anchor="middle" x="2015" y="-744.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/builtin_fp16.h</text>
+<polygon fill="#ffffff" stroke="#000000" points="428,-670.5 428,-700.5 544,-700.5 544,-670.5 428,-670.5"/>
+<text text-anchor="start" x="436" y="-688.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/runtime</text>
+<text text-anchor="middle" x="486" y="-677.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/builtin_fp16.h</text>
 </a>
 </g>
 </g>
-<!-- Node4&#45;&gt;Node201 -->
-<g id="edge63" class="edge">
-<title>Node4&#45;&gt;Node201</title>
-<path fill="none" stroke="#191970" d="M1626.4254,-812.3798C1704.3544,-803.8218 1835.3473,-788.0716 1947,-768 1950.2177,-767.4216 1953.513,-766.7927 1956.8339,-766.1305"/>
-<polygon fill="#191970" stroke="#191970" points="1625.8179,-808.9252 1616.2563,-813.4887 1626.5769,-815.8839 1625.8179,-808.9252"/>
+<!-- Node4&#45;&gt;Node197 -->
+<g id="edge60" class="edge">
+<title>Node4&#45;&gt;Node197</title>
+<path fill="none" stroke="#191970" d="M938.6653,-747.2016C849.5159,-739.7291 688.8773,-724.345 553,-701 550.0583,-700.4946 547.0522,-699.9401 544.0223,-699.3509"/>
+<polygon fill="#191970" stroke="#191970" points="938.3932,-750.6909 948.6485,-748.0303 938.9724,-743.7149 938.3932,-750.6909"/>
 </g>
-<!-- Node202 -->
+<!-- Node198 -->
 <g id="node34" class="node">
-<title>Node202</title>
+<title>Node198</title>
 <g id="a_node34"><a xlink:href="c__backend__api_8h.html" target="_top" xlink:title="TVM runtime backend API. ">
-<polygon fill="#ffffff" stroke="#ff0000" points="2091,-737.5 2091,-767.5 2207,-767.5 2207,-737.5 2091,-737.5"/>
-<text text-anchor="start" x="2099" y="-755.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/runtime</text>
-<text text-anchor="middle" x="2149" y="-744.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/c_backend_api.h</text>
+<polygon fill="#ffffff" stroke="#ff0000" points="562,-670.5 562,-700.5 678,-700.5 678,-670.5 562,-670.5"/>
+<text text-anchor="start" x="570" y="-688.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/runtime</text>
+<text text-anchor="middle" x="620" y="-677.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/c_backend_api.h</text>
 </a>
 </g>
 </g>
-<!-- Node4&#45;&gt;Node202 -->
-<g id="edge64" class="edge">
-<title>Node4&#45;&gt;Node202</title>
-<path fill="none" stroke="#191970" d="M1626.0861,-815.6303C1726.3954,-809.2922 1919.5734,-794.6551 2082,-768 2084.9454,-767.5166 2087.9545,-766.9799 2090.9867,-766.4047"/>
-<polygon fill="#191970" stroke="#191970" points="1625.8476,-812.1383 1616.0849,-816.2541 1626.2834,-819.1247 1625.8476,-812.1383"/>
+<!-- Node4&#45;&gt;Node198 -->
+<g id="edge61" class="edge">
+<title>Node4&#45;&gt;Node198</title>
+<path fill="none" stroke="#191970" d="M938.8329,-742.3617C874.9149,-732.6172 776.7587,-717.0416 692,-701 687.4772,-700.144 682.8026,-699.2222 678.1084,-698.2701"/>
+<polygon fill="#191970" stroke="#191970" points="938.3777,-745.8326 948.79,-743.8747 939.4294,-738.912 938.3777,-745.8326"/>
 </g>
-<!-- Node206 -->
+<!-- Node202 -->
 <g id="node35" class="node">
-<title>Node206</title>
+<title>Node202</title>
 <g id="a_node35"><a xlink:href="graph__executor_8h.html" target="_top" xlink:title="Tiny AoT executor. ">
-<polygon fill="#ffffff" stroke="#000000" points="2304.5,-603.5 2304.5,-633.5 2423.5,-633.5 2423.5,-603.5 2304.5,-603.5"/>
-<text text-anchor="start" x="2312.5" y="-621.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/runtime</text>
-<text text-anchor="middle" x="2364" y="-610.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/crt/graph_executor.h</text>
+<polygon fill="#ffffff" stroke="#000000" points="54.5,-536.5 54.5,-566.5 173.5,-566.5 173.5,-536.5 54.5,-536.5"/>
+<text text-anchor="start" x="62.5" y="-554.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/runtime</text>
+<text text-anchor="middle" x="114" y="-543.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/crt/graph_executor.h</text>
 </a>
 </g>
 </g>
-<!-- Node4&#45;&gt;Node206 -->
-<g id="edge65" class="edge">
-<title>Node4&#45;&gt;Node206</title>
-<path fill="none" stroke="#191970" d="M1626.2715,-817.7835C1820.1927,-812.5781 2362.0458,-795.6145 2388,-768 2423.106,-730.6482 2390.805,-663.4573 2373.4184,-633.5574"/>
-<polygon fill="#191970" stroke="#191970" points="1626.1046,-814.2866 1616.2014,-818.0516 1626.291,-821.2841 1626.1046,-814.2866"/>
+<!-- Node4&#45;&gt;Node202 -->
+<g id="edge62" class="edge">
+<title>Node4&#45;&gt;Node202</title>
+<path fill="none" stroke="#191970" d="M938.8692,-752.1262C756.2,-750.5678 265.3105,-742.5204 206,-701 167.0351,-673.7225 189.3442,-642.6018 163,-603 154.1139,-589.6419 141.6169,-576.5701 131.4679,-566.9386"/>
+<polygon fill="#191970" stroke="#191970" points="938.8658,-755.6262 948.894,-752.2078 938.9229,-748.6264 938.8658,-755.6262"/>
 </g>
-<!-- Node205 -->
+<!-- Node201 -->
 <g id="node36" class="node">
-<title>Node205</title>
+<title>Node201</title>
 <g id="a_node36"><a xlink:href="crt_2packed__func_8h.html" target="_top" xlink:title="Type&#45;erased function used across TVM API. ">
-<polygon fill="#ffffff" stroke="#000000" points="2263,-670.5 2263,-700.5 2379,-700.5 2379,-670.5 2263,-670.5"/>
-<text text-anchor="start" x="2271" y="-688.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/runtime</text>
-<text text-anchor="middle" x="2321" y="-677.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/crt/packed_func.h</text>
+<polygon fill="#ffffff" stroke="#000000" points="38,-603.5 38,-633.5 154,-633.5 154,-603.5 38,-603.5"/>
+<text text-anchor="start" x="46" y="-621.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/runtime</text>
+<text text-anchor="middle" x="96" y="-610.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/crt/packed_func.h</text>
 </a>
 </g>
 </g>
-<!-- Node4&#45;&gt;Node205 -->
-<g id="edge66" class="edge">
-<title>Node4&#45;&gt;Node205</title>
-<path fill="none" stroke="#191970" d="M1626.163,-816.4728C1783.4186,-809.1594 2162.2712,-789.5349 2216,-768 2236.2314,-759.8911 2236.9134,-750.5318 2254,-737 2269.6417,-724.6125 2287.5733,-710.852 2300.9472,-700.6735"/>
-<polygon fill="#191970" stroke="#191970" points="1625.9588,-812.9785 1616.1311,-816.9367 1626.2821,-819.971 1625.9588,-812.9785"/>
+<!-- Node4&#45;&gt;Node201 -->
+<g id="edge63" class="edge">
+<title>Node4&#45;&gt;Node201</title>
+<path fill="none" stroke="#191970" d="M938.8803,-751.3226C751.2634,-747.6602 236.3454,-734.6304 168,-701 138.1499,-686.3118 115.4648,-653.1528 104.0978,-633.6399"/>
+<polygon fill="#191970" stroke="#191970" points="938.8334,-754.8222 948.8989,-751.5153 938.9681,-747.8235 938.8334,-754.8222"/>
 </g>
-<!-- Node207 -->
+<!-- Node203 -->
 <g id="node37" class="node">
-<title>Node207</title>
+<title>Node203</title>
 <g id="a_node37"><a xlink:href="page__allocator_8h.html" target="_top" xlink:title="An implementation of a dynamic memory allocator for microcontrollers. ">
-<polygon fill="#ffffff" stroke="#000000" points="2435,-737.5 2435,-767.5 2551,-767.5 2551,-737.5 2435,-737.5"/>
-<text text-anchor="start" x="2443" y="-755.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/runtime</text>
-<text text-anchor="middle" x="2493" y="-744.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/crt/page_allocator.h</text>
+<polygon fill="#ffffff" stroke="#000000" points="772,-670.5 772,-700.5 888,-700.5 888,-670.5 772,-670.5"/>
+<text text-anchor="start" x="780" y="-688.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/runtime</text>
+<text text-anchor="middle" x="830" y="-677.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/crt/page_allocator.h</text>
 </a>
 </g>
 </g>
-<!-- Node4&#45;&gt;Node207 -->
-<g id="edge68" class="edge">
-<title>Node4&#45;&gt;Node207</title>
-<path fill="none" stroke="#191970" d="M1626.3199,-816.7715C1802.2609,-809.5286 2267.8125,-788.9353 2421,-768 2425.5607,-767.3767 2430.2625,-766.6222 2434.9756,-765.7857"/>
-<polygon fill="#191970" stroke="#191970" points="1626.0032,-813.2815 1616.155,-817.1884 1626.29,-820.2756 1626.0032,-813.2815"/>
+<!-- Node4&#45;&gt;Node203 -->
+<g id="edge65" class="edge">
+<title>Node4&#45;&gt;Node203</title>
+<path fill="none" stroke="#191970" d="M957.7702,-733.865C930.064,-723.3773 895.9165,-710.4515 869.8955,-700.6017"/>
+<polygon fill="#191970" stroke="#191970" points="956.6863,-737.197 967.2777,-737.4639 959.1644,-730.6503 956.6863,-737.197"/>
 </g>
-<!-- Node208 -->
+<!-- Node204 -->
 <g id="node38" class="node">
-<title>Node208</title>
+<title>Node204</title>
 <g id="a_node38"><a xlink:href="platform_8h.html" target="_top" xlink:title="The virtual memory manager for micro&#45;controllers. ">
-<polygon fill="#ffffff" stroke="#000000" points="2263,-737.5 2263,-767.5 2379,-767.5 2379,-737.5 2263,-737.5"/>
-<text text-anchor="start" x="2271" y="-755.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/runtime</text>
-<text text-anchor="middle" x="2321" y="-744.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/crt/platform.h</text>
+<polygon fill="#ffffff" stroke="#000000" points="0,-670.5 0,-700.5 116,-700.5 116,-670.5 0,-670.5"/>
+<text text-anchor="start" x="8" y="-688.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/runtime</text>
+<text text-anchor="middle" x="58" y="-677.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/crt/platform.h</text>
 </a>
 </g>
 </g>
-<!-- Node4&#45;&gt;Node208 -->
-<g id="edge69" class="edge">
-<title>Node4&#45;&gt;Node208</title>
-<path fill="none" stroke="#191970" d="M1626.2139,-817.5278C1750.5859,-813.242 2022.3006,-800.691 2249,-768 2253.556,-767.343 2258.2544,-766.5643 2262.9651,-765.711"/>
-<polygon fill="#191970" stroke="#191970" points="1625.9545,-814.0345 1616.0783,-817.8699 1626.1907,-821.0305 1625.9545,-814.0345"/>
+<!-- Node4&#45;&gt;Node204 -->
+<g id="edge66" class="edge">
+<title>Node4&#45;&gt;Node204</title>
+<path fill="none" stroke="#191970" d="M938.7922,-749.8507C760.9395,-742.7252 286.0294,-722.2483 130,-701 125.439,-700.3789 120.737,-699.6259 116.0237,-698.7905"/>
+<polygon fill="#191970" stroke="#191970" points="938.678,-753.3488 948.8096,-750.2505 938.9572,-746.3544 938.678,-753.3488"/>
 </g>
-<!-- Node209 -->
+<!-- Node205 -->
 <g id="node39" class="node">
-<title>Node209</title>
+<title>Node205</title>
 <g id="a_node39"><a xlink:href="data__type_8h.html" target="_top" xlink:title="include/tvm/runtime\l/data_type.h">
-<polygon fill="#ffffff" stroke="#ff0000" points="202,-737.5 202,-767.5 318,-767.5 318,-737.5 202,-737.5"/>
-<text text-anchor="start" x="210" y="-755.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/runtime</text>
-<text text-anchor="middle" x="260" y="-744.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/data_type.h</text>
+<polygon fill="#ffffff" stroke="#ff0000" points="906,-670.5 906,-700.5 1022,-700.5 1022,-670.5 906,-670.5"/>
+<text text-anchor="start" x="914" y="-688.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/runtime</text>
+<text text-anchor="middle" x="964" y="-677.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/data_type.h</text>
 </a>
 </g>
 </g>
-<!-- Node4&#45;&gt;Node209 -->
-<g id="edge71" class="edge">
-<title>Node4&#45;&gt;Node209</title>
-<path fill="none" stroke="#191970" d="M1489.5664,-818.1584C1266.0139,-813.535 558.6086,-796.9377 332,-768 327.434,-767.4169 322.7283,-766.6913 318.0126,-765.8748"/>
-<polygon fill="#191970" stroke="#191970" points="1489.7723,-821.6633 1499.8421,-818.3696 1489.9162,-814.6648 1489.7723,-821.6633"/>
+<!-- Node4&#45;&gt;Node205 -->
+<g id="edge68" class="edge">
+<title>Node4&#45;&gt;Node205</title>
+<path fill="none" stroke="#191970" d="M991.7725,-728.7735C985.7666,-719.4154 979.0449,-708.9421 973.7188,-700.6432"/>
+<polygon fill="#191970" stroke="#191970" points="988.8958,-730.7712 997.2426,-737.2967 994.7869,-726.9903 988.8958,-730.7712"/>
 </g>
-<!-- Node212 -->
+<!-- Node208 -->
 <g id="node40" class="node">
-<title>Node212</title>
+<title>Node208</title>
 <g id="a_node40"><a xlink:href="ndarray_8h.html" target="_top" xlink:title="A device&#45;independent managed NDArray abstraction. ">
-<polygon fill="#ffffff" stroke="#ff0000" points="1014,-670.5 1014,-700.5 1130,-700.5 1130,-670.5 1014,-670.5"/>
-<text text-anchor="start" x="1022" y="-688.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/runtime</text>
-<text text-anchor="middle" x="1072" y="-677.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/ndarray.h</text>
+<polygon fill="#ffffff" stroke="#ff0000" points="1116,-603.5 1116,-633.5 1232,-633.5 1232,-603.5 1116,-603.5"/>
+<text text-anchor="start" x="1124" y="-621.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/runtime</text>
+<text text-anchor="middle" x="1174" y="-610.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/ndarray.h</text>
 </a>
 </g>
 </g>
-<!-- Node4&#45;&gt;Node212 -->
-<g id="edge100" class="edge">
-<title>Node4&#45;&gt;Node212</title>
-<path fill="none" stroke="#191970" d="M1489.6894,-813.4807C1426.2466,-806.6398 1329.6992,-793.0212 1249,-768 1193.1404,-750.6804 1132.1945,-719.243 1098.3796,-700.5776"/>
-<polygon fill="#191970" stroke="#191970" points="1489.6577,-816.9966 1499.9687,-814.561 1490.3893,-810.0349 1489.6577,-816.9966"/>
+<!-- Node4&#45;&gt;Node208 -->
+<g id="edge99" class="edge">
+<title>Node4&#45;&gt;Node208</title>
+<path fill="none" stroke="#191970" d="M1033.942,-730.8819C1067.9669,-703.5805 1125.6333,-657.3092 1155.2364,-633.5558"/>
+<polygon fill="#191970" stroke="#191970" points="1031.4416,-728.4007 1025.8324,-737.389 1035.8224,-733.8605 1031.4416,-728.4007"/>
 </g>
-<!-- Node214 -->
+<!-- Node210 -->
 <g id="node41" class="node">
-<title>Node214</title>
+<title>Node210</title>
 <g id="a_node41"><a xlink:href="device__api_8h.html" target="_top" xlink:title="Abstract device memory management API. ">
-<polygon fill="#ffffff" stroke="#ff0000" points="1327,-536.5 1327,-566.5 1443,-566.5 1443,-536.5 1327,-536.5"/>
-<text text-anchor="start" x="1335" y="-554.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/runtime</text>
-<text text-anchor="middle" x="1385" y="-543.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/device_api.h</text>
+<polygon fill="#ffffff" stroke="#ff0000" points="1784,-469.5 1784,-499.5 1900,-499.5 1900,-469.5 1784,-469.5"/>
+<text text-anchor="start" x="1792" y="-487.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/runtime</text>
+<text text-anchor="middle" x="1842" y="-476.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/device_api.h</text>
 </a>
 </g>
 </g>
-<!-- Node4&#45;&gt;Node214 -->
-<g id="edge95" class="edge">
-<title>Node4&#45;&gt;Node214</title>
-<path fill="none" stroke="#191970" d="M1550.5323,-794.9129C1536.7167,-752.3471 1503.9449,-663.6728 1454,-603 1442.0298,-588.4587 1425.1276,-575.7557 1411.0481,-566.604"/>
-<polygon fill="#191970" stroke="#191970" points="1547.2011,-795.9871 1553.5621,-804.46 1553.8732,-793.8697 1547.2011,-795.9871"/>
+<!-- Node4&#45;&gt;Node210 -->
+<g id="edge94" class="edge">
+<title>Node4&#45;&gt;Node210</title>
+<path fill="none" stroke="#191970" d="M1075.4346,-750.1545C1243.3476,-744.0363 1666.3378,-726.2934 1725,-701 1805.8786,-666.1275 1835.5599,-647.6315 1871,-567 1881.0798,-544.0671 1866.1189,-516.4239 1854.209,-499.6691"/>
+<polygon fill="#191970" stroke="#191970" points="1075.0959,-746.6644 1065.2289,-750.5235 1075.3488,-753.6598 1075.0959,-746.6644"/>
 </g>
-<!-- Node215 -->
+<!-- Node211 -->
 <g id="node42" class="node">
-<title>Node215</title>
+<title>Node211</title>
 <g id="a_node42"><a xlink:href="profiling_8h.html" target="_top" xlink:title="Runtime profiling including timers. ">
-<polygon fill="#ffffff" stroke="#ff0000" points="1318,-469.5 1318,-499.5 1434,-499.5 1434,-469.5 1318,-469.5"/>
-<text text-anchor="start" x="1326" y="-487.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/runtime</text>
-<text text-anchor="middle" x="1376" y="-476.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/profiling.h</text>
+<polygon fill="#ffffff" stroke="#ff0000" points="1945,-402.5 1945,-432.5 2061,-432.5 2061,-402.5 1945,-402.5"/>
+<text text-anchor="start" x="1953" y="-420.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/runtime</text>
+<text text-anchor="middle" x="2003" y="-409.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/profiling.h</text>
 </a>
 </g>
 </g>
-<!-- Node4&#45;&gt;Node215 -->
+<!-- Node4&#45;&gt;Node211 -->
 <g id="edge117" class="edge">
-<title>Node4&#45;&gt;Node215</title>
-<path fill="none" stroke="#191970" d="M1563.2316,-794.0743C1569.9357,-763.7705 1582.8931,-711.9611 1601,-670 1626.9259,-609.919 1703.9705,-586.2031 1662,-536 1633.5642,-501.9865 1509.3762,-490.4024 1434.3104,-486.4822"/>
-<polygon fill="#191970" stroke="#191970" points="1559.7331,-793.6947 1561.0466,-804.2078 1566.5758,-795.1702 1559.7331,-793.6947"/>
+<title>Node4&#45;&gt;Node211</title>
+<path fill="none" stroke="#191970" d="M1075.4091,-750.2005C1257.2301,-743.796 1743.324,-724.6604 1812,-701 1865.77,-682.4751 2076.6346,-539.6477 2097,-500 2103.2952,-487.7445 2104.022,-480.854 2097,-469 2087.0419,-452.1895 2069.4391,-440.5051 2052.1379,-432.5698"/>
+<polygon fill="#191970" stroke="#191970" points="1075.0409,-746.7111 1065.1695,-750.559 1075.2859,-753.7068 1075.0409,-746.7111"/>
 </g>
-<!-- Node217 -->
+<!-- Node213 -->
 <g id="node43" class="node">
-<title>Node217</title>
+<title>Node213</title>
 <g id="a_node43"><a xlink:href="packed__func_8h.html" target="_top" xlink:title="Type&#45;erased function used across TVM API. ">
-<polygon fill="#ffffff" stroke="#ff0000" points="1329,-603.5 1329,-633.5 1445,-633.5 1445,-603.5 1329,-603.5"/>
-<text text-anchor="start" x="1337" y="-621.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/runtime</text>
-<text text-anchor="middle" x="1387" y="-610.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/packed_func.h</text>
+<polygon fill="#ffffff" stroke="#ff0000" points="1746,-536.5 1746,-566.5 1862,-566.5 1862,-536.5 1746,-536.5"/>
+<text text-anchor="start" x="1754" y="-554.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/runtime</text>
+<text text-anchor="middle" x="1804" y="-543.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/packed_func.h</text>
 </a>
 </g>
 </g>
-<!-- Node4&#45;&gt;Node217 -->
+<!-- Node4&#45;&gt;Node213 -->
 <g id="edge116" class="edge">
-<title>Node4&#45;&gt;Node217</title>
-<path fill="none" stroke="#191970" d="M1512.6297,-799.8571C1497.169,-791.6181 1480.6202,-780.9167 1468,-768 1428.1844,-727.2492 1402.3649,-662.971 1392.0682,-633.802"/>
-<polygon fill="#191970" stroke="#191970" points="1511.0713,-802.9912 1521.5692,-804.4217 1514.2547,-796.7569 1511.0713,-802.9912"/>
+<title>Node4&#45;&gt;Node213</title>
+<path fill="none" stroke="#191970" d="M1075.4925,-749.9058C1237.4722,-743.405 1634.6986,-725.1905 1690,-701 1749.5091,-674.9689 1785.3862,-599.0707 1798.4125,-566.5627"/>
+<polygon fill="#191970" stroke="#191970" points="1075.0223,-746.4216 1065.1695,-750.317 1075.3009,-753.4161 1075.0223,-746.4216"/>
 </g>
-<!-- Node218 -->
+<!-- Node214 -->
 <g id="node44" class="node">
-<title>Node218</title>
+<title>Node214</title>
 <g id="a_node44"><a xlink:href="runtime_2module_8h.html" target="_top" xlink:title="Runtime container of the functions generated by TVM, This is used to support dynamically link...">
-<polygon fill="#ffffff" stroke="#ff0000" points="1537,-536.5 1537,-566.5 1653,-566.5 1653,-536.5 1537,-536.5"/>
-<text text-anchor="start" x="1545" y="-554.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/runtime</text>
-<text text-anchor="middle" x="1595" y="-543.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/module.h</text>
+<polygon fill="#ffffff" stroke="#ff0000" points="1956,-469.5 1956,-499.5 2072,-499.5 2072,-469.5 1956,-469.5"/>
+<text text-anchor="start" x="1964" y="-487.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/runtime</text>
+<text text-anchor="middle" x="2014" y="-476.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/module.h</text>
 </a>
 </g>
 </g>
-<!-- Node4&#45;&gt;Node218 -->
-<g id="edge99" class="edge">
-<title>Node4&#45;&gt;Node218</title>
-<path fill="none" stroke="#191970" d="M1557.0717,-793.7247C1556.4068,-763.7999 1556.5568,-713.0424 1563,-670 1568.6787,-632.065 1582.192,-588.8915 1589.7151,-566.6006"/>
-<polygon fill="#191970" stroke="#191970" points="1553.5836,-794.2133 1557.355,-804.1141 1560.581,-794.0224 1553.5836,-794.2133"/>
+<!-- Node4&#45;&gt;Node214 -->
+<g id="edge98" class="edge">
+<title>Node4&#45;&gt;Node214</title>
+<path fill="none" stroke="#191970" d="M1075.2722,-750.1772C1250.3532,-743.8974 1706.0757,-725.3991 1770,-701 1880.316,-658.8937 1972.791,-541.7628 2003.31,-499.7459"/>
+<polygon fill="#191970" stroke="#191970" points="1075.0176,-746.6839 1065.1486,-750.5378 1075.2669,-753.6795 1075.0176,-746.6839"/>
 </g>
-<!-- Node223 -->
+<!-- Node219 -->
 <g id="node45" class="node">
-<title>Node223</title>
+<title>Node219</title>
 <g id="a_node45"><a xlink:href="serializer_8h.html" target="_top" xlink:title="Serializer extension to support TVM data types Include this file to enable serialization of DLDataTyp...">
-<polygon fill="#ffffff" stroke="#000000" points="1119,-603.5 1119,-633.5 1235,-633.5 1235,-603.5 1119,-603.5"/>
-<text text-anchor="start" x="1127" y="-621.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/runtime</text>
-<text text-anchor="middle" x="1177" y="-610.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/serializer.h</text>
+<polygon fill="#ffffff" stroke="#000000" points="1096,-536.5 1096,-566.5 1212,-566.5 1212,-536.5 1096,-536.5"/>
+<text text-anchor="start" x="1104" y="-554.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/runtime</text>
+<text text-anchor="middle" x="1154" y="-543.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/serializer.h</text>
 </a>
 </g>
 </g>
-<!-- Node4&#45;&gt;Node223 -->
+<!-- Node4&#45;&gt;Node219 -->
 <g id="edge118" class="edge">
-<title>Node4&#45;&gt;Node223</title>
-<path fill="none" stroke="#191970" d="M1511.4471,-800.5202C1489.5206,-791.2419 1463.1559,-779.5941 1440,-768 1350.0655,-722.97 1247.5135,-661.6528 1201.5459,-633.6131"/>
-<polygon fill="#191970" stroke="#191970" points="1510.2898,-803.8303 1520.865,-804.4757 1513.0005,-797.3764 1510.2898,-803.8303"/>
+<title>Node4&#45;&gt;Node219</title>
+<path fill="none" stroke="#191970" d="M1021.4737,-728.5286C1040.0456,-698.3458 1073.8375,-645.3222 1107,-603 1117.0043,-590.2325 1129.4127,-576.7206 1138.9674,-566.7418"/>
+<polygon fill="#191970" stroke="#191970" points="1018.4478,-726.7681 1016.2162,-737.1252 1024.4195,-730.4203 1018.4478,-726.7681"/>
 </g>
-<!-- Node224 -->
+<!-- Node220 -->
 <g id="node46" class="node">
-<title>Node224</title>
+<title>Node220</title>
 <g id="a_node46"><a xlink:href="memory__manager_8h.html" target="_top" xlink:title="Abstract device memory management API. ">
-<polygon fill="#ffffff" stroke="#ff0000" points="849.5,-603.5 849.5,-633.5 986.5,-633.5 986.5,-603.5 849.5,-603.5"/>
-<text text-anchor="start" x="857.5" y="-621.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/runtime</text>
-<text text-anchor="middle" x="918" y="-610.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/vm/memory_manager.h</text>
+<polygon fill="#ffffff" stroke="#ff0000" points="1344.5,-536.5 1344.5,-566.5 1481.5,-566.5 1481.5,-536.5 1344.5,-536.5"/>
+<text text-anchor="start" x="1352.5" y="-554.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/runtime</text>
+<text text-anchor="middle" x="1413" y="-543.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/vm/memory_manager.h</text>
 </a>
 </g>
 </g>
-<!-- Node4&#45;&gt;Node224 -->
+<!-- Node4&#45;&gt;Node220 -->
 <g id="edge119" class="edge">
-<title>Node4&#45;&gt;Node224</title>
-<path fill="none" stroke="#191970" d="M1489.5912,-817.0723C1317.8091,-810.6865 871.5715,-792.2295 725,-768 613.2014,-749.5187 547.7975,-791.0411 479,-701 414.679,-616.8177 351.8214,-687.7094 797,-634 813.926,-631.9579 832.1718,-629.6675 849.2335,-627.4868"/>
-<polygon fill="#191970" stroke="#191970" points="1489.648,-820.5767 1499.7705,-817.4485 1489.9066,-813.5814 1489.648,-820.5767"/>
+<title>Node4&#45;&gt;Node220</title>
+<path fill="none" stroke="#191970" d="M1046.8859,-732.7535C1127.9567,-692.6175 1310.4072,-602.291 1382.6837,-566.5088"/>
+<polygon fill="#191970" stroke="#191970" points="1044.925,-729.8188 1037.516,-737.3923 1048.0308,-736.0921 1044.925,-729.8188"/>
 </g>
-<!-- Node172 -->
+<!-- Node174 -->
 <g id="node47" class="node">
-<title>Node172</title>
+<title>Node174</title>
 <g id="a_node47"><a xlink:href="metadata_8h.html" target="_top" xlink:title="Defines types which can be used in Metadata. ">
-<polygon fill="#ffffff" stroke="#000000" points="1643,-670.5 1643,-700.5 1759,-700.5 1759,-670.5 1643,-670.5"/>
-<text text-anchor="start" x="1651" y="-688.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/runtime</text>
-<text text-anchor="middle" x="1701" y="-677.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/metadata.h</text>
+<polygon fill="#ffffff" stroke="#000000" points="2204,-603.5 2204,-633.5 2320,-633.5 2320,-603.5 2204,-603.5"/>
+<text text-anchor="start" x="2212" y="-621.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/runtime</text>
+<text text-anchor="middle" x="2262" y="-610.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/metadata.h</text>
 </a>
 </g>
 </g>
-<!-- Node4&#45;&gt;Node172 -->
-<g id="edge96" class="edge">
-<title>Node4&#45;&gt;Node172</title>
-<path fill="none" stroke="#191970" d="M1590.134,-798.5005C1602.7463,-789.678 1617.0353,-778.9336 1629,-768 1652.436,-746.5838 1675.9351,-718.001 1689.4417,-700.7155"/>
-<polygon fill="#191970" stroke="#191970" points="1587.9808,-795.7333 1581.7195,-804.2801 1591.944,-801.5034 1587.9808,-795.7333"/>
+<!-- Node4&#45;&gt;Node174 -->
+<g id="edge95" class="edge">
+<title>Node4&#45;&gt;Node174</title>
+<path fill="none" stroke="#191970" d="M1075.5575,-751.0672C1341.1172,-745.3578 2288.4217,-723.448 2310,-701 2329.4073,-680.8105 2302.2722,-651.3234 2281.5874,-633.6132"/>
+<polygon fill="#191970" stroke="#191970" points="1075.2057,-747.5738 1065.283,-751.2873 1075.3557,-754.5722 1075.2057,-747.5738"/>
 </g>
-<!-- Node226 -->
+<!-- Node222 -->
 <g id="node48" class="node">
-<title>Node226</title>
+<title>Node222</title>
 <g id="a_node48"><a xlink:href="metadata__types_8h.html" target="_top" xlink:title="Defines types which can be used in metadata here which are also shared between C and C++ code bases...">
-<polygon fill="#ffffff" stroke="#ff0000" points="1681,-737.5 1681,-767.5 1797,-767.5 1797,-737.5 1681,-737.5"/>
-<text text-anchor="start" x="1689" y="-755.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/runtime</text>
-<text text-anchor="middle" x="1739" y="-744.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/metadata_types.h</text>
+<polygon fill="#ffffff" stroke="#ff0000" points="2185,-670.5 2185,-700.5 2301,-700.5 2301,-670.5 2185,-670.5"/>
+<text text-anchor="start" x="2193" y="-688.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/runtime</text>
+<text text-anchor="middle" x="2243" y="-677.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/metadata_types.h</text>
 </a>
 </g>
 </g>
-<!-- Node4&#45;&gt;Node226 -->
-<g id="edge97" class="edge">
-<title>Node4&#45;&gt;Node226</title>
-<path fill="none" stroke="#191970" d="M1608.3423,-800.865C1636.6747,-790.3773 1671.5938,-777.4515 1698.2029,-767.6017"/>
-<polygon fill="#191970" stroke="#191970" points="1606.783,-797.71 1598.6199,-804.4639 1609.2131,-804.2747 1606.783,-797.71"/>
+<!-- Node4&#45;&gt;Node222 -->
+<g id="edge96" class="edge">
+<title>Node4&#45;&gt;Node222</title>
+<path fill="none" stroke="#191970" d="M1075.2461,-750.986C1291.0401,-745.9616 1956.955,-728.5763 2171,-701 2175.5654,-700.4118 2180.2705,-699.6826 2184.9859,-698.8635"/>
+<polygon fill="#191970" stroke="#191970" points="1074.9263,-747.4924 1065.01,-751.2229 1075.0883,-754.4905 1074.9263,-747.4924"/>
 </g>
-<!-- Node228 -->
+<!-- Node224 -->
 <g id="node49" class="node">
-<title>Node228</title>
+<title>Node224</title>
 <g id="a_node49"><a xlink:href="object_8h.html" target="_top" xlink:title="A managed object in the TVM runtime. ">
-<polygon fill="#ffffff" stroke="#ff0000" points="860,-737.5 860,-767.5 976,-767.5 976,-737.5 860,-737.5"/>
-<text text-anchor="start" x="868" y="-755.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/runtime</text>
-<text text-anchor="middle" x="918" y="-744.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/object.h</text>
+<polygon fill="#ffffff" stroke="#ff0000" points="1565,-670.5 1565,-700.5 1681,-700.5 1681,-670.5 1565,-670.5"/>
+<text text-anchor="start" x="1573" y="-688.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/runtime</text>
+<text text-anchor="middle" x="1623" y="-677.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/object.h</text>
 </a>
 </g>
 </g>
-<!-- Node4&#45;&gt;Node228 -->
-<g id="edge101" class="edge">
-<title>Node4&#45;&gt;Node228</title>
-<path fill="none" stroke="#191970" d="M1489.8393,-812.3644C1363.4117,-799.129 1094.6799,-770.9962 976.1898,-758.5917"/>
-<polygon fill="#191970" stroke="#191970" points="1489.4818,-815.846 1499.7919,-813.4063 1490.2107,-808.8841 1489.4818,-815.846"/>
+<!-- Node4&#45;&gt;Node224 -->
+<g id="edge100" class="edge">
+<title>Node4&#45;&gt;Node224</title>
+<path fill="none" stroke="#191970" d="M1075.1968,-745.0825C1197.1494,-731.8182 1450.5065,-704.2615 1564.8185,-691.8282"/>
+<polygon fill="#191970" stroke="#191970" points="1074.7921,-741.6058 1065.2292,-746.1666 1075.549,-748.5647 1074.7921,-741.6058"/>
 </g>
-<!-- Node242 -->
+<!-- Node238 -->
 <g id="node50" class="node">
-<title>Node242</title>
+<title>Node238</title>
 <g id="a_node50"><a xlink:href="parallel__for_8h.html" target="_top" xlink:title="An implementation to run loop in parallel. ">
-<polygon fill="#ffffff" stroke="#000000" points="2569,-737.5 2569,-767.5 2683,-767.5 2683,-737.5 2569,-737.5"/>
-<text text-anchor="start" x="2577" y="-755.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/support</text>
-<text text-anchor="middle" x="2626" y="-744.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/parallel_for.h</text>
+<polygon fill="#ffffff" stroke="#000000" points="2357,-670.5 2357,-700.5 2471,-700.5 2471,-670.5 2357,-670.5"/>
+<text text-anchor="start" x="2365" y="-688.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/support</text>
+<text text-anchor="middle" x="2414" y="-677.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/parallel_for.h</text>
 </a>
 </g>
 </g>
-<!-- Node4&#45;&gt;Node242 -->
+<!-- Node4&#45;&gt;Node238 -->
 <g id="edge120" class="edge">
-<title>Node4&#45;&gt;Node242</title>
-<path fill="none" stroke="#191970" d="M1626.2523,-817.5884C1821.3569,-811.8702 2379.0302,-793.6891 2560,-768 2562.9133,-767.5864 2565.887,-767.1058 2568.8816,-766.5752"/>
-<polygon fill="#191970" stroke="#191970" points="1626.0258,-814.0934 1616.132,-817.8833 1626.2297,-821.0904 1626.0258,-814.0934"/>
+<title>Node4&#45;&gt;Node238</title>
+<path fill="none" stroke="#191970" d="M1075.5081,-751.4569C1312.1144,-747.6044 2093.9967,-732.727 2343,-701 2347.5056,-700.4259 2352.148,-699.7068 2356.7997,-698.8947"/>
+<polygon fill="#191970" stroke="#191970" points="1075.263,-747.9603 1065.3208,-751.6214 1075.3761,-754.9594 1075.263,-747.9603"/>
 </g>
 <!-- Node6 -->
 <g id="node3" class="node">
 <title>Node6</title>
 <g id="a_node3"><a xlink:href="auto__scheduler_2cost__model_8h.html" target="_top" xlink:title="Cost models that estimate the performance of programs. ">
-<polygon fill="#ffffff" stroke="#000000" points="1718,-397 1718,-427 1870,-427 1870,-397 1718,-397"/>
-<text text-anchor="start" x="1726" y="-415" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/auto_scheduler</text>
-<text text-anchor="middle" x="1794" y="-404" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/cost_model.h</text>
+<polygon fill="#ffffff" stroke="#000000" points="438,-330 438,-360 590,-360 590,-330 438,-330"/>
+<text text-anchor="start" x="446" y="-348" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/auto_scheduler</text>
+<text text-anchor="middle" x="514" y="-337" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/cost_model.h</text>
 </a>
 </g>
 </g>
 <!-- Node5&#45;&gt;Node6 -->
 <g id="edge2" class="edge">
 <title>Node5&#45;&gt;Node6</title>
-<path fill="none" stroke="#191970" d="M1829.3918,-461.3443C1821.3269,-450.1 1811.8693,-436.9139 1804.7606,-427.0028"/>
-<polygon fill="#191970" stroke="#191970" points="1826.5532,-463.3919 1835.2256,-469.478 1832.2413,-459.3121 1826.5532,-463.3919"/>
+<path fill="none" stroke="#191970" d="M238.5727,-400.468C299.5205,-388.1938 381.0016,-371.7844 439.5033,-360.0028"/>
+<polygon fill="#191970" stroke="#191970" points="237.7044,-397.0725 228.5922,-402.478 239.0864,-403.9347 237.7044,-397.0725"/>
 </g>
... 154325 lines suppressed ...