You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tvm.apache.org by le...@apache.org on 2023/01/16 13:34:14 UTC

[tvm] branch ci-docker-staging updated (ddf6f35f11 -> dad13d1c1b)

This is an automated email from the ASF dual-hosted git repository.

leandron pushed a change to branch ci-docker-staging
in repository https://gitbox.apache.org/repos/asf/tvm.git


 discard ddf6f35f11 Apply PR 13394.
     add 37a885553c [microNPU] Fix Cascader code generation without StorageRewrite (#13365)
     add 490e0e3120 [ci] Split out C++ unittests (#13335)
     add 4f35b4ded1 [TIR][Hexagon] Async DMA fixups (#13436)
     add bfb4c00197 [OpenCL] Improve OpenCL version detection (#13434)
     add 1888dbd72d [DOCKER][ADRENO] Enable autotvm tuning cache from tophub for Adreno GPU (#13427)
     add 8eff158470 [RUNTIME][CLML] Add fixes to clml runtime api (#13426)
     add b023e9539c [Fix,Roofline] Handle zero length features in roofline (#13424)
     add 80f8e8bb74 [TE] Remove binding to temporary variable (#13430)
     add c1b8721695 [microNPU] Upgrade Vela to v3.5.0 (#13394)
     add 1ff6e99556 [OPENCL][TEXTURE] Test case enhancements and fixes for RPC (#13408)
     add 24b7d9fdd0 [MetaSchedule] Fix Dynamic Loop from AutoBinding (#13421)
     add 76a6e7141f [TVMScript] Output elif where possible (#13433)
     add 26d9b5a7c6 [Hotfix] Skip Flaky Tests (Tracked in #13443) (#13444)
     add d6632070a0 [Fix] Fix IndexDataTypeNormalizer to avoid redundant casting (#13449)
     add 41b04007aa [FIX][ONNX][Relay] onnx converter on matmul with scalar; bring back nn.matmul check (#13448)
     add edfeba5c3a [Roofline] Allow user choice of pass for saving lowered TIR (#13437)
     add 8136173a63 [ci][docs] Fix docs deploy (#13442)
     add b419c4b4de [Roofline] Add fma (non-tensorcore) peak flops for CUDA (#13419)
     add 77f9c49b4e [Fix][MetaSchedule] Param for rule AutoBind on Python side (#13454)
     add 1b3d77a897 [RPC] Fix tracker connection termination (#13420)
     add 9778907811 [RUNTIME][CLML] Fix Dense layer crash issue (#13451)
     add 3ccc3009a6 Fix building static tvm_runtime on windows (#13445)
     add 545f8dc927 [TOPI] Add handwritten matvec for dynamic cases (#13423)
     add 7cfa62e255 [Meta Schedule] Patch ICHECK for `target_has_vnni` to avoid seg fault (#13441)
     add 3ad425fbdf [Docker]Add privileged option for hardware with USB access testing (#13460)
     add 723a13ac08 [MetaSchedule] TorchBench tuning script: add option to disallow operators in sub graph (#13453)
     add 27d8d4153b Bump tensorflow from 2.9.1 to 2.9.3 in /apps/android_camera/models (#13455)
     add f38dbbbd32 Bump tflite from 2.4.0 to 2.10.0 in /apps/microtvm/cmsisnn (#13457)
     add e662970647 [microTVM] enable building microTVM components by default (#13073)
     add 1c67768483 [MetaSchedule][Minor] Unify Cuda-TensorCore Naming in Schedule Rule Kind (#13473)
     add a41e192dc8 [MetaSchedule] TorchBench tuning script: add task extraction mode (#13452)
     add e2fc4d7e98 [TVMScript] Improvements tvm.script.highlight (#13438)
     add 8cccc253da [ci] Enable CRT tests for CPU minimal build (#13471)
     add b2058f4dd2 [CI][Docker] Store GTest sources in GPU docker image (#13468)
     add ca5bc958d1 [tir]delete useless param in driver_api.cc (#13474)
     add dcea36e76d Add python venvs to demo_ images that build (#13435)
     add 3680b3cb49 [microTVM][Zephyr] Add 'serial_number' option (#13377)
     add fae4c5f1b3 [TIR] Fix an error when the result of compute_at has unit loop (#13481)
     add 2105b937cd [OpenCL][Adreno] Remove PrimFunc parameters annotation (#13483)
     add f5a102c83c [CMSIS-NN] Support for int16 in fully connected layer (#13484)
     add 101e3a4ade [TIR][Transform] Optional data-flow analysis in RemoveNoOp (#13217)
     add fc606c09b2 [TIR][TVMScript] Cleaner printing of And/Or chains (#13432)
     add 9098b497bb [TIR] Correct type annotation for `rfactor` (#13485)
     add 61a4f21412 [RUNTIME] Correctly handling export_module when exporting modules of different type (#13489)
     add ae4fd7df7d [Relay][Pattern] Enable rewrite_once in class:DFPatternRewrite (#13490)
     add b8d7cd7fe0 [DOCKER][ADRENO] we don't need microtvm being built for android cross… (#13486)
     add c38a0c50a7 [Adreno] Add documentation for Adreno deployment (#13393)
     add 5d8fc204a5 [ACL] Enable int8 data type in QNN DENSE (#13487)
     add b6151bcaa2 Fix typo in golang sample (#13476)
     add fc59b6dbdf Bump pillow from 8.3.2 to 9.3.0 in /apps/microtvm/ethosu (#13464)
     add a92a3dd84f [ACL] Enable int8 data type in QNN CONV2D (#13496)
     add 61144f9d87 [ACL] Enable int8 data type in CONCATENATE (#13497)
     add 25e98dd5e4 [ACL] Enable int8 data type in pooling operators (#13488)
     add 1e5fc25649 [Hexagon]Call Acquire/Release resources API in Hexagon Launcher durin… (#13495)
     add 449d674e8d [TIR][Schedule] Add condition to check buffer type (#13429)
     add 2a812f90f3 Bump pillow from 8.3.2 to 9.3.0 in /apps/microtvm/cmsisnn (#13465)
     add 36d18e905b [TIR] Fix buffer shape and IndexMap indices dtype mismatch (#13463)
     add 3252362d94 [MetaSchedule] Enhance Database Validation Script (#13459)
     add 5b1a1e3d39 [skip ci][ci][wasm] Add package-lock.json to git (#13505)
     add 435df5081a [microTVM] Use `serial_number` in Zephyr tutorials (#13479)
     add 40d0ec7515 [ci][docker] Update Docker image tags (#13504)
     add 95d2e9fa35 [ARM] Add dynamic matvec support (#13502)
     add 57de9e7f3d Revert "[microTVM] enable building microTVM components by default" (#13503)
     add c0ba8a1951 [docs][Adreno] Remove unnecessary compilation flag (#13509)
     add f6f7feafb2 [CMSIS-NN] Support int16 handling for pooling functions (#13498)
     add b9f89a2eb9 [Docs][Bug] Fix broken link to tvmc python (#13499)
     add b587e33027 [microTVM][Tutorial] Fix micro_aot and micro_autotune tutorials (#13513)
     add 694d4bf5ea [tir] Add copy on write to all nodes (#13512)
     add e47eed13d9 Add methods to get the size of VTCM on device as well as the allocated size of the HexagonVtcmPool (#13511)
     add 8dc8d248a1 [LLVM] Switch to using New Pass Manager (NPM) with LLVM 16+ (#13515)
     add c2dd53d531 [TE][TIR] Improved naming when converting TE to schedulable TIR (#13431)
     add ab3f54d3f7 [CI] Add `set -x` for demo scripts (#13523)
     add 6782a35018 [TIR] Disable RewriteSimplifier extensions in RemoveNoOp (#13524)
     add c7d7164c42 [QNN] support zero points as variable scalar for QnnBatchMatMul op (#13469)
     add bf16b42edb [Hexagon] Add HVX quant conv2d implementation (#13256)
     add afbfb7aa7e [TIR][Analysis][Hexagon] Add vtcm memory capacity verification for Hexagon target (#13349)
     add a923ed3c89 [TIR] Introduce ReduceBranchingThroughOvercompute (#13299)
     add 72c3399207 [ci][docker] Update NRF command line tools URL (#13541)
     add d31a1fb0db [ci] Dis-allow any non-S3 URLs in CI (#13283)
     add 7bc41ecca2 [Logging] Mark LogFatal::~LogFatal as [[noreturn]] (#13542)
     add 3a81aef40b [Fix] Use proper target in VerifyGPUCode (#13548)
     add e7160d569a Add recursive on loop with marked kUnrolled (#13536)
     add 012551ffda [microNPU] Fix cascade scheduling stability (#13428)
     add 2b110367d1 [microTVM][Arduino]Add `serial_number` to project options and tests (#13518)
     add 3e956ce9da [DNNL][BYOC] enable dense_bias_sum fusion (#13550)
     add 3482eab1c5 [Fix Bug]fix the bug of schedule batch_matmul_int8 on cuda (#13551)
     add 965490e618 [Relay] Optimize transform shape (#13519)
     add 6574e16034 [MetaSchedule][Hexagon] Add postproc for verifying VTCM usage (#13538)
     add 8d31b25bb8 [TIR] [Hexagon] Add vdmpy intrinsic and transform_layout for tests (#13557)
     add bbba8d97fe [microTVM] Modernize Arm Cortex-M convolution schedules (#13242)
     add 8d04e1ea3c [ci] Fix upload_ci_resource.yml and update related documentation (#13562)
     add a1d46645d2 [TIR] Fix remaining dtype mismatch issue caused by SubspaceDivide (#13558)
     add 8826c1c386 Mark base64.h encode and decode API's as inline (#13556)
     add eb2781f7c9 [MetaSchedule] Make `MultiLevelTiling` apply condition customizable (#13535)
     add 9374738b29 [ci] Split Jenkinsfile into platform-specific jobs (#13300)
     add 1a99d7587f [ci] Remove Jenkinsfile for migration to platform-specific jobs (#13316)
     add da73295aa0 [TOPI] Fix tuple unpacking in conv2d int8 compute (#13566)
     add 3e42506a97 [TIR] Fix predefined inverse map in layout transform dtype legalization (#13565)
     add 3f662ddee6 [TIR] Fix layout free annotation in CreatePrimFunc (#13554)
     add 3745ccf161 [ci] Fix docs deploy (#13570)
     add acef2ed97d [ci] Make tvm-bot aware of platform specific jobs (#13571)
     add 7065a7fa11 [ci] Add a workflow to update a nightly branch (#13564)
     add 5a58c581f5 Added macro generation in MLF export (#12789)
     add eda84e7804 [CodegenC] Explicit forward function declarations (#13522)
     add f674e12d1a [ETHOSN] Remove inference test (#13576)
     add 739356747c [COMMUNITY] Gavin Uberti -> Committer (#13575)
     add 22ff38dff8 [docs] Make building the cpu-only docs build explicit (#13315)
     add 6069292ff9 [OpenCL]  Introduction of weights on buffers (#13563)
     add 75fd531124 [bfloat16] Fixed dtype conversion in the arm_cpu injective schedule (#13417)
     add f4cfcafba5 [Adreno] Add global pooling schedule (#13573)
     add 3168e612c7 [Hexagon] Use get_hexagon_target in test, NFC (#13584)
     add 9e7920b581 [Hexagon] Lookup intrinsic by name instead of using enum value (#13583)
     add da2a6379ab [Test] Make tests work with older numpy versions (#13582)
     add c5b8ab2307 [ci][docker] Read docker image tags during CI runs (#13572)
     add 3b001efcc9 [MetaSchedule] Restore `num_threads` parameter in tuning API  (#13561)
     add 8545297a5e [TIR] Add preserve_unit_iters option to blockize/tensorize (#13579)
     add 3af50e0fce [TIR][Transform] Keep the allocate buffers order after update buffer allocation location (#13560)
     add fbb31fefda [Hexagon] Skip test if "onnx" module not available (#13585)
     add 02820ad283 [FQ2I] Support converting `dense` -> `add` to `qnn.dense` -> `add` -> `requantize` (#13578)
     add 0dc26dd870 [ci][docker] Allow usage of ECR images in PRs (#13590)
     add 970110302d [TIR][Schedule] Support for specific consumer block targeting in cache_write (#13510)
     add ae07437a32 [LLVM] Fix get tm allow_missing check pos (#13591)
     add 760b10ae0e [Torch] Stable diffusion support (#13594)
     add fe1d7ad4f2 [OpenCL][CI] Enable OpenCL cpp tests in CI (#13400)
     add 51431d5a8c [Relay] Bug fix in relay.squeeze function for issue #12400 (#12684)
     add ec9fcc0dac [Relay] Fix `CombineParallelDense` slicing axis  (#13597)
     add b7015bb388 [Fix] Task scheduler error prompt upon build/run failure (#13601)
     add 1d9863470e [TIR] Fix PlanAndUpdateBufferAllocationLocation not visiting constant buffer (#13605)
     add 12311dcdef [Hexagon] Enable depthwise conv2d NHWC with an HWIO kernel layout (#13414)
     add c547bbb13d [Relay][Frontend][Onnx] SequenceAt and SplitToSequence Operators (#13602)
     add c6652bca87 [Relay][TIR] Add utility to lower Relay func to TIR prim func (#13606)
     add 866bec03ad [microNPU] Disable copying weights to SRAM for FullyConnected ops in CopyConstants scheduler (#13588)
     add 949089d698 [microTVM][Zephyr] Fix TVMC test on hardware (#13598)
     add 496ca9a479 [LLVM] Use std::nullopt instead of llvm::None (#13617)
     add 795945be4e [Hexagon] Switch from default_rng to random in Hexagon tests (#13616)
     add 57a13a2324 [Metaschedule] Aligning get_top_k logic in MemoryDatabase and JSONDatabase (#13611)
     add 7fd0cdb230 [TOPI] Fix batch_matmul tensorcore legalize for transpose_b = False case (#13618)
     add cc0f27a8b1 [Relay] Remove overwriting of matmul shapes when they are static (#13615)
     add 06be0b3b26 [Frontend] [ONNX] Support sequence_lens of GRU (#13587)
     add c5911a6f23 [ETHOSN] Add support for experimental compiler option (#13410)
     add ce97138ebe [TVMScript] Fix print round-tripable multi thread env binding (#13622)
     add cdb4eea138 [TOPI][Hexagon] Implement global_avg_pool2d for hexagon (#13614)
     add 7674ea84fe Add check for non-contiguous memory access when lowering to async dma… (#13613)
     add 37f6aa0c7e [MetaSchedule] Fix tensorcore winograd task extraction (#13625)
     add e9b331831a [COMMUNITY] Min Chen -> Reviewer (#13628)
     add 0eabbac216 [BugFix][UMA] Protect target registration (#13624)
     add cded048c74 [Arith] Allow const folding on fp16 involving one and zero (#13631)
     add c932777d48 [Hexagon][runtime] Make HexagonThreadManager::CheckSemaphore thread safe (#13609)
     add e3afc92818 [Relay][Testing][Bugfix] `py_converter` should use correct AST for versions above 3.8 too (#13635)
     add 564934d5eb [Relay][Runtime] Add `set_input/output_zero_copy` in python (#13623)
     add 4096548d13 [BugFix][TVMScript] Parser crash (#13630)
     add ddb006ed31 [TRANSFORM] Fix virtual device annotation issue with BYOC subgraphs (#13325)
     add 6161a8d552 [BugFix][TVMScript]fix var capturing order error (#13640)
     add ab28afbab2 [TVMScript] Remove obsolete modules (#13638)
     add 26a205c9fa [CI] Fix android build by constraining numpy version (#13648)
     add 5019dcee0e [ETHOSN] Update driver stack version to 22.11 (#13637)
     add a4156cd935 [CMSIS-NN] Add Cortex-M85 support (#13644)
     add 36d89a28fb [ETHOSN] Fix for the mock inference after NPU driver update (#13650)
     add 209845fb91 [RPC] Add fail-guard for termination time exception (#13651)
     add e2680142ef [CMSIS-NN] Global function that provides range based on dtype (#13652)
     add 7a38477b2f [Pytorch][Relay] aten::_weight_norm implementation (#13661)
     add 520f2c594b [Relay][Frontend] Span filling common API (#13402)
     add f83055f90a [QNN] Change in Pass Context for lookup table calculation (#13660)
     add ece99a243b [CLML][RELAY] Enable Pad and Conv2d layer fusion (#13649)
     add dc3fc36d0b [TVMScript] More accurate hints for ImportError (#13662)
     add d6507b256f [TIR] Create Layout with specified axis dtype (#13663)
     add 8551a5c71f [MetaSchedule] Add "disabled_pass" option in tuning API (#13659)
     add d582b7e511 [CONTAINER] Struct Hash/Equal and JSON support for ShapeTuple (#13671)
     add cef3f0d7d5 [RUNTIME][OPENCL] OpenCL host pointer support to acheive zero copy (#13413)
     add 6f6bf1912e [BugFix] Pylance emits the warnning 'Code is unreachable'  (#13673)
     add 579c9708c6 [TOPI][bugfix] Fix a bug in arm_cpu int8 dotprod schedule and modernize tests (#13669)
     add 4021eec821 [fix] MXNet dot for all tensor dimensions (#11760)
     add fba91656cf [Build] Expose missing USE_VERILATOR in cmake (#13676)
     add b6e4cea6f2 [BENCHMARK][ADRENO] Adreno Benchmarks with texture (#13675)
     add cca84d3082 [Bug][CodeGen,Cuda]fix cast fp16 to int8/uint8 in cuda (#13641)
     add e5a7f5fb5f [TOPI] Expose mem_scope from generic conv2d variants to be more reusable (#13680)
     add b6851f344e [CLML] Version compatibility and various test cases  (#13670)
     add 6dbb7e1a5e [BENCHMARKS][ADRENO] Documentation for Adreno (Texture) benchmarks (#13679)
     add e24d4fb78b [ONNX] Add converter for QAttention from Microsoft onnxruntime contrib opset (#13654)
     add 45a8a44b86 [BugFix][Runtime] Add missing check for `PackedFunc` (#13687)
     add 17f88eaf78 [COMMUNITY] @blackkker -> Reviewer (#13686)
     add d8357a08c0 [Git] Ignore python/requirements directory (#13684)
     add 49ed54478b [Schedule][Bugfix] Fix decompose padding wrt the single child subtree (#13646)
     add 48ef3a2e4b [Hexagon] Remove temporary VTCM workspace APIs (#13681)
     add 231882a26a [Contrib][Sort] Faster Top-K Implementation (#13599)
     add 8088dcb444 [Build][Bugfix] Use CMAKE_ prefix for <LANG>_COMPILER_LAUNCHER (#13697)
     add 52fe268574 [microTVM]Add default value to unspecified project options in project API (#13610)
     add 116453f5cf Add header files for GraphExecutorDebug (#13694)
     add 724757a6e5 [Relay][Docs] Fixed examples in relay/transform.py documentation (#13682)
     add 0e64dba2d4 [Hexagon] Denote DMA cache bypass as experimental feature (#13699)
     add aec46dc092 [BENCHMARKS][CLML] Adreno benchmarks with CLML BYOC path added (#13696)
     add bf0607bd31 [VTA] Provide zero-initialization for VTAGenericInsn (#13698)
     add 07a5a9eadb [Tensorize][runtime] Add support for AMX(Advanced Matrix Extensions) through Tensor intrinsics (#13642)
     add 048028b72b [BugFix][TVMScript] Fix the roundtripability of  intrinsic pow (#13692)
     add 721f1151b1 [BugFix][Runtime] Fix Incorrect node information (#13693)
     add 39dbce1f9c [microTVM] Build standalone_crt with cmake instead of makefile (#13600)
     add 21d7968b61 [microTVM] Fix MacOS build with USE_MICRO=ON (#13711)
     add 123f1f5e2c [tir] Add line level debug info (#13012)
     add 52d8485e48 [microTVM][Zephyr]Add project files for mlperftiny submission  (#13690)
     add 30abbe9832 [docs] Add "Open with Colab" button to documentation (#13627)
     add 088bc118c7 [TIR] Fix dtype mismatch error due to LetStmt (#13710)
     add 843310b03c [Fix,AutoScheduler] Handle 0-dim buffers in featurization (#13718)
     add 875296c762 [TVMScript] Linter-friendly function definitions (#13713)
     add bd37515cfb [Tests] Replace pytest.main with tvm.testing.main (#13717)
     add a99f0c1545 [TVMScript] Refactor IRDocsifier (#13593)
     add a435cbb3b1 [TIR][Arith] Add common sub expr analyzer (#13702)
     add 6bc72bbca3 [microTVM] Replace arm_nnsupportfunctions.h with arm_acle.h (#13363)
     add 614e16d73a [AOT] Added a test for detecting output size post MLF export (#13655)
     add ce7d8c691a [Fix,Roofline] Fix roofline handling of multiple peak flops (#13716)
     add 329584bbf0 Add support for named outputs in MLF archive (#13704)
     add 5db453e18a [HotFix][docs] Use correct Colab button URL (#13725)
     add 6b65a590df [microNPU] Add support for TFLite PAD (#13732)
     add 10452a3444 [Web] Try to upgrade WebGPU API usage to the latest (#13731)
     add a2daffbe11 [Relay][Frontend] Span Filling TFLite (#13727)
     add b2da9453c6 [Relay][Frontend] Span Filling TensorFlow 1 (#13728)
     add d2ee4ec97e Add DisallowAsyncStridedMemCopy post processor to rem  (#13720)
     add 687ec7883b [microTVM][Zephyr] Fix flash command for nrfjprog (#13723)
     add d00168ffbf [CI] Fix MLF input and output name map (#13740)
     add 8d53c0aa8a [TE][PrimFunc] Fix create primfunc from te extern with explicit buffer load (#13729)
     add 1265eb93e7 Add Name Transforms for Rust style (#13706)
     add db920ddcde [COLLAGE] Add more customization to support more targets (#13450)
     add 3b5baf48d2 [Docker update] Update ci_cpu tag to the latest from tlcpackstaging (#13748)
     add 746fcaaaa3 [CMSIS-NN] Support CMSIS NN from new GitHub location (#13656)
     add 4cb75b97cd [microNPU] Add a legalization test for TFLite PAD (#13750)
     add 68c917d9b4 [Arith] Use ConstIntBound to remove negative numerator when lowering (#13724)
     add 92da138bcb [Profiler] Allow user to flush L2 cache in `time_evalutor` function for profiling CUDA kernels (#13726)
     add d979949993 [CMake][OpenCL] Remove warning for OpenCL wrapper (#13683)
     add bf4fedd06e [AOT]Aot module post-test error workaround (#13685)
     add c2bc1ec95e [microTVM] tuning on micro targets with meta-schedule (#13514)
     add 77b6f0eec3 [Tensorize][TOPI] Add AMX Tensorizing for int8 batch matmul (#13745)
     add 15e185d922 [Hexagon][QNN] Improve performance wo QNN canonicalization (#13734)
     add a9c6f137d8 [CI][Docker][Cortex-M]Update scripts to update ci_cortexm to Ubuntu 20.04 (#13736)
     add 48842d78e7 [Fix,TOPI] Consolidate generic and x86 scatter nd (#13755)
     add f1e4cd720b [Docs] Add `typing-extensions` dependency guide (#13730)
     add f71841a199 [MetaSchedule] Add pass instrument to MetaSchedule api (#13688)
     add 079876ed54 [Relay][Frontend] Span Filling ONNX (#13767)
     add fd2fd85079 [TOPI][OP] Support grouped conv2d_NCHWc (#13733)
     add f06e78b61a [CI][microTVM]Update ci_cortexm image (#13764)
     add e0c2181af8 [microNPU] Add relu6 relu_n1_to_1 test cases for Ethos-U (#13645)
     add 665dd413bc [docs] Remove empty code blocks (#13689)
     add c3c6276c06 [ci][docker] Make branch names valid before using them as tags (#13738)
     add be25803178 [ONNX,FIX] onnx Pad operator `constant_value` omit use default value 0 (#13758)
     add bd3d93b6a4 Remove tutorials CMSIS dependency when not needed (#13762)
     add 5878f6090b [Target] Make `key=arm_cpu` --> `key=arm_cpu,cpu` on AArch64 (#13775)
     add 287597b45d [CI] Update ci_minimal docker image to cross-compile TVM to aarch64 (#13776)
     add f9759920e0 [UnitTest] Parametrized test_arith_iter_affine_map::test_padding (#13774)
     add 60c723ec26 [ETHOSN] Remove support for NPU driver 22.08 (#13763)
     add c452e6966c [TVMScript] IR Fragment Printing (#13742)
     add 9a973b7036 [COMMUNITY] Hongyi Jin -> Committer (#13784)
     add fe01c5a749 [COMMUNITY] Yaxing Cai -> Committer (#13787)
     add b0b8d3eda0 [MeteSchedule] Bugfix: Add checks for nullable `run_secs` (#13790)
     new dad13d1c1b Testing PR13529.

This update added new revisions after undoing existing revisions.
That is to say, some revisions that were in the old version of the
branch are not in the new version.  This situation occurs
when a user --force pushes a change and generates a repository
containing something like this:

 * -- * -- B -- O -- O -- O   (ddf6f35f11)
            \
             N -- N -- N   refs/heads/ci-docker-staging (dad13d1c1b)

You should already have received notification emails for all of the O
revisions, and so the following emails describe only the N revisions
from the common base, B.

Any revisions marked "omit" are not gone; other references still
refer to them.  Any revisions marked "discard" are gone forever.

The 1 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 .asf.yaml                                          |    14 +-
 .gitattributes                                     |     1 +
 .../workflows/update_nightly_branch.yml            |    37 +-
 .github/workflows/upload_ci_resource.yml           |     2 +-
 .gitignore                                         |     9 +
 3rdparty/mlperftiny/README.md                      |     2 +
 3rdparty/mlperftiny/api/internally_implemented.cpp |   325 +
 3rdparty/mlperftiny/api/internally_implemented.h   |    62 +
 3rdparty/mlperftiny/api/submitter_implemented.h    |    85 +
 3rdparty/nvbench/l2_cache_flush.h                  |    74 +
 CMakeLists.txt                                     |    21 +-
 CONTRIBUTORS.md                                    |     5 +
 Jenkinsfile                                        |  4769 ------
 LICENSE                                            |     2 +-
 apps/android_camera/models/requirements.txt        |     2 +-
 apps/benchmark/README.md                           |    29 +-
 apps/benchmark/adreno/adreno_gpu_bench_clml.py     |   282 +
 apps/benchmark/adreno/adreno_gpu_bench_texture.py  |   278 +
 .../benchmark/adreno/bench.sh                      |    21 +-
 apps/dso_plugin_module/plugin_module.cc            |     1 -
 apps/hexagon_launcher/launcher_hexagon.cc          |     6 +
 .../template_project/microtvm_api_server.py        |    95 +-
 apps/microtvm/cmsisnn/Makefile                     |    28 +-
 apps/microtvm/cmsisnn/README.md                    |     1 +
 apps/microtvm/cmsisnn/requirements.txt             |   125 +-
 apps/microtvm/cmsisnn/run_demo.sh                  |     1 +
 apps/microtvm/cmsisnn/src/demo_bare_metal.c        |     4 +-
 apps/microtvm/ethosu/Makefile                      |    33 +-
 apps/microtvm/ethosu/README.md                     |     1 +
 apps/microtvm/ethosu/requirements.txt              |   119 +-
 apps/microtvm/ethosu/run_demo.sh                   |     1 +
 apps/microtvm/ethosu/src/demo_bare_metal.c         |     4 +-
 apps/microtvm/ethosu/src/demo_freertos.c           |     4 +-
 apps/microtvm/poetry.lock                          |   556 +-
 apps/microtvm/pyproject.toml                       |     1 +
 .../reference-vm/base-box/base_box_setup_common.sh |     2 +-
 .../template_project/CMakeLists.txt.template       |    41 +-
 .../zephyr/template_project/microtvm_api_server.py |   310 +-
 .../template_project/src/mlperftiny}/README.md     |    21 +-
 .../zephyr/template_project/src/mlperftiny/main.cc |    45 +-
 .../src/mlperftiny/submitter_implemented.cc        |   218 +
 .../template_project/src/mlperftiny/tvmruntime.cc  |   164 +
 .../template_project/src/mlperftiny/tvmruntime.h   |    62 +
 .../template_project/src/mlperftiny/zephyr_uart.cc |    89 +
 .../template_project/src/mlperftiny/zephyr_uart.h  |    51 +
 apps/microtvm/zephyr_cmsisnn/CMakeLists.txt        |    44 +-
 apps/microtvm/zephyr_cmsisnn/README.md             |     5 +
 ci/jenkins/Build.groovy.j2                         |   236 -
 ci/jenkins/DockerBuild.groovy.j2                   |   118 -
 ci/jenkins/Lint.groovy.j2                          |    19 -
 ci/jenkins/Test.groovy.j2                          |   319 -
 ci/jenkins/data.py                                 |   122 +
 .../__init__.py => ci/jenkins/docker-images.ini    |    16 +-
 ci/jenkins/generate.py                             |   146 +-
 ci/jenkins/generated/arm_jenkinsfile.groovy        |   986 ++
 ci/jenkins/generated/cortexm_jenkinsfile.groovy    |  1166 ++
 ci/jenkins/generated/cpu_jenkinsfile.groovy        |   884 ++
 ci/jenkins/generated/docker_jenkinsfile.groovy     |   965 ++
 ci/jenkins/generated/gpu_jenkinsfile.groovy        |  1343 ++
 ci/jenkins/generated/hexagon_jenkinsfile.groovy    |   973 ++
 ci/jenkins/generated/i386_jenkinsfile.groovy       |   735 +
 ci/jenkins/generated/lint_jenkinsfile.groovy       |   587 +
 ci/jenkins/generated/minimal_jenkinsfile.groovy    |   631 +
 ci/jenkins/generated/riscv_jenkinsfile.groovy      |   636 +
 ci/jenkins/generated/wasm_jenkinsfile.groovy       |   572 +
 ci/jenkins/macros.j2                               |   222 -
 ci/jenkins/requirements.txt                        |     1 -
 ci/jenkins/templates/arm_jenkinsfile.groovy.j2     |    99 +
 ci/jenkins/templates/cortexm_jenkinsfile.groovy.j2 |    64 +
 ci/jenkins/templates/cpu_jenkinsfile.groovy.j2     |    97 +
 .../docker_jenkinsfile.groovy.j2}                  |   159 +-
 ci/jenkins/templates/gpu_jenkinsfile.groovy.j2     |   217 +
 ci/jenkins/templates/hexagon_jenkinsfile.groovy.j2 |    63 +
 ci/jenkins/templates/i386_jenkinsfile.groovy.j2    |    65 +
 ci/jenkins/templates/lint_jenkinsfile.groovy.j2    |    58 +
 ci/jenkins/templates/minimal_jenkinsfile.groovy.j2 |    55 +
 ci/jenkins/templates/riscv_jenkinsfile.groovy.j2   |    62 +
 ci/jenkins/templates/utils/Build.groovy.j2         |    57 +
 ci/jenkins/{ => templates/utils}/Prepare.groovy.j2 |    43 +-
 ci/jenkins/templates/utils/Test.groovy.j2          |    13 +
 .../utils/base.groovy.j2}                          |    65 +-
 ci/jenkins/templates/utils/macros.j2               |   138 +
 ci/jenkins/templates/wasm_jenkinsfile.groovy.j2    |    40 +
 ci/scripts/github/github_tvmbot.py                 |    28 +-
 ci/scripts/jenkins/cmd_utils.py                    |    11 +
 ci/scripts/jenkins/determine_docker_images.py      |    31 +-
 ci/scripts/jenkins/git_utils.py                    |     2 +-
 ci/scripts/jenkins/open_docker_update_pr.py        |    56 +-
 ci/scripts/jenkins/s3.py                           |   145 +
 cmake/config.cmake                                 |     8 +
 cmake/modules/Hexagon.cmake                        |     9 +
 cmake/modules/LibInfo.cmake                        |     3 +
 cmake/modules/OpenCL.cmake                         |    17 +-
 cmake/modules/StandaloneCrt.cmake                  |   256 +-
 cmake/modules/Zephyr.cmake                         |     3 +
 .../Makefile => cmake/modules/contrib/AMX.cmake    |    16 +-
 cmake/modules/contrib/CLML.cmake                   |    20 +-
 cmake/utils/CCache.cmake                           |    10 +-
 cmake/utils/FindEthosN.cmake                       |    16 +-
 cmake/utils/Summary.cmake                          |     2 +-
 docker/Dockerfile.ci_arm                           |     2 +-
 docker/Dockerfile.ci_cortexm                       |     8 +-
 docker/Dockerfile.ci_cpu                           |     2 +-
 docker/Dockerfile.ci_gpu                           |     4 +-
 docker/Dockerfile.ci_hexagon                       |     2 +-
 docker/Dockerfile.ci_i386                          |     2 +-
 docker/Dockerfile.ci_lint                          |     2 +-
 docker/Dockerfile.ci_minimal                       |    10 +-
 docker/Dockerfile.ci_riscv                         |     2 +-
 docker/Dockerfile.ci_wasm                          |     2 +-
 docker/Dockerfile.demo_android                     |     9 +-
 docker/Dockerfile.demo_rocm                        |     8 +-
 docker/Dockerfile.demo_vitis_ai                    |     6 +-
 docker/bash.sh                                     |    16 +-
 docker/dev_common.sh                               |    19 +-
 .../ubuntu1804_install_aarch64_cross_compile.sh    |    51 +
 ...ubuntu1804_manual_install_llvm_cross_aarch64.sh |    65 +
 .../install/ubuntu2004_install_llvm.sh             |    24 +-
 docker/install/ubuntu_install_cmsis.sh             |     4 +
 .../install/ubuntu_install_ethosn_driver_stack.sh  |     2 +-
 .../install/ubuntu_install_ethosu_driver_stack.sh  |    15 +-
 docker/install/ubuntu_install_googletest.sh        |    21 +-
 docker/install/ubuntu_install_nrfjprog.sh          |    24 +-
 docker/install/ubuntu_install_python.sh            |    23 +-
 docker/install/ubuntu_install_python_package.sh    |     1 -
 docs/README.md                                     |    41 +-
 docs/conf.py                                       |   218 +-
 docs/contribute/ci.rst                             |    34 +-
 docs/contribute/pull_request.rst                   |     2 +-
 docs/how_to/deploy/adreno.rst                      |   336 +
 docs/how_to/deploy/index.rst                       |     1 +
 docs/install/from_source.rst                       |     6 +
 gallery/how_to/compile_models/from_coreml.py       |    12 +-
 gallery/how_to/compile_models/from_darknet.py      |    11 +-
 gallery/how_to/compile_models/from_keras.py        |    10 +-
 gallery/how_to/compile_models/from_mxnet.py        |    15 +-
 gallery/how_to/compile_models/from_oneflow.py      |     7 +-
 gallery/how_to/compile_models/from_onnx.py         |    14 +-
 gallery/how_to/compile_models/from_paddle.py       |    11 +-
 gallery/how_to/compile_models/from_pytorch.py      |    18 +-
 gallery/how_to/compile_models/from_tensorflow.py   |    11 +-
 gallery/how_to/compile_models/from_tflite.py       |    11 +-
 .../how_to/deploy_models/deploy_model_on_adreno.py |   346 +
 .../deploy_models/deploy_model_on_android.py       |     5 -
 .../how_to/deploy_models/deploy_model_on_nano.py   |     5 +-
 .../how_to/deploy_models/deploy_model_on_rasp.py   |     6 -
 .../deploy_object_detection_pytorch.py             |    10 +-
 .../how_to/deploy_models/deploy_prequantized.py    |     5 -
 .../deploy_models/deploy_prequantized_tflite.py    |     5 -
 gallery/how_to/deploy_models/deploy_quantized.py   |     5 -
 gallery/how_to/deploy_models/deploy_sparse.py      |     5 -
 gallery/how_to/deploy_models/deploy_ssd_gluoncv.py |     5 -
 .../how_to/extend_tvm/bring_your_own_datatypes.py  |     7 +-
 gallery/how_to/extend_tvm/low_level_custom_pass.py |     5 -
 gallery/how_to/extend_tvm/use_pass_infra.py        |     5 -
 gallery/how_to/extend_tvm/use_pass_instrument.py   |     5 -
 gallery/how_to/optimize_operators/opt_conv_cuda.py |     9 +-
 .../optimize_operators/opt_conv_tensorcore.py      |     5 +-
 gallery/how_to/optimize_operators/opt_gemm.py      |     5 -
 .../tune_conv2d_layer_cuda.py                      |     5 +-
 .../tune_with_autoscheduler/tune_network_arm.py    |     5 -
 .../tune_with_autoscheduler/tune_network_cuda.py   |     5 -
 .../tune_with_autoscheduler/tune_network_mali.py   |     5 -
 .../tune_with_autoscheduler/tune_network_x86.py    |     5 -
 .../tune_with_autoscheduler/tune_sparse_x86.py     |     5 -
 .../how_to/tune_with_autotvm/tune_conv2d_cuda.py   |     5 +-
 gallery/how_to/tune_with_autotvm/tune_relay_arm.py |     5 -
 .../how_to/tune_with_autotvm/tune_relay_cuda.py    |     5 +-
 .../tune_with_autotvm/tune_relay_mobile_gpu.py     |     5 -
 gallery/how_to/tune_with_autotvm/tune_relay_x86.py |     5 -
 .../how_to/work_with_microtvm/install_cmsis.rst    |    39 +
 .../work_with_microtvm/install_dependencies.rst    |    33 +
 .../how_to/work_with_microtvm/install_zephyr.rst   |    52 +
 gallery/how_to/work_with_microtvm/micro_aot.py     |    36 +-
 .../how_to/work_with_microtvm/micro_autotune.py    |    34 +-
 gallery/how_to/work_with_microtvm/micro_ethosu.py  |    10 +-
 gallery/how_to/work_with_microtvm/micro_pytorch.py |     9 +-
 .../work_with_microtvm/micro_reference_vm.py       |     6 -
 gallery/how_to/work_with_microtvm/micro_tflite.py  |   128 +-
 gallery/how_to/work_with_microtvm/micro_train.py   |    15 +-
 gallery/how_to/work_with_pytorch/using_as_torch.py |    18 +-
 .../work_with_pytorch/using_optimized_torch.py     |    28 +-
 gallery/how_to/work_with_relay/build_gcn.py        |    13 +-
 .../how_to/work_with_relay/using_external_lib.py   |     5 -
 .../work_with_relay/using_pipeline_executor.py     |    10 -
 gallery/how_to/work_with_relay/using_relay_viz.py  |    12 +-
 gallery/how_to/work_with_schedules/extern_op.py    |     5 -
 gallery/how_to/work_with_schedules/intrin_math.py  |     7 +-
 gallery/how_to/work_with_schedules/reduction.py    |     4 +-
 gallery/how_to/work_with_schedules/scan.py         |     4 +-
 .../work_with_schedules/schedule_primitives.py     |     5 -
 gallery/how_to/work_with_schedules/tedd.py         |     5 -
 gallery/how_to/work_with_schedules/tensorize.py    |     5 -
 gallery/how_to/work_with_schedules/tuple_inputs.py |     5 -
 gallery/tutorial/auto_scheduler_matmul_x86.py      |     5 -
 gallery/tutorial/autotvm_matmul_x86.py             |     5 -
 gallery/tutorial/autotvm_relay_x86.py              |     5 -
 gallery/tutorial/cross_compilation_and_rpc.py      |     5 -
 gallery/tutorial/install.py                        |     6 -
 gallery/tutorial/intro_topi.py                     |     5 +-
 gallery/tutorial/introduction.py                   |     5 -
 gallery/tutorial/relay_quick_start.py              |     9 +-
 gallery/tutorial/tensor_expr_get_started.py        |     5 -
 gallery/tutorial/tensor_ir_blitz_course.py         |     5 +-
 gallery/tutorial/tvmc_command_line_driver.py       |    13 +-
 gallery/tutorial/tvmc_python.py                    |     5 -
 gallery/tutorial/uma.py                            |     8 +-
 golang/sample/complex.go                           |     2 +-
 include/tvm/arith/iter_affine_map.h                |     5 +-
 include/tvm/ir/expr.h                              |     2 +
 include/tvm/meta_schedule/mutator.h                |     2 +
 include/tvm/meta_schedule/postproc.h               |    13 +
 .../tvm/meta_schedule/schedule/cuda/thread_bind.h  |     1 +
 include/tvm/meta_schedule/schedule_rule.h          |     9 +-
 include/tvm/relay/transform.h                      |     4 +
 include/tvm/runtime/crt/crt.h                      |     0
 include/tvm/runtime/data_type.h                    |     3 +-
 include/tvm/runtime/device_api.h                   |     1 -
 include/tvm/runtime/logging.h                      |     6 +-
 include/tvm/runtime/packed_func.h                  |     6 +-
 include/tvm/script/printer.h                       |    56 -
 include/tvm/script/printer/doc.h                   |    70 +-
 include/tvm/script/printer/doc_printer.h           |    48 -
 include/tvm/script/printer/frame.h                 |   140 -
 include/tvm/script/printer/ir_docsifier.h          |   314 +-
 include/tvm/script/printer/ir_docsifier_functor.h  |   163 +
 include/tvm/script/printer/printer.h               |    91 +
 include/tvm/script/printer/traced_object.h         |   484 -
 include/tvm/script/printer/traced_object_functor.h |   175 -
 include/tvm/script/printer/var_table.h             |   155 -
 include/tvm/support/with.h                         |    29 -
 include/tvm/tir/analysis.h                         |    24 +
 include/tvm/tir/data_layout.h                      |     4 +-
 include/tvm/tir/data_type_rewriter.h               |     6 +-
 include/tvm/tir/expr.h                             |    33 +
 include/tvm/tir/expr_functor.h                     |     1 -
 include/tvm/tir/op.h                               |     4 +-
 include/tvm/tir/op_attr_types.h                    |     6 +-
 include/tvm/tir/schedule/schedule.h                |    15 +-
 include/tvm/tir/stmt.h                             |    16 +-
 include/tvm/tir/stmt_functor.h                     |     1 -
 include/tvm/tir/transform.h                        |     7 +
 LICENSE => licenses/LICENSE.l2_cache_flush.txt     |    99 +-
 python/gen_requirements.py                         |     5 +-
 python/tvm/arith/__init__.py                       |     2 +-
 python/tvm/arith/iter_affine_map.py                |    15 +-
 python/tvm/arith/pattern.py                        |    23 +
 python/tvm/auto_scheduler/feature.py               |     2 +
 python/tvm/autotvm/measure/measure_methods.py      |    33 +-
 python/tvm/autotvm/tophub.py                       |     2 +
 python/tvm/contrib/debugger/debug_result.py        |    24 +-
 python/tvm/contrib/graph_executor.py               |    58 +-
 python/tvm/contrib/hexagon/meta_schedule.py        |    21 +-
 python/tvm/contrib/hexagon/session.py              |     5 +-
 .../micro/meta_schedule/local_builder_micro.py     |    84 +
 .../micro/meta_schedule/rpc_runner_micro.py        |   233 +
 python/tvm/contrib/torch/as_torch.py               |     4 +-
 python/tvm/ir/base.py                              |     4 +-
 python/tvm/ir/module.py                            |    16 +-
 python/tvm/meta_schedule/cost_model/cost_model.py  |     5 +
 python/tvm/meta_schedule/cost_model/xgb_model.py   |     7 +-
 python/tvm/meta_schedule/postproc/__init__.py      |     2 +
 .../postproc/disallow_async_strided_mem_copy.py}   |    35 +-
 .../meta_schedule/postproc/verify_vtcm_limit.py    |    21 +-
 python/tvm/meta_schedule/relay_integration.py      |    77 +-
 python/tvm/meta_schedule/runner/runner.py          |     2 +
 .../tvm/meta_schedule/schedule_rule/auto_bind.py   |     4 +
 .../schedule_rule/multi_level_tiling.py            |    10 +-
 python/tvm/meta_schedule/testing/torchbench/run.py |    42 +-
 .../tvm/meta_schedule/testing/torchbench/utils.py  |    63 +-
 .../tvm/meta_schedule/testing/validate_database.py |   781 +-
 python/tvm/meta_schedule/tir_integration.py        |     8 +-
 python/tvm/meta_schedule/tune.py                   |    12 +-
 python/tvm/micro/model_library_format.py           |   119 +-
 python/tvm/micro/project.py                        |    15 +-
 python/tvm/micro/testing/aot_test_utils.py         |     8 +-
 python/tvm/micro/testing/pytest_plugin.py          |    13 +
 .../tvm/relay/backend/contrib/ethosu/legalize.py   |    57 +
 .../relay/backend/contrib/ethosu/tir/compiler.py   |     6 +-
 .../tvm/relay/backend/contrib/ethosu/tir/passes.py |     5 +
 .../relay/backend/contrib/ethosu/tir/scheduler.py  |    10 +-
 python/tvm/relay/backend/contrib/ethosu/util.py    |    10 +
 python/tvm/relay/backend/contrib/uma/backend.py    |     7 +-
 python/tvm/relay/backend/executor.py               |    13 +
 python/tvm/relay/backend/te_compiler.py            |    23 +
 python/tvm/relay/collage/__init__.py               |     1 +
 python/tvm/relay/collage/collage.py                |     8 +
 python/tvm/relay/expr.py                           |   202 +-
 python/tvm/relay/frontend/common.py                |   222 +-
 python/tvm/relay/frontend/mxnet.py                 |    45 +-
 python/tvm/relay/frontend/onnx.py                  |   549 +-
 python/tvm/relay/frontend/pytorch.py               |    33 +-
 python/tvm/relay/frontend/tensorflow.py            |    56 +-
 python/tvm/relay/frontend/tflite.py                |    59 +-
 python/tvm/relay/function.py                       |     7 +-
 python/tvm/relay/loops.py                          |     2 +-
 python/tvm/relay/op/_transform.py                  |     2 -
 python/tvm/relay/op/contrib/arm_compute_lib.py     |    21 +-
 python/tvm/relay/op/contrib/clml.py                |   201 +-
 python/tvm/relay/op/contrib/cmsisnn.py             |    45 +-
 python/tvm/relay/op/contrib/dnnl.py                |    23 +
 python/tvm/relay/op/contrib/ethosn.py              |     2 +-
 python/tvm/relay/op/contrib/ethosu.py              |    85 +
 python/tvm/relay/op/nn/_nn.py                      |    17 +
 python/tvm/relay/op/strategy/adreno.py             |     7 +
 python/tvm/relay/op/strategy/arm_cpu.py            |    20 +-
 python/tvm/relay/op/strategy/cuda.py               |     2 +
 python/tvm/relay/op/strategy/hexagon.py            |     3 +-
 python/tvm/relay/op/strategy/x86.py                |    45 +-
 python/tvm/relay/op/transform.py                   |   359 +-
 python/tvm/relay/qnn/op/_qnn.py                    |    10 +-
 python/tvm/relay/qnn/op/canonicalizations.py       |    23 +-
 python/tvm/relay/qnn/op/legalizations.py           |    70 +
 python/tvm/relay/qnn/strategy/__init__.py          |     1 +
 python/tvm/relay/qnn/strategy/arm_cpu.py           |    72 +
 python/tvm/relay/qnn/strategy/hexagon.py           |    13 +
 python/tvm/relay/testing/py_converter.py           |     4 +-
 python/tvm/relay/testing/tflite.py                 |    10 +
 .../transform/fake_quantization_to_integer.py      |    31 +
 python/tvm/rpc/base.py                             |    36 +-
 python/tvm/rpc/proxy.py                            |    13 +-
 python/tvm/rpc/server.py                           |    39 +-
 python/tvm/rpc/server_ios_launcher.py              |     5 +-
 python/tvm/rpc/tracker.py                          |    27 +-
 python/tvm/runtime/module.py                       |    17 +-
 python/tvm/script/__init__.py                      |     5 +-
 python/tvm/script/highlight.py                     |   275 +-
 python/tvm/script/ir_builder/tir/ir.py             |   328 +-
 python/tvm/script/parser/core/parser.py            |     8 +-
 python/tvm/script/parser/core/utils.py             |     2 +-
 python/tvm/script/parser_v1/context_maintainer.py  |   248 -
 python/tvm/script/parser_v1/diagnostics.py         |    55 -
 python/tvm/script/parser_v1/meta_unparser.py       |    45 -
 python/tvm/script/parser_v1/parser.py              |  1391 --
 python/tvm/script/parser_v1/registry.py            |    62 -
 python/tvm/script/parser_v1/tir/__init__.pyi       |   475 -
 python/tvm/script/parser_v1/tir/intrin.py          |   307 -
 python/tvm/script/parser_v1/tir/node.py            |   218 -
 python/tvm/script/parser_v1/tir/scope_handler.py   |   793 -
 python/tvm/script/parser_v1/tir/special_stmt.py    |   927 --
 python/tvm/script/parser_v1/tir/ty.py              |   226 -
 python/tvm/script/parser_v1/utils.py               |   105 -
 python/tvm/script/printer/__init__.py              |     8 +-
 python/tvm/script/printer/default.py               |    83 +
 python/tvm/script/printer/entry.py                 |    71 -
 python/tvm/script/printer/frame.py                 |    81 -
 python/tvm/script/printer/ir_docsifier.py          |   245 -
 python/tvm/script/printer/printer.py               |    54 +
 python/tvm/script/printer/var_table.py             |   118 -
 python/tvm/target/target.py                        |     8 +
 python/tvm/testing/aot.py                          |    31 +-
 python/tvm/testing/utils.py                        |    22 +
 python/tvm/tir/__init__.py                         |     2 +-
 python/tvm/tir/analysis/analysis.py                |    16 +
 python/tvm/tir/data_layout.py                      |     8 +-
 python/tvm/tir/function.py                         |    16 +-
 python/tvm/tir/op.py                               |    22 +
 python/tvm/tir/schedule/schedule.py                |    34 +-
 python/tvm/tir/schedule/trace.py                   |    14 +-
 python/tvm/tir/tensor_intrin/hexagon.py            |    46 +
 python/tvm/tir/transform/transform.py              |    34 +
 python/tvm/topi/adreno/conv2d_nchw.py              |     5 +-
 python/tvm/topi/adreno/conv2d_nhwc.py              |     5 +-
 python/tvm/topi/adreno/conv2d_winograd_common.py   |     2 +
 python/tvm/topi/adreno/depthwise_conv2d_nchw.py    |     5 +-
 python/tvm/topi/adreno/depthwise_conv2d_nhwc.py    |     5 +-
 python/tvm/topi/adreno/pooling.py                  |   107 +
 python/tvm/topi/arm_cpu/__init__.py                |     2 +
 python/tvm/topi/arm_cpu/conv2d.py                  |    18 -
 python/tvm/topi/arm_cpu/conv2d_int8.py             |     2 +-
 python/tvm/topi/arm_cpu/depthwise_conv2d.py        |    20 -
 python/tvm/topi/arm_cpu/injective.py               |     3 +-
 .../arm_cpu/mprofile/dsp/micro_kernel/avg_pool.py  |     6 +-
 .../arm_cpu/mprofile/dsp/micro_kernel/common.py    |    34 +-
 .../topi/arm_cpu/mprofile/dsp/micro_kernel/gemm.py |    66 +-
 .../arm_cpu/mprofile/dsp/micro_kernel/max_pool.py  |    10 +-
 .../dsp/micro_kernel/multi_channel_convolve.py     |    19 +-
 .../arm_cpu/mprofile/dsp/micro_kernel/tensordot.py |   469 +-
 .../topi/arm_cpu/mprofile/dsp/tensordot_conv2ds.py |   296 -
 python/tvm/topi/arm_cpu/qnn.py                     |   370 +
 python/tvm/topi/arm_cpu/qnn_alter_op.py            |   122 +
 python/tvm/topi/cuda/batch_matmul.py               |     6 +-
 python/tvm/topi/cuda/tensorcore_alter_op.py        |    32 +-
 python/tvm/topi/generic/conv2d.py                  |     6 +-
 python/tvm/topi/hexagon/qnn/__init__.py            |     2 +
 python/tvm/topi/hexagon/qnn/conv2d_alter_op.py     |    53 +
 python/tvm/topi/hexagon/qnn/global_avg_pool2d.py   |    95 +
 python/tvm/topi/hexagon/qnn/nn.py                  |   208 +-
 python/tvm/topi/hexagon/slice_ops/__init__.py      |     1 +
 .../topi/hexagon/slice_ops/global_avg_pool2d.py    |    52 +
 python/tvm/topi/hexagon/utils.py                   |    12 +
 python/tvm/topi/nn/conv2d.py                       |    46 +-
 python/tvm/topi/nn/depthwise_conv2d.py             |    19 +-
 python/tvm/topi/nn/qnn.py                          |    67 +
 python/tvm/topi/nn/winograd_util.py                |     1 +
 python/tvm/topi/scatter.py                         |    55 +-
 python/tvm/topi/testing/depthwise_conv2d_python.py |     2 +-
 python/tvm/topi/utils.py                           |     7 +-
 python/tvm/topi/x86/__init__.py                    |     1 -
 python/tvm/topi/x86/batch_matmul.py                |    53 +-
 python/tvm/topi/x86/dense.py                       |   222 +-
 python/tvm/topi/x86/dense_alter_op.py              |    10 +-
 python/tvm/topi/x86/scatter.py                     |   119 -
 python/tvm/topi/x86/tensor_intrin.py               |   226 +-
 python/tvm/topi/x86/utils.py                       |     7 +
 python/tvm/utils/roofline/__init__.py              |    23 +-
 python/tvm/utils/roofline/cuda.py                  |    94 +-
 src/arith/bound_deducer.cc                         |     1 -
 src/arith/const_fold.h                             |     8 -
 src/arith/detect_common_subexpr.cc                 |    74 +
 src/arith/iter_affine_map.cc                       |    35 +-
 src/arith/narrow_predicate_expression.cc           |     1 -
 src/arith/rewrite_simplify.cc                      |    26 +-
 src/arith/transitive_comparison_analyzer.cc        |     3 -
 src/auto_scheduler/compute_dag.cc                  |    27 +-
 src/auto_scheduler/feature.cc                      |    17 +-
 src/auto_scheduler/search_policy/utils.h           |     7 +-
 src/auto_scheduler/utils.h                         |     1 -
 src/contrib/ethosu/cascader/pareto.cc              |     6 +
 src/contrib/torch/base64.h                         |     4 +-
 src/driver/driver_api.cc                           |    35 +-
 src/ir/expr.cc                                     |    35 -
 src/ir/function.cc                                 |     1 -
 src/ir/module.cc                                   |     1 -
 src/ir/span.cc                                     |     4 +
 src/ir/transform.cc                                |     1 +
 src/ir/type.cc                                     |    28 -
 src/meta_schedule/database/json_database.cc        |    10 +-
 src/meta_schedule/database/memory_database.cc      |    12 +-
 .../measure_callback/remove_build_artifact.cc      |     2 +-
 .../measure_callback/update_cost_model.cc          |     3 +-
 src/meta_schedule/module_equality.cc               |     1 -
 src/meta_schedule/mutator/mutator.cc               |     8 +
 .../postproc/disallow_async_strided_mem_copy.cc    |   189 +
 src/meta_schedule/postproc/postproc.cc             |     9 +-
 src/meta_schedule/postproc/verify_gpu_code.cc      |    11 +-
 src/meta_schedule/postproc/verify_vtcm_limit.cc    |   104 +
 src/meta_schedule/schedule/cuda/thread_bind.cc     |     2 +-
 src/meta_schedule/schedule/cuda/winograd.cc        |     3 +-
 src/meta_schedule/schedule_rule/auto_inline.cc     |     5 +
 .../schedule_rule/multi_level_tiling.cc            |    21 +-
 .../schedule_rule/multi_level_tiling.h             |     2 +
 src/meta_schedule/schedule_rule/schedule_rule.cc   |    27 +
 .../space_generator/space_generator.cc             |    13 +-
 src/meta_schedule/task_scheduler/task_scheduler.cc |     4 +-
 src/meta_schedule/utils.h                          |     2 +
 src/node/structural_hash.cc                        |    44 +
 src/parser/meta_ref.cc                             |     1 -
 src/parser/parser.cc                               |     1 -
 src/parser/token.h                                 |     2 -
 src/printer/text_printer.h                         |    40 +-
 src/printer/tir_text_printer.cc                    |    53 +-
 src/printer/tir_text_printer_debug.cc              |    97 +
 src/printer/tir_text_printer_debug.h               |    70 +
 src/printer/tvmscript_printer.cc                   |    28 +-
 src/relay/backend/aot/aot_lower_main.cc            |     1 -
 src/relay/backend/aot_executor_codegen.cc          |     1 -
 src/relay/backend/contrib/clml/codegen.cc          |    30 +-
 .../legalize.cc => backend/contrib/clml/target.cc} |    30 +-
 src/relay/backend/contrib/cmsisnn/relay_to_tir.cc  |    64 +-
 .../backend/contrib/cmsisnn/tir_to_runtime.cc      |    13 +-
 src/relay/backend/contrib/codegen_c/codegen.cc     |     1 -
 .../backend/contrib/codegen_json/codegen_json.h    |     1 -
 src/relay/backend/contrib/cutlass/codegen.cc       |     2 -
 src/relay/backend/contrib/dnnl/codegen.cc          |     3 -
 src/relay/backend/contrib/ethosn/codegen.cc        |    10 +-
 src/relay/backend/contrib/ethosn/codegen_ethosn.h  |     4 +
 .../contrib/example_target_hooks/tir_to_runtime.cc |     3 +-
 src/relay/backend/contrib/uma/targets.cc           |    24 +-
 src/relay/backend/contrib/uma/tir_to_runtime.cc    |     7 +-
 src/relay/backend/graph_executor_codegen.cc        |     8 -
 src/relay/backend/interpreter.cc                   |     6 -
 src/relay/backend/name_transforms.cc               |    51 +-
 src/relay/backend/name_transforms.h                |    24 +
 src/relay/backend/task_extraction.cc               |     6 +-
 src/relay/backend/te_compiler_cache.cc             |    33 +-
 src/relay/backend/te_compiler_cache.h              |    20 +-
 src/relay/backend/vm/compiler.cc                   |     1 -
 src/relay/collage/collage_partitioner.cc           |     2 +-
 src/relay/collage/custom_cost_estimator.cc         |    60 +
 src/relay/collage/custom_cost_estimator.h          |    67 +
 src/relay/collage/gather_partition_specs.cc        |    35 +-
 src/relay/collage/utils.cc                         |    13 +
 src/relay/collage/utils.h                          |     6 +
 src/relay/ir/expr.cc                               |    88 +-
 src/relay/ir/function.cc                           |     4 +-
 src/relay/op/nn/nn.h                               |    34 +-
 src/relay/op/nn/sparse.cc                          |     2 -
 src/relay/qnn/op/batch_matmul.cc                   |    78 +-
 src/relay/qnn/op/convolution.cc                    |     8 +-
 src/relay/qnn/op/requantize.cc                     |     9 +-
 src/relay/qnn/pass/legalize.cc                     |    22 +-
 src/relay/qnn/utils.h                              |     2 -
 src/relay/transforms/annotate_texture_storage.cc   |    90 +-
 src/relay/transforms/combine_parallel_dense.cc     |    43 +-
 src/relay/transforms/defunctionalization.cc        |     1 -
 src/relay/transforms/device_aware_visitors.cc      |     3 -
 .../transforms/fake_quantization_to_integer.cc     |     2 +-
 src/relay/transforms/fold_constant.cc              |     1 -
 src/relay/transforms/fold_scale_axis.cc            |     2 -
 src/relay/transforms/infer_layout_utils.cc         |     1 -
 src/relay/transforms/simplify_expr.h               |     4 +-
 src/relay/transforms/to_mixed_precision.cc         |     1 -
 src/runtime/contrib/amx/amx_config.cc              |   135 +
 src/runtime/contrib/arm_compute_lib/acl_utils.cc   |     1 -
 src/runtime/contrib/bnns/bnns_json_runtime.cc      |     1 -
 src/runtime/contrib/clml/clml_runtime.cc           |   129 +-
 src/runtime/contrib/cublas/cublas_utils.h          |     1 -
 src/runtime/contrib/dnnl/dnnl_json_runtime.cc      |     4 +
 src/runtime/contrib/dnnl/dnnl_tensor_requisite.h   |     1 -
 src/runtime/contrib/ethosn/ethosn_device.cc        |    23 +-
 src/runtime/contrib/ethosn/ethosn_device.h         |     3 +-
 src/runtime/contrib/ethosn/ethosn_runtime.cc       |    10 +-
 src/runtime/contrib/ethosn/ethosn_runtime.h        |     2 +
 src/runtime/contrib/sort/sort.cc                   |    91 +-
 src/runtime/contrib/tflite/tflite_runtime.cc       |     1 -
 .../ethosn_device.h => cuda/l2_cache_flush.cc}     |    31 +-
 .../graph_executor/debug/graph_executor_debug.cc   |   528 +-
 .../graph_executor/debug/graph_executor_debug.h    |   147 +
 src/runtime/hexagon/hexagon_buffer.cc              |     1 -
 src/runtime/hexagon/hexagon_device_api.cc          |    26 +-
 src/runtime/hexagon/hexagon_device_api.h           |    14 -
 src/runtime/hexagon/hexagon_module.cc              |     1 -
 src/runtime/hexagon/hexagon_thread_manager.cc      |    10 +-
 src/runtime/hexagon/hexagon_thread_manager.h       |     3 +
 src/runtime/hexagon/hexagon_vtcm_pool.cc           |    15 +-
 src/runtime/hexagon/hexagon_vtcm_pool.h            |    14 +-
 {include/tvm => src}/runtime/hexagon/ops/conv2d.h  |   145 +-
 src/runtime/hexagon/ops/conv2d_fp16_hvx.cc         |    57 +-
 src/runtime/hexagon/ops/conv2d_quant_hvx.cc        |   319 +
 src/runtime/hexagon/ops/conv_utils.cc              |   170 +-
 src/runtime/logging.cc                             |     1 -
 src/runtime/module.cc                              |     2 -
 src/runtime/opencl/opencl_common.h                 |    17 +-
 src/runtime/opencl/opencl_device_api.cc            |   103 +-
 .../opencl/opencl_wrapper/opencl_wrapper.cc        |    31 +
 src/runtime/pack_args.h                            |     1 -
 src/runtime/pipeline/pipeline_executor.cc          |     1 -
 src/runtime/pipeline/pipeline_struct.h             |     3 -
 src/runtime/rpc/rpc_event_impl.cc                  |     6 +-
 src/runtime/rpc/rpc_module.cc                      |     1 -
 src/runtime/rpc/rpc_session.cc                     |     1 -
 src/runtime/stackvm/stackvm.cc                     |     1 -
 src/runtime/stackvm/stackvm.h                      |     4 -
 src/runtime/thread_storage_scope.h                 |     7 +-
 src/runtime/vm/executable.cc                       |     2 -
 src/runtime/vm/vm.cc                               |     1 -
 src/runtime/vulkan/vulkan_device.cc                |     1 -
 src/script/printer/doc.cc                          |    10 +-
 .../printer/{ => doc_printer}/base_doc_printer.cc  |     0
 .../printer/{ => doc_printer}/base_doc_printer.h   |     7 +-
 .../{ => doc_printer}/python_doc_printer.cc        |    29 +-
 src/script/printer/frame.cc                        |    50 -
 src/script/printer/ir/ir.cc                        |    74 +
 src/script/printer/ir/misc.cc                      |    77 +
 src/script/{printer.cc => printer/ir/utils.h}      |    49 +-
 src/script/printer/ir_docsifier.cc                 |   185 +-
 src/script/printer/printer.cc                      |    54 +
 src/script/printer/tir/block.cc                    |   148 +
 src/script/printer/tir/buffer.cc                   |   261 +
 src/script/printer/tir/expr.cc                     |   317 +
 src/script/printer/tir/for_loop.cc                 |   124 +
 src/script/printer/tir/function.cc                 |    87 +
 src/script/printer/tir/ir.cc                       |   113 +
 src/script/printer/tir/stmt.cc                     |   371 +
 src/script/printer/tir/utils.h                     |   223 +
 src/script/printer/traced_object_functor.cc        |    85 -
 src/script/printer/utils.h                         |    93 -
 src/script/printer/var_table.cc                    |   109 -
 src/support/base64.h                               |    10 +-
 src/support/libinfo.cc                             |    11 +
 src/support/scalars.cc                             |     2 -
 src/support/socket.h                               |     1 -
 src/target/llvm/codegen_amdgpu.cc                  |     4 +-
 src/target/llvm/codegen_cpu.cc                     |    86 +-
 src/target/llvm/codegen_cpu.h                      |     2 +
 src/target/llvm/codegen_llvm.cc                    |   138 +-
 src/target/llvm/codegen_llvm.h                     |    13 +-
 src/target/llvm/codegen_nvptx.cc                   |     4 +-
 src/target/llvm/llvm_instance.cc                   |     6 +-
 src/target/parsers/aprofile.cc                     |    10 +-
 src/target/parsers/mprofile.cc                     |     6 +-
 src/target/source/codegen_c.cc                     |     5 +-
 src/target/source/codegen_c.h                      |    13 +-
 src/target/source/codegen_c_host.cc                |    58 +-
 src/target/source/codegen_c_host.h                 |    11 +-
 src/target/source/codegen_cuda.cc                  |    19 +-
 src/target/source/codegen_cuda.h                   |     3 +-
 src/target/source/codegen_opencl.cc                |     2 +-
 src/target/source/codegen_opencl.h                 |     2 +-
 src/target/source/codegen_source_base.h            |     2 +
 src/target/source/codegen_vhls.cc                  |     2 +-
 src/target/source/codegen_vhls.h                   |     2 +-
 src/target/source/interface_c.cc                   |    34 +-
 src/target/source/ptx.cc                           |     2 -
 src/target/spirv/codegen_spirv.cc                  |     2 -
 src/target/target.cc                               |     1 -
 src/target/target_kind.cc                          |     1 +
 src/te/autodiff/jacobian.cc                        |     1 -
 src/te/operation/create_primfunc.cc                |    63 +-
 src/te/schedule/schedule_dataflow_rewrite.cc       |     1 -
 src/tir/analysis/calculate_allocated_memory.cc     |   126 +
 src/tir/analysis/control_flow_graph.cc             |   117 +-
 src/tir/analysis/control_flow_graph.h              |    12 +-
 src/tir/analysis/stmt_finding.cc                   |     1 -
 src/tir/ir/buffer.cc                               |     6 -
 src/tir/ir/data_layout.cc                          |    25 +-
 src/tir/ir/data_type_rewriter.cc                   |    53 +-
 src/tir/ir/expr.cc                                 |   351 -
 src/tir/ir/expr_functor.cc                         |     1 -
 src/tir/ir/function.cc                             |    15 -
 src/tir/ir/index_map.cc                            |     3 +-
 src/tir/ir/legacy_printer.cc                       |   270 +
 src/tir/ir/specialize.cc                           |     6 +-
 src/tir/ir/stmt.cc                                 |   405 +-
 src/tir/ir/stmt_functor.cc                         |     3 -
 src/tir/op/builtin.cc                              |    16 +-
 src/tir/op/op.cc                                   |    98 +-
 src/tir/schedule/concrete_schedule.cc              |    27 +-
 src/tir/schedule/concrete_schedule.h               |    10 +-
 src/tir/schedule/error.h                           |     2 +-
 src/tir/schedule/primitive.h                       |    10 +-
 src/tir/schedule/primitive/blockize_tensorize.cc   |    51 +-
 src/tir/schedule/primitive/cache_index.cc          |     1 -
 src/tir/schedule/primitive/cache_read_write.cc     |    75 +-
 src/tir/schedule/primitive/compute_at.cc           |     2 +-
 src/tir/schedule/primitive/compute_inline.cc       |     2 -
 src/tir/schedule/primitive/decompose_padding.cc    |    17 +-
 .../schedule/primitive/layout_transformation.cc    |    40 +-
 src/tir/schedule/schedule.cc                       |     6 +-
 src/tir/schedule/traced_schedule.cc                |    28 +-
 src/tir/schedule/traced_schedule.h                 |    10 +-
 src/tir/transforms/bf16_legalize.cc                |     2 -
 src/tir/transforms/bound_checker.cc                |     2 -
 src/tir/transforms/common_subexpr_elim.cc          |     4 +-
 src/tir/transforms/common_subexpr_elim_tools.cc    |    10 +-
 src/tir/transforms/common_subexpr_elim_tools.h     |     3 +-
 src/tir/transforms/inject_double_buffer.cc         |     2 -
 src/tir/transforms/inject_virtual_thread.cc        |     4 -
 src/tir/transforms/install_debug_spans.cc          |   150 +
 src/tir/transforms/install_debug_spans.h           |   132 +
 src/tir/transforms/ir_utils.cc                     |     2 -
 src/tir/transforms/lift_attr_scope.cc              |     1 -
 src/tir/transforms/loop_partition.cc               |     3 +-
 src/tir/transforms/lower_async_dma.cc              |    36 +-
 src/tir/transforms/lower_custom_datatypes.cc       |     2 -
 src/tir/transforms/lower_intrin.cc                 |   136 +-
 src/tir/transforms/lower_thread_allreduce.cc       |     2 -
 src/tir/transforms/lower_tvm_builtin.cc            |     6 +-
 src/tir/transforms/lower_warp_memory.cc            |     2 -
 .../merge_dynamic_shared_memory_allocations.cc     |     2 -
 src/tir/transforms/narrow_datatype.cc              |    13 +-
 .../plan_update_buffer_allocation_location.cc      |    74 +-
 .../reduce_branching_through_overcompute.cc        |   178 +
 src/tir/transforms/remove_no_op.cc                 |   232 +-
 src/tir/transforms/remove_no_op.h                  |    60 +
 src/tir/transforms/renew_defs.cc                   |     2 -
 src/tir/transforms/rewrite_unsafe_select.cc        |     1 -
 src/tir/transforms/simplify.cc                     |     1 -
 src/tir/{op/runtime.cc => transforms/simplify.h}   |    27 +-
 src/tir/transforms/split_host_device.cc            |     2 -
 src/tir/transforms/storage_flatten.cc              |     5 -
 src/tir/transforms/storage_rewrite.cc              |     4 -
 src/tir/transforms/thread_storage_sync.cc          |     2 -
 src/tir/transforms/unroll_loop.cc                  |     1 -
 src/tir/transforms/update_pointer_storage_scope.cc |     2 -
 src/tir/transforms/vectorize_loop.cc               |     6 -
 src/tir/usmp/algo/hill_climb.cc                    |     1 -
 .../cpp-runtime/hexagon/hexagon_conv_utils_test.h  |   102 +
 .../hexagon/hexagon_fp16_utils_tests.cc            |    96 +-
 ...utils_tests.cc => hexagon_quant_utils_tests.cc} |   175 +-
 .../cpp-runtime/hexagon/hexagon_vtcm_pool_tests.cc |     9 +-
 .../cpp-runtime/opencl/opencl_nativeptr.cc         |    35 +-
 tests/cpp-runtime/opencl/run_gtests.cc             |    60 -
 tests/cpp/expr_test.cc                             |     2 +-
 tests/cpp/name_transforms_test.cc                  |    50 +-
 tests/cpp/packed_func_test.cc                      |    12 +
 .../backend/contrib/cmsisnn/compiler_attrs_test.cc |    16 +-
 tests/cpp/runtime/contrib/ethosn/inference_test.cc |    12 -
 tests/cpp/target/parsers/aprofile_test.cc          |     3 +-
 tests/cpp/target/source/interface_c_test.cc        |   316 +-
 tests/cpp/traced_object_test.cc                    |   268 -
 tests/cpp/tvmscript_printer_irdocsifier_test.cc    |   117 -
 ...tvmscript_printer_traced_object_functor_test.cc |   188 -
 tests/cpp/tvmscript_printer_var_table_test.cc      |   158 -
 tests/lint/check_file_type.py                      |     4 +
 tests/lint/check_request_hook.py                   |   148 -
 tests/lint/rat-excludes                            |     9 +-
 tests/micro/arduino/README.md                      |     7 +
 tests/micro/arduino/conftest.py                    |    13 -
 .../micro/arduino/test_arduino_error_detection.py  |     5 +-
 tests/micro/arduino/test_arduino_rpc_server.py     |    77 +-
 tests/micro/arduino/test_arduino_workflow.py       |     7 +-
 tests/micro/arduino/test_utils.py                  |     4 +-
 tests/micro/common/test_tvmc.py                    |    46 +-
 .../micro/project_api/__init__.py                  |     4 +-
 .../test_arduino_microtvm_api_server.py            |     7 +-
 tests/micro/project_api/test_project_api.py        |    34 +-
 .../test_zephyr_microtvm_api_server.py}            |    19 +-
 .../project_api/{test_project_api.py => utils.py}  |    38 +-
 tests/micro/stm32/test_code_emitter.py             |     2 +-
 tests/micro/zephyr/README.md                       |     6 +
 .../parser_v1 => tests/micro/zephyr}/__init__.py   |     5 +-
 tests/micro/zephyr/conftest.py                     |     8 -
 tests/micro/zephyr/test_ms_tuning.py               |   171 +
 tests/micro/zephyr/test_zephyr.py                  |   125 +-
 tests/micro/zephyr/test_zephyr_aot_exec.py         |    34 +-
 .../zephyr/test_zephyr_aot_exec_standalone.py      |    31 +-
 tests/micro/zephyr/test_zephyr_armv7m.py           |    30 +-
 tests/micro/zephyr/{test_utils.py => utils.py}     |    27 +-
 tests/python/ci/test_ci.py                         |    16 +-
 tests/python/contrib/test_amx.py                   |   126 +
 .../contrib/test_arm_compute_lib/infrastructure.py |    18 +
 .../test_arm_compute_lib/test_concatenate.py       |    72 +-
 .../contrib/test_arm_compute_lib/test_conv2d.py    |    25 +-
 .../contrib/test_arm_compute_lib/test_dense.py     |    29 +-
 .../contrib/test_arm_compute_lib/test_pooling.py   |   414 +-
 tests/python/contrib/test_clml/infrastructure.py   |    60 +-
 .../test_clml/test_adreno_collage_targets.py       |   354 +
 .../test_clml/test_compiler.py}                    |    38 +-
 tests/python/contrib/test_clml/test_network.py     |    15 +-
 tests/python/contrib/test_clml/test_ops.py         |   379 +-
 .../python/contrib/test_cmsisnn/test_binary_ops.py |    14 +-
 tests/python/contrib/test_cmsisnn/test_conv2d.py   |    21 +-
 .../contrib/test_cmsisnn/test_fully_connected.py   |    35 +-
 .../python/contrib/test_cmsisnn/test_fuse_pads.py  |    22 +-
 .../test_cmsisnn/test_generate_constants.py        |     7 +-
 .../contrib/test_cmsisnn/test_invalid_graphs.py    |     5 +-
 tests/python/contrib/test_cmsisnn/test_networks.py |     6 +-
 tests/python/contrib/test_cmsisnn/test_pooling.py  |    16 +-
 .../contrib/test_cmsisnn/test_remove_reshapes.py   |     4 +-
 tests/python/contrib/test_cmsisnn/test_softmax.py  |     5 +-
 tests/python/contrib/test_cmsisnn/utils.py         |    38 +-
 tests/python/contrib/test_cublas.py                |     2 +-
 tests/python/contrib/test_dnnl.py                  |    26 +
 tests/python/contrib/test_ethosn/infrastructure.py |     1 -
 tests/python/contrib/test_ethosn/test_codegen.py   |    54 +
 tests/python/contrib/test_ethosn/test_conv2d.py    |     2 +-
 .../contrib/test_ethosn/test_conv2d_transpose.py   |    68 +-
 .../test_ethosn/test_convert_equivalents.py        |     4 +-
 .../python/contrib/test_ethosn/test_leaky_relu.py  |     4 +-
 tests/python/contrib/test_ethosn/test_tanh.py      |     3 +-
 .../test_ethosu_binary_elementwise_matcher.py      |     2 +-
 .../cascader/test_ethosu_block_config.py           |     2 +-
 .../cascader/test_ethosu_conv2d_matcher.py         |     2 +-
 .../cascader/test_ethosu_depthwise2d_matcher.py    |     2 +-
 .../cascader/test_ethosu_identity_matcher.py       |     2 +-
 .../cascader/test_ethosu_inline_matcher.py         |     2 +-
 .../test_ethosu/cascader/test_ethosu_part.py       |     2 +-
 .../cascader/test_ethosu_part_performance.py       |     2 +-
 .../cascader/test_ethosu_pooling_matcher.py        |     2 +-
 .../test_ethosu_unary_elementwise_matcher.py       |     2 +-
 .../contrib/test_ethosu/cascader/test_graph.py     |     2 +-
 .../test_ethosu/cascader/test_memory_reduction.py  |     8 +-
 .../contrib/test_ethosu/cascader/test_pareto.py    |     2 +-
 .../contrib/test_ethosu/cascader/test_plan.py      |     2 +-
 .../test_ethosu/cascader/test_plan_generator.py    |     2 +-
 .../test_ethosu/cascader/test_propagator.py        |     2 +-
 .../cascader/test_proposal_generator.py            |     2 +-
 .../contrib/test_ethosu/cascader/test_scheduler.py |     2 +-
 .../test_ethosu/cascader/test_stripe_config.py     |     2 +-
 .../test_ethosu/cascader/test_tensor_config.py     |     2 +-
 tests/python/contrib/test_ethosu/infra.py          |     4 +-
 .../contrib/test_ethosu/test_attr_passing.py       |     2 +-
 tests/python/contrib/test_ethosu/test_codegen.py   |    87 +-
 tests/python/contrib/test_ethosu/test_compiler.py  |     2 +-
 .../test_ethosu/test_copy_compute_reordering.py    |     2 +-
 .../contrib/test_ethosu/test_create_tiles.py       |     2 +-
 .../contrib/test_ethosu/test_encode_constants.py   |     2 +-
 .../contrib/test_ethosu/test_extract_constants.py  |     2 +-
 .../contrib/test_ethosu/test_layout_optimizer.py   |     2 +-
 tests/python/contrib/test_ethosu/test_legalize.py  |   102 +-
 .../contrib/test_ethosu/test_lookup_table.py       |     2 +-
 .../python/contrib/test_ethosu/test_lower_to_te.py |     2 +-
 tests/python/contrib/test_ethosu/test_networks.py  |     2 +-
 .../python/contrib/test_ethosu/test_preprocess.py  |     2 +-
 .../test_ethosu/test_remove_concatenates.py        |     2 +-
 .../test_ethosu/test_replace_binary_elementwise.py |     2 +-
 .../contrib/test_ethosu/test_replace_conv2d.py     |     2 +-
 .../contrib/test_ethosu/test_replace_copy.py       |     2 +-
 .../contrib/test_ethosu/test_replace_identity.py   |     2 +-
 .../contrib/test_ethosu/test_replace_pooling.py    |     2 +-
 .../test_ethosu/test_replace_unary_elementwise.py  |     2 +-
 .../contrib/test_ethosu/test_rolling_buffer.py     |     2 +-
 tests/python/contrib/test_ethosu/test_scheduler.py |    18 +-
 .../test_ethosu/test_tir_to_cs_translator.py       |     2 +-
 .../contrib/test_ethosu/test_type_inference.py     |     2 +-
 tests/python/contrib/test_ethosu/test_vela_api.py  |     2 +-
 .../python/contrib/test_hexagon/infrastructure.py  |    19 +-
 .../metaschedule_e2e/test_resnet50_int8.py         |    43 +-
 .../test_hexagon/test_async_dma_pipeline.py        |   221 +-
 tests/python/contrib/test_hexagon/test_launcher.py |     6 +-
 .../contrib/test_hexagon/test_meta_schedule.py     |     7 +-
 tests/python/contrib/test_hexagon/test_models.py   |     2 +-
 .../contrib/test_hexagon/test_parallel_hvx.py      |     6 +-
 .../test_hexagon/test_parallel_hvx_load_vtcm.py    |     5 +-
 .../contrib/test_hexagon/test_parallel_scalar.py   |     8 +-
 .../test_hexagon/test_software_pipeline_async.py   |     2 +-
 tests/python/contrib/test_hexagon/test_vtcm.py     |    55 +-
 .../contrib/test_hexagon/test_vtcm_bandwidth.py    |     4 +-
 .../test_hexagon/test_wo_qnn_canonicalization.py   |   121 +-
 .../topi/slice_op/test_conv2d_slice.py             |     4 +-
 .../topi/slice_op/test_global_avg_pool2d.py        |   167 +
 .../test_hexagon/topi/test_conv2d_quant_intrin.py  |   261 +
 tests/python/contrib/test_hexagon/topi/test_pad.py |     2 +-
 tests/python/contrib/test_libtorch_ops.py          |     2 +-
 tests/python/contrib/test_nnpack.py                |     2 +-
 .../python/contrib/test_opencl/test_run_gtests.py  |    56 -
 tests/python/contrib/test_uma/test_target.py       |    25 +-
 .../contrib/test_vitis_ai/test_vitis_ai_codegen.py |     2 +-
 .../test_vitis_ai_runtime_cpu_part.py              |     2 +-
 tests/python/driver/tvmc/test_pass_list.py         |     2 +-
 tests/python/driver/tvmc/test_shape_parser.py      |     5 +-
 tests/python/frontend/darknet/test_forward.py      |   200 +-
 tests/python/frontend/mxnet/test_forward.py        |    13 +-
 tests/python/frontend/onnx/test_forward.py         |   668 +-
 tests/python/frontend/paddlepaddle/test_forward.py |     2 +-
 tests/python/frontend/pytorch/test_forward.py      |    40 +-
 .../python/frontend/tensorflow/test_bn_dynamic.py  |     6 +-
 .../frontend/tensorflow/test_control_flow.py       |    10 +-
 tests/python/frontend/tensorflow/test_debugging.py |    11 +-
 tests/python/frontend/tensorflow/test_forward.py   |   260 +-
 tests/python/frontend/tensorflow/test_no_op.py     |     9 +-
 .../frontend/tensorflow2/test_functional_models.py |     2 +-
 .../frontend/tensorflow2/test_sequential_models.py |     2 +-
 tests/python/frontend/test_common.py               |   194 +-
 tests/python/frontend/tflite/test_forward.py       |   173 +-
 tests/python/integration/test_arm_aprofile.py      |    74 +
 tests/python/integration/test_reduce.py            |     3 +-
 tests/python/integration/test_winograd_nnpack.py   |     4 +-
 tests/python/relay/aot/corstone300.mk              |    42 +-
 tests/python/relay/aot/test_c_device_api.py        |    33 +-
 tests/python/relay/aot/test_crt_aot.py             |    92 +-
 .../relay/aot/test_crt_forward_declarations.py     |   251 +
 .../relay/collage/demo_collage_partitioner.py      |     6 +
 .../relay/opencl_texture}/conftest.py              |    38 +-
 .../opencl_texture/test_conv2d_nchw_texture.py     |   134 +-
 .../opencl_texture/test_conv2d_nhwc_texture.py     |    74 +-
 .../test_depthwise_conv2d_nchw_texture.py          |    24 +-
 .../test_depthwise_conv2d_nhwc_texture.py          |    24 +-
 tests/python/relay/opencl_texture/test_network.py  |    69 +
 .../relay/opencl_texture/test_pool_texture.py      |   135 +
 .../relay/opencl_texture/test_reduction_texture.py |    12 +-
 .../relay/opencl_texture/utils/adreno_utils.py     |   130 +-
 tests/python/relay/strategy/arm_cpu/test_conv2d.py |    22 -
 .../strategy/arm_cpu/test_depthwise_conv2d.py      |    31 -
 .../strategy/arm_cpu/test_generalized_conv2d.py    |    10 +-
 .../strategy/arm_cpu/test_quantized_convolution.py |   358 +
 tests/python/relay/test_adt.py                     |     2 +-
 .../relay/test_analysis_basic_block_normal_form.py |     2 +-
 .../test_analysis_extract_intermediate_expr.py     |     2 +-
 .../relay/test_analysis_extract_operators.py       |     2 +-
 tests/python/relay/test_any.py                     |     9 +
 .../relay/test_auto_scheduler_task_extraction.py   |     2 +-
 tests/python/relay/test_backend_graph_executor.py  |     2 +-
 tests/python/relay/test_backend_interpreter.py     |     2 +-
 tests/python/relay/test_call_graph.py              |     2 +-
 tests/python/relay/test_name_mangling.py           |     2 +-
 tests/python/relay/test_op_grad_level2.py          |     2 +-
 tests/python/relay/test_op_grad_level3.py          |     2 +-
 tests/python/relay/test_op_grad_level4.py          |     2 +-
 tests/python/relay/test_op_level1.py               |    57 +-
 tests/python/relay/test_op_level10.py              |    55 +
 tests/python/relay/test_op_level3.py               |    11 +-
 tests/python/relay/test_op_level6.py               |     2 +-
 tests/python/relay/test_pass_alter_op_layout.py    |     2 +-
 .../relay/test_pass_combine_parallel_dense.py      |    51 +-
 tests/python/relay/test_pass_convert_op_layout.py  |     2 +-
 .../python/relay/test_pass_defunctionalization.py  |     2 +-
 tests/python/relay/test_pass_defuse_ops.py         |     2 +-
 tests/python/relay/test_pass_dynamic_to_static.py  |     2 +-
 .../test_pass_fake_quantization_to_integer.py      |    43 +-
 .../relay/test_pass_flexible_shape_dispatch.py     |     2 +-
 tests/python/relay/test_pass_fuse_ops.py           |     2 +-
 tests/python/relay/test_pass_gradient.py           |     2 +-
 tests/python/relay/test_pass_inline.py             |     2 +-
 tests/python/relay/test_pass_lambda_lift.py        |     2 +-
 .../python/relay/test_pass_legalize_tensorcore.py  |    43 +-
 tests/python/relay/test_pass_manager.py            |     2 +-
 tests/python/relay/test_pass_merge_composite.py    |     2 +-
 .../relay/test_pass_remove_unused_functions.py     |     2 +-
 tests/python/relay/test_pass_simplify_expr.py      |     2 +-
 .../relay/test_pass_to_basic_block_normal_form.py  |     2 +-
 tests/python/relay/test_pass_unmatched_cases.py    |     2 +-
 tests/python/relay/test_pipeline_executor.py       |     2 +-
 tests/python/relay/test_tensor_array.py            |     2 +-
 tests/python/relay/test_to_mixed_precision.py      |     2 +-
 tests/python/relay/test_type_infer.py              |     4 +-
 tests/python/relay/test_vm_serialization.py        |     2 +-
 tests/python/relay/utils/tag_span.py               |   108 +
 tests/python/tir/test_debug_info.py                |   124 +
 tests/python/topi/python/test_topi_conv2d_NCHWc.py |    19 +-
 tests/python/topi/python/test_topi_conv2d_int8.py  |  1110 +-
 .../topi/python/test_topi_conv2d_tensordot_opts.py |   415 +
 tests/python/topi/python/test_topi_dense.py        |     3 +
 tests/python/topi/python/test_topi_scatter.py      |     4 -
 .../unittest/test_aot_legalize_packed_call.py      |     2 +-
 .../unittest/test_arith_canonical_simplify.py      |    29 +-
 tests/python/unittest/test_arith_deduce_bound.py   |     2 +-
 .../test_arith_detect_cse.py}                      |    23 +-
 .../python/unittest/test_arith_iter_affine_map.py  |   189 +-
 .../python/unittest/test_arith_rewrite_simplify.py |    11 +-
 .../unittest/test_arith_solve_linear_equations.py  |     2 +-
 .../unittest/test_arith_solve_linear_inequality.py |     2 +-
 .../python/unittest/test_auto_scheduler_feature.py |    26 +
 .../unittest/test_container_structural_equal.py    |    14 +
 ...egression.py => test_evaluator_with_preproc.py} |    45 +-
 tests/python/unittest/test_ir_container.py         |     2 +-
 .../python/unittest/test_meta_schedule_database.py |    39 +
 ...ule_postproc_disallow_async_strided_mem_copy.py |   110 +
 .../test_meta_schedule_postproc_rewrite_layout.py  |   130 +
 ...est_meta_schedule_postproc_verify_vtcm_limit.py |   127 +
 .../test_meta_schedule_relay_integration.py        |    82 +-
 .../test_meta_schedule_schedule_rule_mlt.py        |   168 +-
 .../test_meta_schedule_schedule_rule_mlt_intrin.py |    30 +-
 .../test_meta_schedule_schedule_rule_mlt_tc.py     |    41 +-
 .../test_meta_schedule_space_cpu_winograd.py       |     1 -
 .../test_meta_schedule_space_cuda_winograd.py      |   241 +
 .../unittest/test_meta_schedule_trace_apply.py     |   278 +-
 .../unittest/test_micro_model_library_format.py    |   145 +-
 tests/python/unittest/test_micro_ms_tuning.py      |   126 +
 tests/python/unittest/test_roofline.py             |     7 +
 .../unittest/test_rpc_base.py}                     |    43 +-
 tests/python/unittest/test_runtime_container.py    |     5 +
 tests/python/unittest/test_runtime_graph_debug.py  |    26 +-
 .../test_runtime_module_based_interface.py         |    39 +
 .../python/unittest/test_runtime_module_export.py  |     6 +-
 tests/python/unittest/test_runtime_rpc.py          |    13 +-
 tests/python/unittest/test_target_codegen_cuda.py  |     2 +-
 .../python/unittest/test_target_codegen_vulkan.py  |    42 +
 tests/python/unittest/test_te_create_primfunc.py   |   174 +-
 ...test_tir_analysis_calculate_allocated_memory.py |   101 +
 .../unittest/test_tir_analysis_stmt_finding.py     |     2 +-
 tests/python/unittest/test_tir_buffer.py           |     2 +-
 tests/python/unittest/test_tir_data_layout.py      |    27 +-
 tests/python/unittest/test_tir_nodes.py            |     2 +-
 .../python/unittest/test_tir_schedule_blockize.py  |    29 +-
 .../unittest/test_tir_schedule_cache_read_write.py |   103 +
 .../unittest/test_tir_schedule_compute_at.py       |    52 +
 .../test_tir_schedule_decompose_padding.py         |    63 +
 .../python/unittest/test_tir_schedule_tensorize.py |    42 +-
 .../unittest/test_tir_schedule_transform_layout.py |    48 +-
 .../python/unittest/test_tir_te_extern_primfunc.py |     2 +-
 .../test_tir_transform_convert_for_loops_serial.py |     2 +-
 .../python/unittest/test_tir_transform_hoist_if.py |     2 +-
 .../test_tir_transform_inject_rolling_buffer.py    |     2 +-
 .../test_tir_transform_inject_software_pipeline.py |     4 +-
 .../python/unittest/test_tir_transform_ir_utils.py |     2 +-
 .../unittest/test_tir_transform_loop_partition.py  |    69 +
 .../test_tir_transform_lower_warp_memory.py        |     2 +-
 .../test_tir_transform_make_unpacked_api.py        |     2 +-
 ...sform_plan_update_buffer_allocation_location.py |    81 +-
 ...ansform_reduce_branching_through_overcompute.py |   219 +
 .../unittest/test_tir_transform_remove_no_op.py    |   521 +
 .../python/unittest/test_tir_transform_simplify.py |     1 +
 .../test_tir_usmp_analysis_extract_bufferinfo.py   |     2 +-
 ...ransform_convert_pool_allocations_to_offsets.py |     2 +-
 tests/python/unittest/test_tir_usmp_utils.py       |     2 +-
 .../unittest/test_tvmscript_printer_frame.py       |    60 -
 .../unittest/test_tvmscript_printer_irdocsifier.py |   123 -
 .../test_tvmscript_printer_python_doc_printer.py   |     3 +-
 .../python/unittest/test_tvmscript_printer_tir.py  |   638 +
 .../unittest/test_tvmscript_printer_underlining.py |    12 +-
 .../unittest/test_tvmscript_printer_var_table.py   |    89 -
 tests/python/unittest/test_tvmscript_regression.py |    32 +
 tests/python/unittest/test_tvmscript_roundtrip.py  |    97 +
 tests/python/unittest/test_tvmscript_spans.py      |    73 -
 .../unittest/test_type_annotation_checker.py       |     3 +-
 tests/scripts/ci.py                                |    40 +-
 tests/scripts/request_hook/request_hook.py         |   165 +-
 tests/scripts/task_build_adreno_bins.sh            |     7 +-
 tests/scripts/task_ci_setup.sh                     |    41 -
 .../scripts/task_clear_pytest.sh                   |     9 +-
 tests/scripts/task_config_build_adreno.sh          |     4 +-
 tests/scripts/task_config_build_gpu.sh             |     1 +
 tests/scripts/task_config_build_minimal.sh         |     1 +
 tests/scripts/task_cpp_unittest.sh                 |    30 +-
 tests/scripts/task_lint.sh                         |     3 -
 ...build_minimal.sh => task_microtvm_cpp_tests.sh} |    32 +-
 ...build_adreno.sh => task_opencl_cpp_unittest.sh} |    30 +-
 tests/scripts/task_python_adreno.sh                |     3 +-
 vta/python/vta/transform.py                        |     2 +-
 vta/runtime/runtime.cc                             |     3 +-
 web/.gitignore                                     |     1 -
 web/emcc/webgpu_runtime.cc                         |    16 +-
 web/package-lock.json                              | 15462 +++++++++++++++++++
 web/package.json                                   |     5 +-
 web/src/webgpu.ts                                  |    32 +-
 988 files changed, 57492 insertions(+), 22491 deletions(-)
 rename python/tvm/script/parser_v1/tir/__init__.py => .github/workflows/update_nightly_branch.yml (57%)
 create mode 100644 3rdparty/mlperftiny/README.md
 create mode 100644 3rdparty/mlperftiny/api/internally_implemented.cpp
 create mode 100644 3rdparty/mlperftiny/api/internally_implemented.h
 create mode 100644 3rdparty/mlperftiny/api/submitter_implemented.h
 create mode 100644 3rdparty/nvbench/l2_cache_flush.h
 delete mode 100755 Jenkinsfile
 create mode 100755 apps/benchmark/adreno/adreno_gpu_bench_clml.py
 create mode 100755 apps/benchmark/adreno/adreno_gpu_bench_texture.py
 copy tests/scripts/task_python_adreno.sh => apps/benchmark/adreno/bench.sh (80%)
 copy {tests/micro/arduino => apps/microtvm/zephyr/template_project/src/mlperftiny}/README.md (61%)
 copy src/relay/qnn/pass/legalize.cc => apps/microtvm/zephyr/template_project/src/mlperftiny/main.cc (55%)
 create mode 100644 apps/microtvm/zephyr/template_project/src/mlperftiny/submitter_implemented.cc
 create mode 100644 apps/microtvm/zephyr/template_project/src/mlperftiny/tvmruntime.cc
 create mode 100644 apps/microtvm/zephyr/template_project/src/mlperftiny/tvmruntime.h
 create mode 100644 apps/microtvm/zephyr/template_project/src/mlperftiny/zephyr_uart.cc
 create mode 100644 apps/microtvm/zephyr/template_project/src/mlperftiny/zephyr_uart.h
 delete mode 100644 ci/jenkins/Build.groovy.j2
 delete mode 100644 ci/jenkins/DockerBuild.groovy.j2
 delete mode 100644 ci/jenkins/Lint.groovy.j2
 delete mode 100644 ci/jenkins/Test.groovy.j2
 create mode 100644 ci/jenkins/data.py
 copy python/tvm/script/parser_v1/__init__.py => ci/jenkins/docker-images.ini (56%)
 create mode 100644 ci/jenkins/generated/arm_jenkinsfile.groovy
 create mode 100644 ci/jenkins/generated/cortexm_jenkinsfile.groovy
 create mode 100644 ci/jenkins/generated/cpu_jenkinsfile.groovy
 create mode 100644 ci/jenkins/generated/docker_jenkinsfile.groovy
 create mode 100644 ci/jenkins/generated/gpu_jenkinsfile.groovy
 create mode 100644 ci/jenkins/generated/hexagon_jenkinsfile.groovy
 create mode 100644 ci/jenkins/generated/i386_jenkinsfile.groovy
 create mode 100644 ci/jenkins/generated/lint_jenkinsfile.groovy
 create mode 100644 ci/jenkins/generated/minimal_jenkinsfile.groovy
 create mode 100644 ci/jenkins/generated/riscv_jenkinsfile.groovy
 create mode 100644 ci/jenkins/generated/wasm_jenkinsfile.groovy
 delete mode 100644 ci/jenkins/macros.j2
 delete mode 100644 ci/jenkins/requirements.txt
 create mode 100644 ci/jenkins/templates/arm_jenkinsfile.groovy.j2
 create mode 100644 ci/jenkins/templates/cortexm_jenkinsfile.groovy.j2
 create mode 100644 ci/jenkins/templates/cpu_jenkinsfile.groovy.j2
 rename ci/jenkins/{Deploy.groovy.j2 => templates/docker_jenkinsfile.groovy.j2} (52%)
 create mode 100644 ci/jenkins/templates/gpu_jenkinsfile.groovy.j2
 create mode 100644 ci/jenkins/templates/hexagon_jenkinsfile.groovy.j2
 create mode 100644 ci/jenkins/templates/i386_jenkinsfile.groovy.j2
 create mode 100644 ci/jenkins/templates/lint_jenkinsfile.groovy.j2
 create mode 100644 ci/jenkins/templates/minimal_jenkinsfile.groovy.j2
 create mode 100644 ci/jenkins/templates/riscv_jenkinsfile.groovy.j2
 create mode 100644 ci/jenkins/templates/utils/Build.groovy.j2
 rename ci/jenkins/{ => templates/utils}/Prepare.groovy.j2 (85%)
 create mode 100644 ci/jenkins/templates/utils/Test.groovy.j2
 rename ci/jenkins/{Jenkinsfile.j2 => templates/utils/base.groovy.j2} (65%)
 create mode 100644 ci/jenkins/templates/utils/macros.j2
 create mode 100644 ci/jenkins/templates/wasm_jenkinsfile.groovy.j2
 create mode 100755 ci/scripts/jenkins/s3.py
 rename ci/jenkins/Makefile => cmake/modules/contrib/AMX.cmake (74%)
 create mode 100644 docker/install/ubuntu1804_install_aarch64_cross_compile.sh
 create mode 100644 docker/install/ubuntu1804_manual_install_llvm_cross_aarch64.sh
 copy tests/scripts/task_config_build_adreno.sh => docker/install/ubuntu2004_install_llvm.sh (60%)
 create mode 100644 docs/how_to/deploy/adreno.rst
 create mode 100644 gallery/how_to/deploy_models/deploy_model_on_adreno.py
 create mode 100644 gallery/how_to/work_with_microtvm/install_cmsis.rst
 create mode 100644 gallery/how_to/work_with_microtvm/install_dependencies.rst
 create mode 100644 gallery/how_to/work_with_microtvm/install_zephyr.rst
 mode change 100755 => 100644 include/tvm/runtime/crt/crt.h
 delete mode 100644 include/tvm/script/printer.h
 delete mode 100644 include/tvm/script/printer/doc_printer.h
 delete mode 100644 include/tvm/script/printer/frame.h
 create mode 100644 include/tvm/script/printer/ir_docsifier_functor.h
 create mode 100644 include/tvm/script/printer/printer.h
 delete mode 100644 include/tvm/script/printer/traced_object.h
 delete mode 100644 include/tvm/script/printer/traced_object_functor.h
 delete mode 100644 include/tvm/script/printer/var_table.h
 copy LICENSE => licenses/LICENSE.l2_cache_flush.txt (82%)
 create mode 100644 python/tvm/contrib/micro/meta_schedule/local_builder_micro.py
 create mode 100644 python/tvm/contrib/micro/meta_schedule/rpc_runner_micro.py
 rename python/tvm/{script/parser_v1/tir/prim_func.py => meta_schedule/postproc/disallow_async_strided_mem_copy.py} (52%)
 copy tests/python/unittest/test_tvmscript_printer_entry_point.py => python/tvm/meta_schedule/postproc/verify_vtcm_limit.py (60%)
 create mode 100644 python/tvm/relay/qnn/strategy/arm_cpu.py
 delete mode 100644 python/tvm/script/parser_v1/context_maintainer.py
 delete mode 100644 python/tvm/script/parser_v1/diagnostics.py
 delete mode 100644 python/tvm/script/parser_v1/meta_unparser.py
 delete mode 100644 python/tvm/script/parser_v1/parser.py
 delete mode 100644 python/tvm/script/parser_v1/registry.py
 delete mode 100644 python/tvm/script/parser_v1/tir/__init__.pyi
 delete mode 100644 python/tvm/script/parser_v1/tir/intrin.py
 delete mode 100644 python/tvm/script/parser_v1/tir/node.py
 delete mode 100644 python/tvm/script/parser_v1/tir/scope_handler.py
 delete mode 100644 python/tvm/script/parser_v1/tir/special_stmt.py
 delete mode 100644 python/tvm/script/parser_v1/tir/ty.py
 delete mode 100644 python/tvm/script/parser_v1/utils.py
 create mode 100644 python/tvm/script/printer/default.py
 delete mode 100644 python/tvm/script/printer/entry.py
 delete mode 100644 python/tvm/script/printer/frame.py
 delete mode 100644 python/tvm/script/printer/ir_docsifier.py
 create mode 100644 python/tvm/script/printer/printer.py
 delete mode 100644 python/tvm/script/printer/var_table.py
 delete mode 100644 python/tvm/topi/arm_cpu/mprofile/dsp/tensordot_conv2ds.py
 create mode 100644 python/tvm/topi/arm_cpu/qnn.py
 create mode 100644 python/tvm/topi/arm_cpu/qnn_alter_op.py
 create mode 100644 python/tvm/topi/hexagon/qnn/conv2d_alter_op.py
 create mode 100755 python/tvm/topi/hexagon/qnn/global_avg_pool2d.py
 create mode 100755 python/tvm/topi/hexagon/slice_ops/global_avg_pool2d.py
 delete mode 100644 python/tvm/topi/x86/scatter.py
 create mode 100644 src/arith/detect_common_subexpr.cc
 create mode 100644 src/meta_schedule/postproc/disallow_async_strided_mem_copy.cc
 create mode 100644 src/meta_schedule/postproc/verify_vtcm_limit.cc
 create mode 100644 src/printer/tir_text_printer_debug.cc
 create mode 100644 src/printer/tir_text_printer_debug.h
 copy src/relay/{qnn/pass/legalize.cc => backend/contrib/clml/target.cc} (61%)
 create mode 100644 src/relay/collage/custom_cost_estimator.cc
 create mode 100644 src/relay/collage/custom_cost_estimator.h
 create mode 100644 src/runtime/contrib/amx/amx_config.cc
 copy src/runtime/{contrib/ethosn/ethosn_device.h => cuda/l2_cache_flush.cc} (58%)
 create mode 100644 src/runtime/graph_executor/debug/graph_executor_debug.h
 rename {include/tvm => src}/runtime/hexagon/ops/conv2d.h (54%)
 create mode 100644 src/runtime/hexagon/ops/conv2d_quant_hvx.cc
 rename src/script/printer/{ => doc_printer}/base_doc_printer.cc (100%)
 rename src/script/printer/{ => doc_printer}/base_doc_printer.h (97%)
 rename src/script/printer/{ => doc_printer}/python_doc_printer.cc (96%)
 delete mode 100644 src/script/printer/frame.cc
 create mode 100644 src/script/printer/ir/ir.cc
 create mode 100644 src/script/printer/ir/misc.cc
 rename src/script/{printer.cc => printer/ir/utils.h} (50%)
 create mode 100644 src/script/printer/printer.cc
 create mode 100644 src/script/printer/tir/block.cc
 create mode 100644 src/script/printer/tir/buffer.cc
 create mode 100644 src/script/printer/tir/expr.cc
 create mode 100644 src/script/printer/tir/for_loop.cc
 create mode 100644 src/script/printer/tir/function.cc
 create mode 100644 src/script/printer/tir/ir.cc
 create mode 100644 src/script/printer/tir/stmt.cc
 create mode 100644 src/script/printer/tir/utils.h
 delete mode 100644 src/script/printer/traced_object_functor.cc
 delete mode 100644 src/script/printer/utils.h
 delete mode 100644 src/script/printer/var_table.cc
 create mode 100644 src/tir/analysis/calculate_allocated_memory.cc
 create mode 100644 src/tir/ir/legacy_printer.cc
 create mode 100644 src/tir/transforms/install_debug_spans.cc
 create mode 100644 src/tir/transforms/install_debug_spans.h
 create mode 100644 src/tir/transforms/reduce_branching_through_overcompute.cc
 create mode 100644 src/tir/transforms/remove_no_op.h
 rename src/tir/{op/runtime.cc => transforms/simplify.h} (60%)
 create mode 100644 tests/cpp-runtime/hexagon/hexagon_conv_utils_test.h
 copy tests/cpp-runtime/hexagon/{hexagon_fp16_utils_tests.cc => hexagon_quant_utils_tests.cc} (58%)
 copy src/relay/qnn/pass/legalize.cc => tests/cpp-runtime/opencl/opencl_nativeptr.cc (55%)
 delete mode 100644 tests/cpp-runtime/opencl/run_gtests.cc
 delete mode 100644 tests/cpp/traced_object_test.cc
 delete mode 100644 tests/cpp/tvmscript_printer_irdocsifier_test.cc
 delete mode 100644 tests/cpp/tvmscript_printer_traced_object_functor_test.cc
 delete mode 100644 tests/cpp/tvmscript_printer_var_table_test.cc
 delete mode 100644 tests/lint/check_request_hook.py
 rename python/tvm/script/parser_v1/_ffi_api.py => tests/micro/project_api/__init__.py (90%)
 rename tests/{python/unittest/test_tvmscript_printer_entry_point.py => micro/project_api/test_zephyr_microtvm_api_server.py} (69%)
 copy tests/micro/project_api/{test_project_api.py => utils.py} (66%)
 copy {python/tvm/script/parser_v1 => tests/micro/zephyr}/__init__.py (85%)
 create mode 100644 tests/micro/zephyr/test_ms_tuning.py
 rename tests/micro/zephyr/{test_utils.py => utils.py} (93%)
 create mode 100644 tests/python/contrib/test_amx.py
 create mode 100644 tests/python/contrib/test_clml/test_adreno_collage_targets.py
 copy tests/python/{unittest/test_tir_transform_ir_utils.py => contrib/test_clml/test_compiler.py} (57%)
 create mode 100755 tests/python/contrib/test_hexagon/topi/slice_op/test_global_avg_pool2d.py
 create mode 100644 tests/python/contrib/test_hexagon/topi/test_conv2d_quant_intrin.py
 delete mode 100644 tests/python/contrib/test_opencl/test_run_gtests.py
 create mode 100644 tests/python/integration/test_arm_aprofile.py
 create mode 100644 tests/python/relay/aot/test_crt_forward_declarations.py
 copy tests/{micro/arduino => python/relay/opencl_texture}/conftest.py (58%)
 create mode 100644 tests/python/relay/opencl_texture/test_network.py
 create mode 100644 tests/python/relay/opencl_texture/test_pool_texture.py
 create mode 100644 tests/python/relay/strategy/arm_cpu/test_quantized_convolution.py
 create mode 100644 tests/python/relay/utils/tag_span.py
 create mode 100644 tests/python/tir/test_debug_info.py
 create mode 100644 tests/python/topi/python/test_topi_conv2d_tensordot_opts.py
 copy tests/python/{frontend/test_common.py => unittest/test_arith_detect_cse.py} (69%)
 copy tests/python/unittest/{test_tvmscript_regression.py => test_evaluator_with_preproc.py} (51%)
 create mode 100644 tests/python/unittest/test_meta_schedule_postproc_disallow_async_strided_mem_copy.py
 create mode 100644 tests/python/unittest/test_meta_schedule_postproc_verify_vtcm_limit.py
 create mode 100644 tests/python/unittest/test_micro_ms_tuning.py
 copy tests/{micro/arduino/test_arduino_error_detection.py => python/unittest/test_rpc_base.py} (55%)
 create mode 100644 tests/python/unittest/test_tir_analysis_calculate_allocated_memory.py
 create mode 100644 tests/python/unittest/test_tir_transform_reduce_branching_through_overcompute.py
 delete mode 100644 tests/python/unittest/test_tvmscript_printer_frame.py
 delete mode 100644 tests/python/unittest/test_tvmscript_printer_irdocsifier.py
 create mode 100644 tests/python/unittest/test_tvmscript_printer_tir.py
 delete mode 100644 tests/python/unittest/test_tvmscript_printer_var_table.py
 delete mode 100644 tests/python/unittest/test_tvmscript_spans.py
 delete mode 100755 tests/scripts/task_ci_setup.sh
 rename python/tvm/script/parser_v1/__init__.py => tests/scripts/task_clear_pytest.sh (84%)
 mode change 100644 => 100755
 copy tests/scripts/{task_config_build_minimal.sh => task_microtvm_cpp_tests.sh} (61%)
 copy tests/scripts/{task_config_build_adreno.sh => task_opencl_cpp_unittest.sh} (64%)
 create mode 100644 web/package-lock.json


[tvm] 01/01: Testing PR13529.

Posted by le...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

leandron pushed a commit to branch ci-docker-staging
in repository https://gitbox.apache.org/repos/asf/tvm.git

commit dad13d1c1bc6381c07e2cb0c4d39e92b059e8a5d
Author: Leandro Nunes <le...@arm.com>
AuthorDate: Mon Jan 16 13:32:04 2023 +0000

    Testing PR13529.
---
 apps/microtvm/cmsisnn/Makefile                     | 19 ++++++++-----
 apps/microtvm/cmsisnn/src/demo_bare_metal.c        |  4 +--
 apps/microtvm/ethosu/Makefile                      | 24 ++++++++++-------
 apps/microtvm/ethosu/src/demo_bare_metal.c         |  4 +--
 apps/microtvm/ethosu/src/demo_freertos.c           |  4 +--
 .../install/ubuntu_install_ethosu_driver_stack.sh  | 12 ++++++---
 gallery/how_to/work_with_microtvm/micro_ethosu.py  |  4 +--
 python/tvm/micro/testing/aot_test_utils.py         |  8 +++---
 tests/python/contrib/test_cmsisnn/utils.py         |  4 +--
 tests/python/contrib/test_ethosu/infra.py          |  4 +--
 tests/python/relay/aot/corstone300.mk              | 31 +++++++++++++---------
 11 files changed, 69 insertions(+), 49 deletions(-)

diff --git a/apps/microtvm/cmsisnn/Makefile b/apps/microtvm/cmsisnn/Makefile
index e7d1b7081d..3a14c46779 100644
--- a/apps/microtvm/cmsisnn/Makefile
+++ b/apps/microtvm/cmsisnn/Makefile
@@ -40,15 +40,14 @@ PKG_CFLAGS = ${PKG_COMPILE_OPTS} \
 	-I${STANDALONE_CRT_PATH}/include \
 	-I${STANDALONE_CRT_PATH}/src/runtime/crt/include \
 	-I${PWD}/include \
-	-I${CORSTONE_300_PATH} \
+	-I${ETHOSU_PLATFORM_PATH}/drivers/uart/include \
 	-I${CMSIS_PATH}/Device/ARM/${ARM_CPU}/Include/ \
 	-I${CMSIS_PATH}/CMSIS/Core/Include \
 	-I${CMSIS_NN_PATH}/Include \
 	-I${CMSIS_PATH}/CMSIS/DSP/Include \
 	-I$(abspath $(BUILD_DIR))/codegen/host/include
-CMSIS_NN_CMAKE_FLAGS = -DCMAKE_TOOLCHAIN_FILE=$(abspath $(BUILD_DIR))/../arm-none-eabi-gcc.cmake \
-	-DTARGET_CPU=cortex-m55 \
-	-DBUILD_CMSIS_NN_FUNCTIONS=YES
+CMAKE_FLAGS = -DCMAKE_TOOLCHAIN_FILE=$(abspath $(BUILD_DIR))/../arm-none-eabi-gcc.cmake \
+	-DTARGET_CPU=cortex-m55
 PKG_LDFLAGS = -lm -specs=nosys.specs -static -T corstone300.ld
 
 $(ifeq VERBOSE,1)
@@ -62,7 +61,7 @@ CODEGEN_SRCS = $(wildcard $(abspath $(BUILD_DIR))/codegen/host/src/*.c)
 CODEGEN_OBJS = $(subst .c,.o,$(CODEGEN_SRCS))
 CMSIS_STARTUP_SRCS = $(wildcard ${CMSIS_PATH}/Device/ARM/${ARM_CPU}/Source/*.c)
 CMSIS_NN_SRCS = $(shell find ${CMSIS_NN_PATH}/Source/*/*.c)
-UART_SRCS = $(wildcard ${CORSTONE_300_PATH}/*.c)
+CORSTONE_300_SRCS = $(wildcard ${CORSTONE_300_PATH}/*.c)
 
 demo: $(BUILD_DIR)/demo
 
@@ -94,9 +93,15 @@ ${BUILD_DIR}/libcmsis_nn.a: $(CMSIS_NN_SRCS)
 	$(QUIET)$(AR) -cr $(abspath $(BUILD_DIR)/libcmsis_nn.a) $(abspath $(BUILD_DIR))/libcmsis_nn/*.o
 	$(QUIET)$(RANLIB) $(abspath $(BUILD_DIR)/libcmsis_nn.a)
 
+# Build UART driver
+${BUILD_DIR}/ethosu_core_platform/libethosu_uart_cmsdk_apb.a:
+	$(QUIET)mkdir -p $(@D)
+	$(QUIET)cd ${ETHOSU_PLATFORM_PATH}/drivers/uart && $(CMAKE) -B $(abspath $(BUILD_DIR)/ethosu_core_platform) $(CMAKE_FLAGS)
+	$(QUIET)cd $(abspath $(BUILD_DIR)/ethosu_core_platform) && $(MAKE)
+
 # Build demo application
-$(BUILD_DIR)/demo: $(DEMO_MAIN) $(UART_SRCS) $(BUILD_DIR)/stack_allocator.o $(BUILD_DIR)/crt_backend_api.o \
-	${BUILD_DIR}/libcodegen.a ${BUILD_DIR}/libcmsis_startup.a ${BUILD_DIR}/libcmsis_nn.a
+$(BUILD_DIR)/demo: $(DEMO_MAIN) $(CORSTONE_300_SRCS) $(BUILD_DIR)/stack_allocator.o $(BUILD_DIR)/crt_backend_api.o \
+	${BUILD_DIR}/libcodegen.a ${BUILD_DIR}/libcmsis_startup.a ${BUILD_DIR}/libcmsis_nn.a ${BUILD_DIR}/ethosu_core_platform/libethosu_uart_cmsdk_apb.a
 	$(QUIET)mkdir -p $(@D)
 	$(QUIET)$(CC) $(PKG_CFLAGS) $(FREERTOS_FLAGS) -o $@ -Wl,--whole-archive $^ -Wl,--no-whole-archive $(PKG_LDFLAGS)
 
diff --git a/apps/microtvm/cmsisnn/src/demo_bare_metal.c b/apps/microtvm/cmsisnn/src/demo_bare_metal.c
index f17fe859f2..80b298d8b2 100644
--- a/apps/microtvm/cmsisnn/src/demo_bare_metal.c
+++ b/apps/microtvm/cmsisnn/src/demo_bare_metal.c
@@ -21,14 +21,14 @@
 #include <tvm_runtime.h>
 #include <tvmgen_detection.h>
 
-#include "uart.h"
+#include "uart_stdout.h"
 
 // Header files generated by convert_image.py
 #include "inputs.h"
 #include "outputs.h"
 
 int main(int argc, char** argv) {
-  uart_init();
+  UartStdOutInit();
   printf("Starting Demo\n");
 
   printf("Running detection inference\n");
diff --git a/apps/microtvm/ethosu/Makefile b/apps/microtvm/ethosu/Makefile
index 63f8adbc27..f28777470e 100644
--- a/apps/microtvm/ethosu/Makefile
+++ b/apps/microtvm/ethosu/Makefile
@@ -41,20 +41,18 @@ PKG_CFLAGS = ${PKG_COMPILE_OPTS} \
 	-I${STANDALONE_CRT_PATH}/include \
 	-I${STANDALONE_CRT_PATH}/src/runtime/crt/include \
 	-I${PWD}/include \
-	-I${CORSTONE_300_PATH} \
-	-I${ETHOSU_PATH}/core_driver/include \
+	-I${ETHOSU_DRIVER_PATH}/include \
+	-I${ETHOSU_PLATFORM_PATH}/drivers/uart/include \
 	-I${CMSIS_PATH}/Device/ARM/${ARM_CPU}/Include/ \
 	-I${CMSIS_PATH}/CMSIS/Core/Include \
 	-I${CMSIS_NN_PATH}/Include \
 	-I${CMSIS_PATH}/CMSIS/DSP/Include \
 	-I$(abspath $(BUILD_DIR))/codegen/host/include \
 	-DETHOSU_TEST_RUNNER_TOL=${ETHOSU_TEST_RUNNER_TOL}
-DRIVER_CMAKE_FLAGS = -DCMAKE_TOOLCHAIN_FILE=$(abspath $(BUILD_DIR))/../arm-none-eabi-gcc.cmake \
-	-DETHOSU_LOG_SEVERITY=debug \
-	-DCMAKE_SYSTEM_PROCESSOR=cortex-m55
-CMSIS_NN_CMAKE_FLAGS = -DCMAKE_TOOLCHAIN_FILE=$(abspath $(BUILD_DIR))/../arm-none-eabi-gcc.cmake \
-	-DTARGET_CPU=cortex-m55 \
-	-DBUILD_CMSIS_NN_FUNCTIONS=YES
+CMAKE_FLAGS = -DCMAKE_TOOLCHAIN_FILE=$(abspath $(BUILD_DIR))/../arm-none-eabi-gcc.cmake \
+	-DTARGET_CPU=cortex-m55
+DRIVER_CMAKE_FLAGS = $(CMAKE_FLAGS) \
+	-DETHOSU_LOG_SEVERITY=debug
 PKG_LDFLAGS = -lm -specs=nosys.specs -static -T corstone300.ld
 
 $(ifeq VERBOSE,1)
@@ -84,7 +82,7 @@ CODEGEN_SRCS = $(wildcard $(abspath $(BUILD_DIR))/codegen/host/src/*.c)
 CODEGEN_OBJS = $(subst .c,.o,$(CODEGEN_SRCS))
 CMSIS_STARTUP_SRCS = $(wildcard ${CMSIS_PATH}/Device/ARM/${ARM_CPU}/Source/*.c)
 CMSIS_NN_SOFTMAX_SRCS = $(shell find ${CMSIS_NN_PATH}/Source/SoftmaxFunctions/*.c)
-UART_SRCS = $(wildcard ${CORSTONE_300_PATH}/*.c)
+CORSTONE_300_SRCS = $(wildcard ${CORSTONE_300_PATH}/*.c)
 
 demo: $(BUILD_DIR)/demo
 
@@ -122,7 +120,13 @@ ${BUILD_DIR}/libcmsis_nn_softmax.a: $(CMSIS_NN_SOFTMAX_SRCS)
 	$(QUIET)$(AR) -cr $(abspath $(BUILD_DIR)/libcmsis_nn_softmax.a) $(abspath $(BUILD_DIR))/libcmsis_nn/*.o
 	$(QUIET)$(RANLIB) $(abspath $(BUILD_DIR)/libcmsis_nn_softmax.a)
 
-$(BUILD_DIR)/demo: $(DEMO_MAIN) src/tvm_ethosu_runtime.c $(FREERTOS_SOURCES) $(UART_SRCS) $(BUILD_DIR)/stack_allocator.o $(BUILD_DIR)/crt_backend_api.o ${BUILD_DIR}/libcodegen.a ${BUILD_DIR}/libcmsis_startup.a ${BUILD_DIR}/ethosu_core_driver/libethosu_core_driver.a ${BUILD_DIR}/libcmsis_nn_softmax.a
+# Build UART driver
+${BUILD_DIR}/ethosu_core_platform/libethosu_uart_cmsdk_apb.a:
+	$(QUIET)mkdir -p $(@D)
+	$(QUIET)cd ${ETHOSU_PLATFORM_PATH}/drivers/uart && $(CMAKE) -B $(abspath $(BUILD_DIR)/ethosu_core_platform) $(CMAKE_FLAGS)
+	$(QUIET)cd $(abspath $(BUILD_DIR)/ethosu_core_platform) && $(MAKE)
+
+$(BUILD_DIR)/demo: $(DEMO_MAIN) src/tvm_ethosu_runtime.c $(FREERTOS_SOURCES) $(CORSTONE_300_SRCS) $(BUILD_DIR)/stack_allocator.o $(BUILD_DIR)/crt_backend_api.o ${BUILD_DIR}/libcodegen.a ${BUILD_DIR}/libcmsis_startup.a ${BUILD_DIR}/ethosu_core_driver/libethosu_core_driver.a ${BUILD_DIR}/libcmsis_nn_softmax.a ${BUILD_DIR}/ethosu_core_platform/libethosu_uart_cmsdk_apb.a
 	$(QUIET)mkdir -p $(@D)
 	$(QUIET)$(CC) $(PKG_CFLAGS) $(FREERTOS_FLAGS) -o $@ $^ $(PKG_LDFLAGS)
 
diff --git a/apps/microtvm/ethosu/src/demo_bare_metal.c b/apps/microtvm/ethosu/src/demo_bare_metal.c
index febc212129..1bef90cfb3 100644
--- a/apps/microtvm/ethosu/src/demo_bare_metal.c
+++ b/apps/microtvm/ethosu/src/demo_bare_metal.c
@@ -21,7 +21,7 @@
 #include <tvm_runtime.h>
 
 #include "ethosu_mod.h"
-#include "uart.h"
+#include "uart_stdout.h"
 
 // Header files generated by convert_image.py and convert_labels.py
 #include "inputs.h"
@@ -31,7 +31,7 @@
 int abs(int v) { return v * ((v > 0) - (v < 0)); }
 
 int main(int argc, char** argv) {
-  uart_init();
+  UartStdOutInit();
   printf("Starting Demo\n");
   EthosuInit();
 
diff --git a/apps/microtvm/ethosu/src/demo_freertos.c b/apps/microtvm/ethosu/src/demo_freertos.c
index 4fa363a50e..e59d7aeacc 100644
--- a/apps/microtvm/ethosu/src/demo_freertos.c
+++ b/apps/microtvm/ethosu/src/demo_freertos.c
@@ -24,7 +24,7 @@
 #include <tvm_runtime.h>
 
 #include "ethosu_mod.h"
-#include "uart.h"
+#include "uart_stdout.h"
 
 // Header files generated by convert_image.py and convert_labels.py
 #include "inputs.h"
@@ -46,7 +46,7 @@ static QueueHandle_t xQueue = NULL;
 
 int main(void) {
   // Platform UART
-  uart_init();
+  UartStdOutInit();
   // NPU
   EthosuInit();
 
diff --git a/docker/install/ubuntu_install_ethosu_driver_stack.sh b/docker/install/ubuntu_install_ethosu_driver_stack.sh
index 0fb35b13e7..b86569e3a9 100755
--- a/docker/install/ubuntu_install_ethosu_driver_stack.sh
+++ b/docker/install/ubuntu_install_ethosu_driver_stack.sh
@@ -23,7 +23,7 @@ set -o pipefail
 fvp_dir="/opt/arm/FVP_Corstone_SSE-300"
 cmake_dir="/opt/arm/cmake"
 ethosu_dir="/opt/arm/ethosu"
-ethosu_driver_ver="21.11"
+ethosu_driver_ver="22.08"
 
 mkdir -p /opt/arm
 
@@ -80,9 +80,13 @@ git clone --branch ${ethosu_driver_ver} "https://review.mlplatform.org/ml/ethos-
 git clone --branch ${ethosu_driver_ver} "https://review.mlplatform.org/ml/ethos-u/ethos-u-core-platform" core_platform
 
 # Build Driver
-mkdir ${ethosu_dir}/core_driver/build && cd ${ethosu_dir}/core_driver/build
-cmake -DCMAKE_TOOLCHAIN_FILE=${ethosu_dir}/core_platform/cmake/toolchain/arm-none-eabi-gcc.cmake -DETHOSU_LOG_SEVERITY=debug -DTARGET_CPU=cortex-m55 ..
-make
+NPU_VARIANTS=("u55" "u65")
+for i in ${NPU_VARIANTS[*]}
+do
+    mkdir ${ethosu_dir}/core_driver/build_${i} && cd ${ethosu_dir}/core_driver/build_${i}
+    cmake -DCMAKE_TOOLCHAIN_FILE=${ethosu_dir}/core_platform/cmake/toolchain/arm-none-eabi-gcc.cmake -DETHOSU_LOG_SEVERITY=debug -DTARGET_CPU=cortex-m55 -DETHOSU_TARGET_NPU_CONFIG=ethos-${i}-128 ..
+    make
+done
 
 # Build NN Library
 mkdir ${CMSIS_PATH}/CMSIS-NN/build/ && cd ${CMSIS_PATH}/CMSIS-NN/build/
diff --git a/gallery/how_to/work_with_microtvm/micro_ethosu.py b/gallery/how_to/work_with_microtvm/micro_ethosu.py
index f257507bb5..74a9d59d77 100644
--- a/gallery/how_to/work_with_microtvm/micro_ethosu.py
+++ b/gallery/how_to/work_with_microtvm/micro_ethosu.py
@@ -380,7 +380,7 @@ TVM to offload operators to the Ethos(TM)-U55 where possible.
 #     #include <tvm_runtime.h>
 #
 #     #include "ethosu_mod.h"
-#     #include "uart.h"
+#     #include "uart_stdout.h"
 #
 #     // Header files generated by convert_image.py and convert_labels.py
 #     #include "inputs.h"
@@ -390,7 +390,7 @@ TVM to offload operators to the Ethos(TM)-U55 where possible.
 #     int abs(int v) { return v * ((v > 0) - (v < 0)); }
 #
 #     int main(int argc, char** argv) {
-#       uart_init();
+#       UartStdOutInit();
 #       printf("Starting Demo\n");
 #       EthosuInit();
 #
diff --git a/python/tvm/micro/testing/aot_test_utils.py b/python/tvm/micro/testing/aot_test_utils.py
index 89c08395de..06cd0f1c9e 100644
--- a/python/tvm/micro/testing/aot_test_utils.py
+++ b/python/tvm/micro/testing/aot_test_utils.py
@@ -41,9 +41,9 @@ AOT_DEFAULT_RUNNER = AOTTestRunner()
 AOT_CORSTONE300_RUNNER = AOTTestRunner(
     makefile="corstone300",
     prologue="""
-    uart_init();
+    UartStdOutInit();
     """,
-    includes=["uart.h"],
+    includes=["uart_stdout.h"],
     pass_config={
         "relay.ext.cmsisnn.options": {
             "mcpu": "cortex-m55",
@@ -54,9 +54,9 @@ AOT_CORSTONE300_RUNNER = AOTTestRunner(
 AOT_USMP_CORSTONE300_RUNNER = AOTTestRunner(
     makefile="corstone300",
     prologue="""
-    uart_init();
+    UartStdOutInit();
     """,
-    includes=["uart.h"],
+    includes=["uart_stdout.h"],
     pass_config={
         "relay.ext.cmsisnn.options": {
             "mcpu": "cortex-m55",
diff --git a/tests/python/contrib/test_cmsisnn/utils.py b/tests/python/contrib/test_cmsisnn/utils.py
index 1ec3e609f1..74d9686a78 100644
--- a/tests/python/contrib/test_cmsisnn/utils.py
+++ b/tests/python/contrib/test_cmsisnn/utils.py
@@ -274,9 +274,9 @@ def create_test_runner(compiler_cpu="cortex-m55", cpu_flags=""):
     return AOTTestRunner(
         makefile="corstone300",
         prologue="""
-        uart_init();
+        UartStdOutInit();
         """,
-        includes=["uart.h"],
+        includes=["uart_stdout.h"],
         pass_config={
             "relay.ext.cmsisnn.options": {
                 "mcpu": compiler_cpu + cpu_flags,
diff --git a/tests/python/contrib/test_ethosu/infra.py b/tests/python/contrib/test_ethosu/infra.py
index b2bbcd377b..efab6e6911 100644
--- a/tests/python/contrib/test_ethosu/infra.py
+++ b/tests/python/contrib/test_ethosu/infra.py
@@ -133,7 +133,7 @@ def create_test_runner(
     ethosu_variant = ethosu_variant.upper()
 
     prologue = """
-    uart_init();
+    UartStdOutInit();
     EthosuInit();
 
     struct ethosu_driver* ethos_u = ethosu_reserve_driver();
@@ -158,7 +158,7 @@ def create_test_runner(
         epilogue="""
         ethosu_release_driver(ethos_u);
         """,
-        includes=["uart.h", "ethosu_55.h", "ethosu_mod.h", "hard_fault.h"],
+        includes=["uart_stdout.h", "ethosu_55.h", "ethosu_mod.h", "hard_fault.h"],
         parameters={
             "ETHOSU_TEST_ROOT": test_root,
             "NPU_MACS": ethosu_macs,
diff --git a/tests/python/relay/aot/corstone300.mk b/tests/python/relay/aot/corstone300.mk
index ebda50d9cf..f968a231af 100644
--- a/tests/python/relay/aot/corstone300.mk
+++ b/tests/python/relay/aot/corstone300.mk
@@ -41,7 +41,8 @@ DMLC_CORE=${TVM_ROOT}/3rdparty/dmlc-core
 ETHOSU_PATH=/opt/arm/ethosu
 DRIVER_PATH=${ETHOSU_PATH}/core_driver
 CMSIS_PATH=${ETHOSU_PATH}/cmsis
-PLATFORM_PATH=${ETHOSU_PATH}/core_platform/targets/corstone-300
+ETHOSU_PLATFORM_PATH=/opt/arm/ethosu/core_platform
+CORSTONE_300_PATH = ${ETHOSU_PLATFORM_PATH}/targets/corstone-300
 PKG_COMPILE_OPTS = -g -Wall -O2 -Wno-incompatible-pointer-types -Wno-format -Werror-implicit-function-declaration -mcpu=${MCPU}${MCPU_FLAGS} -mthumb -mfloat-abi=${MFLOAT_ABI} -std=gnu99
 CMAKE = /opt/arm/cmake/bin/cmake
 CC = arm-none-eabi-gcc
@@ -58,15 +59,14 @@ PKG_CFLAGS = ${PKG_COMPILE_OPTS} \
 	-I$(build_dir)/../include \
 	-I${TVM_ROOT}/src/runtime/contrib/ethosu/bare_metal \
 	-I$(CODEGEN_ROOT)/host/include \
-	-I${PLATFORM_PATH} \
+	-I${ETHOSU_PLATFORM_PATH}/drivers/uart/include \
 	-I${DRIVER_PATH}/include \
 	-I${CMSIS_PATH}/Device/ARM/${ARM_CPU}/Include/ \
 	-I${CMSIS_PATH}/CMSIS/Core/Include \
 	-I${CMSIS_NN_PATH}/Include \
 	-I${CMSIS_PATH}/CMSIS/DSP/Include \
 	-isystem$(STANDALONE_CRT_DIR)/include
-DRIVER_CMAKE_FLAGS = -DCMAKE_TOOLCHAIN_FILE=$(ETHOSU_TEST_ROOT)/arm-none-eabi-gcc.cmake \
-	-DETHOSU_LOG_SEVERITY=debug \
+CMAKE_FLAGS = -DCMAKE_TOOLCHAIN_FILE=${TVM_ROOT}/tests/python/contrib/test_ethosu/reference_system/arm-none-eabi-gcc.cmake \
 	-DCMAKE_SYSTEM_PROCESSOR=cortex-m55
 
 PKG_LDFLAGS = -lm -specs=nosys.specs -static -T ${AOT_TEST_ROOT}/corstone300.ld
@@ -84,10 +84,11 @@ C_CODEGEN_OBJS = $(subst .c,.o,$(C_CODEGEN_SRCS))
 CC_CODEGEN_OBJS = $(subst .cc,.o,$(CC_CODEGEN_SRCS))
 CMSIS_STARTUP_SRCS = $(shell find ${CMSIS_PATH}/Device/ARM/${ARM_CPU}/Source/*.c)
 CMSIS_NN_SRCS = $(shell find ${CMSIS_NN_PATH}/Source/*/*.c)
-UART_SRCS = $(shell find ${PLATFORM_PATH}/*.c)
+CORSTONE_300_SRCS = $(shell find ${CORSTONE_300_PATH}/*.c)
 
 ifdef ETHOSU_TEST_ROOT
-ETHOSU_DRIVER_LIBS = $(wildcard ${DRIVER_PATH}/build/*.a)
+NPU=$(shell echo "${NPU_VARIANT}" | tr '[:upper:]' '[:lower:]')
+ETHOSU_DRIVER_LIBS = ${DRIVER_PATH}/build_${NPU}/*.a
 ETHOSU_RUNTIME=$(build_dir)/tvm_ethosu_runtime.o
 ETHOSU_INCLUDE=-I$(ETHOSU_TEST_ROOT)
 endif
@@ -123,13 +124,19 @@ ${build_dir}/libcmsis_nn.a: $(CMSIS_NN_SRCS)
 	$(QUIET)$(AR) -cr $(abspath $(build_dir)/libcmsis_nn.a) $(abspath $(build_dir))/libcmsis_nn/*.o
 	$(QUIET)$(RANLIB) $(abspath $(build_dir)/libcmsis_nn.a)
 
-${build_dir}/libuart.a: $(UART_SRCS)
-	$(QUIET)mkdir -p $(abspath $(build_dir)/libuart)
-	$(QUIET)cd $(abspath $(build_dir)/libuart) && $(CC) -c $(PKG_CFLAGS) $^
-	$(QUIET)$(AR) -cr $(abspath $(build_dir)/libuart.a) $(abspath $(build_dir))/libuart/*.o
-	$(QUIET)$(RANLIB) $(abspath $(build_dir)/libuart.a)
+${build_dir}/libcorstone.a: $(CORSTONE_300_SRCS)
+	$(QUIET)mkdir -p $(abspath $(build_dir)/libcorstone)
+	$(QUIET)cd $(abspath $(build_dir)/libcorstone) && $(CC) -c $(PKG_CFLAGS) $^
+	$(QUIET)$(AR) -cr $(abspath $(build_dir)/libcorstone.a) $(abspath $(build_dir))/libcorstone/*.o
+	$(QUIET)$(RANLIB) $(abspath $(build_dir)/libcorstone.a)
 
-$(build_dir)/aot_test_runner: $(build_dir)/test.c $(build_dir)/crt_backend_api.o $(build_dir)/stack_allocator.o $(build_dir)/libcodegen.a ${build_dir}/libcmsis_startup.a ${build_dir}/libcmsis_nn.a ${build_dir}/libuart.a $(ETHOSU_DRIVER_LIBS) $(ETHOSU_RUNTIME)
+# Build UART driver
+${build_dir}/ethosu_core_platform/libethosu_uart_cmsdk_apb.a:
+	$(QUIET)mkdir -p $(@D)
+	$(QUIET)cd ${ETHOSU_PLATFORM_PATH}/drivers/uart && $(CMAKE) -B $(abspath $(build_dir)/ethosu_core_platform) $(CMAKE_FLAGS)
+	$(QUIET)cd $(abspath $(build_dir)/ethosu_core_platform) && $(MAKE)
+
+$(build_dir)/aot_test_runner: $(build_dir)/test.c $(build_dir)/crt_backend_api.o $(build_dir)/stack_allocator.o $(build_dir)/libcodegen.a ${build_dir}/libcmsis_startup.a ${build_dir}/libcmsis_nn.a ${build_dir}/libcorstone.a ${build_dir}/ethosu_core_platform/libethosu_uart_cmsdk_apb.a $(ETHOSU_DRIVER_LIBS) $(ETHOSU_RUNTIME)
 	$(QUIET)mkdir -p $(@D)
 	$(QUIET)$(CC) $(PKG_CFLAGS) $(ETHOSU_INCLUDE) -o $@ -Wl,--whole-archive $^ -Wl,--no-whole-archive $(PKG_LDFLAGS)