You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tvm.apache.org by ek...@apache.org on 2023/06/19 07:18:16 UTC

[tvm] branch main updated: [microNPU][ETHOSU] Fix CopyComputeReordering pass arguments (#15063)

This is an automated email from the ASF dual-hosted git repository.

ekalda pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tvm.git


The following commit(s) were added to refs/heads/main by this push:
     new e280e01fc1 [microNPU][ETHOSU] Fix CopyComputeReordering pass arguments (#15063)
e280e01fc1 is described below

commit e280e01fc1d2f79cc4f1cda2257a51bd20605bcd
Author: Aleksei-grovety <11...@users.noreply.github.com>
AuthorDate: Mon Jun 19 11:18:11 2023 +0400

    [microNPU][ETHOSU] Fix CopyComputeReordering pass arguments (#15063)
    
    No arguments were passed to CopyComputeReordering pass and the same parameters were used for all targets. This fix takes arguments for CopyComputeReordering pass from Vela.
    In networks tests, the amount of memory used for U65 has increased because now after CopyComputeReordering pass, the number of DMA commands executed in parallel has increased.
---
 .../relay/backend/contrib/ethosu/tir/compiler.py   |   7 +-
 .../tvm/relay/backend/contrib/ethosu/vela_api.py   |  15 ++-
 .../contrib/test_ethosu/test_encode_constants.py   | 102 ++++++++++-----------
 tests/python/contrib/test_ethosu/test_networks.py  |   6 +-
 4 files changed, 71 insertions(+), 59 deletions(-)

diff --git a/python/tvm/relay/backend/contrib/ethosu/tir/compiler.py b/python/tvm/relay/backend/contrib/ethosu/tir/compiler.py
index 2cf45170e4..d47b3d4a7d 100644
--- a/python/tvm/relay/backend/contrib/ethosu/tir/compiler.py
+++ b/python/tvm/relay/backend/contrib/ethosu/tir/compiler.py
@@ -18,12 +18,13 @@
 """The integration of the Arm(R) Ethos(TM)-U NPU TIR compiler."""
 import tvm
 from tvm import relay
-from tvm.relay.expr_functor import ExprMutator
 from tvm.driver.build_module import schedule_to_module
+from tvm.relay.backend.contrib.ethosu import vela_api as vapi
+from tvm.relay.expr_functor import ExprMutator
 
+from .. import util
 from . import passes as ethosu_passes
 from .scheduler import schedule
-from .. import util
 
 
 def lower_ethosu(sch, args, const_dict, name="main"):
@@ -92,7 +93,7 @@ def lower_ethosu(sch, args, const_dict, name="main"):
         mod = ethosu_passes.HoistAllocates()(mod)
         mod = tvm.tir.transform.RemoveNoOp()(mod)
         mod, const_dict = ethosu_passes.MergeConstants(const_dict)(mod)
-        mod = ethosu_passes.CopyComputeReordering()(mod)
+        mod = ethosu_passes.CopyComputeReordering(vapi.get_max_copy_movements())(mod)
 
         disable_storage_rewrite = curr_cfg.get("tir.disable_storage_rewrite", False)
         if not disable_storage_rewrite:
diff --git a/python/tvm/relay/backend/contrib/ethosu/vela_api.py b/python/tvm/relay/backend/contrib/ethosu/vela_api.py
index f241652e73..45c232a461 100644
--- a/python/tvm/relay/backend/contrib/ethosu/vela_api.py
+++ b/python/tvm/relay/backend/contrib/ethosu/vela_api.py
@@ -23,13 +23,15 @@ The following conversion APIs are added :
 """
 import logging
 import math
-from typing import Tuple, Optional, List
+from typing import List, Optional, Tuple
+
 import numpy as np  # type: ignore
 from ethosu.vela import api as vapi  # type: ignore
+from ethosu.vela.architecture_features import Accelerator, create_default_arch
 
 import tvm
-from tvm.relay.backend.contrib.ethosu import util  # type: ignore
 from tvm.relay.backend.contrib.ethosu import tir_to_cs_translator as tirtocs
+from tvm.relay.backend.contrib.ethosu import util  # type: ignore
 
 # pylint: disable=invalid-name
 logger = logging.getLogger("Ethos-U")
@@ -400,3 +402,12 @@ def get_accelerator_config() -> vapi.NpuAccelerator:
     accel_config_str = compiler_attrs.accelerator_config
     assert accel_config_str in npu_accel_str_map.keys(), f"{accel_config_str} is not supported"
     return npu_accel_str_map[accel_config_str]
+
+
+def get_max_copy_movements() -> int:
+    """Get maximum copy movements for CopyComputeReordering pass.
+    max_outstanding_dma from architecture features indicates how many
+    DMA operations can be in-progress.
+    """
+    arch = create_default_arch(Accelerator.from_npu_accelerator(get_accelerator_config()))
+    return arch.max_outstanding_dma
diff --git a/tests/python/contrib/test_ethosu/test_encode_constants.py b/tests/python/contrib/test_ethosu/test_encode_constants.py
index 6a8ff28e44..4341f367f0 100644
--- a/tests/python/contrib/test_ethosu/test_encode_constants.py
+++ b/tests/python/contrib/test_ethosu/test_encode_constants.py
@@ -66,30 +66,29 @@ class WeightStreamOnlyU55:
 @tvm.script.ir_module
 class WeightStreamOnlyU65:
     @T.prim_func
-    def main(input_placeholder: T.Buffer((1, 16, 16, 32), "int8"), input_ethosu_write: T.Buffer((1, 16, 16, 8), "int8")) -> None:
-        # function attr dict
-        T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
-        # buffer definition
-        placeholder = T.Buffer([8192], dtype="int8", data=input_placeholder.data)
-        ethosu_write = T.Buffer([2048], dtype="int8", data=input_ethosu_write.data)
-        buffer_encoded_1 = T.Buffer([192], dtype="uint8")
-        buffer_encoded_2_1 = T.Buffer([192], dtype="uint8")
-        buffer_encoded_4_1 = T.Buffer([208], dtype="uint8")
-        buffer_encoded_6_1 = T.Buffer([192], dtype="uint8")
-        # body
-        p1_data = T.allocate([208], "uint8", "global", annotations={"disable_lower_builtin":True})
-        p1 = T.Buffer([208], "uint8", data=p1_data)
-        p2_data = T.allocate([192], "uint8", "global", annotations={"disable_lower_builtin":True})
-        p2 = T.Buffer([192], "uint8", data=p2_data)
-        p3 = T.Buffer([192], dtype="uint8", data=p1.data)
-        T.evaluate(T.call_extern("ethosu_copy", buffer_encoded_1[0], 192, p3[0], dtype="handle"))
-        T.evaluate(T.call_extern("ethosu_copy", buffer_encoded_2_1[0], 192, p2[0], dtype="handle"))
-        T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 16, 32, 16, 0, 16, placeholder[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 16, 2, 16, 0, 16, ethosu_write[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, p3[0], 80, p3[80], 80, 12, p3[160], 16, p3[176], 16, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
-        T.evaluate(T.call_extern("ethosu_copy", buffer_encoded_4_1[0], 208, p1[0], dtype="handle"))
-        T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 16, 32, 16, 0, 16, placeholder[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 16, 2, 16, 0, 16, ethosu_write[2], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, p2[0], 80, p2[80], 80, 12, p2[160], 16, p2[176], 16, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
-        T.evaluate(T.call_extern("ethosu_copy", buffer_encoded_6_1[0], 192, p2[0], dtype="handle"))
-        T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 16, 32, 16, 0, 16, placeholder[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 16, 2, 16, 0, 16, ethosu_write[4], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, p1[0], 96, p1[96], 80, 12, p1[176], 16, p1[192], 16, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
-        T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 16, 32, 16, 0, 16, placeholder[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 16, 2, 16, 0, 16, ethosu_write[6], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, p2[0], 80, p2[80], 80, 12, p2[160], 16, p2[176], 16, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
+    def main(ifm: T.Buffer((1, 16, 16, 32), "int8"), ethosu_write: T.Buffer((1, 16, 16, 8), "int8")):
+        T.func_attr({"from_legacy_te_schedule": T.bool(True), "global_symbol": "main", "tir.noalias": T.bool(True)})
+        p2_global_6 = T.allocate([192], "uint8", "global", annotations={"disable_lower_builtin": T.bool(True)})
+        p2_global_4 = T.allocate([192], "uint8", "global", annotations={"disable_lower_builtin": T.bool(True)})
+        p2_global_5 = T.allocate([208], "uint8", "global", annotations={"disable_lower_builtin": T.bool(True)})
+        buffer_encoded = T.Buffer((192,), "uint8")
+        p2_global_3 = T.Buffer((192,), "uint8", data=p2_global_6)
+        T.call_extern("handle", "ethosu_copy", buffer_encoded[0], 192, p2_global_3[0])
+        buffer_encoded_1 = T.Buffer((192,), "uint8")
+        p2_global_4_1 = T.Buffer((192,), "uint8", data=p2_global_4)
+        T.call_extern("handle", "ethosu_copy", buffer_encoded_1[0], 192, p2_global_4_1[0])
+        buffer_encoded_2 = T.Buffer((208,), "uint8")
+        p2_global_5_1 = T.Buffer((208,), "uint8", data=p2_global_5)
+        T.call_extern("handle", "ethosu_copy", buffer_encoded_2[0], 208, p2_global_5_1[0])
+        ifm_1 = T.Buffer((8192,), "int8", data=ifm.data)
+        ethosu_write_1 = T.Buffer((2048,), "int8", data=ethosu_write.data)
+        T.call_extern("handle", "ethosu_conv2d", "int8", 16, 16, 32, 16, 0, 16, ifm_1[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 16, 2, 16, 0, 16, ethosu_write_1[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, p2_global_3[0], 80, p2_global_3[80], 80, 12, p2_global_3[160], 16, p2_global_3[176], 16, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0)
+        buffer_encoded_3 = T.Buffer((192,), "uint8")
+        p2_global_6_1 = T.Buffer((192,), "uint8", data=p2_global_6)
+        T.call_extern("handle", "ethosu_copy", buffer_encoded_3[0], 192, p2_global_6_1[0])
+        T.call_extern("handle", "ethosu_conv2d", "int8", 16, 16, 32, 16, 0, 16, ifm_1[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 16, 2, 16, 0, 16, ethosu_write_1[2], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, p2_global_4_1[0], 80, p2_global_4_1[80], 80, 12, p2_global_4_1[160], 16, p2_global_4_1[176], 16, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0)
+        T.call_extern("handle", "ethosu_conv2d", "int8", 16, 16, 32, 16, 0, 16, ifm_1[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 16, 2, 16, 0, 16, ethosu_write_1[4], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, p2_global_5_1[0], 96, p2_global_5_1[96], 80, 12, p2_global_5_1[176], 16, p2_global_5_1[192], 16, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0)
+        T.call_extern("handle", "ethosu_conv2d", "int8", 16, 16, 32, 16, 0, 16, ifm_1[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 16, 2, 16, 0, 16, ethosu_write_1[6], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, p2_global_6_1[0], 80, p2_global_6_1[80], 80, 12, p2_global_6_1[160], 16, p2_global_6_1[176], 16, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0)
     __tvm_meta__ = None
 # fmt: on
 
@@ -387,33 +386,34 @@ class MixedReadU55:
 @tvm.script.ir_module
 class MixedReadU65:
     @T.prim_func
-    def main(input_ifm: T.Buffer((1,16,16,32), "int8"), input_ethosu_write: T.Buffer((1,16,16,8), "int8")) -> None:
-        # function attr dict
-        T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
-        # buffer definition
-        ifm = T.Buffer([8192], dtype="int8", data=input_ifm.data)
-        ethosu_write = T.Buffer([2048], dtype="int8", data=input_ethosu_write.data)
-        buffer1 = T.Buffer([128], dtype="uint8")
-        buffer2 = T.Buffer([128], dtype="uint8")
-        buffer3 = T.Buffer([128], dtype="uint8")
-        buffer4 = T.Buffer([608], dtype="uint8")
-        buffer5 = T.Buffer([160], dtype="uint8")
-        buffer6 = T.Buffer([128], dtype="uint8")
-        p1_data = T.allocate([128], "uint8", "global", annotations={"disable_lower_builtin":True})
-        p1 = T.Buffer([128], "uint8", data=p1_data)
-        p2_data = T.allocate([4096], "int8", "global", annotations={"disable_lower_builtin":True})
-        p2 = T.Buffer([4096], "int8", data=p2_data)
-        p3_data = T.allocate([128], "uint8", "global", annotations={"disable_lower_builtin":True})
-        p3 = T.Buffer([128], "uint8", data=p3_data)
-        T.evaluate(T.call_extern("ethosu_copy", buffer1[0], 128, p1[0], dtype="handle"))
-        T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 16, 32, 16, 0, 16, ifm[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 16, 16, 16, 0, 16, p2[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 256, 16, 1, 1, 1, 1, 1, 1, 1, buffer4[0], 304, buffer4[304], 304, 12, buffer5[0], 80, buffer5[80], 80, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
-        T.evaluate(T.call_extern("ethosu_copy", buffer2[0], 128, p3[0], dtype="handle"))
-        T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 16, 16, 16, 0, 16, p2[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 256, 16, 1, "int8", 16, 16, 2, 16, 0, 16, ethosu_write[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, p1[0], 48, p1[48], 48, 12, p1[96], 16, p1[112], 16, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
-        T.evaluate(T.call_extern("ethosu_copy", buffer3[0], 128, p1[0], dtype="handle"))
-        T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 16, 16, 16, 0, 16, p2[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 256, 16, 1, "int8", 16, 16, 2, 16, 0, 16, ethosu_write[2], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, p3[0], 48, p3[48], 48, 12, p3[96], 16, p3[112], 16, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
-        T.evaluate(T.call_extern("ethosu_copy", buffer6[0], 128, p3[0], dtype="handle"))
-        T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 16, 16, 16, 0, 16, p2[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 256, 16, 1, "int8", 16, 16, 2, 16, 0, 16, ethosu_write[4], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, p1[0], 48, p1[48], 48, 12, p1[96], 16, p1[112], 16, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
-        T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 16, 16, 16, 0, 16, p2[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 256, 16, 1, "int8", 16, 16, 2, 16, 0, 16, ethosu_write[6], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, p3[0], 48, p3[48], 48, 12, p3[96], 16, p3[112], 16, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
+    def main(ifm: T.Buffer((1, 16, 16, 32), "int8"), ethosu_write: T.Buffer((1, 16, 16, 8), "int8")):
+        T.func_attr({"from_legacy_te_schedule": T.bool(True), "global_symbol": "main", "tir.noalias": T.bool(True)})
+        p5_global = T.allocate([128], "uint8", "global", annotations={"disable_lower_builtin": T.bool(True)})
+        p5_global_1 = T.allocate([128], "uint8", "global", annotations={"disable_lower_builtin": T.bool(True)})
+        ethosu_write_1 = T.allocate([4096], "int8", "global", annotations={"disable_lower_builtin": T.bool(True)})
+        p5_global_2 = T.allocate([128], "uint8", "global", annotations={"disable_lower_builtin": T.bool(True)})
+        buffer_encoded = T.Buffer((128,), "uint8")
+        p5_global_3 = T.Buffer((128,), "uint8", data=p5_global)
+        T.call_extern("handle", "ethosu_copy", buffer_encoded[0], 128, p5_global_3[0])
+        buffer_encoded_1 = T.Buffer((128,), "uint8")
+        p5_global_4 = T.Buffer((128,), "uint8", data=p5_global_1)
+        T.call_extern("handle", "ethosu_copy", buffer_encoded_1[0], 128, p5_global_4[0])
+        ifm_1 = T.Buffer((8192,), "int8", data=ifm.data)
+        ethosu_write_2 = T.Buffer((4096,), "int8", data=ethosu_write_1)
+        p1_encoded = T.Buffer((608,), "uint8")
+        p2_encoded = T.Buffer((160,), "uint8")
+        T.call_extern("handle", "ethosu_conv2d", "int8", 16, 16, 32, 16, 0, 16, ifm_1[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 16, 16, 16, 0, 16, ethosu_write_2[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 256, 16, 1, 1, 1, 1, 1, 1, 1, p1_encoded[0], 304, p1_encoded[304], 304, 12, p2_encoded[0], 80, p2_encoded[80], 80, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0)
+        buffer_encoded_2 = T.Buffer((128,), "uint8")
+        p5_global_5 = T.Buffer((128,), "uint8", data=p5_global_2)
+        T.call_extern("handle", "ethosu_copy", buffer_encoded_2[0], 128, p5_global_5[0])
+        ethosu_write_3 = T.Buffer((2048,), "int8", data=ethosu_write.data)
+        T.call_extern("handle", "ethosu_conv2d", "int8", 16, 16, 16, 16, 0, 16, ethosu_write_2[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 256, 16, 1, "int8", 16, 16, 2, 16, 0, 16, ethosu_write_3[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, p5_global_3[0], 48, p5_global_3[48], 48, 12, p5_global_3[96], 16, p5_global_3[112], 16, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0)
+        buffer_encoded_3 = T.Buffer((128,), "uint8")
+        p5_global_6 = T.Buffer((128,), "uint8", data=p5_global)
+        T.call_extern("handle", "ethosu_copy", buffer_encoded_3[0], 128, p5_global_6[0])
+        T.call_extern("handle", "ethosu_conv2d", "int8", 16, 16, 16, 16, 0, 16, ethosu_write_2[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 256, 16, 1, "int8", 16, 16, 2, 16, 0, 16, ethosu_write_3[2], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, p5_global_4[0], 48, p5_global_4[48], 48, 12, p5_global_4[96], 16, p5_global_4[112], 16, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0)
+        T.call_extern("handle", "ethosu_conv2d", "int8", 16, 16, 16, 16, 0, 16, ethosu_write_2[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 256, 16, 1, "int8", 16, 16, 2, 16, 0, 16, ethosu_write_3[4], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, p5_global_5[0], 48, p5_global_5[48], 48, 12, p5_global_5[96], 16, p5_global_5[112], 16, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0)
+        T.call_extern("handle", "ethosu_conv2d", "int8", 16, 16, 16, 16, 0, 16, ethosu_write_2[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 256, 16, 1, "int8", 16, 16, 2, 16, 0, 16, ethosu_write_3[6], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, p5_global_6[0], 48, p5_global_6[48], 48, 12, p5_global_6[96], 16, p5_global_6[112], 16, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0)
     __tvm_meta__ = None
 # fmt: on
 
diff --git a/tests/python/contrib/test_ethosu/test_networks.py b/tests/python/contrib/test_ethosu/test_networks.py
index 0df73d6dc5..a5490cbe2b 100644
--- a/tests/python/contrib/test_ethosu/test_networks.py
+++ b/tests/python/contrib/test_ethosu/test_networks.py
@@ -44,8 +44,8 @@ MOBILENET_V2_URL = (
 @pytest.mark.parametrize(
     "accel_type, model_url, workspace_size",
     [
-        ("ethos-u65-256", MOBILENET_V1_URL, 1793376),
-        ("ethos-u65-256", MOBILENET_V2_URL, 2217152),
+        ("ethos-u65-256", MOBILENET_V1_URL, 2338848),
+        ("ethos-u65-256", MOBILENET_V2_URL, 2264320),
         ("ethos-u55-256", MOBILENET_V1_URL, 1793376),
         ("ethos-u55-256", MOBILENET_V2_URL, 2217152),
         ("ethos-u55-128", MOBILENET_V2_URL, 2217152),
@@ -71,7 +71,7 @@ def test_networks_without_usmp(accel_type, model_url, workspace_size):
 @pytest.mark.parametrize(
     "accel_type, model_url, workspace_size",
     [
-        ("ethos-u65-256", MOBILENET_V1_URL, 1206880),
+        ("ethos-u65-256", MOBILENET_V1_URL, 1311200),
         ("ethos-u55-256", MOBILENET_V2_URL, 1509408),
     ],
 )