You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tvm.apache.org by ma...@apache.org on 2022/07/06 15:46:35 UTC
[tvm] branch main updated: [microNPU] Calculate memory pressure for microNPU external functions (#11209)
This is an automated email from the ASF dual-hosted git repository.
manupa pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tvm.git
The following commit(s) were added to refs/heads/main by this push:
new cfe8318990 [microNPU] Calculate memory pressure for microNPU external functions (#11209)
cfe8318990 is described below
commit cfe8318990e799215c9baac4ef2c4ecb20a91d9f
Author: Luke Hutton <lu...@arm.com>
AuthorDate: Wed Jul 6 16:46:29 2022 +0100
[microNPU] Calculate memory pressure for microNPU external functions (#11209)
* [microNPU] Calculate memory pressure for microNPU external functions
During the microNPU compilation stage, the "used_memory" annotations on
external microNPU functions are read to determine a memory pressure
value. This value is passed to the cascader to better approximate the
memory available for the optimization.
Change-Id: I11a311b0005e785637014cb451f4aed96edcda26
* fix get size from memory region
Change-Id: I41acfc83f05b2204075edb99f86a0eecaba00f71
* add test case for full offload
Change-Id: If3e672d402ab237fa82e34761bb972d2e9483ba9
---
python/tvm/contrib/ethosu/cascader/scheduler.py | 8 +-
python/tvm/relay/backend/contrib/ethosu/codegen.py | 59 +++++--
python/tvm/tir/usmp/utils.py | 9 +
.../cascader/test_calculate_memory_pressure.py | 186 +++++++++++++++++++++
.../test_ethosu/cascader/test_memory_reduction.py | 163 +++++++++++++++++-
5 files changed, 407 insertions(+), 18 deletions(-)
diff --git a/python/tvm/contrib/ethosu/cascader/scheduler.py b/python/tvm/contrib/ethosu/cascader/scheduler.py
index fd247e660a..2c804a3b3b 100644
--- a/python/tvm/contrib/ethosu/cascader/scheduler.py
+++ b/python/tvm/contrib/ethosu/cascader/scheduler.py
@@ -225,21 +225,21 @@ def choose_proposal(
return proposal_choice
-def extract_memory_info(memory_pool: PoolInfo) -> MemoryRegion:
+def extract_memory_info(memory_pool: PoolInfo, memory_pressure: int) -> MemoryRegion:
"Create a MemoryRegion based on the info in the memory pool"
- size = int(memory_pool.size_hint_bytes)
+ size = int(memory_pool.size_hint_bytes - memory_pressure)
read_bandwidth = int(memory_pool.read_bandwidth_bytes_per_cycle)
write_bandwidth = int(memory_pool.write_bandwidth_bytes_per_cycle)
for param in (size, read_bandwidth, write_bandwidth):
assert param != -1, f"{param} needs to be specified for the cascader."
- name_to_burst_lenght = {
+ name_to_burst_length = {
target.kind.name: burst for target, burst in memory_pool.target_burst_bytes.items()
}
try:
- burst_length = int(name_to_burst_lenght["ethos-u"])
+ burst_length = int(name_to_burst_length["ethos-u"])
except KeyError:
burst_length = 1
diff --git a/python/tvm/relay/backend/contrib/ethosu/codegen.py b/python/tvm/relay/backend/contrib/ethosu/codegen.py
index 423834daa8..5119c04edb 100644
--- a/python/tvm/relay/backend/contrib/ethosu/codegen.py
+++ b/python/tvm/relay/backend/contrib/ethosu/codegen.py
@@ -381,6 +381,46 @@ def _ethos_u55_cascader(sram, enable_striping) -> Callable:
)
+def _calculate_memory_pressure(mod: tvm.ir.IRModule) -> int:
+ """
+ Calculates a worst-case estimate of the memory consumed at the callsite of
+ each microNPU function. This value can be used as a hint to guide the cascader,
+ indicating how aggressively it will need to optimize the input module to fit
+ into the memory that remains in the memory workspace.
+
+ Parameters
+ ----------
+ mod : tvm.ir.IRModule
+ The input module
+
+ Returns
+ -------
+ int
+ Memory pressure value for the module.
+ """
+ memory_pressure = 0
+
+ @util.create_npu_function_pass(opt_level=1)
+ class CalculateMemoryPressure:
+ """
+ Traverse the module and get total memory used by external NPU functions.
+ """
+
+ def transform_npu_function(self, _, func: relay.Function) -> relay.Function:
+ nonlocal memory_pressure
+ max_val = max(func.attrs["used_memory"])
+ memory_pressure += max_val
+ return func
+
+ CalculateMemoryPressure()(mod) # pylint: disable=not-callable
+
+ io_used_memory = 0
+ if not tvm.tir.usmp.utils.use_workspace_io_is_enabled():
+ io_used_memory = int(mod["main"].attrs["io_used_memory"])
+
+ return memory_pressure - io_used_memory
+
+
@tvm._ffi.register_func("relay.ext.ethos-u.relay_to_tir")
def relay_to_tir(mod: tvm.ir.IRModule) -> tvm.ir.IRModule:
"""
@@ -413,21 +453,18 @@ def relay_to_tir(mod: tvm.ir.IRModule) -> tvm.ir.IRModule:
# Use the cascader if it is enabled for the U55 accelerator, otherwise use copy_constants
# scheduler
if util.is_cascader_enabled():
- assert (
- util.get_accelerator_config() != "ethos-u65-256"
- ), "Cascading is not supported for the U65 accelerator"
+ if util.get_accelerator_config() == "ethos-u65-256":
+ raise ValueError("Cascading is not supported for the U65 accelerator")
workspace_memory_pools = mod.attrs["workspace_memory_pools"]
- assert (
- workspace_memory_pools
- ), "Workspace memory pool needs to be provided for the U55 cascader"
-
- assert (
- len(workspace_memory_pools.pools) == 1
- ), "Exactly one workspace pool needs to be provided for the U55 cascader"
+ if not workspace_memory_pools:
+ raise ValueError("Workspace memory pool needs to be provided for the U55 cascader")
+ if len(workspace_memory_pools.pools) != 1:
+ raise ValueError("Exactly one workspace pool needs to be provided for the U55 cascader")
- sram = extract_memory_info(workspace_memory_pools.pools[0])
+ memory_pressure = _calculate_memory_pressure(mod)
+ sram = extract_memory_info(workspace_memory_pools.pools[0], memory_pressure)
tir_mod = LowerToTIR(_ethos_u55_cascader(sram, util.is_striping_enabled()))(mod)
else:
tir_mod = LowerToTIR(copy_constants())(mod)
diff --git a/python/tvm/tir/usmp/utils.py b/python/tvm/tir/usmp/utils.py
index a7221cfe6f..024922e85b 100644
--- a/python/tvm/tir/usmp/utils.py
+++ b/python/tvm/tir/usmp/utils.py
@@ -19,6 +19,7 @@
from typing import Optional, List
+import tvm
from tvm._ffi import register_object
from tvm.runtime import Object
from . import _ffi_api
@@ -31,6 +32,14 @@ from ...ir.memory_pools import PoolInfo
CANDIDATE_MEMORY_POOL_ATTR = "candidate_memory_pools"
+def use_workspace_io_is_enabled() -> bool:
+ """
+ Check whether placing I/O tensors in the workspace is enabled.
+ """
+ ctx = tvm.transform.PassContext.current()
+ return bool(ctx.config.get("tir.usmp.use_workspace_io", False))
+
+
@register_object("tir.usmp.BufferInfo")
class BufferInfo(Object):
"""BufferInfo object holds information related to buffers
diff --git a/tests/python/contrib/test_ethosu/cascader/test_calculate_memory_pressure.py b/tests/python/contrib/test_ethosu/cascader/test_calculate_memory_pressure.py
new file mode 100644
index 0000000000..255ec4bba8
--- /dev/null
+++ b/tests/python/contrib/test_ethosu/cascader/test_calculate_memory_pressure.py
@@ -0,0 +1,186 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+# pylint: disable=wrong-import-position
+
+"""
+Test memory pressure is calculated correctly from used memory annotations.
+"""
+
+import pytest
+
+pytest.importorskip("ethosu.vela")
+
+import tvm
+from tvm import relay
+from tvm.relay.backend.contrib.ethosu.codegen import _calculate_memory_pressure
+from tvm.contrib.ethosu.cascader.scheduler import extract_memory_info
+from tvm import WorkspacePoolInfo, PoolInfoProperties
+
+
+def _npu_and_non_npu_functions():
+ mod = tvm.IRModule({})
+
+ # NPU function 1
+ x = relay.var("x", shape=(1, 2, 2, 4), dtype="int8")
+ max_pool = relay.nn.max_pool2d(x)
+ composite_func = relay.Function([x], max_pool)
+ composite_func = composite_func.with_attr("Composite", "ethos-u.pooling")
+ inp = relay.var("input", shape=(1, 2, 2, 4), dtype="int8")
+ compiler_func = relay.Function([inp], composite_func)
+ compiler_func = compiler_func.with_attr("used_memory", [32])
+ npu_compiler_func1 = compiler_func.with_attr("Compiler", "ethos-u")
+ g1 = relay.GlobalVar("g1")
+ mod[g1] = npu_compiler_func1
+
+ # Non-NPU function
+ x = relay.var("x", shape=(1, 2, 2, 4), dtype="int8")
+ max_pool = relay.abs(x)
+ composite_func = relay.Function([x], max_pool)
+ composite_func = composite_func.with_attr("Composite", "foo.unary_elementwise")
+ inp = relay.var("input", shape=(1, 2, 2, 4), dtype="int8")
+ compiler_func = relay.Function([inp], composite_func)
+ compiler_func = compiler_func.with_attr("used_memory", [32])
+ non_npu_compiler_func = compiler_func.with_attr("Compiler", "foo")
+ g2 = relay.GlobalVar("g2")
+ mod[g2] = non_npu_compiler_func
+
+ # NPU function 2
+ x = relay.var("x", shape=(1, 2, 2, 4), dtype="int8")
+ max_pool = relay.abs(x)
+ composite_func = relay.Function([x], max_pool)
+ composite_func = composite_func.with_attr("Composite", "ethos-u.unary_elementwise")
+ inp = relay.var("input", shape=(1, 2, 2, 4), dtype="int8")
+ compiler_func = relay.Function([inp], composite_func)
+ compiler_func = compiler_func.with_attr("used_memory", [32])
+ npu_compiler_func2 = compiler_func.with_attr("Compiler", "ethos-u")
+ g3 = relay.GlobalVar("g3")
+ mod[g3] = npu_compiler_func2
+
+ # Main
+ inp = relay.var("main_input", shape=(1, 2, 2, 4), dtype="int8")
+ call1 = relay.Call(g1, [inp])
+ call2 = relay.Call(g2, [call1])
+ call3 = relay.Call(g3, [call2])
+ main_func = relay.Function([inp], call3)
+ main_func = main_func.with_attr("io_used_memory", 32)
+ mod["main"] = main_func
+ return mod
+
+
+def _parallel_npu_functions():
+ mod = tvm.IRModule({})
+
+ # NPU function 1
+ x = relay.var("x", shape=(1, 2, 2, 4), dtype="int8")
+ max_pool = relay.nn.max_pool2d(x)
+ composite_func = relay.Function([x], max_pool)
+ composite_func = composite_func.with_attr("Composite", "ethos-u.pooling")
+ inp = relay.var("input", shape=(1, 2, 2, 4), dtype="int8")
+ compiler_func = relay.Function([inp], composite_func)
+ compiler_func = compiler_func.with_attr("used_memory", [32])
+ npu_compiler_func1 = compiler_func.with_attr("Compiler", "ethos-u")
+ g1 = relay.GlobalVar("g1")
+ mod[g1] = npu_compiler_func1
+
+ # NPU function 2
+ x = relay.var("x", shape=(1, 2, 2, 4), dtype="int8")
+ abs_op = relay.abs(x)
+ composite_func = relay.Function([x], abs_op)
+ composite_func = composite_func.with_attr("Composite", "ethos-u.unary_elementwise")
+ inp = relay.var("input", shape=(1, 2, 2, 4), dtype="int8")
+ compiler_func = relay.Function([inp], composite_func)
+ compiler_func = compiler_func.with_attr("used_memory", [32 + 16])
+ npu_compiler_func2 = compiler_func.with_attr("Compiler", "ethos-u")
+ g2 = relay.GlobalVar("g2")
+ mod[g2] = npu_compiler_func2
+
+ # Main
+ inp = relay.var("main_input", shape=(1, 2, 2, 4), dtype="int8")
+ call1 = relay.Call(g1, [inp])
+ call2 = relay.Call(g2, [inp])
+ concat = relay.concatenate([call1, call2], axis=3)
+ main_func = relay.Function([inp], concat)
+ main_func = main_func.with_attr("io_used_memory", 32)
+ mod["main"] = main_func
+ return mod
+
+
+def _full_offload():
+ mod = tvm.IRModule({})
+
+ # NPU function
+ x = relay.var("x", shape=(1, 4, 4, 16), dtype="int8")
+ max_pool = relay.nn.max_pool2d(x)
+ composite_func = relay.Function([x], max_pool)
+ composite_func = composite_func.with_attr("Composite", "ethos-u.pooling")
+ inp = relay.var("input", shape=(1, 4, 4, 16), dtype="int8")
+ compiler_func = relay.Function([inp], composite_func)
+ compiler_func = compiler_func.with_attr("used_memory", [256 + 256])
+ npu_compiler_func = compiler_func.with_attr("Compiler", "ethos-u")
+ g1 = relay.GlobalVar("g1")
+ mod[g1] = npu_compiler_func
+
+ # Main
+ inp = relay.var("main_input", shape=(1, 4, 4, 16), dtype="int8")
+ call = relay.Call(g1, [inp])
+ main_func = relay.Function([inp], call)
+ main_func = main_func.with_attr("io_used_memory", 256 + 256)
+ mod["main"] = main_func
+ return mod
+
+
+@pytest.mark.parametrize(
+ "model_func,use_workspace_io,expected_memory_pressure",
+ [
+ (_npu_and_non_npu_functions, True, (16 + 16) + (16 + 16)),
+ (_npu_and_non_npu_functions, False, (16 + 16) + (16 + 16) - (16 + 16)),
+ (_parallel_npu_functions, True, (16 + 16) + (16 + 16 + 16)),
+ (_parallel_npu_functions, False, (16 + 16) + (16 + 16 + 16) - (16 + 16)),
+ (_full_offload, True, (256 + 256)),
+ (_full_offload, False, (256 + 256) - (256 + 256)),
+ ],
+)
+def test_calculate_memory_pressure_pass(model_func, use_workspace_io, expected_memory_pressure):
+ """
+ Test that memory pressure is correctly calculated for NPU external functions.
+ """
+
+ mod = model_func()
+ with tvm.transform.PassContext(config={"tir.usmp.use_workspace_io": use_workspace_io}):
+ memory_pressure = _calculate_memory_pressure(mod)
+ assert memory_pressure == expected_memory_pressure
+
+
+def test_extract_memory_info():
+ """
+ Test memory pressure value correctly reduces the workspace size.
+ """
+ initial_pool_size = 2000
+ memory_pressure = 500
+ memory_pool = WorkspacePoolInfo(
+ "SRAM",
+ [tvm.target.Target("c"), tvm.target.Target("ethos-u")],
+ PoolInfoProperties(
+ size_hint_bytes=initial_pool_size,
+ read_bandwidth_bytes_per_cycle=16,
+ write_bandwidth_bytes_per_cycle=16,
+ target_burst_bytes={tvm.target.Target("ethos-u"): 1},
+ ),
+ )
+
+ sram = extract_memory_info(memory_pool, memory_pressure)
+ assert sram.size == initial_pool_size - memory_pressure
diff --git a/tests/python/contrib/test_ethosu/cascader/test_memory_reduction.py b/tests/python/contrib/test_ethosu/cascader/test_memory_reduction.py
index 5c3b745cb4..e882822405 100644
--- a/tests/python/contrib/test_ethosu/cascader/test_memory_reduction.py
+++ b/tests/python/contrib/test_ethosu/cascader/test_memory_reduction.py
@@ -28,13 +28,12 @@ from tvm.micro import model_library_format as mlf
from tvm.relay.op.contrib.ethosu import partition_for_ethosu
import tvm
from tvm import WorkspaceMemoryPools, WorkspacePoolInfo, PoolInfoProperties
+from tvm.relay.backend.contrib.ethosu.codegen import extract_memory_info
from .. import infra
-def _get_ethosu_workspace_size(
- mod, params, accel_type, pool_size, enable_cascader, enable_striping
-):
+def _get_compilation_config(accel_type, enable_cascader, enable_striping):
enable_usmp = True
target = tvm.target.Target("c")
@@ -61,6 +60,17 @@ def _get_ethosu_workspace_size(
"tir.disable_storage_rewrite": enable_usmp,
}
+ return target, ethosu_target, runtime, executor, pass_config
+
+
+def _get_ethosu_workspace_size(
+ mod, params, accel_type, pool_size, enable_cascader, enable_striping
+):
+
+ target, ethosu_target, runtime, executor, pass_config = _get_compilation_config(
+ accel_type, enable_cascader, enable_striping
+ )
+
workspace_memory_pools = WorkspaceMemoryPools(
[
WorkspacePoolInfo(
@@ -234,3 +244,150 @@ def test_depthwise2d_conv2d_pooling(
assert workspace_size_cascader_disabled == expected_ws_size_without_striping
assert workspace_size_cascader_enabled_striping_enabled == expected_ws_size_with_striping
+
+
+def test_multiple_memory_pools():
+ """
+ The cascader does not support multiple workspace memory
+ pools. Check the correct error is thrown.
+ """
+ np.random.seed(2)
+ ifm_shape = (1, 80, 75, 3)
+
+ target, ethosu_target, runtime, executor, pass_config = _get_compilation_config(
+ "ethos-u55-256", True, True
+ )
+ workspace_memory_pools = WorkspaceMemoryPools(
+ [
+ WorkspacePoolInfo(
+ "SRAM",
+ [target, ethosu_target],
+ PoolInfoProperties(
+ size_hint_bytes=1,
+ read_bandwidth_bytes_per_cycle=16,
+ write_bandwidth_bytes_per_cycle=16,
+ target_burst_bytes={ethosu_target: 1},
+ ),
+ ),
+ WorkspacePoolInfo(
+ "SRAM",
+ [target, ethosu_target],
+ PoolInfoProperties(
+ size_hint_bytes=1,
+ read_bandwidth_bytes_per_cycle=16,
+ write_bandwidth_bytes_per_cycle=16,
+ target_burst_bytes={ethosu_target: 1},
+ ),
+ ),
+ ]
+ )
+
+ @tf.function
+ def tf_graph(x):
+ return tf.nn.max_pool(x, (3, 3), (1, 1), "SAME")
+
+ _, tflite_graph = infra.get_tflite_graph(tf_graph, [ifm_shape])
+ tflite_model = tflite.Model.Model.GetRootAsModel(tflite_graph, 0)
+ relay_module, params = relay.frontend.from_tflite(tflite_model)
+ mod = partition_for_ethosu(relay_module, params)
+
+ with pytest.raises(ValueError) as e:
+ with tvm.transform.PassContext(opt_level=3, config=pass_config):
+ tvm.relay.build(
+ mod,
+ target,
+ executor=executor,
+ runtime=runtime,
+ workspace_memory_pools=workspace_memory_pools,
+ params=params,
+ )
+
+ expected_reason = "Exactly one workspace pool needs to be provided for the U55 cascader"
+ on_error = "A ValueError was caught but its reason is not the expected one."
+ assert expected_reason in str(e.value), on_error
+
+
+def test_missing_memory_pools():
+ """
+ The cascader requires memory pools to be present, check the correct error
+ is thrown when there aren't any.
+ """
+ np.random.seed(2)
+ ifm_shape = (1, 80, 75, 3)
+
+ target, _, runtime, executor, pass_config = _get_compilation_config("ethos-u55-256", True, True)
+
+ @tf.function
+ def tf_graph(x):
+ return tf.nn.max_pool(x, (3, 3), (1, 1), "SAME")
+
+ _, tflite_graph = infra.get_tflite_graph(tf_graph, [ifm_shape])
+ tflite_model = tflite.Model.Model.GetRootAsModel(tflite_graph, 0)
+ relay_module, params = relay.frontend.from_tflite(tflite_model)
+ mod = partition_for_ethosu(relay_module, params)
+
+ with pytest.raises(ValueError) as e:
+ with tvm.transform.PassContext(opt_level=3, config=pass_config):
+ tvm.relay.build(
+ mod,
+ target,
+ executor=executor,
+ runtime=runtime,
+ workspace_memory_pools=None,
+ params=params,
+ )
+
+ expected_reason = "Workspace memory pool needs to be provided for the U55 cascader"
+ on_error = "A ValueError was caught but its reason is not the expected one."
+ assert expected_reason in str(e.value), on_error
+
+
+def test_invalid_accelerator():
+ """
+ Check an error is thrown when an unsupported accelerator configuration
+ is used.
+ """
+ np.random.seed(2)
+ ifm_shape = (1, 80, 75, 3)
+
+ target, ethosu_target, runtime, executor, pass_config = _get_compilation_config(
+ "ethos-u65-256", True, True
+ )
+ workspace_memory_pools = WorkspaceMemoryPools(
+ [
+ WorkspacePoolInfo(
+ "SRAM",
+ [target, ethosu_target],
+ PoolInfoProperties(
+ size_hint_bytes=1,
+ read_bandwidth_bytes_per_cycle=16,
+ write_bandwidth_bytes_per_cycle=16,
+ target_burst_bytes={ethosu_target: 1},
+ ),
+ ),
+ ]
+ )
+
+ @tf.function
+ def tf_graph(x):
+ return tf.nn.max_pool(x, (3, 3), (1, 1), "SAME")
+
+ _, tflite_graph = infra.get_tflite_graph(tf_graph, [ifm_shape])
+ tflite_model = tflite.Model.Model.GetRootAsModel(tflite_graph, 0)
+ relay_module, params = relay.frontend.from_tflite(tflite_model)
+ mod = partition_for_ethosu(relay_module, params)
+
+ with pytest.raises(ValueError) as e:
+ with tvm.transform.PassContext(opt_level=3, config=pass_config):
+ tvm.relay.build(
+ mod,
+ target,
+ executor=executor,
+ runtime=runtime,
+ workspace_memory_pools=workspace_memory_pools,
+ params=params,
+ )
+
+ expected_reason = "Cascading is not supported for the U65 accelerator"
+ on_error = "A ValueError was caught but its reason is not the expected one."
+ assert expected_reason in str(e.value), on_error