You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tvm.apache.org by ju...@apache.org on 2023/01/25 05:24:26 UTC
[tvm] branch main updated: [TVMScript] Default to T.Buffer than T.buffer_decl (#13838)
This is an automated email from the ASF dual-hosted git repository.
junrushao pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tvm.git
The following commit(s) were added to refs/heads/main by this push:
new 2c109c53e0 [TVMScript] Default to T.Buffer than T.buffer_decl (#13838)
2c109c53e0 is described below
commit 2c109c53e00e99e6e4e198aab7514a1871827a88
Author: Junru Shao <ju...@apache.org>
AuthorDate: Tue Jan 24 21:24:20 2023 -0800
[TVMScript] Default to T.Buffer than T.buffer_decl (#13838)
TVMScript parser supports both `T.Buffer` and `T.buffer_decl` interchangeably, which share the same semantics in TIR AST. However, `T.buffer_decl` is usually confused with `T.decl_buffer`. To clarify the semantics, we decide to print `T.Buffer` instead.
Note that this PR is backward compatible with the previous behavior, i.e. the parser still parses TVMScript with `T.decl_buffer`, and the only difference is the print now produces `T.Buffer` instead by default.
---
python/tvm/script/ir_builder/tir/__init__.py | 1 +
python/tvm/script/parser/tir/entry.py | 4 +-
src/script/printer/tir/buffer.cc | 3 +-
src/script/printer/tir/ir.cc | 8 +-
src/script/printer/utils.h | 4 +
.../test_ethosu/test_copy_compute_reordering.py | 146 +++++++-------
.../contrib/test_ethosu/test_encode_constants.py | 130 ++++++-------
.../contrib/test_ethosu/test_hoist_allocates.py | 72 +++----
.../contrib/test_ethosu/test_merge_constants.py | 216 ++++++++++-----------
.../test_ethosu/test_remove_concatenates.py | 24 +--
.../contrib/test_ethosu/test_replace_conv2d.py | 130 ++++++-------
.../contrib/test_ethosu/test_replace_copy.py | 20 +-
tests/python/contrib/test_ethosu/test_scheduler.py | 16 +-
.../test_ethosu/test_tir_to_cs_translator.py | 64 +++---
tests/python/relay/aot/test_pass_aot_lower_main.py | 4 +-
tests/python/unittest/test_lower_build.py | 12 +-
tests/python/unittest/test_tir_renew_defs.py | 2 +-
.../unittest/test_tir_schedule_cache_read_write.py | 8 +-
.../test_tir_transform_common_subexpr_elim.py | 8 +-
.../test_tir_transform_extract_constants.py | 6 +-
.../unittest/test_tir_transform_flatten_buffer.py | 52 +++--
.../test_tir_transform_inject_rolling_buffer.py | 4 +-
.../test_tir_transform_inject_virtual_thread.py | 8 +-
.../unittest/test_tir_transform_loop_partition.py | 32 +--
...test_tir_transform_renormalize_split_pattern.py | 18 +-
.../unittest/test_tir_transform_storage_rewrite.py | 20 +-
.../unittest/test_tir_transform_thread_sync.py | 2 +-
...ransform_convert_pool_allocations_to_offsets.py | 48 ++---
.../unittest/test_tvmscript_ir_builder_tir.py | 18 +-
.../python/unittest/test_tvmscript_printer_tir.py | 14 +-
tests/python/unittest/test_tvmscript_roundtrip.py | 132 ++++++-------
.../python/unittest/test_tvmscript_syntax_sugar.py | 12 --
32 files changed, 607 insertions(+), 631 deletions(-)
diff --git a/python/tvm/script/ir_builder/tir/__init__.py b/python/tvm/script/ir_builder/tir/__init__.py
index 0a71af4db7..563ac56f7b 100644
--- a/python/tvm/script/ir_builder/tir/__init__.py
+++ b/python/tvm/script/ir_builder/tir/__init__.py
@@ -17,3 +17,4 @@
"""Package tvm.script.ir_builder.tir"""
from .ir import * # pylint: disable=wildcard-import,redefined-builtin
from .ir import boolean as bool # pylint: disable=redefined-builtin
+from .ir import buffer_decl as Buffer
diff --git a/python/tvm/script/parser/tir/entry.py b/python/tvm/script/parser/tir/entry.py
index a5c134a859..e7ec7cf886 100644
--- a/python/tvm/script/parser/tir/entry.py
+++ b/python/tvm/script/parser/tir/entry.py
@@ -55,7 +55,7 @@ class BufferProxy:
def __call__(
self,
shape,
- dtype=None,
+ dtype="float32",
data=None,
strides=None,
elem_offset=None,
@@ -65,8 +65,6 @@ class BufferProxy:
buffer_type="",
axis_separators=None,
) -> Buffer:
- if dtype is None:
- raise ValueError("Data type must be specified when constructing buffer")
return buffer_decl(
shape,
dtype=dtype,
diff --git a/src/script/printer/tir/buffer.cc b/src/script/printer/tir/buffer.cc
index b4429dc9af..19f3dc7ef5 100644
--- a/src/script/printer/tir/buffer.cc
+++ b/src/script/printer/tir/buffer.cc
@@ -209,8 +209,7 @@ TVM_STATIC_IR_FUNCTOR(IRDocsifier, vtable) //
if (!d->IsVarDefined(buffer)) {
if (Optional<Frame> opt_f = FindLowestVarDef(buffer, d)) {
ExprDoc lhs = DefineBuffer(buffer, opt_f.value(), d);
- ExprDoc rhs = BufferDecl(buffer, "buffer_decl", // TODO(@junrushao): name confusing
- {}, p, opt_f.value(), d);
+ ExprDoc rhs = BufferDecl(buffer, "Buffer", {}, p, opt_f.value(), d);
opt_f.value()->stmts.push_back(AssignDoc(lhs, rhs, NullOpt));
}
}
diff --git a/src/script/printer/tir/ir.cc b/src/script/printer/tir/ir.cc
index 76d3680fec..ce10ff6816 100644
--- a/src/script/printer/tir/ir.cc
+++ b/src/script/printer/tir/ir.cc
@@ -34,8 +34,7 @@ TVM_STATIC_IR_FUNCTOR(IRDocsifier, vtable)
} else if (dtype == DataType::Bool()) {
return LiteralDoc::Boolean(imm->value, imm_p->Attr("value"));
} else {
- return TIR(d, runtime::DLDataType2String(dtype)) //
- ->Call({LiteralDoc::Int(imm->value, imm_p->Attr("value"))});
+ return TIR(d, DType2Str(dtype))->Call({LiteralDoc::Int(imm->value, imm_p->Attr("value"))});
}
});
@@ -45,7 +44,7 @@ TVM_STATIC_IR_FUNCTOR(IRDocsifier, vtable)
if (dtype == d->cfg->float_dtype) {
return LiteralDoc::Float(imm->value, imm_p->Attr("value"));
} else {
- return TIR(d, runtime::DLDataType2String(dtype)) //
+ return TIR(d, DType2Str(dtype))
->Call({LiteralDoc::Float(imm->value, imm_p->Attr("value"))});
}
});
@@ -61,8 +60,7 @@ TVM_STATIC_IR_FUNCTOR(IRDocsifier, vtable)
TVM_STATIC_IR_FUNCTOR(IRDocsifier, vtable)
.set_dispatch<PrimType>("", [](PrimType ty, ObjectPath p, IRDocsifier d) -> Doc {
- std::string dtype = ty->dtype.is_void() ? "void" : runtime::DLDataType2String(ty->dtype);
- return TIR(d, dtype);
+ return TIR(d, DType2Str(ty->dtype));
});
TVM_STATIC_IR_FUNCTOR(IRDocsifier, vtable)
diff --git a/src/script/printer/utils.h b/src/script/printer/utils.h
index 5161f1f9a2..cb20eb363d 100644
--- a/src/script/printer/utils.h
+++ b/src/script/printer/utils.h
@@ -65,6 +65,10 @@ inline std::string Docsify(const ObjectRef& obj, const IRDocsifier& d, const Fra
return DocToPythonScript(StmtBlockDoc(f->stmts), cfg);
}
+inline std::string DType2Str(const runtime::DataType& dtype) {
+ return dtype.is_void() ? "void" : runtime::DLDataType2String(dtype);
+}
+
} // namespace printer
} // namespace script
} // namespace tvm
diff --git a/tests/python/contrib/test_ethosu/test_copy_compute_reordering.py b/tests/python/contrib/test_ethosu/test_copy_compute_reordering.py
index 586b8b380e..02b5f9f7f1 100644
--- a/tests/python/contrib/test_ethosu/test_copy_compute_reordering.py
+++ b/tests/python/contrib/test_ethosu/test_copy_compute_reordering.py
@@ -29,16 +29,16 @@ class AllOperatorsWithWeights:
def main() -> None:
# function attr dict
T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
- buffer1 = T.buffer_decl([8192], "int8")
- buffer2 = T.buffer_decl([128], "uint8")
- buffer3 = T.buffer_decl([32], "uint8")
- buffer4 = T.buffer_decl([112], "uint8")
- buffer5 = T.buffer_decl([32], "uint8")
- buffer6 = T.buffer_decl([112], "uint8")
- buffer7 = T.buffer_decl([32], "uint8")
- buffer8 = T.buffer_decl([112], "uint8")
- buffer9 = T.buffer_decl([32], "uint8")
- buffer10 = T.buffer_decl([2048], "int8")
+ buffer1 = T.Buffer([8192], "int8")
+ buffer2 = T.Buffer([128], "uint8")
+ buffer3 = T.Buffer([32], "uint8")
+ buffer4 = T.Buffer([112], "uint8")
+ buffer5 = T.Buffer([32], "uint8")
+ buffer6 = T.Buffer([112], "uint8")
+ buffer7 = T.Buffer([32], "uint8")
+ buffer8 = T.Buffer([112], "uint8")
+ buffer9 = T.Buffer([32], "uint8")
+ buffer10 = T.Buffer([2048], "int8")
# body
p1 = T.decl_buffer([128], "uint8")
p2 = T.decl_buffer([112], "uint8")
@@ -77,16 +77,16 @@ def test_all_operators_with_weights_max_copy_movements_1():
def main() -> None:
# function attr dict
T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
- buffer1 = T.buffer_decl([8192], "int8")
- buffer2 = T.buffer_decl([128], "uint8")
- buffer3 = T.buffer_decl([32], "uint8")
- buffer4 = T.buffer_decl([112], "uint8")
- buffer5 = T.buffer_decl([32], "uint8")
- buffer6 = T.buffer_decl([112], "uint8")
- buffer7 = T.buffer_decl([32], "uint8")
- buffer8 = T.buffer_decl([112], "uint8")
- buffer9 = T.buffer_decl([32], "uint8")
- buffer10 = T.buffer_decl([2048], "int8")
+ buffer1 = T.Buffer([8192], "int8")
+ buffer2 = T.Buffer([128], "uint8")
+ buffer3 = T.Buffer([32], "uint8")
+ buffer4 = T.Buffer([112], "uint8")
+ buffer5 = T.Buffer([32], "uint8")
+ buffer6 = T.Buffer([112], "uint8")
+ buffer7 = T.Buffer([32], "uint8")
+ buffer8 = T.Buffer([112], "uint8")
+ buffer9 = T.Buffer([32], "uint8")
+ buffer10 = T.Buffer([2048], "int8")
# body
p1 = T.decl_buffer([128], "uint8")
p2 = T.decl_buffer([112], "uint8")
@@ -123,16 +123,16 @@ def test_all_operators_with_weights_max_copy_movements_2():
def main() -> None:
# function attr dict
T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
- buffer1 = T.buffer_decl([8192], "int8")
- buffer2 = T.buffer_decl([128], "uint8")
- buffer3 = T.buffer_decl([32], "uint8")
- buffer4 = T.buffer_decl([112], "uint8")
- buffer5 = T.buffer_decl([32], "uint8")
- buffer6 = T.buffer_decl([112], "uint8")
- buffer7 = T.buffer_decl([32], "uint8")
- buffer8 = T.buffer_decl([112], "uint8")
- buffer9 = T.buffer_decl([32], "uint8")
- buffer10 = T.buffer_decl([2048], "int8")
+ buffer1 = T.Buffer([8192], "int8")
+ buffer2 = T.Buffer([128], "uint8")
+ buffer3 = T.Buffer([32], "uint8")
+ buffer4 = T.Buffer([112], "uint8")
+ buffer5 = T.Buffer([32], "uint8")
+ buffer6 = T.Buffer([112], "uint8")
+ buffer7 = T.Buffer([32], "uint8")
+ buffer8 = T.Buffer([112], "uint8")
+ buffer9 = T.Buffer([32], "uint8")
+ buffer10 = T.Buffer([2048], "int8")
# body
p1 = T.decl_buffer([128], "uint8")
p2 = T.decl_buffer([112], "uint8")
@@ -167,8 +167,8 @@ class AllOperatorsWithoutWeights:
@T.prim_func
def main() -> None:
T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
- buffer1 = T.buffer_decl([36], "int8")
- buffer2 = T.buffer_decl([9], "int8")
+ buffer1 = T.Buffer([36], "int8")
+ buffer2 = T.Buffer([9], "int8")
# body
p1 = T.decl_buffer([96], "int8")
T.evaluate(T.call_extern("ethosu_pooling", "int8", 3, 4, 3, 3, 0, 4, buffer1[0], 0, 0, 0, T.float32(1), 0, "NHWC", 12, 3, 1, "int8", 3, 2, 3, 3, 0, 2, p1[0], 0, 0, 0, T.float32(1), 0, "NHCWB16", 32, 16, 1, "MAX", 2, 1, 2, 1, 1, 1, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
@@ -189,11 +189,11 @@ class OperatorsWithAndWithoutWeights:
@T.prim_func
def main() -> None:
T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
- buffer1 = T.buffer_decl([97156], "int8")
- buffer2 = T.buffer_decl([80], "uint8")
- buffer3 = T.buffer_decl([64], "uint8")
- buffer4 = T.buffer_decl([96], "uint8")
- buffer5 = T.buffer_decl([32], "uint8")
+ buffer1 = T.Buffer([97156], "int8")
+ buffer2 = T.Buffer([80], "uint8")
+ buffer3 = T.Buffer([64], "uint8")
+ buffer4 = T.Buffer([96], "uint8")
+ buffer5 = T.Buffer([32], "uint8")
# body
p1 = T.decl_buffer([390336], "int8")
p2 = T.decl_buffer([80], "uint8")
@@ -224,11 +224,11 @@ def test_operators_with_and_without_weights_max_copy_movements_1():
@T.prim_func
def main() -> None:
T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
- buffer1 = T.buffer_decl([97156], "int8")
- buffer2 = T.buffer_decl([80], "uint8")
- buffer3 = T.buffer_decl([64], "uint8")
- buffer4 = T.buffer_decl([96], "uint8")
- buffer5 = T.buffer_decl([32], "uint8")
+ buffer1 = T.Buffer([97156], "int8")
+ buffer2 = T.Buffer([80], "uint8")
+ buffer3 = T.Buffer([64], "uint8")
+ buffer4 = T.Buffer([96], "uint8")
+ buffer5 = T.Buffer([32], "uint8")
# body
p1 = T.decl_buffer([390336], "int8")
p2 = T.decl_buffer([80], "uint8")
@@ -257,11 +257,11 @@ def test_operators_with_and_without_weights_max_copy_movements_2():
@T.prim_func
def main() -> None:
T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
- buffer1 = T.buffer_decl([97156], "int8")
- buffer2 = T.buffer_decl([80], "uint8")
- buffer3 = T.buffer_decl([64], "uint8")
- buffer4 = T.buffer_decl([96], "uint8")
- buffer5 = T.buffer_decl([32], "uint8")
+ buffer1 = T.Buffer([97156], "int8")
+ buffer2 = T.Buffer([80], "uint8")
+ buffer3 = T.Buffer([64], "uint8")
+ buffer4 = T.Buffer([96], "uint8")
+ buffer5 = T.Buffer([32], "uint8")
# body
p1 = T.decl_buffer([390336], "int8")
p2 = T.decl_buffer([80], "uint8")
@@ -289,14 +289,14 @@ class CopyToBufferWithLocalScope:
@T.prim_func
def main() -> None:
T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
- buffer1 = T.buffer_decl([64], "uint8")
- buffer2 = T.buffer_decl([48], "uint8")
- buffer3 = T.buffer_decl([48], "uint8")
- buffer4 = T.buffer_decl([256], "uint8")
- buffer5 = T.buffer_decl([16], "uint8")
- buffer6 = T.buffer_decl([48], "uint8")
- buffer7 = T.buffer_decl([256], "uint8")
- buffer8 = T.buffer_decl([64], "uint8")
+ buffer1 = T.Buffer([64], "uint8")
+ buffer2 = T.Buffer([48], "uint8")
+ buffer3 = T.Buffer([48], "uint8")
+ buffer4 = T.Buffer([256], "uint8")
+ buffer5 = T.Buffer([16], "uint8")
+ buffer6 = T.Buffer([48], "uint8")
+ buffer7 = T.Buffer([256], "uint8")
+ buffer8 = T.Buffer([64], "uint8")
# body
p1 = T.decl_buffer([48], "uint8")
p2 = T.decl_buffer([48], "uint8")
@@ -330,14 +330,14 @@ def test_copy_to_buffer_with_local_scope_max_copy_movements_n(max_copy_movements
@T.prim_func
def main() -> None:
T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
- buffer1 = T.buffer_decl([64], "uint8")
- buffer2 = T.buffer_decl([48], "uint8")
- buffer3 = T.buffer_decl([48], "uint8")
- buffer4 = T.buffer_decl([256], "uint8")
- buffer5 = T.buffer_decl([16], "uint8")
- buffer6 = T.buffer_decl([48], "uint8")
- buffer7 = T.buffer_decl([256], "uint8")
- buffer8 = T.buffer_decl([64], "uint8")
+ buffer1 = T.Buffer([64], "uint8")
+ buffer2 = T.Buffer([48], "uint8")
+ buffer3 = T.Buffer([48], "uint8")
+ buffer4 = T.Buffer([256], "uint8")
+ buffer5 = T.Buffer([16], "uint8")
+ buffer6 = T.Buffer([48], "uint8")
+ buffer7 = T.Buffer([256], "uint8")
+ buffer8 = T.Buffer([64], "uint8")
# body
p1 = T.decl_buffer([48], "uint8")
p2 = T.decl_buffer([48], "uint8")
@@ -406,11 +406,11 @@ def test_default_max_copy_movements():
@T.prim_func
def main() -> None:
T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
- buffer1 = T.buffer_decl([97156], "int8")
- buffer2 = T.buffer_decl([80], "uint8")
- buffer3 = T.buffer_decl([64], "uint8")
- buffer4 = T.buffer_decl([96], "uint8")
- buffer5 = T.buffer_decl([32], "uint8")
+ buffer1 = T.Buffer([97156], "int8")
+ buffer2 = T.Buffer([80], "uint8")
+ buffer3 = T.Buffer([64], "uint8")
+ buffer4 = T.Buffer([96], "uint8")
+ buffer5 = T.Buffer([32], "uint8")
# body
p1 = T.decl_buffer([390336], "int8")
p2 = T.decl_buffer([80], "uint8")
@@ -439,11 +439,11 @@ def test_pass_context_option_max_copy_movements():
@T.prim_func
def main() -> None:
T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
- buffer1 = T.buffer_decl([97156], "int8")
- buffer2 = T.buffer_decl([80], "uint8")
- buffer3 = T.buffer_decl([64], "uint8")
- buffer4 = T.buffer_decl([96], "uint8")
- buffer5 = T.buffer_decl([32], "uint8")
+ buffer1 = T.Buffer([97156], "int8")
+ buffer2 = T.Buffer([80], "uint8")
+ buffer3 = T.Buffer([64], "uint8")
+ buffer4 = T.Buffer([96], "uint8")
+ buffer5 = T.Buffer([32], "uint8")
# body
p1 = T.decl_buffer([390336], "int8")
p2 = T.decl_buffer([80], "uint8")
diff --git a/tests/python/contrib/test_ethosu/test_encode_constants.py b/tests/python/contrib/test_ethosu/test_encode_constants.py
index 0728840ee9..871c7e29df 100644
--- a/tests/python/contrib/test_ethosu/test_encode_constants.py
+++ b/tests/python/contrib/test_ethosu/test_encode_constants.py
@@ -39,19 +39,19 @@ class WeightStreamOnlyU55:
def main(input_placeholder: T.Buffer[(1, 16, 16, 32), "int8"], input_ethosu_write: T.Buffer[(1, 16, 16, 8), "int8"]) -> None:
# function attr dict
T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
- placeholder = T.buffer_decl([8192], "int8", data=input_placeholder.data)
- ethosu_write = T.buffer_decl([2048], "int8", data=input_ethosu_write.data)
- buffer1 = T.buffer_decl([160], "uint8")
- buffer3 = T.buffer_decl([144], "uint8")
- buffer5 = T.buffer_decl([144], "uint8")
- buffer7 = T.buffer_decl([144], "uint8")
- buffer8 = T.buffer_decl([32], "uint8")
+ placeholder = T.Buffer([8192], "int8", data=input_placeholder.data)
+ ethosu_write = T.Buffer([2048], "int8", data=input_ethosu_write.data)
+ buffer1 = T.Buffer([160], "uint8")
+ buffer3 = T.Buffer([144], "uint8")
+ buffer5 = T.Buffer([144], "uint8")
+ buffer7 = T.Buffer([144], "uint8")
+ buffer8 = T.Buffer([32], "uint8")
# body
p1_data = T.allocate([160], "uint8", "global", annotations={"disable_lower_builtin":True})
- p1 = T.buffer_decl([160], "uint8", data=p1_data)
+ p1 = T.Buffer([160], "uint8", data=p1_data)
p2_data = T.allocate([144], "uint8", "global", annotations={"disable_lower_builtin":True})
- p2 = T.buffer_decl([144], "uint8", data=p2_data)
- buffer9 = T.buffer_decl([144], "uint8", data=p1.data)
+ p2 = T.Buffer([144], "uint8", data=p2_data)
+ buffer9 = T.Buffer([144], "uint8", data=p1.data)
T.evaluate(T.call_extern("ethosu_copy", buffer1[0], 160, p1[0], dtype="handle"))
T.evaluate(T.call_extern("ethosu_copy", buffer3[0], 144, p2[0], dtype="handle"))
T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 16, 32, 16, 0, 16, placeholder[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 16, 2, 16, 0, 16, ethosu_write[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, p1[0], 128, T.int8(-1), T.int8(-1), 12, p1[128], 32, T.int8(-1), T.int8(-1), 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
@@ -70,18 +70,18 @@ class WeightStreamOnlyU65:
# function attr dict
T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
# buffer definition
- placeholder = T.buffer_decl([8192], dtype="int8", data=input_placeholder.data)
- ethosu_write = T.buffer_decl([2048], dtype="int8", data=input_ethosu_write.data)
- buffer_encoded_1 = T.buffer_decl([192], dtype="uint8")
- buffer_encoded_2_1 = T.buffer_decl([192], dtype="uint8")
- buffer_encoded_4_1 = T.buffer_decl([208], dtype="uint8")
- buffer_encoded_6_1 = T.buffer_decl([192], dtype="uint8")
+ placeholder = T.Buffer([8192], dtype="int8", data=input_placeholder.data)
+ ethosu_write = T.Buffer([2048], dtype="int8", data=input_ethosu_write.data)
+ buffer_encoded_1 = T.Buffer([192], dtype="uint8")
+ buffer_encoded_2_1 = T.Buffer([192], dtype="uint8")
+ buffer_encoded_4_1 = T.Buffer([208], dtype="uint8")
+ buffer_encoded_6_1 = T.Buffer([192], dtype="uint8")
# body
p1_data = T.allocate([208], "uint8", "global", annotations={"disable_lower_builtin":True})
- p1 = T.buffer_decl([208], "uint8", data=p1_data)
+ p1 = T.Buffer([208], "uint8", data=p1_data)
p2_data = T.allocate([192], "uint8", "global", annotations={"disable_lower_builtin":True})
- p2 = T.buffer_decl([192], "uint8", data=p2_data)
- p3 = T.buffer_decl([192], dtype="uint8", data=p1.data)
+ p2 = T.Buffer([192], "uint8", data=p2_data)
+ p3 = T.Buffer([192], dtype="uint8", data=p1.data)
T.evaluate(T.call_extern("ethosu_copy", buffer_encoded_1[0], 192, p3[0], dtype="handle"))
T.evaluate(T.call_extern("ethosu_copy", buffer_encoded_2_1[0], 192, p2[0], dtype="handle"))
T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 16, 32, 16, 0, 16, placeholder[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 16, 2, 16, 0, 16, ethosu_write[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, p3[0], 80, p3[80], 80, 12, p3[160], 16, p3[176], 16, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
@@ -157,14 +157,14 @@ class RereadWeightsU55:
def main(input_placeholder: T.Buffer[(1, 16, 16, 32), "int8"], input_ethosu_write: T.Buffer[(1, 16, 16, 8), "int8"]) -> None:
# function attr dict
T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
- buffer1 = T.buffer_decl([384], "uint8")
- placeholder = T.buffer_decl([8192], "int8", data=input_placeholder.data)
- ethosu_write = T.buffer_decl([2048], "int8", data=input_ethosu_write.data)
+ buffer1 = T.Buffer([384], "uint8")
+ placeholder = T.Buffer([8192], "int8", data=input_placeholder.data)
+ ethosu_write = T.Buffer([2048], "int8", data=input_ethosu_write.data)
# body
p1_data = T.allocate([384], "uint8", "global", annotations={"disable_lower_builtin":True})
- p1 = T.buffer_decl([384], "uint8", data=p1_data)
+ p1 = T.Buffer([384], "uint8", data=p1_data)
p2_data = T.allocate([384], "uint8", "global", annotations={"disable_lower_builtin":True})
- p2 = T.buffer_decl([384], "uint8", data=p2_data)
+ p2 = T.Buffer([384], "uint8", data=p2_data)
T.evaluate(T.call_extern("ethosu_copy", buffer1[0], 384, p1[0], dtype="handle"))
T.evaluate(T.call_extern("ethosu_copy", buffer1[0], 384, p2[0], dtype="handle"))
T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 8, 32, 16, 0, 8, placeholder[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 8, 8, 16, 0, 8, ethosu_write[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, p1[0], 304, T.int8(-1), T.int8(-1), 12, p1[304], 80, T.int8(-1), T.int8(-1), 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
@@ -179,14 +179,14 @@ class RereadWeightsU65:
# function attr dict
T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
# buffer definition
- placeholder = T.buffer_decl([8192], dtype="int8", data=input_placeholder.data)
- ethosu_write = T.buffer_decl([2048], dtype="int8", data=input_ethosu_write.data)
- placeholder_encoded_1 = T.buffer_decl([464], "uint8")
+ placeholder = T.Buffer([8192], dtype="int8", data=input_placeholder.data)
+ ethosu_write = T.Buffer([2048], dtype="int8", data=input_ethosu_write.data)
+ placeholder_encoded_1 = T.Buffer([464], "uint8")
# body
p1_data = T.allocate([464], "uint8", "global", annotations={"disable_lower_builtin":True})
- p1 = T.buffer_decl([464], "uint8", data=p1_data)
+ p1 = T.Buffer([464], "uint8", data=p1_data)
p2_data = T.allocate([464], "uint8", "global", annotations={"disable_lower_builtin":True})
- p2 = T.buffer_decl([464], "uint8", data=p2_data)
+ p2 = T.Buffer([464], "uint8", data=p2_data)
T.evaluate(T.call_extern("ethosu_copy", placeholder_encoded_1[0], 464, p1[0], dtype="handle"))
T.evaluate(T.call_extern("ethosu_copy", placeholder_encoded_1[0], 464, p2[0], dtype="handle"))
T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 8, 32, 16, 0, 8, placeholder[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 8, 8, 16, 0, 8, ethosu_write[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, p1[0], 192, p1[192], 176, 12, p1[368], 48, p1[416], 48, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
@@ -259,15 +259,15 @@ class DirectReadOnlyU55:
def main(input_placeholder: T.Buffer[(1, 16, 16, 32), "int8"], input_ethosu_write: T.Buffer[(1, 16, 16, 8), "int8"]) -> None:
# function attr dict
T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
- buffer = T.buffer_decl([592], "uint8")
- buffer_1 = T.buffer_decl([160], "uint8")
- buffer_2 = T.buffer_decl([160], "uint8")
- buffer_3 = T.buffer_decl([80], "uint8")
- placeholder = T.buffer_decl([8192], "int8", data=input_placeholder.data)
- ethosu_write = T.buffer_decl([2048], "int8", data=input_ethosu_write.data)
+ buffer = T.Buffer([592], "uint8")
+ buffer_1 = T.Buffer([160], "uint8")
+ buffer_2 = T.Buffer([160], "uint8")
+ buffer_3 = T.Buffer([80], "uint8")
+ placeholder = T.Buffer([8192], "int8", data=input_placeholder.data)
+ ethosu_write = T.Buffer([2048], "int8", data=input_ethosu_write.data)
# body
ethosu_write_1_data = T.allocate([4096], "int8", "global", annotations={"disable_lower_builtin":True})
- ethosu_write_1 = T.buffer_decl([4096], "int8", data=ethosu_write_1_data)
+ ethosu_write_1 = T.Buffer([4096], "int8", data=ethosu_write_1_data)
T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 16, 32, 16, 0, 16, placeholder[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 16, 16, 16, 0, 16, ethosu_write_1[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 256, 16, 1, 1, 1, 1, 1, 1, 1, buffer[0], 592, T.int8(-1), T.int8(-1), 12, buffer_1[0], 160, T.int8(-1), T.int8(-1), 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 16, 16, 16, 0, 16, ethosu_write_1[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 256, 16, 1, "int8", 16, 16, 8, 16, 0, 16, ethosu_write[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, buffer_2[0], 160, T.int8(-1), T.int8(-1), 12, buffer_3[0], 80, T.int8(-1), T.int8(-1), 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
__tvm_meta__ = None
@@ -280,15 +280,15 @@ class DirectReadOnlyU65:
# function attr dict
T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
# buffer definition
- placeholder_encoded = T.buffer_decl([608], dtype="uint8")
- placeholder_encoded_1 = T.buffer_decl([160], dtype="uint8")
- placeholder_encoded_2 = T.buffer_decl([208], dtype="uint8")
- placeholder_encoded_3 = T.buffer_decl([96], dtype="uint8")
- placeholder = T.buffer_decl([8192], dtype="int8", data=input_placeholder.data)
- ethosu_write = T.buffer_decl([2048], dtype="int8", data=input_ethosu_write.data)
+ placeholder_encoded = T.Buffer([608], dtype="uint8")
+ placeholder_encoded_1 = T.Buffer([160], dtype="uint8")
+ placeholder_encoded_2 = T.Buffer([208], dtype="uint8")
+ placeholder_encoded_3 = T.Buffer([96], dtype="uint8")
+ placeholder = T.Buffer([8192], dtype="int8", data=input_placeholder.data)
+ ethosu_write = T.Buffer([2048], dtype="int8", data=input_ethosu_write.data)
# body
ethosu_write_2_data = T.allocate([4096], "int8", "global", annotations={"disable_lower_builtin":True})
- ethosu_write_2 = T.buffer_decl([4096], "int8", data=ethosu_write_2_data)
+ ethosu_write_2 = T.Buffer([4096], "int8", data=ethosu_write_2_data)
T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 16, 32, 16, 0, 16, placeholder[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 16, 16, 16, 0, 16, ethosu_write_2[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 256, 16, 1, 1, 1, 1, 1, 1, 1, placeholder_encoded[0], 304, placeholder_encoded[304], 304, 12, placeholder_encoded_1[0], 80, placeholder_encoded_1[80], 80, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 16, 16, 16, 0, 16, ethosu_write_2[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 256, 16, 1, "int8", 16, 16, 8, 16, 0, 16, ethosu_write[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, placeholder_encoded_2[0], 112, placeholder_encoded_2[112], 96, 12, placeholder_encoded_3[0], 48, placeholder_encoded_3[48], 48, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
__tvm_meta__ = None
@@ -357,21 +357,21 @@ class MixedReadU55:
def main(input_ifm: T.Buffer[(1,16,16,32), "int8"], input_ethosu_write: T.Buffer[(1,16,16,8), "int8"]) -> None:
# function attr dict
T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
- buffer1 = T.buffer_decl([112], "uint8")
- buffer3 = T.buffer_decl([112], "uint8")
- buffer5 = T.buffer_decl([112], "uint8")
- buffer7 = T.buffer_decl([112], "uint8")
- buffer9 = T.buffer_decl([592], "uint8")
- buffer10 = T.buffer_decl([160], "uint8")
- ifm = T.buffer_decl([8192], "int8", data=input_ifm.data)
- ethosu_write = T.buffer_decl([2048], "int8", data=input_ethosu_write.data)
+ buffer1 = T.Buffer([112], "uint8")
+ buffer3 = T.Buffer([112], "uint8")
+ buffer5 = T.Buffer([112], "uint8")
+ buffer7 = T.Buffer([112], "uint8")
+ buffer9 = T.Buffer([592], "uint8")
+ buffer10 = T.Buffer([160], "uint8")
+ ifm = T.Buffer([8192], "int8", data=input_ifm.data)
+ ethosu_write = T.Buffer([2048], "int8", data=input_ethosu_write.data)
# body
p1_data = T.allocate([112], "uint8", "global", annotations={"disable_lower_builtin":True})
- p1 = T.buffer_decl([112], "uint8", data=p1_data)
+ p1 = T.Buffer([112], "uint8", data=p1_data)
p3_data = T.allocate([4096], "int8", "global", annotations={"disable_lower_builtin":True})
- p3 = T.buffer_decl([4096], "int8", data=p3_data)
+ p3 = T.Buffer([4096], "int8", data=p3_data)
p2_data = T.allocate([112], "uint8", "global", annotations={"disable_lower_builtin":True})
- p2 = T.buffer_decl([112], "uint8", data=p2_data)
+ p2 = T.Buffer([112], "uint8", data=p2_data)
T.evaluate(T.call_extern("ethosu_copy", buffer1[0], 112, p1[0], dtype="handle"))
T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 16, 32, 16, 0, 16, ifm[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 16, 16, 16, 0, 16, p3[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 256, 16, 1, 1, 1, 1, 1, 1, 1, buffer9[0], 592, T.int8(-1), T.int8(-1), 12, buffer10[0], 160, T.int8(-1), T.int8(-1), 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
T.evaluate(T.call_extern("ethosu_copy", buffer3[0], 112, p2[0], dtype="handle"))
@@ -391,20 +391,20 @@ class MixedReadU65:
# function attr dict
T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
# buffer definition
- ifm = T.buffer_decl([8192], dtype="int8", data=input_ifm.data)
- ethosu_write = T.buffer_decl([2048], dtype="int8", data=input_ethosu_write.data)
- buffer1 = T.buffer_decl([128], dtype="uint8")
- buffer2 = T.buffer_decl([128], dtype="uint8")
- buffer3 = T.buffer_decl([128], dtype="uint8")
- buffer4 = T.buffer_decl([608], dtype="uint8")
- buffer5 = T.buffer_decl([160], dtype="uint8")
- buffer6 = T.buffer_decl([128], dtype="uint8")
+ ifm = T.Buffer([8192], dtype="int8", data=input_ifm.data)
+ ethosu_write = T.Buffer([2048], dtype="int8", data=input_ethosu_write.data)
+ buffer1 = T.Buffer([128], dtype="uint8")
+ buffer2 = T.Buffer([128], dtype="uint8")
+ buffer3 = T.Buffer([128], dtype="uint8")
+ buffer4 = T.Buffer([608], dtype="uint8")
+ buffer5 = T.Buffer([160], dtype="uint8")
+ buffer6 = T.Buffer([128], dtype="uint8")
p1_data = T.allocate([128], "uint8", "global", annotations={"disable_lower_builtin":True})
- p1 = T.buffer_decl([128], "uint8", data=p1_data)
+ p1 = T.Buffer([128], "uint8", data=p1_data)
p2_data = T.allocate([4096], "int8", "global", annotations={"disable_lower_builtin":True})
- p2 = T.buffer_decl([4096], "int8", data=p2_data)
+ p2 = T.Buffer([4096], "int8", data=p2_data)
p3_data = T.allocate([128], "uint8", "global", annotations={"disable_lower_builtin":True})
- p3 = T.buffer_decl([128], "uint8", data=p3_data)
+ p3 = T.Buffer([128], "uint8", data=p3_data)
T.evaluate(T.call_extern("ethosu_copy", buffer1[0], 128, p1[0], dtype="handle"))
T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 16, 32, 16, 0, 16, ifm[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 16, 16, 16, 0, 16, p2[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 256, 16, 1, 1, 1, 1, 1, 1, 1, buffer4[0], 304, buffer4[304], 304, 12, buffer5[0], 80, buffer5[80], 80, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
T.evaluate(T.call_extern("ethosu_copy", buffer2[0], 128, p3[0], dtype="handle"))
diff --git a/tests/python/contrib/test_ethosu/test_hoist_allocates.py b/tests/python/contrib/test_ethosu/test_hoist_allocates.py
index 1508aa441c..ea1cae50e6 100644
--- a/tests/python/contrib/test_ethosu/test_hoist_allocates.py
+++ b/tests/python/contrib/test_ethosu/test_hoist_allocates.py
@@ -109,27 +109,27 @@ def test_double_convolution():
def main(input_placeholder: T.Buffer[(1, 27, 42, 3), "int8"], input_placeholder_encoded: T.Buffer[(3, 3, 2, 3), "uint8"], input_placeholder_encoded_1: T.Buffer[(3, 10), "uint8"], input_placeholder_encoded_2: T.Buffer[(3, 3, 2, 3), "uint8"], input_placeholder_encoded_3: T.Buffer[(3, 10), "uint8"], input_ethosu_write: T.Buffer[(1, 27, 42, 3), "int8"]) -> None:
# function attr dict
T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
- placeholder = T.buffer_decl([3402], dtype="int8", data=input_placeholder.data)
- placeholder_encoded = T.buffer_decl([128], dtype="int8", data=input_placeholder_encoded.data)
- placeholder_encoded_1 = T.buffer_decl([32], dtype="uint8", data=input_placeholder_encoded_1.data)
- placeholder_encoded_2 = T.buffer_decl([128], dtype="int8", data=input_placeholder_encoded_2.data)
- placeholder_encoded_3 = T.buffer_decl([32], dtype="uint8", data=input_placeholder_encoded_3.data)
- ethosu_write = T.buffer_decl([3402], dtype="int8", data=input_ethosu_write.data)
+ placeholder = T.Buffer([3402], dtype="int8", data=input_placeholder.data)
+ placeholder_encoded = T.Buffer([128], dtype="int8", data=input_placeholder_encoded.data)
+ placeholder_encoded_1 = T.Buffer([32], dtype="uint8", data=input_placeholder_encoded_1.data)
+ placeholder_encoded_2 = T.Buffer([128], dtype="int8", data=input_placeholder_encoded_2.data)
+ placeholder_encoded_3 = T.Buffer([32], dtype="uint8", data=input_placeholder_encoded_3.data)
+ ethosu_write = T.Buffer([3402], dtype="int8", data=input_ethosu_write.data)
# body
placeholder_global_data = T.allocate([128], "uint8", "global")
- placeholder_global = T.buffer_decl([128], "uint8", data=placeholder_global_data)
+ placeholder_global = T.Buffer([128], "uint8", data=placeholder_global_data)
T.evaluate(T.call_extern("ethosu_copy", placeholder_encoded[0], 128, placeholder_global[0], dtype="handle"))
placeholder_d_global_data = T.allocate([32], "uint8", "global")
- placeholder_d_global = T.buffer_decl([32], "uint8", data=placeholder_d_global_data)
+ placeholder_d_global = T.Buffer([32], "uint8", data=placeholder_d_global_data)
T.evaluate(T.call_extern("ethosu_copy", placeholder_encoded_1[0], 32, placeholder_d_global[0], dtype="handle"))
ethosu_write_2_data = T.allocate([18144], "int8", "global")
- ethosu_write_2 = T.buffer_decl([18144], "int8", data=ethosu_write_2_data)
+ ethosu_write_2 = T.Buffer([18144], "int8", data=ethosu_write_2_data)
T.evaluate(T.call_extern("ethosu_conv2d", "int8", 27, 42, 3, 27, 0, 42, placeholder[0], 0, 0, 0, T.float32(0.0039215646684169769), -128, "NHWC", 126, 3, 1, "int8", 27, 42, 3, 27, 0, 42, ethosu_write_2[0], 0, 0, 0, T.float32(0.031308155506849289), -128, "NHCWB16", 672, 16, 1, 2, 3, 1, 1, 1, 2, placeholder_global[0], 128, 0, placeholder_d_global[0], 32, 2, 0, 2, 1, "NONE", 0, 0, "TFL", "NONE", dtype="handle"))
placeholder_d_global_1_data = T.allocate([128], "uint8", "global")
- placeholder_d_global_1 = T.buffer_decl([128], "uint8", data=placeholder_d_global_1_data)
+ placeholder_d_global_1 = T.Buffer([128], "uint8", data=placeholder_d_global_1_data)
T.evaluate(T.call_extern("ethosu_copy", placeholder_encoded_2[0], 128, placeholder_d_global_1[0], dtype="handle"))
placeholder_d_global_2_data = T.allocate([32], "uint8", "global")
- placeholder_d_global_2 = T.buffer_decl([32], "uint8", data=placeholder_d_global_2_data)
+ placeholder_d_global_2 = T.Buffer([32], "uint8", data=placeholder_d_global_2_data)
T.evaluate(T.call_extern("ethosu_copy", placeholder_encoded_3[0], 32, placeholder_d_global_2[0], dtype="handle"))
T.evaluate(T.call_extern("ethosu_conv2d", "int8", 27, 42, 3, 27, 0, 42, ethosu_write_2[0], 0, 0, 0, T.float32(0.031308155506849289), -128, "NHCWB16", 672, 16, 1, "int8", 27, 42, 3, 27, 0, 42, ethosu_write[0], 0, 0, 0, T.float32(0.23604340851306915), -128, "NHWC", 126, 3, 1, 2, 3, 1, 1, 1, 2, placeholder_d_global_1[0], 128, 0, placeholder_d_global_2[0], 32, 2, 0, 2, 1, "CLIP", -128, 127, "TFL", "NONE", dtype="handle"))
# fmt: on
@@ -153,20 +153,20 @@ def test_identities():
def main(input_placeholder: T.Buffer[(1, 2, 3, 4), "int8"], T_concat: T.Buffer[(24,), "int8"]) -> None:
# function attr dict
T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
- placeholder = T.buffer_decl([24], dtype="int8", data=input_placeholder.data)
+ placeholder = T.Buffer([24], dtype="int8", data=input_placeholder.data)
# body
ethosu_write_data = T.allocate([12], "int8", "global")
- ethosu_write = T.buffer_decl([12], "int8", data=ethosu_write_data)
+ ethosu_write = T.Buffer([12], "int8", data=ethosu_write_data)
T.evaluate(T.call_extern("ethosu_identity", "int8", 1, 3, 4, 1, 0, 3, placeholder[12], 0, 0, 0, T.float32(1), 0, "NHWC", 1, 4, 1, "int8", 1, 3, 4, 1, 0, 3, ethosu_write[0], 0, 0, 0, T.float32(1), 0, "NHWC", 1, 4, 1, "AVG", 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", dtype="handle"))
ethosu_write_1_data = T.allocate([12], "int8", "global")
- ethosu_write_1 = T.buffer_decl([12], "int8", data=ethosu_write_1_data)
+ ethosu_write_1 = T.Buffer([12], "int8", data=ethosu_write_1_data)
T.evaluate(T.call_extern("ethosu_identity", "int8", 1, 3, 4, 1, 0, 3, ethosu_write[0], 0, 0, 0, T.float32(1), 0, "NHWC", 1, 4, 1, "int8", 1, 3, 4, 1, 0, 3, ethosu_write_1[0], 0, 0, 0, T.float32(1), 0, "NHWC", 1, 4, 1, "AVG", 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", dtype="handle"))
T.evaluate(T.call_extern("ethosu_identity", "int8", 12, 1, 1, 12, 0, 1, ethosu_write_1[0], 0, 0, 0, T.float32(1), 0, "NHWC", 1, 1, 1, "int8", 12, 1, 1, 12, 0, 1, T_concat[12], 0, 0, 0, T.float32(1), 0, "NHWC", 1, 1, 1, "AVG", 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", dtype="handle"))
ethosu_write_2_data = T.allocate([12], "int8", "global")
- ethosu_write_2 = T.buffer_decl([12], "int8", data=ethosu_write_2_data)
+ ethosu_write_2 = T.Buffer([12], "int8", data=ethosu_write_2_data)
T.evaluate(T.call_extern("ethosu_identity", "int8", 1, 3, 4, 1, 0, 3, placeholder[0], 0, 0, 0, T.float32(1), 0, "NHWC", 1, 4, 1, "int8", 1, 3, 4, 1, 0, 3, ethosu_write_2[0], 0, 0, 0, T.float32(1), 0, "NHWC", 1, 4, 1, "AVG", 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", dtype="handle"))
ethosu_write_3_data = T.allocate([12], "int8", "global")
- ethosu_write_3 = T.buffer_decl([12], "int8", data=ethosu_write_3_data)
+ ethosu_write_3 = T.Buffer([12], "int8", data=ethosu_write_3_data)
T.evaluate(T.call_extern("ethosu_identity", "int8", 1, 3, 4, 1, 0, 3, ethosu_write_2[0], 0, 0, 0, T.float32(1), 0, "NHWC", 1, 4, 1, "int8", 1, 3, 4, 1, 0, 3, ethosu_write_3[0], 0, 0, 0, T.float32(1), 0, "NHWC", 1, 4, 1, "AVG", 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", dtype="handle"))
T.evaluate(T.call_extern("ethosu_identity", "int8", 12, 1, 1, 12, 0, 1, ethosu_write_3[0], 0, 0, 0, T.float32(1), 0, "NHWC", 1, 1, 1, "int8", 12, 1, 1, 12, 0, 1, T_concat[0], 0, 0, 0, T.float32(1), 0, "NHWC", 1, 1, 1, "AVG", 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", dtype="handle"))
# fmt: on
@@ -190,35 +190,35 @@ def test_outer_seq_stmt():
def main(input_placeholder: T.Buffer[(1, 16, 16, 32), "int8"], input_ethosu_write: T.Buffer[(1, 16, 16, 8), "int8"], buffer_encoded: T.Buffer[(128,), "uint8"], buffer_encoded_1: T.Buffer[(32,), "uint8"], buffer_encoded_2: T.Buffer[(112,), "uint8"], buffer_encoded_3: T.Buffer[(32,), "uint8"], buffer_encoded_4: T.Buffer[(112,), "uint8"], buffer_encoded_5: T.Buffer[(32,), "uint8"], buffer_encoded_6: T.Buffer[(112,), "uint8"], buffer_encoded_7: T.Buffer[(32,), "uint8"]) -> None:
# function attr dict
T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
- placeholder = T.buffer_decl([8192], dtype="int8", data=input_placeholder.data)
- ethosu_write = T.buffer_decl([2048], dtype="int8", data=input_ethosu_write.data)
+ placeholder = T.Buffer([8192], dtype="int8", data=input_placeholder.data)
+ ethosu_write = T.Buffer([2048], dtype="int8", data=input_ethosu_write.data)
# body
with T.allocate([128], "uint8", "global") as placeholder_global_data:
- placeholder_global = T.buffer_decl([128], "uint8", data=placeholder_global_data)
+ placeholder_global = T.Buffer([128], "uint8", data=placeholder_global_data)
T.evaluate(T.call_extern("ethosu_copy", buffer_encoded[0], 128, placeholder_global[0], dtype="handle"))
placeholder_d_global_data = T.allocate([32], "uint8", "global")
- placeholder_d_global = T.buffer_decl([32], "uint8", data=placeholder_d_global_data)
+ placeholder_d_global = T.Buffer([32], "uint8", data=placeholder_d_global_data)
T.evaluate(T.call_extern("ethosu_copy", buffer_encoded_1[0], 32, placeholder_d_global[0], dtype="handle"))
T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 16, 32, 16, 0, 16, placeholder[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 16, 2, 16, 0, 16, ethosu_write[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, placeholder_global[0], 128, 12, placeholder_d_global[0], 32, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
with T.allocate([112], "uint8", "global") as placeholder_global_1_data:
- placeholder_global_1 = T.buffer_decl([112], "uint8", data=placeholder_global_1_data)
+ placeholder_global_1 = T.Buffer([112], "uint8", data=placeholder_global_1_data)
T.evaluate(T.call_extern("ethosu_copy", buffer_encoded_2[0], 112, placeholder_global_1[0], dtype="handle"))
placeholder_d_global_1_data = T.allocate([32], "uint8", "global")
- placeholder_d_global_1 = T.buffer_decl([32], "uint8", data=placeholder_d_global_1_data)
+ placeholder_d_global_1 = T.Buffer([32], "uint8", data=placeholder_d_global_1_data)
T.evaluate(T.call_extern("ethosu_copy", buffer_encoded_3[0], 32, placeholder_d_global_1[0], dtype="handle"))
T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 16, 32, 16, 0, 16, placeholder[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 16, 2, 16, 0, 16, ethosu_write[2], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, placeholder_global_1[0], 112, 12, placeholder_d_global_1[0], 32, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
with T.allocate([112], "uint8", "global") as placeholder_global_2_data:
- placeholder_global_2 = T.buffer_decl([112], "uint8", data=placeholder_global_2_data)
+ placeholder_global_2 = T.Buffer([112], "uint8", data=placeholder_global_2_data)
T.evaluate(T.call_extern("ethosu_copy", buffer_encoded_4[0], 112, placeholder_global_2[0], dtype="handle"))
placeholder_d_global_2_data = T.allocate([32], "uint8", "global")
- placeholder_d_global_2 = T.buffer_decl([32], "uint8", data=placeholder_d_global_2_data)
+ placeholder_d_global_2 = T.Buffer([32], "uint8", data=placeholder_d_global_2_data)
T.evaluate(T.call_extern("ethosu_copy", buffer_encoded_5[0], 32, placeholder_d_global_2[0], dtype="handle"))
T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 16, 32, 16, 0, 16, placeholder[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 16, 2, 16, 0, 16, ethosu_write[4], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, placeholder_global_2[0], 112, 12, placeholder_d_global_2[0], 32, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
placeholder_global_3_data = T.allocate([112], "uint8", "global")
- placeholder_global_3 = T.buffer_decl([112], "uint8", data=placeholder_global_3_data)
+ placeholder_global_3 = T.Buffer([112], "uint8", data=placeholder_global_3_data)
T.evaluate(T.call_extern("ethosu_copy", buffer_encoded_6[0], 112, placeholder_global_3[0], dtype="handle"))
placeholder_d_global_3_data = T.allocate([32], "uint8", "global")
- placeholder_d_global_3 = T.buffer_decl([32], "uint8", data=placeholder_d_global_3_data)
+ placeholder_d_global_3 = T.Buffer([32], "uint8", data=placeholder_d_global_3_data)
T.evaluate(T.call_extern("ethosu_copy", buffer_encoded_7[0], 32, placeholder_d_global_3[0], dtype="handle"))
T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 16, 32, 16, 0, 16, placeholder[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 16, 2, 16, 0, 16, ethosu_write[6], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, placeholder_global_3[0], 112, 12, placeholder_d_global_3[0], 32, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
# fmt: on
@@ -240,23 +240,23 @@ def test_allocate_without_seq_stmt():
def main(input_placeholder: T.Buffer[(1, 16, 16, 32), "int8"], input_ethosu_write: T.Buffer[(1, 16, 16, 8), "int8"], buffer_encoded: T.Buffer[(128,), "uint8"], buffer_encoded_1: T.Buffer[(32,), "uint8"], buffer_encoded_2: T.Buffer[(112,), "uint8"], buffer_encoded_3: T.Buffer[(32,), "uint8"], buffer_encoded_4: T.Buffer[(112,), "uint8"], buffer_encoded_5: T.Buffer[(32,), "uint8"], buffer_encoded_6: T.Buffer[(112,), "uint8"], buffer_encoded_7: T.Buffer[(32,), "uint8"]) -> None:
# function attr dict
T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
- placeholder = T.buffer_decl([8192], dtype="int8", data=input_placeholder.data)
- ethosu_write = T.buffer_decl([2048], dtype="int8", data=input_ethosu_write.data)
+ placeholder = T.Buffer([8192], dtype="int8", data=input_placeholder.data)
+ ethosu_write = T.Buffer([2048], dtype="int8", data=input_ethosu_write.data)
# body
placeholder_global_data = T.allocate([128], "uint8", "global")
- placeholder_global = T.buffer_decl([128], "uint8", data=placeholder_global_data)
+ placeholder_global = T.Buffer([128], "uint8", data=placeholder_global_data)
placeholder_global_1_data = T.allocate([112], "uint8", "global")
- placeholder_global_1 = T.buffer_decl([112], "uint8", data=placeholder_global_1_data)
+ placeholder_global_1 = T.Buffer([112], "uint8", data=placeholder_global_1_data)
placeholder_global_2_data = T.allocate([112], "uint8", "global")
- placeholder_global_2 = T.buffer_decl([112], "uint8", data=placeholder_global_2_data)
+ placeholder_global_2 = T.Buffer([112], "uint8", data=placeholder_global_2_data)
placeholder_d_global_data = T.allocate([32], "uint8", "global")
- placeholder_d_global = T.buffer_decl([32], "uint8", data=placeholder_d_global_data)
+ placeholder_d_global = T.Buffer([32], "uint8", data=placeholder_d_global_data)
placeholder_d_global_1_data = T.allocate([32], "uint8", "global")
- placeholder_d_global_1 = T.buffer_decl([32], "uint8", data=placeholder_d_global_1_data)
+ placeholder_d_global_1 = T.Buffer([32], "uint8", data=placeholder_d_global_1_data)
placeholder_d_global_2_data = T.allocate([32], "uint8", "global")
- placeholder_d_global_2 = T.buffer_decl([32], "uint8", data=placeholder_d_global_2_data)
+ placeholder_d_global_2 = T.Buffer([32], "uint8", data=placeholder_d_global_2_data)
placeholder_global_3_data = T.allocate([112], "uint8", "global")
- placeholder_global_3 = T.buffer_decl([112], "uint8", data=placeholder_global_3_data)
+ placeholder_global_3 = T.Buffer([112], "uint8", data=placeholder_global_3_data)
T.evaluate(T.call_extern("ethosu_copy", buffer_encoded[0], 128, placeholder_global[0], dtype="handle"))
T.evaluate(T.call_extern("ethosu_copy", buffer_encoded_1[0], 32, placeholder_d_global[0], dtype="handle"))
T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 16, 32, 16, 0, 16, placeholder[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 16, 2, 16, 0, 16, ethosu_write[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, placeholder_global[0], 128, 12, placeholder_d_global[0], 32, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
@@ -266,7 +266,7 @@ def test_allocate_without_seq_stmt():
T.evaluate(T.call_extern("ethosu_copy", buffer_encoded_4[0], 112, placeholder_global_2[0], dtype="handle"))
T.evaluate(T.call_extern("ethosu_copy", buffer_encoded_5[0], 32, placeholder_d_global_2[0], dtype="handle"))
placeholder_d_global_3_data = T.allocate([32], "uint8", "global")
- placeholder_d_global_3 = T.buffer_decl([32], "uint8", data=placeholder_d_global_3_data)
+ placeholder_d_global_3 = T.Buffer([32], "uint8", data=placeholder_d_global_3_data)
T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 16, 32, 16, 0, 16, placeholder[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 16, 2, 16, 0, 16, ethosu_write[4], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, placeholder_global_2[0], 112, 12, placeholder_d_global_2[0], 32, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
T.evaluate(T.call_extern("ethosu_copy", buffer_encoded_6[0], 112, placeholder_global_3[0], dtype="handle"))
T.evaluate(T.call_extern("ethosu_copy", buffer_encoded_7[0], 32, placeholder_d_global_3[0], dtype="handle"))
diff --git a/tests/python/contrib/test_ethosu/test_merge_constants.py b/tests/python/contrib/test_ethosu/test_merge_constants.py
index ed1927b849..7465e22078 100644
--- a/tests/python/contrib/test_ethosu/test_merge_constants.py
+++ b/tests/python/contrib/test_ethosu/test_merge_constants.py
@@ -41,13 +41,13 @@ def test_only_one_operator():
def main(buffer2: T.Buffer[(128,), "uint8"], buffer3: T.Buffer[(32,), "uint8"]) -> None:
# function attr dict
T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
- buffer1 = T.buffer_decl([8192], "int8")
- buffer10 = T.buffer_decl([2048], "int8")
+ buffer1 = T.Buffer([8192], "int8")
+ buffer10 = T.Buffer([2048], "int8")
# body
p1_data = T.allocate([128], "uint8", "global")
- p1 = T.buffer_decl([128], "uint8", data=p1_data)
+ p1 = T.Buffer([128], "uint8", data=p1_data)
p4_data = T.allocate([32], "uint8", "global")
- p4 = T.buffer_decl([32], "uint8", data=p4_data)
+ p4 = T.Buffer([32], "uint8", data=p4_data)
T.evaluate(T.call_extern("ethosu_copy", buffer2[0], 128, p1[0], dtype="handle"))
T.evaluate(T.call_extern("ethosu_copy", buffer3[0], 32, p4[0], dtype="handle"))
T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 16, 32, 16, 0, 16, buffer1[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 16, 2, 16, 0, 16, buffer10[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, p1[0], 128, 12, p4[0], 32, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
@@ -59,11 +59,11 @@ def test_only_one_operator():
def main(buffer2: T.Buffer[(160,), "uint8"]) -> None:
# function attr dict
T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
- buffer1 = T.buffer_decl([8192], "int8")
- buffer10 = T.buffer_decl([2048], "int8")
+ buffer1 = T.Buffer([8192], "int8")
+ buffer10 = T.Buffer([2048], "int8")
# body
p4_data = T.allocate([160], "uint8", "global")
- p4 = T.buffer_decl([160], "uint8", data=p4_data)
+ p4 = T.Buffer([160], "uint8", data=p4_data)
T.evaluate(T.call_extern("ethosu_copy", buffer2[0], 160, p4[0], dtype="handle"))
T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 16, 32, 16, 0, 16, buffer1[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 16, 2, 16, 0, 16, buffer10[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, p4[0], 128, 12, p4[128], 32, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
# fmt: on
@@ -86,25 +86,25 @@ def test_all_operators_with_weights():
def main(buffer2: T.Buffer[(128,), "uint8"], buffer3: T.Buffer[(32,), "uint8"], buffer4: T.Buffer[(112,), "uint8"], buffer5: T.Buffer[(32,), "uint8"], buffer6: T.Buffer[(112,), "uint8"], buffer7: T.Buffer[(32,), "uint8"], buffer8: T.Buffer[(112,), "uint8"], buffer9: T.Buffer[(32,), "uint8"]) -> None:
# function attr dict
T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
- buffer1 = T.buffer_decl([8192], "int8")
- buffer10 = T.buffer_decl([2048], "int8")
+ buffer1 = T.Buffer([8192], "int8")
+ buffer10 = T.Buffer([2048], "int8")
# body
p1_data = T.allocate([128], "uint8", "global")
- p1 = T.buffer_decl([128], "uint8", data=p1_data)
+ p1 = T.Buffer([128], "uint8", data=p1_data)
p2_data = T.allocate([112], "uint8", "global")
- p2 = T.buffer_decl([112], "uint8", data=p2_data)
+ p2 = T.Buffer([112], "uint8", data=p2_data)
p3_data = T.allocate([112], "uint8", "global")
- p3 = T.buffer_decl([112], "uint8", data=p3_data)
+ p3 = T.Buffer([112], "uint8", data=p3_data)
p4_data = T.allocate([32], "uint8", "global")
- p4 = T.buffer_decl([32], "uint8", data=p4_data)
+ p4 = T.Buffer([32], "uint8", data=p4_data)
p5_data = T.allocate([32], "uint8", "global")
- p5 = T.buffer_decl([32], "uint8", data=p5_data)
+ p5 = T.Buffer([32], "uint8", data=p5_data)
p6_data = T.allocate([32], "uint8", "global")
- p6 = T.buffer_decl([32], "uint8", data=p6_data)
+ p6 = T.Buffer([32], "uint8", data=p6_data)
p7_data = T.allocate([112], "uint8", "global")
- p7 = T.buffer_decl([112], "uint8", data=p7_data)
+ p7 = T.Buffer([112], "uint8", data=p7_data)
p8_data = T.allocate([3], "uint8", "global")
- p8 = T.buffer_decl([3], "uint8", data=p8_data)
+ p8 = T.Buffer([3], "uint8", data=p8_data)
T.evaluate(T.call_extern("ethosu_copy", buffer2[0], 128, p1[0], dtype="handle"))
T.evaluate(T.call_extern("ethosu_copy", buffer3[0], 32, p4[0], dtype="handle"))
T.evaluate(T.call_extern("ethosu_copy", buffer4[0], 112, p2[0], dtype="handle"))
@@ -125,17 +125,17 @@ def test_all_operators_with_weights():
def main(buffer2: T.Buffer[(160,), "uint8"], buffer4: T.Buffer[(144,), "uint8"], buffer6: T.Buffer[(144,), "uint8"], buffer8: T.Buffer[(144,), "uint8"]) -> None:
# function attr dict
T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
- buffer1 = T.buffer_decl([8192], "int8")
- buffer10 = T.buffer_decl([2048], "int8")
+ buffer1 = T.Buffer([8192], "int8")
+ buffer10 = T.Buffer([2048], "int8")
# body
p4_data = T.allocate([160], "uint8", "global")
- p4 = T.buffer_decl([160], "uint8", data=p4_data)
+ p4 = T.Buffer([160], "uint8", data=p4_data)
p7_data = T.allocate([144], "uint8", "global")
- p7 = T.buffer_decl([144], "uint8", data=p7_data)
+ p7 = T.Buffer([144], "uint8", data=p7_data)
p10_data = T.allocate([144], "uint8", "global")
- p10 = T.buffer_decl([144], "uint8", data=p10_data)
+ p10 = T.Buffer([144], "uint8", data=p10_data)
p11_data = T.allocate([144], "uint8", "global")
- p11 = T.buffer_decl([144], "uint8", data=p11_data)
+ p11 = T.Buffer([144], "uint8", data=p11_data)
T.evaluate(T.call_extern("ethosu_copy", buffer2[0], 160, p4[0], dtype="handle"))
T.evaluate(T.call_extern("ethosu_copy", buffer4[0], 144, p7[0], dtype="handle"))
T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 16, 32, 16, 0, 16, buffer1[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 16, 2, 16, 0, 16, buffer10[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, p4[0], 128, 12, p4[128], 32, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
@@ -175,14 +175,14 @@ def test_operators_with_and_without_weights():
@T.prim_func
def main(buffer2: T.Buffer[(80,), "uint8"], buffer3: T.Buffer[(64,), "uint8"]) -> None:
T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
- buffer0 = T.buffer_decl([390336], "int8")
- buffer1 = T.buffer_decl([97156], "int8")
- buffer6 = T.buffer_decl([390336], "int8")
+ buffer0 = T.Buffer([390336], "int8")
+ buffer1 = T.Buffer([97156], "int8")
+ buffer6 = T.Buffer([390336], "int8")
# body
p2_data = T.allocate([80], "uint8", "global")
- p2 = T.buffer_decl([80], "uint8", data=p2_data)
+ p2 = T.Buffer([80], "uint8", data=p2_data)
p3_data = T.allocate([64], "uint8", "global")
- p3 = T.buffer_decl([64], "uint8", data=p3_data)
+ p3 = T.Buffer([64], "uint8", data=p3_data)
T.evaluate(T.call_extern("ethosu_pooling", "int8", 214, 227, 2, 214, 0, 227, buffer1[0], 0, 0, 0, T.float32(1), 0, "NHWC", 454, 2, 1, "int8", 214, 114, 2, 214, 0, 114, buffer0[0], 0, 0, 0, T.float32(1), 0, "NHCWB16", 1824, 16, 1, "MAX", 2, 1, 2, 1, 1, 1, 0, 0, 0, 1, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
T.evaluate(T.call_extern("ethosu_copy", buffer2[0], 80, p2[0], dtype="handle"))
T.evaluate(T.call_extern("ethosu_copy", buffer3[0], 64, p3[0], dtype="handle"))
@@ -194,12 +194,12 @@ def test_operators_with_and_without_weights():
@T.prim_func
def main(buffer2: T.Buffer[(144,), "uint8"]) -> None:
T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
- buffer0 = T.buffer_decl([390336], "int8")
- buffer1 = T.buffer_decl([97156], "int8")
- buffer6 = T.buffer_decl([390336], "int8")
+ buffer0 = T.Buffer([390336], "int8")
+ buffer1 = T.Buffer([97156], "int8")
+ buffer6 = T.Buffer([390336], "int8")
# body
p3_data = T.allocate([144], "uint8", "global")
- p3 = T.buffer_decl([144], "uint8", data=p3_data)
+ p3 = T.Buffer([144], "uint8", data=p3_data)
T.evaluate(T.call_extern("ethosu_pooling", "int8", 214, 227, 2, 214, 0, 227, buffer1[0], 0, 0, 0, T.float32(1), 0, "NHWC", 454, 2, 1, "int8", 214, 114, 2, 214, 0, 114, buffer0[0], 0, 0, 0, T.float32(1), 0, "NHCWB16", 1824, 16, 1, "MAX", 2, 1, 2, 1, 1, 1, 0, 0, 0, 1, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
T.evaluate(T.call_extern("ethosu_copy", buffer2[0], 144, p3[0], dtype="handle"))
T.evaluate(T.call_extern("ethosu_conv2d", "int8", 214, 114, 2, 214, 0, 114, buffer0[0], 0, 0, 0, T.float32(0.00392157), -128, "NHCWB16", 1824, 16, 1, "int8", 214, 114, 5, 214, 0, 114, buffer6[0], 0, 0, 0, T.float32(0.0174839), -128, "NHCWB16", 1824, 16, 1, 3, 1, 1, 1, 1, 2, p3[0], 80, 0, p3[80], 64, 0, 1, 0, 1, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
@@ -234,17 +234,17 @@ def test_copy_to_buffer_with_local_scope():
T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
# body
p1_data = T.allocate([48], "uint8", "global")
- p1 = T.buffer_decl([48], "uint8", data=p1_data)
+ p1 = T.Buffer([48], "uint8", data=p1_data)
p2_data = T.allocate([48], "uint8", "global")
- p2 = T.buffer_decl([48], "uint8", data=p2_data)
+ p2 = T.Buffer([48], "uint8", data=p2_data)
p3_data = T.allocate([256], "int8", "local")
- p3 = T.buffer_decl([256], "int8", data=p3_data, scope="local")
+ p3 = T.Buffer([256], "int8", data=p3_data, scope="local")
p5_data = T.allocate([16], "uint8", "global")
- p5 = T.buffer_decl([16], "uint8", data=p5_data)
+ p5 = T.Buffer([16], "uint8", data=p5_data)
p6_data = T.allocate([48], "uint8", "global")
- p6 = T.buffer_decl([48], "uint8", data=p6_data)
+ p6 = T.Buffer([48], "uint8", data=p6_data)
p7_data = T.allocate([256], "int8", "local")
- p7 = T.buffer_decl([256], "int8", data=p7_data, scope="local")
+ p7 = T.Buffer([256], "int8", data=p7_data, scope="local")
T.evaluate(T.call_extern("ethosu_copy", buffer2[0], 48, p1[0], dtype="handle"))
T.evaluate(T.call_extern("ethosu_copy", buffer3[0], 48, p2[0], dtype="handle"))
T.evaluate(T.call_extern("ethosu_copy", buffer4[0], 256, p3[0], dtype="handle")) # Local
@@ -269,13 +269,13 @@ def test_copy_to_buffer_with_local_scope():
T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
# body
p1_data = T.allocate([96], "uint8", "global")
- p1 = T.buffer_decl([96], "uint8", data=p1_data)
+ p1 = T.Buffer([96], "uint8", data=p1_data)
p2_data = T.allocate([64], "uint8", "global")
- p2 = T.buffer_decl([64], "uint8", data=p2_data)
+ p2 = T.Buffer([64], "uint8", data=p2_data)
p3_data = T.allocate([256], "int8", "local")
- p3 = T.buffer_decl([256], "int8", data=p3_data, scope="local")
+ p3 = T.Buffer([256], "int8", data=p3_data, scope="local")
p7_data = T.allocate([256], "int8", "local")
- p7 = T.buffer_decl([256], "int8", data=p7_data, scope="local")
+ p7 = T.Buffer([256], "int8", data=p7_data, scope="local")
T.evaluate(T.call_extern("ethosu_copy", buffer2[0], 96, p1[0], dtype="handle"))
T.evaluate(T.call_extern("ethosu_copy", buffer4[0], 256, p3[0], dtype="handle")) # Local
T.evaluate(T.call_extern("ethosu_copy", buffer5[0], 64, p2[0], dtype="handle"))
@@ -312,11 +312,11 @@ def test_no_copies():
def main() -> None:
# function attr dict
T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
- placeholder = T.buffer_decl([20], "int8")
- ethosu_write = T.buffer_decl([16], "int8")
+ placeholder = T.Buffer([20], "int8")
+ ethosu_write = T.Buffer([16], "int8")
# body
ethosu_write_4_data = T.allocate([16], "int8", "global")
- ethosu_write_4 = T.buffer_decl([16], "int8", data=ethosu_write_4_data)
+ ethosu_write_4 = T.Buffer([16], "int8", data=ethosu_write_4_data)
T.evaluate(T.call_extern("ethosu_binary_elementwise", "int8", 1, 4, 4, 1, 0, 4, placeholder[0], 0, 0, 0, T.float32(0.00783747), -128, "NHWC", 1, 4, 1, "int8", 1, 4, 1, 1, 0, 4, placeholder[16], 0, 0, 0, T.float32(0.00783747), -128, "NHWC", 1, 1, 1, "int8", 1, 4, 4, 1, 0, 4, ethosu_write_4[0], 0, 0, 0, T.float32(0.00783747), -128, "NHWC", 1, 4, 1, "MAX", 0, "CLIP", -128, 127, "TFL", 1, 4, 4, dtype="handle"))
T.evaluate(T.call_extern("ethosu_identity", "int8", 1, 4, 4, 1, 0, 4, ethosu_write_4[0], 0, 0, 0, T.float32(1), 0, "NHWC", 1, 4, 1, "int8", 1, 4, 4, 1, 0, 4, ethosu_write[0], 0, 0, 0, T.float32(1), 0, "NHWC", 1, 4, 1, "AVG", 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
@@ -326,11 +326,11 @@ def test_no_copies():
def main() -> None:
# function attr dict
T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
- placeholder = T.buffer_decl([20], "int8")
- ethosu_write = T.buffer_decl([16], "int8")
+ placeholder = T.Buffer([20], "int8")
+ ethosu_write = T.Buffer([16], "int8")
# body
ethosu_write_4_data = T.allocate([16], "int8", "global")
- ethosu_write_4 = T.buffer_decl([16], "int8", data=ethosu_write_4_data)
+ ethosu_write_4 = T.Buffer([16], "int8", data=ethosu_write_4_data)
T.evaluate(T.call_extern("ethosu_binary_elementwise", "int8", 1, 4, 4, 1, 0, 4, placeholder[0], 0, 0, 0, T.float32(0.00783747), -128, "NHWC", 1, 4, 1, "int8", 1, 4, 1, 1, 0, 4, placeholder[16], 0, 0, 0, T.float32(0.00783747), -128, "NHWC", 1, 1, 1, "int8", 1, 4, 4, 1, 0, 4, ethosu_write_4[0], 0, 0, 0, T.float32(0.00783747), -128, "NHWC", 1, 4, 1, "MAX", 0, "CLIP", -128, 127, "TFL", 1, 4, 4, dtype="handle"))
T.evaluate(T.call_extern("ethosu_identity", "int8", 1, 4, 4, 1, 0, 4, ethosu_write_4[0], 0, 0, 0, T.float32(1), 0, "NHWC", 1, 4, 1, "int8", 1, 4, 4, 1, 0, 4, ethosu_write[0], 0, 0, 0, T.float32(1), 0, "NHWC", 1, 4, 1, "AVG", 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
# fmt: on
@@ -351,13 +351,13 @@ def test_copies_to_the_same_buffer():
def main(buffer2: T.Buffer[(128,), "uint8"], buffer3: T.Buffer[(32,), "uint8"]) -> None:
# function attr dict
T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
- buffer1 = T.buffer_decl([8192], "int8")
- buffer10 = T.buffer_decl([2048], "int8")
+ buffer1 = T.Buffer([8192], "int8")
+ buffer10 = T.Buffer([2048], "int8")
# body
p1_data = T.allocate([128], "uint8", "global")
- p1 = T.buffer_decl([128], "uint8", data=p1_data)
+ p1 = T.Buffer([128], "uint8", data=p1_data)
p4_data = T.allocate([32], "uint8", "global")
- p4 = T.buffer_decl([32], "uint8", data=p4_data)
+ p4 = T.Buffer([32], "uint8", data=p4_data)
T.evaluate(T.call_extern("ethosu_copy", buffer2[0], 128, p1[0], dtype="handle"))
T.evaluate(T.call_extern("ethosu_copy", buffer3[0], 32, p4[0], dtype="handle"))
T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 16, 32, 16, 0, 16, buffer1[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 16, 2, 16, 0, 16, buffer10[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, p1[0], 128, 12, p4[0], 32, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
@@ -372,11 +372,11 @@ def test_copies_to_the_same_buffer():
def main(buffer2: T.Buffer[(160,), "uint8"]) -> None:
# function attr dict
T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
- buffer1 = T.buffer_decl([8192], "int8")
- buffer10 = T.buffer_decl([2048], "int8")
+ buffer1 = T.Buffer([8192], "int8")
+ buffer10 = T.Buffer([2048], "int8")
# body
p5_data = T.allocate([160], "uint8", "global")
- p5 = T.buffer_decl([160], "uint8", data=p5_data)
+ p5 = T.Buffer([160], "uint8", data=p5_data)
T.evaluate(T.call_extern("ethosu_copy", buffer2[0], 160, p5[0], dtype="handle"))
T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 16, 32, 16, 0, 16, buffer1[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 16, 2, 16, 0, 16, buffer10[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, p5[0], 128, 12, p5[128], 32, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
T.evaluate(T.call_extern("ethosu_copy", buffer2[0], 160, p5[0], dtype="handle"))
@@ -403,13 +403,13 @@ def test_read_from_the_same_buffer():
# function attr dict
T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
# buffer definition
- placeholder = T.buffer_decl(8192, dtype="int8", data=input_placeholder.data)
- ethosu_write = T.buffer_decl(2048, dtype="int8", data=input_ethosu_write.data)
+ placeholder = T.Buffer(8192, dtype="int8", data=input_placeholder.data)
+ ethosu_write = T.Buffer(2048, dtype="int8", data=input_ethosu_write.data)
# body
p1_data = T.allocate([368], "uint8", "global")
- p1 = T.buffer_decl([368], "uint8", data=p1_data)
+ p1 = T.Buffer([368], "uint8", data=p1_data)
p2_data = T.allocate([96], "uint8", "global")
- p2 = T.buffer_decl([96], "uint8", data=p2_data)
+ p2 = T.Buffer([96], "uint8", data=p2_data)
T.evaluate(T.call_extern("ethosu_copy", buffer1[0], 368, p1[0], dtype="handle"))
T.evaluate(T.call_extern("ethosu_copy", buffer2[0], 96, p2[0], dtype="handle"))
T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 8, 32, 16, 0, 8, placeholder[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 8, 8, 16, 0, 8, ethosu_write[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, p1[0], 192, p1[192], 176, 12, p2[0], 48, p2[48], 48, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
@@ -423,11 +423,11 @@ def test_read_from_the_same_buffer():
# function attr dict
T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
# buffer definition
- placeholder = T.buffer_decl(8192, dtype="int8", data=input_placeholder.data)
- ethosu_write = T.buffer_decl(2048, dtype="int8", data=input_ethosu_write.data)
+ placeholder = T.Buffer(8192, dtype="int8", data=input_placeholder.data)
+ ethosu_write = T.Buffer(2048, dtype="int8", data=input_ethosu_write.data)
# body
p1_data = T.allocate([464], "uint8", "global")
- p1 = T.buffer_decl([464], "uint8", data=p1_data)
+ p1 = T.Buffer([464], "uint8", data=p1_data)
T.evaluate(T.call_extern("ethosu_copy", buffer1[0], 464, p1[0], dtype="handle"))
T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 8, 32, 16, 0, 8, placeholder[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 8, 8, 16, 0, 8, ethosu_write[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, p1[0], 192, p1[192], 176, 12, p1[368], 48, p1[416], 48, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
__tvm_meta__ = None
@@ -453,17 +453,17 @@ def test_arbitrary_argument_order():
# function attr dict
T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
# buffer definition
- placeholder = T.buffer_decl(8192, dtype="int8", data=input_placeholder.data)
- ethosu_write = T.buffer_decl(2048, dtype="int8", data=input_ethosu_write.data)
+ placeholder = T.Buffer(8192, dtype="int8", data=input_placeholder.data)
+ ethosu_write = T.Buffer(2048, dtype="int8", data=input_ethosu_write.data)
# body
p1_data = T.allocate([368], "uint8", "global")
- p1 = T.buffer_decl([368], "uint8", data=p1_data)
+ p1 = T.Buffer([368], "uint8", data=p1_data)
p2_data = T.allocate([96], "uint8", "global")
- p2 = T.buffer_decl([96], "uint8", data=p2_data)
+ p2 = T.Buffer([96], "uint8", data=p2_data)
p3_data = T.allocate([368], "uint8", "global")
- p3 = T.buffer_decl([368], "uint8", data=p3_data)
+ p3 = T.Buffer([368], "uint8", data=p3_data)
p4_data = T.allocate([96], "uint8", "global")
- p4 = T.buffer_decl([96], "uint8", data=p4_data)
+ p4 = T.Buffer([96], "uint8", data=p4_data)
T.evaluate(T.call_extern("ethosu_copy", buffer1[0], 368, p1[0], dtype="handle"))
T.evaluate(T.call_extern("ethosu_copy", buffer2[0], 96, p2[0], dtype="handle"))
T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 8, 32, 16, 0, 8, placeholder[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 8, 8, 16, 0, 8, ethosu_write[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, p1[0], 192, p1[192], 176, 12, p2[0], 48, p2[48], 48, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
@@ -480,13 +480,13 @@ def test_arbitrary_argument_order():
# function attr dict
T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
# buffer definition
- placeholder = T.buffer_decl(8192, dtype="int8", data=input_placeholder.data)
- ethosu_write = T.buffer_decl(2048, dtype="int8", data=input_ethosu_write.data)
+ placeholder = T.Buffer(8192, dtype="int8", data=input_placeholder.data)
+ ethosu_write = T.Buffer(2048, dtype="int8", data=input_ethosu_write.data)
# body
p1_data = T.allocate([464], "uint8", "global")
- p1 = T.buffer_decl([464], "uint8", data=p1_data)
+ p1 = T.Buffer([464], "uint8", data=p1_data)
p2_data = T.allocate([464], "uint8", "global")
- p2 = T.buffer_decl([464], "uint8", data=p2_data)
+ p2 = T.Buffer([464], "uint8", data=p2_data)
T.evaluate(T.call_extern("ethosu_copy", buffer1[0], 464, p1[0], dtype="handle"))
T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 8, 32, 16, 0, 8, placeholder[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 8, 8, 16, 0, 8, ethosu_write[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, p1[0], 192, p1[192], 176, 12, p1[368], 48, p1[416], 48, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
T.evaluate(T.call_extern("ethosu_copy", buffer2[0], 464, p2[0], dtype="handle"))
@@ -519,17 +519,17 @@ def test_arbitrary_argument_order_const_split():
# function attr dict
T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
# buffer definition
- placeholder = T.buffer_decl(8192, dtype="int8", data=input_placeholder.data)
- ethosu_write = T.buffer_decl(2048, dtype="int8", data=input_ethosu_write.data)
+ placeholder = T.Buffer(8192, dtype="int8", data=input_placeholder.data)
+ ethosu_write = T.Buffer(2048, dtype="int8", data=input_ethosu_write.data)
# body
p1_data = T.allocate([368], "uint8", "global")
- p1 = T.buffer_decl([368], "uint8", data=p1_data)
+ p1 = T.Buffer([368], "uint8", data=p1_data)
p2_data = T.allocate([96], "uint8", "global")
- p2 = T.buffer_decl([96], "uint8", data=p2_data)
+ p2 = T.Buffer([96], "uint8", data=p2_data)
p3_data = T.allocate([368], "uint8", "global")
- p3 = T.buffer_decl([368], "uint8", data=p3_data)
+ p3 = T.Buffer([368], "uint8", data=p3_data)
p4_data = T.allocate([96], "uint8", "global")
- p4 = T.buffer_decl([96], "uint8", data=p4_data)
+ p4 = T.Buffer([96], "uint8", data=p4_data)
T.evaluate(T.call_extern("ethosu_copy", buffer1[0], 368, p1[0], dtype="handle"))
T.evaluate(T.call_extern("ethosu_copy", buffer2[0], 96, p2[0], dtype="handle"))
T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 8, 32, 16, 0, 8, placeholder[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 8, 8, 16, 0, 8, ethosu_write[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, p1[0], 192, p1[192], 176, 12, p2[0], 48, p2[48], 48, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
@@ -546,13 +546,13 @@ def test_arbitrary_argument_order_const_split():
# function attr dict
T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
# buffer definition
- placeholder = T.buffer_decl(8192, dtype="int8", data=input_placeholder.data)
- ethosu_write = T.buffer_decl(2048, dtype="int8", data=input_ethosu_write.data)
+ placeholder = T.Buffer(8192, dtype="int8", data=input_placeholder.data)
+ ethosu_write = T.Buffer(2048, dtype="int8", data=input_ethosu_write.data)
# body
p1_data = T.allocate([464], "uint8", "global")
- p1 = T.buffer_decl([464], "uint8", data=p1_data)
+ p1 = T.Buffer([464], "uint8", data=p1_data)
p2_data = T.allocate([464], "uint8", "global")
- p2 = T.buffer_decl([464], "uint8", data=p2_data)
+ p2 = T.Buffer([464], "uint8", data=p2_data)
T.evaluate(T.call_extern("ethosu_copy", buffer1[0], 464, p1[0], dtype="handle"))
T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 8, 32, 16, 0, 8, placeholder[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 8, 8, 16, 0, 8, ethosu_write[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, p1[0], 192, p1[192], 176, 12, p1[368], 48, p1[416], 48, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
T.evaluate(T.call_extern("ethosu_copy", buffer2[0], 464, p2[0], dtype="handle"))
@@ -585,17 +585,17 @@ def test_arbitrary_argument_order_const_split_mixed():
# function attr dict
T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
# buffer definition
- placeholder = T.buffer_decl(8192, dtype='int8', data=input_placeholder.data)
- ethosu_write = T.buffer_decl(4096, dtype='int8', data=input_ethosu_write.data)
+ placeholder = T.Buffer(8192, dtype='int8', data=input_placeholder.data)
+ ethosu_write = T.Buffer(4096, dtype='int8', data=input_ethosu_write.data)
# body
p1_data = T.allocate([368], "uint8", "global")
- p1 = T.buffer_decl([368], "uint8", data=p1_data)
+ p1 = T.Buffer([368], "uint8", data=p1_data)
p2_data = T.allocate([368], "uint8", "global")
- p2 = T.buffer_decl([368], "uint8", data=p2_data)
+ p2 = T.Buffer([368], "uint8", data=p2_data)
p3_data = T.allocate([96], "uint8", "global")
- p3 = T.buffer_decl([96], "uint8", data=p3_data)
+ p3 = T.Buffer([96], "uint8", data=p3_data)
p4_data = T.allocate([96], "uint8", "global")
- p4 = T.buffer_decl([96], "uint8", data=p4_data)
+ p4 = T.Buffer([96], "uint8", data=p4_data)
T.evaluate(T.call_extern("ethosu_copy", buffer1[0], 368, p1[0], dtype="handle"))
T.evaluate(T.call_extern("ethosu_copy", buffer3[0], 96, p3[0], dtype="handle"))
T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 8, 32, 16, 0, 8, placeholder[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 8, 8, 16, 0, 8, ethosu_write[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, p1[0], 192, p1[192], 176, 12, p3[0], 48, p3[48], 48, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
@@ -612,13 +612,13 @@ def test_arbitrary_argument_order_const_split_mixed():
# function attr dict
T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
# buffer definition
- placeholder = T.buffer_decl(8192, dtype='int8', data=input_placeholder.data)
- ethosu_write = T.buffer_decl(4096, dtype='int8', data=input_ethosu_write.data)
+ placeholder = T.Buffer(8192, dtype='int8', data=input_placeholder.data)
+ ethosu_write = T.Buffer(4096, dtype='int8', data=input_ethosu_write.data)
# body
p1_data = T.allocate([464], "uint8", "global")
- p1 = T.buffer_decl([464], "uint8", data=p1_data)
+ p1 = T.Buffer([464], "uint8", data=p1_data)
p2_data = T.allocate([464], "uint8", "global")
- p2 = T.buffer_decl([464], "uint8", data=p2_data)
+ p2 = T.Buffer([464], "uint8", data=p2_data)
T.evaluate(T.call_extern("ethosu_copy", buffer1[0], 464, p1[0], dtype="handle"))
T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 8, 32, 16, 0, 8, placeholder[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 8, 8, 16, 0, 8, ethosu_write[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, p1[0], 192, p1[192], 176, 12, p1[368], 48, p1[416], 48, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
T.evaluate(T.call_extern("ethosu_copy", buffer2[0], 464, p2[0], dtype="handle"))
@@ -662,25 +662,25 @@ def test_cycle_count():
v4a = T.var("int32")
v4b = T.var("int32")
v4c = T.var("int32")
- buffer1 = T.buffer_decl([8192], "int8")
- buffer10 = T.buffer_decl([2048], "int8")
+ buffer1 = T.Buffer([8192], "int8")
+ buffer10 = T.Buffer([2048], "int8")
# body
p1_data = T.allocate([128], "uint8", "global")
- p1 = T.buffer_decl([128], "uint8", data=p1_data)
+ p1 = T.Buffer([128], "uint8", data=p1_data)
p2_data = T.allocate([112], "uint8", "global")
- p2 = T.buffer_decl([112], "uint8", data=p2_data)
+ p2 = T.Buffer([112], "uint8", data=p2_data)
p3_data = T.allocate([112], "uint8", "global")
- p3 = T.buffer_decl([112], "uint8", data=p3_data)
+ p3 = T.Buffer([112], "uint8", data=p3_data)
p4_data = T.allocate([32], "uint8", "global")
- p4 = T.buffer_decl([32], "uint8", data=p4_data)
+ p4 = T.Buffer([32], "uint8", data=p4_data)
p5_data = T.allocate([32], "uint8", "global")
- p5 = T.buffer_decl([32], "uint8", data=p5_data)
+ p5 = T.Buffer([32], "uint8", data=p5_data)
p6_data = T.allocate([32], "uint8", "global")
- p6 = T.buffer_decl([32], "uint8", data=p6_data)
+ p6 = T.Buffer([32], "uint8", data=p6_data)
p7_data = T.allocate([112], "uint8", "global")
- p7 = T.buffer_decl([112], "uint8", data=p7_data)
+ p7 = T.Buffer([112], "uint8", data=p7_data)
p8_data = T.allocate([3], "uint8", "global")
- p8 = T.buffer_decl([3], "uint8", data=p8_data)
+ p8 = T.Buffer([3], "uint8", data=p8_data)
with T.attr(T.iter_var(v1a, None, "DataPar", ""), "pragma_compute_cycles_hint", 100):
T.evaluate(T.call_extern("ethosu_copy", buffer2[0], 128, p1[0], dtype="handle"))
with T.attr(T.iter_var(v1b, None, "DataPar", ""), "pragma_compute_cycles_hint", 101):
@@ -721,17 +721,17 @@ def test_cycle_count():
v3c = T.var("int32")
v4a = T.var("int32")
v4c = T.var("int32")
- buffer1 = T.buffer_decl([8192], "int8")
- buffer10 = T.buffer_decl([2048], "int8")
+ buffer1 = T.Buffer([8192], "int8")
+ buffer10 = T.Buffer([2048], "int8")
# body
p4_data = T.allocate([160], "uint8", "global")
- p4 = T.buffer_decl([160], "uint8", data=p4_data)
+ p4 = T.Buffer([160], "uint8", data=p4_data)
p7_data = T.allocate([144], "uint8", "global")
- p7 = T.buffer_decl([144], "uint8", data=p7_data)
+ p7 = T.Buffer([144], "uint8", data=p7_data)
p10_data = T.allocate([144], "uint8", "global")
- p10 = T.buffer_decl([144], "uint8", data=p10_data)
+ p10 = T.Buffer([144], "uint8", data=p10_data)
p11_data = T.allocate([144], "uint8", "global")
- p11 = T.buffer_decl([144], "uint8", data=p11_data)
+ p11 = T.Buffer([144], "uint8", data=p11_data)
with T.attr(T.iter_var(v1a, None, "DataPar", ""), "pragma_compute_cycles_hint", 201):
T.evaluate(T.call_extern("ethosu_copy", buffer2[0], 160, p4[0], dtype="handle"))
with T.attr(T.iter_var(v2a, None, "DataPar", ""), "pragma_compute_cycles_hint", 205):
diff --git a/tests/python/contrib/test_ethosu/test_remove_concatenates.py b/tests/python/contrib/test_ethosu/test_remove_concatenates.py
index b8ce7f0d60..64777aa0fb 100644
--- a/tests/python/contrib/test_ethosu/test_remove_concatenates.py
+++ b/tests/python/contrib/test_ethosu/test_remove_concatenates.py
@@ -35,21 +35,21 @@ class ReferenceModule:
# function attr dict
T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
- placeholder = T.buffer_decl(1536, dtype="int8", data=input_placeholder.data)
- placeholder_1 = T.buffer_decl(1280, dtype="int8", data=input_placeholder_1.data)
- T_concat = T.buffer_decl(4096, dtype="int8", data=input_T_concat.data)
+ placeholder = T.Buffer(1536, dtype="int8", data=input_placeholder.data)
+ placeholder_1 = T.Buffer(1280, dtype="int8", data=input_placeholder_1.data)
+ T_concat = T.Buffer(4096, dtype="int8", data=input_T_concat.data)
- buffer = T.buffer_decl([2992], "uint8")
- buffer_1 = T.buffer_decl([160], "uint8")
- buffer_2 = T.buffer_decl([2992], "uint8")
- buffer_3 = T.buffer_decl([160], "uint8")
- buffer_4 = T.buffer_decl([2992], "uint8")
- buffer_5 = T.buffer_decl([160], "uint8")
- buffer_6 = T.buffer_decl([2992], "uint8")
- buffer_7 = T.buffer_decl([160], "uint8")
+ buffer = T.Buffer([2992], "uint8")
+ buffer_1 = T.Buffer([160], "uint8")
+ buffer_2 = T.Buffer([2992], "uint8")
+ buffer_3 = T.Buffer([160], "uint8")
+ buffer_4 = T.Buffer([2992], "uint8")
+ buffer_5 = T.Buffer([160], "uint8")
+ buffer_6 = T.Buffer([2992], "uint8")
+ buffer_7 = T.Buffer([160], "uint8")
# body
T_concat_1_data = T.allocate([2816], "int8", "global", annotations={"disable_lower_builtin":True})
- T_concat_1 = T.buffer_decl([2816], "int8", data=T_concat_1_data)
+ T_concat_1 = T.Buffer([2816], "int8", data=T_concat_1_data)
T.evaluate(T.call_extern("ethosu_conv2d", "int8", 8, 10, 16, 8, 0, 10, placeholder_1[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 160, 16, 1, "int8", 8, 10, 16, 8, 0, 10, T_concat_1[192], 0, 0, 0, T.float32(0.25), 14, "NHWC", 352, 16, 1, 3, 3, 1, 1, 1, 1, buffer[0], 2992, T.int8(-1), T.int8(-1), 12, buffer_1[0], 160, T.int8(-1), T.int8(-1), 1, 1, 1, 1, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
T.evaluate(T.call_extern("ethosu_conv2d", "int8", 8, 10, 16, 8, 0, 10, T_concat_1[192], 0, 0, 0, T.float32(0.5), 10, "NHWC", 352, 16, 1, "int8", 8, 10, 16, 8, 0, 10, T_concat[352], 0, 0, 0, T.float32(0.25), 14, "NHWC", 512, 16, 1, 3, 3, 1, 1, 1, 1, buffer_2[0], 2992, T.int8(-1), T.int8(-1), 12, buffer_3[0], 160, T.int8(-1), T.int8(-1), 1, 1, 1, 1, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
T.evaluate(T.call_extern("ethosu_conv2d", "int8", 8, 12, 16, 8, 0, 12, placeholder[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 192, 16, 1, "int8", 8, 12, 16, 8, 0, 12, T_concat_1[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 352, 16, 1, 3, 3, 1, 1, 1, 1, buffer_4[0], 2992, T.int8(-1), T.int8(-1), 12, buffer_5[0], 160, T.int8(-1), T.int8(-1), 1, 1, 1, 1, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
diff --git a/tests/python/contrib/test_ethosu/test_replace_conv2d.py b/tests/python/contrib/test_ethosu/test_replace_conv2d.py
index bdc0447bc7..ffa6d6effd 100644
--- a/tests/python/contrib/test_ethosu/test_replace_conv2d.py
+++ b/tests/python/contrib/test_ethosu/test_replace_conv2d.py
@@ -370,15 +370,15 @@ class Conv2dDoubleCascade1:
def main(input_placeholder_5: T.Buffer[(1, 8, 8, 3), "int8"], input_ethosu_write_1: T.Buffer[(1, 8, 8, 8), "int8"]) -> None:
# function attr dict
T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
- buffer = T.buffer_decl([304], "uint8")
- buffer_1 = T.buffer_decl([80], "uint8")
- buffer_2 = T.buffer_decl([320], "uint8")
- buffer_3 = T.buffer_decl([160], "uint8")
- placeholder_5 = T.buffer_decl([192], 'int8', data=input_placeholder_5.data)
- ethosu_write_1 = T.buffer_decl([512], 'int8', data=input_ethosu_write_1.data)
+ buffer = T.Buffer([304], "uint8")
+ buffer_1 = T.Buffer([80], "uint8")
+ buffer_2 = T.Buffer([320], "uint8")
+ buffer_3 = T.Buffer([160], "uint8")
+ placeholder_5 = T.Buffer([192], 'int8', data=input_placeholder_5.data)
+ ethosu_write_1 = T.Buffer([512], 'int8', data=input_ethosu_write_1.data)
# body
ethosu_write_2_data = T.allocate([1024], "int8", "global", annotations={"disable_lower_builtin": True})
- ethosu_write_2 = T.buffer_decl([1024], "int8", data=ethosu_write_2_data)
+ ethosu_write_2 = T.Buffer([1024], "int8", data=ethosu_write_2_data)
T.evaluate(T.call_extern("ethosu_conv2d", "int8", 8, 4, 3, 8, 0, 4, placeholder_5[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 24, 3, 1, "int8", 8, 4, 32, 8, 0, 4, ethosu_write_2[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 32, 1, 1, 1, 1, 1, 1, 1, buffer_3[0], 160, T.int8(-1), T.int8(-1), 12, buffer_2[0], 320, T.int8(-1), T.int8(-1), 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
T.evaluate(T.call_extern("ethosu_conv2d", "int8", 8, 4, 32, 8, 0, 4, ethosu_write_2[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 128, 32, 1, "int8", 8, 4, 8, 8, 0, 4, ethosu_write_1[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 64, 8, 1, 1, 1, 1, 1, 1, 1, buffer[0], 304, T.int8(-1), T.int8(-1), 12, buffer_1[0], 80, T.int8(-1), T.int8(-1), 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
T.evaluate(T.call_extern("ethosu_conv2d", "int8", 8, 4, 3, 8, 0, 4, placeholder_5[12], 0, 0, 0, T.float32(0.5), 10, "NHWC", 24, 3, 1, "int8", 8, 4, 32, 8, 0, 4, ethosu_write_2[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 32, 1, 1, 1, 1, 1, 1, 1, buffer_3[0], 160, T.int8(-1), T.int8(-1), 12, buffer_2[0], 320, T.int8(-1), T.int8(-1), 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
@@ -392,15 +392,15 @@ class Conv2dDoubleCascade2:
def main(input_placeholder_5: T.Buffer[(1, 8, 8, 3), "int8"], input_ethosu_write_1: T.Buffer[(1, 8, 8, 8), "int8"]) -> None:
# function attr dict
T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
- buffer = T.buffer_decl([80], "uint8")
- buffer_1 = T.buffer_decl([320], "uint8")
- buffer_2 = T.buffer_decl([1312], "uint8")
- buffer_3 = T.buffer_decl([2608], "uint8")
- placeholder_5 = T.buffer_decl([192], 'int8', data=input_placeholder_5.data)
- ethosu_write_1 = T.buffer_decl([512], 'int8', data=input_ethosu_write_1.data)
+ buffer = T.Buffer([80], "uint8")
+ buffer_1 = T.Buffer([320], "uint8")
+ buffer_2 = T.Buffer([1312], "uint8")
+ buffer_3 = T.Buffer([2608], "uint8")
+ placeholder_5 = T.Buffer([192], 'int8', data=input_placeholder_5.data)
+ ethosu_write_1 = T.Buffer([512], 'int8', data=input_ethosu_write_1.data)
# body
ethosu_write_2_data = T.allocate([1536], "int8", "global", annotations={"disable_lower_builtin": True})
- ethosu_write_2 = T.buffer_decl([1536], "int8", data=ethosu_write_2_data)
+ ethosu_write_2 = T.Buffer([1536], "int8", data=ethosu_write_2_data)
T.evaluate(T.call_extern("ethosu_conv2d", "int8", 6, 8, 3, 6, 0, 8, placeholder_5[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 24, 3, 1, "int8", 5, 8, 32, 5, 0, 8, ethosu_write_2[256], 0, 0, 0, T.float32(0.25), 14, "NHWC", 256, 32, 1, 3, 3, 1, 1, 1, 1, buffer_2[0], 1312, T.int8(-1), T.int8(-1), 12, buffer_1[0], 320, T.int8(-1), T.int8(-1), 1, 1, 0, 1, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
T.evaluate(T.call_extern("ethosu_conv2d", "int8", 5, 8, 32, 5, 0, 8, ethosu_write_2[256], 0, 0, 0, T.float32(0.5), 10, "NHWC", 256, 32, 1, "int8", 4, 8, 8, 4, 0, 8, ethosu_write_1[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 64, 8, 1, 3, 3, 1, 1, 1, 1, buffer_3[0], 2608, T.int8(-1), T.int8(-1), 12, buffer[0], 80, T.int8(-1), T.int8(-1), 1, 1, 0, 1, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
T.evaluate(T.call_extern("ethosu_conv2d", "int8", 6, 8, 3, 6, 0, 8, placeholder_5[48], 0, 0, 0, T.float32(0.5), 10, "NHWC", 24, 3, 1, "int8", 5, 8, 32, 5, 0, 8, ethosu_write_2[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 256, 32, 1, 3, 3, 1, 1, 1, 1, buffer_2[0], 1312, T.int8(-1), T.int8(-1), 12, buffer_1[0], 320, T.int8(-1), T.int8(-1), 0, 1, 1, 1, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
@@ -414,16 +414,16 @@ class Conv2dDoubleCascade3:
def main(input_placeholder_5: T.Buffer[(1, 16, 16, 3), "int8"], input_ethosu_write_1: T.Buffer[(1, 20, 4, 8), "int8"]) -> None:
# function attr dict
T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
- buffer = T.buffer_decl([1744], "uint8")
- buffer_1 = T.buffer_decl([80], "uint8")
- buffer_2 = T.buffer_decl([320], "uint8")
- buffer_3 = T.buffer_decl([880], "uint8")
- placeholder_5 = T.buffer_decl([768], 'int8', data=input_placeholder_5.data)
- ethosu_write_1 = T.buffer_decl([640], 'int8', data=input_ethosu_write_1.data)
+ buffer = T.Buffer([1744], "uint8")
+ buffer_1 = T.Buffer([80], "uint8")
+ buffer_2 = T.Buffer([320], "uint8")
+ buffer_3 = T.Buffer([880], "uint8")
+ placeholder_5 = T.Buffer([768], 'int8', data=input_placeholder_5.data)
+ ethosu_write_1 = T.Buffer([640], 'int8', data=input_ethosu_write_1.data)
# body
ethosu_write_2_data = T.allocate([2560], "int8", "global", annotations={"disable_lower_builtin": True})
- ethosu_write_2 = T.buffer_decl([2560], "int8", data=ethosu_write_2_data)
+ ethosu_write_2 = T.Buffer([2560], "int8", data=ethosu_write_2_data)
T.evaluate(T.call_extern("ethosu_conv2d", "int8", 8, 16, 3, 8, 0, 16, placeholder_5[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 48, 3, 1, "int8", 8, 8, 32, 8, 0, 8, ethosu_write_2[512], 0, 0, 0, T.float32(0.25), 14, "NHWC", 256, 32, 1, 2, 3, 2, 1, 2, 1, buffer_3[0], 880, T.int8(-1), T.int8(-1), 12, buffer_2[0], 320, T.int8(-1), T.int8(-1), 2, 1, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
T.evaluate(T.call_extern("ethosu_conv2d", "int8", 8, 8, 32, 8, 0, 8, ethosu_write_2[512], 0, 0, 0, T.float32(0.5), 10, "NHWC", 256, 32, 1, "int8", 8, 4, 8, 8, 0, 4, ethosu_write_1[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 32, 8, 1, 2, 3, 2, 1, 2, 1, buffer[0], 1744, T.int8(-1), T.int8(-1), 12, buffer_1[0], 80, T.int8(-1), T.int8(-1), 2, 1, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
T.evaluate(T.call_extern("ethosu_conv2d", "int8", 12, 16, 3, 12, 0, 16, placeholder_5[192], 0, 0, 0, T.float32(0.5), 10, "NHWC", 48, 3, 1, "int8", 10, 8, 32, 10, 0, 8, ethosu_write_2[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 256, 32, 1, 2, 3, 2, 1, 2, 1, buffer_3[0], 880, T.int8(-1), T.int8(-1), 12, buffer_2[0], 320, T.int8(-1), T.int8(-1), 0, 1, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
@@ -439,15 +439,15 @@ class Conv2dDoubleCascade4:
def main(input_placeholder_5: T.Buffer[(1, 8, 1, 8, 16), "int8"], input_ethosu_write_1: T.Buffer[(1, 8, 2, 8, 16), "int8"]) -> None:
# function attr dict
T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
- buffer = T.buffer_decl([1456], "uint8")
- buffer_1 = T.buffer_decl([352], "uint8")
- buffer_2 = T.buffer_decl([272], "uint8")
- buffer_3 = T.buffer_decl([11040], "uint8")
- placeholder_5 = T.buffer_decl([1024], 'int8', data=input_placeholder_5.data)
- ethosu_write_1 = T.buffer_decl([2048], 'int8', data=input_ethosu_write_1.data)
+ buffer = T.Buffer([1456], "uint8")
+ buffer_1 = T.Buffer([352], "uint8")
+ buffer_2 = T.Buffer([272], "uint8")
+ buffer_3 = T.Buffer([11040], "uint8")
+ placeholder_5 = T.Buffer([1024], 'int8', data=input_placeholder_5.data)
+ ethosu_write_1 = T.Buffer([2048], 'int8', data=input_ethosu_write_1.data)
# body
ethosu_write_2_data = T.allocate([2304], "int8", "global", annotations={"disable_lower_builtin": True})
- ethosu_write_2 = T.buffer_decl((2304,), "int8", data=ethosu_write_2_data)
+ ethosu_write_2 = T.Buffer((2304,), "int8", data=ethosu_write_2_data)
T.evaluate(T.call_extern("ethosu_conv2d", "int8", 6, 8, 3, 6, 0, 8, placeholder_5[0], 0, 0, 0, T.float32(0.5), 10, "NHCWB16", 128, 16, 1, "int8", 5, 8, 35, 5, 0, 8, ethosu_write_2[384], 0, 0, 0, T.float32(0.25), 14, "NHCWB16", 384, 16, 128, 3, 3, 1, 1, 1, 1, buffer[0], 1456, T.int8(-1), T.int8(-1), 12, buffer_1[0], 352, T.int8(-1), T.int8(-1), 1, 1, 0, 1, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
T.evaluate(T.call_extern("ethosu_conv2d", "int8", 5, 8, 35, 5, 0, 8, ethosu_write_2[384], 0, 0, 0, T.float32(0.5), 10, "NHCWB16", 384, 16, 128, "int8", 4, 8, 26, 4, 0, 8, ethosu_write_1[0], 0, 0, 0, T.float32(0.25), 14, "NHCWB16", 256, 16, 128, 3, 3, 1, 1, 1, 1, buffer_3[0], 11040, T.int8(-1), T.int8(-1), 12, buffer_2[0], 272, T.int8(-1), T.int8(-1), 1, 1, 0, 1, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
T.evaluate(T.call_extern("ethosu_conv2d", "int8", 6, 8, 3, 6, 0, 8, placeholder_5[256], 0, 0, 0, T.float32(0.5), 10, "NHCWB16", 128, 16, 1, "int8", 5, 8, 35, 5, 0, 8, ethosu_write_2[0], 0, 0, 0, T.float32(0.25), 14, "NHCWB16", 384, 16, 128, 3, 3, 1, 1, 1, 1, buffer[0], 1456, T.int8(-1), T.int8(-1), 12, buffer_1[0], 352, T.int8(-1), T.int8(-1), 0, 1, 1, 1, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
@@ -461,15 +461,15 @@ class Conv2dDoubleCascade5:
def main(input_placeholder: T.Buffer[(1, 8, 8, 3), "int8"], input_ethosu_write: T.Buffer[(1, 32, 32, 8), "int8"]) -> None:
# function attr dict
T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
- buffer = T.buffer_decl([160], "uint8")
- buffer_1 = T.buffer_decl([320], "uint8")
- buffer_2 = T.buffer_decl([304], "uint8")
- buffer_3 = T.buffer_decl([80], "uint8")
- placeholder = T.buffer_decl([192], 'int8', data=input_placeholder.data)
- ethosu_write = T.buffer_decl([8192], 'int8', data=input_ethosu_write.data)
+ buffer = T.Buffer([160], "uint8")
+ buffer_1 = T.Buffer([320], "uint8")
+ buffer_2 = T.Buffer([304], "uint8")
+ buffer_3 = T.Buffer([80], "uint8")
+ placeholder = T.Buffer([192], 'int8', data=input_placeholder.data)
+ ethosu_write = T.Buffer([8192], 'int8', data=input_ethosu_write.data)
# body
ethosu_write_1_data = T.allocate([4096], "int8", "global", annotations={"disable_lower_builtin":True})
- ethosu_write_1 = T.buffer_decl([4096], "int8", data=ethosu_write_1_data)
+ ethosu_write_1 = T.Buffer([4096], "int8", data=ethosu_write_1_data)
T.evaluate(T.call_extern("ethosu_conv2d", "int8", 4, 8, 3, 4, 0, 8, placeholder[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 24, 3, 1, "int8", 8, 16, 32, 8, 0, 16, ethosu_write_1[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 512, 32, 1, 1, 1, 1, 1, 1, 1, buffer[0], 160, T.int8(-1), T.int8(-1), 12, buffer_1[0], 320, T.int8(-1), T.int8(-1), 0, 0, 0, 0, "NONE", 0, 0, "TFL", "ZEROS", 0, 0, 0, dtype="handle"))
T.evaluate(T.call_extern("ethosu_conv2d", "int8", 8, 16, 32, 8, 0, 16, ethosu_write_1[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 32, 8, 16, 0, 32, ethosu_write[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 256, 8, 1, 1, 1, 1, 1, 1, 1, buffer_2[0], 304, T.int8(-1), T.int8(-1), 12, buffer_3[0], 80, T.int8(-1), T.int8(-1), 0, 0, 0, 0, "NONE", 0, 0, "TFL", "ZEROS", 0, 0, 0, dtype="handle"))
T.evaluate(T.call_extern("ethosu_conv2d", "int8", 4, 8, 3, 4, 0, 8, placeholder[96], 0, 0, 0, T.float32(0.5), 10, "NHWC", 24, 3, 1, "int8", 8, 16, 32, 8, 0, 16, ethosu_write_1[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 512, 32, 1, 1, 1, 1, 1, 1, 1, buffer[0], 160, T.int8(-1), T.int8(-1), 12, buffer_1[0], 320, T.int8(-1), T.int8(-1), 0, 0, 0, 0, "NONE", 0, 0, "TFL", "ZEROS", 0, 0, 0, dtype="handle"))
@@ -483,15 +483,15 @@ class Conv2dDoubleCascade6:
def main(input_placeholder: T.Buffer[(1, 8, 1, 8, 16), "int8"], input_ethosu_write: T.Buffer[(1, 32, 2, 32, 16), "int8"]) -> None:
# function attr dict
T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
- buffer = T.buffer_decl([1456], "uint8")
- buffer_1 = T.buffer_decl([352], "uint8")
- buffer_2 = T.buffer_decl([11040], "uint8")
- buffer_3 = T.buffer_decl([272], "uint8")
- placeholder = T.buffer_decl([1024], 'int8', data=input_placeholder.data)
- ethosu_write = T.buffer_decl([32768], 'int8', data=input_ethosu_write.data)
+ buffer = T.Buffer([1456], "uint8")
+ buffer_1 = T.Buffer([352], "uint8")
+ buffer_2 = T.Buffer([11040], "uint8")
+ buffer_3 = T.Buffer([272], "uint8")
+ placeholder = T.Buffer([1024], 'int8', data=input_placeholder.data)
+ ethosu_write = T.Buffer([32768], 'int8', data=input_ethosu_write.data)
# body
ethosu_write_1_data = T.allocate([12288], "int8", "global", annotations={"disable_lower_builtin":True})
- ethosu_write_1 = T.buffer_decl([12288], "int8", data=ethosu_write_1_data)
+ ethosu_write_1 = T.Buffer([12288], "int8", data=ethosu_write_1_data)
T.evaluate(T.call_extern("ethosu_conv2d", "int8", 8, 8, 3, 8, 0, 8, placeholder[0], 0, 0, 0, T.float32(0.5), 10, "NHCWB16", 128, 16, 1, "int8", 16, 16, 35, 16, 0, 16, ethosu_write_1[0], 0, 0, 0, T.float32(0.25), 14, "NHCWB16", 768, 16, 256, 3, 3, 1, 1, 1, 1, buffer[0], 1456, T.int8(-1), T.int8(-1), 12, buffer_1[0], 352, T.int8(-1), T.int8(-1), 1, 1, 1, 1, "NONE", 0, 0, "TFL", "NEAREST", 0, 0, 0, dtype="handle"))
T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 16, 35, 16, 0, 16, ethosu_write_1[0], 0, 0, 0, T.float32(0.5), 10, "NHCWB16", 768, 16, 256, "int8", 32, 32, 26, 32, 0, 32, ethosu_write[0], 0, 0, 0, T.float32(0.25), 14, "NHCWB16", 1024, 16, 512, 3, 3, 1, 1, 1, 1, buffer_2[0], 11040, T.int8(-1), T.int8(-1), 12, buffer_3[0], 272, T.int8(-1), T.int8(-1), 1, 1, 1, 1, "NONE", 0, 0, "TFL", "NEAREST", 0, 0, 0, dtype="handle"))
__tvm_meta__ = None
@@ -647,10 +647,10 @@ class Conv2dInlineCopy1:
def main(input_placeholder_3: T.Buffer[(1, 10, 12, 8), "int8"], input_ethosu_write_1: T.Buffer[(1, 8, 8, 16), "int8"]) -> None:
# function attr dict
T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
- buffer = T.buffer_decl([848], "uint8")
- buffer_1 = T.buffer_decl([160], "uint8")
- placeholder_3 = T.buffer_decl([960], 'int8', data=input_placeholder_3.data)
- ethosu_write_1 = T.buffer_decl([1024], 'int8', data=input_ethosu_write_1.data)
+ buffer = T.Buffer([848], "uint8")
+ buffer_1 = T.Buffer([160], "uint8")
+ placeholder_3 = T.Buffer([960], 'int8', data=input_placeholder_3.data)
+ ethosu_write_1 = T.Buffer([1024], 'int8', data=input_ethosu_write_1.data)
# body
T.evaluate(T.call_extern("ethosu_conv2d", "int8", 8, 8, 4, 8, 0, 8, placeholder_3[120], 0, 0, 0, T.float32(0.5), 10, "NHWC", 96, 8, 1, "int8", 8, 8, 16, 8, 0, 8, ethosu_write_1[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 16, 1, 3, 3, 1, 1, 1, 1, buffer[0], 848, T.int8(-1), T.int8(-1), 12, buffer_1[0], 160, T.int8(-1), T.int8(-1), 1, 1, 1, 1, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
__tvm_meta__ = None
@@ -662,10 +662,10 @@ class Conv2dInlineCopy2:
def main(input_placeholder_3: T.Buffer[(1, 7, 9, 5), "int8"], input_ethosu_write_1: T.Buffer[(1, 3, 5, 16), "int8"]) -> None:
# function attr dict
T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
- buffer = T.buffer_decl([160], "uint8")
- buffer_1 = T.buffer_decl([656], "uint8")
- placeholder_3 = T.buffer_decl([315], 'int8', data=input_placeholder_3.data)
- ethosu_write_1 = T.buffer_decl([240], 'int8', data=input_ethosu_write_1.data)
+ buffer = T.Buffer([160], "uint8")
+ buffer_1 = T.Buffer([656], "uint8")
+ placeholder_3 = T.Buffer([315], 'int8', data=input_placeholder_3.data)
+ ethosu_write_1 = T.Buffer([240], 'int8', data=input_ethosu_write_1.data)
# body
T.evaluate(T.call_extern("ethosu_conv2d", "int8", 3, 5, 3, 3, 0, 5, placeholder_3[146], 0, 0, 0, T.float32(0.5), 10, "NHWC", 45, 5, 1, "int8", 3, 5, 16, 3, 0, 5, ethosu_write_1[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 80, 16, 1, 3, 3, 1, 1, 1, 1, buffer_1[0], 656, T.int8(-1), T.int8(-1), 12, buffer[0], 160, T.int8(-1), T.int8(-1), 1, 1, 1, 1, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
__tvm_meta__ = None
@@ -706,10 +706,10 @@ class Conv2dInlineReshape1:
def main(input_placeholder_3: T.Buffer[(4, 6, 8, 1), "int8"], input_ethosu_write_1: T.Buffer[(1, 8, 6, 16), "int8"]) -> None:
# function attr dict
T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
- buffer = T.buffer_decl([160], "uint8")
- buffer_1 = T.buffer_decl([848], "uint8")
- placeholder_3 = T.buffer_decl([192], 'int8', data=input_placeholder_3.data)
- ethosu_write_1 = T.buffer_decl([768], 'int8', data=input_ethosu_write_1.data)
+ buffer = T.Buffer([160], "uint8")
+ buffer_1 = T.Buffer([848], "uint8")
+ placeholder_3 = T.Buffer([192], 'int8', data=input_placeholder_3.data)
+ ethosu_write_1 = T.Buffer([768], 'int8', data=input_ethosu_write_1.data)
# body
T.evaluate(T.call_extern("ethosu_conv2d", "int8", 5, 6, 4, 5, 0, 6, placeholder_3[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 24, 4, 1, "int8", 4, 6, 16, 4, 0, 6, ethosu_write_1[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 96, 16, 1, 3, 3, 1, 1, 1, 1, buffer_1[0], 848, T.int8(-1), T.int8(-1), 12, buffer[0], 160, T.int8(-1), T.int8(-1), 1, 1, 0, 1, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
T.evaluate(T.call_extern("ethosu_conv2d", "int8", 5, 6, 4, 5, 0, 6, placeholder_3[72], 0, 0, 0, T.float32(0.5), 10, "NHWC", 24, 4, 1, "int8", 4, 6, 16, 4, 0, 6, ethosu_write_1[384], 0, 0, 0, T.float32(0.25), 14, "NHWC", 96, 16, 1, 3, 3, 1, 1, 1, 1, buffer_1[0], 848, T.int8(-1), T.int8(-1), 12, buffer[0], 160, T.int8(-1), T.int8(-1), 0, 1, 1, 1, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
@@ -722,10 +722,10 @@ class Conv2dInlineReshape2:
def main(input_placeholder_3: T.Buffer[(1, 24, 8), "int8"], input_ethosu_write_1: T.Buffer[(1, 8, 6, 16), "int8"]) -> None:
# function attr dict
T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
- buffer = T.buffer_decl([160], "uint8")
- buffer_1 = T.buffer_decl([848], "uint8")
- placeholder_3 = T.buffer_decl([192], 'int8', data=input_placeholder_3.data)
- ethosu_write_1 = T.buffer_decl([768], 'int8', data=input_ethosu_write_1.data)
+ buffer = T.Buffer([160], "uint8")
+ buffer_1 = T.Buffer([848], "uint8")
+ placeholder_3 = T.Buffer([192], 'int8', data=input_placeholder_3.data)
+ ethosu_write_1 = T.Buffer([768], 'int8', data=input_ethosu_write_1.data)
# body
T.evaluate(T.call_extern("ethosu_conv2d", "int8", 5, 6, 4, 5, 0, 6, placeholder_3[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 24, 4, 1, "int8", 4, 6, 16, 4, 0, 6, ethosu_write_1[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 96, 16, 1, 3, 3, 1, 1, 1, 1, buffer_1[0], 848, T.int8(-1), T.int8(-1), 12, buffer[0], 160, T.int8(-1), T.int8(-1), 1, 1, 0, 1, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
T.evaluate(T.call_extern("ethosu_conv2d", "int8", 5, 6, 4, 5, 0, 6, placeholder_3[72], 0, 0, 0, T.float32(0.5), 10, "NHWC", 24, 4, 1, "int8", 4, 6, 16, 4, 0, 6, ethosu_write_1[384], 0, 0, 0, T.float32(0.25), 14, "NHWC", 96, 16, 1, 3, 3, 1, 1, 1, 1, buffer_1[0], 848, T.int8(-1), T.int8(-1), 12, buffer[0], 160, T.int8(-1), T.int8(-1), 0, 1, 1, 1, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
@@ -738,10 +738,10 @@ class Conv2dInlineReshape3:
def main(input_placeholder_3: T.Buffer[(192, 1), "int8"], input_ethosu_write_1: T.Buffer[(1, 8, 6, 16), "int8"]) -> None:
# function attr dict
T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
- buffer = T.buffer_decl([160], "uint8")
- buffer_1 = T.buffer_decl([848], "uint8")
- placeholder_3 = T.buffer_decl([192], 'int8', data=input_placeholder_3.data)
- ethosu_write_1 = T.buffer_decl([768], 'int8', data=input_ethosu_write_1.data)
+ buffer = T.Buffer([160], "uint8")
+ buffer_1 = T.Buffer([848], "uint8")
+ placeholder_3 = T.Buffer([192], 'int8', data=input_placeholder_3.data)
+ ethosu_write_1 = T.Buffer([768], 'int8', data=input_ethosu_write_1.data)
# body
T.evaluate(T.call_extern("ethosu_conv2d", "int8", 5, 6, 4, 5, 0, 6, placeholder_3[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 24, 4, 1, "int8", 4, 6, 16, 4, 0, 6, ethosu_write_1[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 96, 16, 1, 3, 3, 1, 1, 1, 1, buffer_1[0], 848, T.int8(-1), T.int8(-1), 12, buffer[0], 160, T.int8(-1), T.int8(-1), 1, 1, 0, 1, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
T.evaluate(T.call_extern("ethosu_conv2d", "int8", 5, 6, 4, 5, 0, 6, placeholder_3[72], 0, 0, 0, T.float32(0.5), 10, "NHWC", 24, 4, 1, "int8", 4, 6, 16, 4, 0, 6, ethosu_write_1[384], 0, 0, 0, T.float32(0.25), 14, "NHWC", 96, 16, 1, 3, 3, 1, 1, 1, 1, buffer_1[0], 848, T.int8(-1), T.int8(-1), 12, buffer[0], 160, T.int8(-1), T.int8(-1), 0, 1, 1, 1, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
@@ -754,9 +754,9 @@ class Conv2dInlineReshape4:
def main(placeholder_3: T.Buffer[(192,), "int8"], input_ethosu_write_1: T.Buffer[(1, 8, 6, 16), "int8"]) -> None:
# function attr dict
T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
- buffer = T.buffer_decl([160], "uint8")
- buffer_1 = T.buffer_decl([848], "uint8")
- ethosu_write_1 = T.buffer_decl([768], 'int8', data=input_ethosu_write_1.data)
+ buffer = T.Buffer([160], "uint8")
+ buffer_1 = T.Buffer([848], "uint8")
+ ethosu_write_1 = T.Buffer([768], 'int8', data=input_ethosu_write_1.data)
# body
T.evaluate(T.call_extern("ethosu_conv2d", "int8", 5, 6, 4, 5, 0, 6, placeholder_3[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 24, 4, 1, "int8", 4, 6, 16, 4, 0, 6, ethosu_write_1[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 96, 16, 1, 3, 3, 1, 1, 1, 1, buffer_1[0], 848, T.int8(-1), T.int8(-1), 12, buffer[0], 160, T.int8(-1), T.int8(-1), 1, 1, 0, 1, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
T.evaluate(T.call_extern("ethosu_conv2d", "int8", 5, 6, 4, 5, 0, 6, placeholder_3[72], 0, 0, 0, T.float32(0.5), 10, "NHWC", 24, 4, 1, "int8", 4, 6, 16, 4, 0, 6, ethosu_write_1[384], 0, 0, 0, T.float32(0.25), 14, "NHWC", 96, 16, 1, 3, 3, 1, 1, 1, 1, buffer_1[0], 848, T.int8(-1), T.int8(-1), 12, buffer[0], 160, T.int8(-1), T.int8(-1), 0, 1, 1, 1, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
diff --git a/tests/python/contrib/test_ethosu/test_replace_copy.py b/tests/python/contrib/test_ethosu/test_replace_copy.py
index e23954f4cb..29e1f9814c 100644
--- a/tests/python/contrib/test_ethosu/test_replace_copy.py
+++ b/tests/python/contrib/test_ethosu/test_replace_copy.py
@@ -37,12 +37,12 @@ class ReferenceModule:
def main(input_placeholder_3: T.Buffer[(1, 16, 16, 32), "int8"], input_ethosu_write_1: T.Buffer[(1, 16, 16, 8), "int8"]) -> None:
# function attr dict
T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
- buffer_1 = T.buffer_decl([384], "uint8")
- placeholder_3 = T.buffer_decl([8192], dtype="int8", data=input_placeholder_3.data)
- ethosu_write_1 = T.buffer_decl([2048], dtype="int8", data=input_ethosu_write_1.data)
+ buffer_1 = T.Buffer([384], "uint8")
+ placeholder_3 = T.Buffer([8192], dtype="int8", data=input_placeholder_3.data)
+ ethosu_write_1 = T.Buffer([2048], dtype="int8", data=input_ethosu_write_1.data)
# body
placeholder_global_data = T.allocate([384], "uint8", "global", annotations={"disable_lower_builtin": True})
- placeholder_global = T.buffer_decl([384], "uint8", data=placeholder_global_data)
+ placeholder_global = T.Buffer([384], "uint8", data=placeholder_global_data)
T.evaluate(T.call_extern("ethosu_copy", buffer_1[0], 384, placeholder_global[0], dtype="handle"))
T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 16, 32, 16, 0, 16, placeholder_3[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 16, 8, 16, 0, 16, ethosu_write_1[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, placeholder_global[0], 304, T.int8(-1), T.int8(-1), 12, placeholder_global[304], 80, T.int8(-1), T.int8(-1), 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
__tvm_meta__ = None
@@ -81,15 +81,15 @@ class WeightStream:
def main(input_placeholder_5: T.Buffer[(1, 16, 16, 32), "int8"], input_ethosu_write_1: T.Buffer[(1, 16, 16, 16), "int8"]) -> None:
# function attr dict
T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
- buffer = T.buffer_decl([528], "uint8")
- buffer_2 = T.buffer_decl([336], "uint8")
- placeholder_5 = T.buffer_decl([8192], dtype="int8", data=input_placeholder_5.data)
- ethosu_write_1 = T.buffer_decl([4096], dtype="int8", data=input_ethosu_write_1.data)
+ buffer = T.Buffer([528], "uint8")
+ buffer_2 = T.Buffer([336], "uint8")
+ placeholder_5 = T.Buffer([8192], dtype="int8", data=input_placeholder_5.data)
+ ethosu_write_1 = T.Buffer([4096], dtype="int8", data=input_ethosu_write_1.data)
# body
placeholder_d_global_data = T.allocate([528], "uint8", "global", annotations={"disable_lower_builtin": True})
- placeholder_d_global = T.buffer_decl([528], "uint8", data=placeholder_d_global_data)
+ placeholder_d_global = T.Buffer([528], "uint8", data=placeholder_d_global_data)
placeholder_d_global_1_data = T.allocate([336], "uint8", "global", annotations={"disable_lower_builtin": True})
- placeholder_d_global_1 = T.buffer_decl([336], "uint8", data=placeholder_d_global_1_data)
+ placeholder_d_global_1 = T.Buffer([336], "uint8", data=placeholder_d_global_1_data)
T.evaluate(T.call_extern("ethosu_copy", buffer[0], 528, placeholder_d_global[0], dtype="handle"))
T.evaluate(T.call_extern("ethosu_copy", buffer_2[0], 336, placeholder_d_global_1[0], dtype="handle"))
T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 16, 32, 16, 0, 16, placeholder_5[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 16, 10, 16, 0, 16, ethosu_write_1[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 256, 16, 1, 1, 1, 1, 1, 1, 1, placeholder_d_global[0], 416, T.int8(-1), T.int8(-1), 12, placeholder_d_global[416], 112, T.int8(-1), T.int8(-1), 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
diff --git a/tests/python/contrib/test_ethosu/test_scheduler.py b/tests/python/contrib/test_ethosu/test_scheduler.py
index 1e9b43b47a..c6f6bc2c6c 100644
--- a/tests/python/contrib/test_ethosu/test_scheduler.py
+++ b/tests/python/contrib/test_ethosu/test_scheduler.py
@@ -182,18 +182,18 @@ class DiamondGraphTir:
@T.prim_func
def main(input_placeholder: T.Buffer[(1, 56, 56, 96), "int8"], input_ethosu_write: T.Buffer[(1, 56, 56, 24), "int8"]) -> None:
T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
- placeholder = T.buffer_decl([301056], dtype='int8', data=input_placeholder.data)
- ethosu_write = T.buffer_decl([75264], dtype='int8', data=input_ethosu_write.data)
- buffer1 = T.buffer_decl([2848], "uint8")
- buffer3 = T.buffer_decl([976], "uint8")
+ placeholder = T.Buffer([301056], dtype='int8', data=input_placeholder.data)
+ ethosu_write = T.Buffer([75264], dtype='int8', data=input_ethosu_write.data)
+ buffer1 = T.Buffer([2848], "uint8")
+ buffer3 = T.Buffer([976], "uint8")
p1_data = T.allocate([2848], "uint8", "global", annotations={"disable_lower_builtin":True})
- p1 = T.buffer_decl([2848], "uint8", data=p1_data)
+ p1 = T.Buffer([2848], "uint8", data=p1_data)
p2_data = T.allocate([976], "uint8", "global", annotations={"disable_lower_builtin":True})
- p2 = T.buffer_decl([976], "uint8", data=p2_data)
+ p2 = T.Buffer([976], "uint8", data=p2_data)
p5_data = T.allocate([75264], "int8", "global", annotations={"disable_lower_builtin":True})
- p5 = T.buffer_decl([75264], "int8", data=p5_data)
+ p5 = T.Buffer([75264], "int8", data=p5_data)
p6_data = T.allocate([75264], "int8", "global", annotations={"disable_lower_builtin":True})
- p6 = T.buffer_decl([75264], "int8", data=p6_data)
+ p6 = T.Buffer([75264], "int8", data=p6_data)
T.evaluate(T.call_extern("ethosu_copy", buffer1[0], 2848, p1[0], dtype="handle"))
T.evaluate(T.call_extern("ethosu_copy", buffer3[0], 976, p2[0], dtype="handle"))
T.evaluate(T.call_extern("ethosu_conv2d", "int8", 56, 56, 96, 56, 0, 56, placeholder[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 5376, 96, 1, "int8", 56, 56, 24, 56, 0, 56, p5[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 1344, 24, 1, 1, 1, 1, 1, 1, 1, p1[0], 2608, T.int8(-1), T.int8(-1), 12, p1[2608], 240, T.int8(-1), T.int8(-1), 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
diff --git a/tests/python/contrib/test_ethosu/test_tir_to_cs_translator.py b/tests/python/contrib/test_ethosu/test_tir_to_cs_translator.py
index f205bc3b26..d68c806f72 100644
--- a/tests/python/contrib/test_ethosu/test_tir_to_cs_translator.py
+++ b/tests/python/contrib/test_ethosu/test_tir_to_cs_translator.py
@@ -36,8 +36,8 @@ class SingleEthosUConv2D:
def main(placeholder_3: T.Buffer[(8192,), "int8"], ethosu_conv2d_1: T.Buffer[(1024,), "int8"]) -> None:
# function attr dict
T.func_attr({"global_symbol": "main", "tir.noalias": True})
- placeholder_4 = T.buffer_decl([1], "uint8")
- placeholder_5 = T.buffer_decl([1], "uint8")
+ placeholder_4 = T.Buffer([1], "uint8")
+ placeholder_5 = T.Buffer([1], "uint8")
# body
T.evaluate(T.call_extern("ethosu_conv2d", "uint8", 8, 8, 3, 8, 0, 8, placeholder_3[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 24, 3, 1, "uint8", 8, 8, 16, 8, 0, 8, ethosu_conv2d_1[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 16, 1, 1, 1, 1, 1, 1, 1, placeholder_4[0], 0, T.int8(-1), T.int8(-1), 12, placeholder_5[0], 0, T.int8(-1), T.int8(-1), 0, 0, 0, 0, "CLIP", 0, 255, "TFL", "NONE", 0, 0, 0, dtype="uint8"))
# fmt: on
@@ -51,10 +51,10 @@ class MultiEthosUConv2D:
def main(placeholder_6: T.Buffer[(192,), "int8"], ethosu_conv2d_1: T.Buffer[(512,), "int8"]) -> None:
# function attr dict
T.func_attr({"global_symbol": "main", "tir.noalias": True})
- placeholder_9 = T.buffer_decl([1], "uint8")
- placeholder_7 = T.buffer_decl([1], "uint8")
- placeholder_8 = T.buffer_decl([1], "uint8")
- placeholder_5 = T.buffer_decl([1], "uint8")
+ placeholder_9 = T.Buffer([1], "uint8")
+ placeholder_7 = T.Buffer([1], "uint8")
+ placeholder_8 = T.Buffer([1], "uint8")
+ placeholder_5 = T.Buffer([1], "uint8")
# body
ethosu_conv2d_2 = T.decl_buffer([1024], "uint8")
ethosu_conv2d_3 = T.decl_buffer([2048], "uint8")
@@ -73,8 +73,8 @@ class MultiEthosUCopy:
def main(placeholder_3: T.Buffer[(8192,), "int8"], ethosu_conv2d_1: T.Buffer[(2048,), "int8"]) -> None:
# function attr dict
T.func_attr({"global_symbol": "main", "tir.noalias": True})
- placeholder_5 = T.buffer_decl([1], "int32")
- placeholder_4 = T.buffer_decl([1], "uint8")
+ placeholder_5 = T.Buffer([1], "int32")
+ placeholder_4 = T.Buffer([1], "uint8")
# body
placeholder_global = T.decl_buffer([256], "uint8")
placeholder_d_global = T.decl_buffer([8], "int32")
@@ -90,14 +90,14 @@ class MultiEthosUCopy:
class WeightStreamOnly:
@T.prim_func
def main(placeholder: T.Buffer[(8192,), "int8"], ethosu_write: T.Buffer[(2048,), "int8"]) -> None:
- buffer = T.buffer_decl([1], "uint8")
- buffer_1 = T.buffer_decl([1], "uint8")
- buffer_2 = T.buffer_decl([1], "uint8")
- buffer_3 = T.buffer_decl([1], "uint8")
- buffer_4 = T.buffer_decl([1], "uint8")
- buffer_5 = T.buffer_decl([1], "uint8")
- buffer_6 = T.buffer_decl([1], "uint8")
- buffer_7 = T.buffer_decl([1], "uint8")
+ buffer = T.Buffer([1], "uint8")
+ buffer_1 = T.Buffer([1], "uint8")
+ buffer_2 = T.Buffer([1], "uint8")
+ buffer_3 = T.Buffer([1], "uint8")
+ buffer_4 = T.Buffer([1], "uint8")
+ buffer_5 = T.Buffer([1], "uint8")
+ buffer_6 = T.Buffer([1], "uint8")
+ buffer_7 = T.Buffer([1], "uint8")
# function attr dict
T.func_attr({"from_legacy_te_schedule": True,
"global_symbol": "main", "tir.noalias": True,
@@ -136,16 +136,16 @@ class WeightStreamOnly:
class MixedRead:
@T.prim_func
def main(placeholder: T.Buffer[(8192,), "int8"], ethosu_write: T.Buffer[(2048,), "int8"]) -> None:
- buffer = T.buffer_decl([1], "uint8")
- buffer_1 = T.buffer_decl([1], "uint8")
- buffer_2 = T.buffer_decl([1], "uint8")
- buffer_3 = T.buffer_decl([1], "uint8")
- buffer_4 = T.buffer_decl([1], "uint8")
- buffer_5 = T.buffer_decl([1], "uint8")
- buffer_6 = T.buffer_decl([1], "uint8")
- buffer_7 = T.buffer_decl([1], "uint8")
- buffer_8 = T.buffer_decl([1], "uint8")
- buffer_9 = T.buffer_decl([1], "uint8")
+ buffer = T.Buffer([1], "uint8")
+ buffer_1 = T.Buffer([1], "uint8")
+ buffer_2 = T.Buffer([1], "uint8")
+ buffer_3 = T.Buffer([1], "uint8")
+ buffer_4 = T.Buffer([1], "uint8")
+ buffer_5 = T.Buffer([1], "uint8")
+ buffer_6 = T.Buffer([1], "uint8")
+ buffer_7 = T.Buffer([1], "uint8")
+ buffer_8 = T.Buffer([1], "uint8")
+ buffer_9 = T.Buffer([1], "uint8")
# function attr dict
T.func_attr({"from_legacy_te_schedule": True,
"global_symbol": "main", "tir.noalias": True,
@@ -161,11 +161,11 @@ class MixedRead:
buffer_9.name: buffer_9}})
# body
ethosu_write_1_data = T.allocate([4096], "int8", "global", annotations={"disable_lower_builtin":True})
- ethosu_write_1 = T.buffer_decl([4096], "int8", data=ethosu_write_1_data)
+ ethosu_write_1 = T.Buffer([4096], "int8", data=ethosu_write_1_data)
placeholder_global_data = T.allocate([80], "uint8", "global", annotations={"disable_lower_builtin":True})
- placeholder_global = T.buffer_decl([80], "uint8", data=placeholder_global_data)
+ placeholder_global = T.Buffer([80], "uint8", data=placeholder_global_data)
placeholder_d_global_data = T.allocate([32], "uint8", "global", annotations={"disable_lower_builtin":True})
- placeholder_d_global = T.buffer_decl([32], "uint8", data=placeholder_d_global_data)
+ placeholder_d_global = T.Buffer([32], "uint8", data=placeholder_d_global_data)
T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 16, 32, 16, 0, 16, placeholder[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 16, 16, 16, 0, 16, ethosu_write_1[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 256, 16, 1, 1, 1, 1, 1, 1, 1, buffer[0], 592, T.int8(-1), T.int8(-1), 12, buffer_1[0], 160, T.int8(-1), T.int8(-1), 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))
T.evaluate(T.call_extern("ethosu_copy", buffer_2[0], 80, placeholder_global[0], dtype="handle"))
T.evaluate(T.call_extern("ethosu_copy", buffer_3[0], 32, placeholder_d_global[0], dtype="handle"))
@@ -673,9 +673,9 @@ def test_translate_ethosu_copy():
class MixedConstantDatatypes:
@T.prim_func
def main(placeholder_4: T.Buffer[(2048,), "int8"], ethosu_write_1: T.Buffer[(16,), "int8"]) -> None:
- buffer = T.buffer_decl([1], "uint8")
- buffer_1 = T.buffer_decl([1], "uint8")
- buffer_2 = T.buffer_decl([1], "int16")
+ buffer = T.Buffer([1], "uint8")
+ buffer_1 = T.Buffer([1], "uint8")
+ buffer_2 = T.Buffer([1], "int16")
# function attr dict
T.func_attr({"from_legacy_te_schedule": True,
"global_symbol": "main", "tir.noalias": True,
diff --git a/tests/python/relay/aot/test_pass_aot_lower_main.py b/tests/python/relay/aot/test_pass_aot_lower_main.py
index 093305203a..b523e01929 100644
--- a/tests/python/relay/aot/test_pass_aot_lower_main.py
+++ b/tests/python/relay/aot/test_pass_aot_lower_main.py
@@ -180,12 +180,12 @@ def @main(%a: Tensor[(5, 7), float32]) -> Tensor[(5, 7), float32] {
T.func_attr({"global_symbol": "test_mod___tvm_main__", "runner_function": True, "target": T.target({"kind":"llvm", "tag":"", "keys":["cpu"]}), "input_vars": [a], "output_vars": [output], "devices": []})
tmp_read = T.buffer_var("uint8", "")
# buffer definition
- tmp_read_1 = T.buffer_decl([T.uint64(140)], dtype="uint8", data=tmp_read)
+ tmp_read_1 = T.Buffer([T.uint64(140)], dtype="uint8", data=tmp_read)
a_buffer = T.match_buffer(a, [5, 7], dtype="float32", align=16)
output_buffer = T.match_buffer(output, [5, 7], dtype="float32", align=16)
# body
tmp_write: T.Ptr[T.uint8] = output_buffer.data
- tmp_write_1 = T.buffer_decl([T.uint64(140)], dtype="uint8", data=tmp_write)
+ tmp_write_1 = T.Buffer([T.uint64(140)], dtype="uint8", data=tmp_write)
for i in T.serial(140):
tmp_write_1[i] = T.let(tmp_read, a_buffer.data, tmp_read_1[i])
# fmt: on
diff --git a/tests/python/unittest/test_lower_build.py b/tests/python/unittest/test_lower_build.py
index 665697b84b..4c188d2f83 100644
--- a/tests/python/unittest/test_lower_build.py
+++ b/tests/python/unittest/test_lower_build.py
@@ -60,9 +60,9 @@ class LoweredModule:
) -> None:
# function attr dict
T.func_attr({"global_symbol": "main", "from_legacy_te_schedule": True, "tir.noalias": True})
- A_flat = T.buffer_decl([16384], data=A.data)
- B_flat = T.buffer_decl([16384], data=B.data)
- C_flat = T.buffer_decl([16384], data=C.data)
+ A_flat = T.Buffer([16384], data=A.data)
+ B_flat = T.Buffer([16384], data=B.data)
+ C_flat = T.Buffer([16384], data=C.data)
# body
for x, y in T.grid(128, 128):
C_flat[x * 128 + y] = 0.0
@@ -82,9 +82,9 @@ class LoweredTIRModule:
) -> None:
# function attr dict
T.func_attr({"global_symbol": "main", "tir.noalias": True})
- A_flat = T.buffer_decl([16384], data=A.data)
- B_flat = T.buffer_decl([16384], data=B.data)
- C_flat = T.buffer_decl([16384], data=C.data)
+ A_flat = T.Buffer([16384], data=A.data)
+ B_flat = T.Buffer([16384], data=B.data)
+ C_flat = T.Buffer([16384], data=C.data)
# body
for x, y in T.grid(128, 128):
C_flat[x * 128 + y] = 0.0
diff --git a/tests/python/unittest/test_tir_renew_defs.py b/tests/python/unittest/test_tir_renew_defs.py
index 28b440a608..65f81499bd 100644
--- a/tests/python/unittest/test_tir_renew_defs.py
+++ b/tests/python/unittest/test_tir_renew_defs.py
@@ -136,7 +136,7 @@ def test_undefined_buffer():
def access_alloc():
# Buffer A should be remapped
A_data = T.allocate([128], "float16", "global")
- A = T.buffer_decl(shape=[128], dtype="float16", data=A_data)
+ A = T.Buffer(shape=[128], dtype="float16", data=A_data)
# check if buffer var also get remapped
T.evaluate(A.data)
for i in range(128):
diff --git a/tests/python/unittest/test_tir_schedule_cache_read_write.py b/tests/python/unittest/test_tir_schedule_cache_read_write.py
index 6a75057e72..bcb214594c 100644
--- a/tests/python/unittest/test_tir_schedule_cache_read_write.py
+++ b/tests/python/unittest/test_tir_schedule_cache_read_write.py
@@ -1011,9 +1011,9 @@ def cache_write_allocate_const(
):
B = T.alloc_buffer([128, 128], dtype="float32")
const = T.allocate_const([0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7], "float32", [8])
- const_1 = T.buffer_decl([8], dtype="float32", data=const)
+ const_1 = T.Buffer([8], dtype="float32", data=const)
const2 = T.allocate_const([0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7], "float32", [8])
- const_2 = T.buffer_decl([8], dtype="float32", data=const)
+ const_2 = T.Buffer([8], dtype="float32", data=const)
for i, j in T.grid(128, 128):
for x in range(8):
with T.block("B"):
@@ -1037,8 +1037,8 @@ def cache_write_allocate_const_output(
A_global = T.alloc_buffer([128, 128], dtype="float32")
C_global = T.alloc_buffer([128, 128], dtype="float16")
const_2 = T.allocate_const([0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7], "float32", [8])
- const_1 = T.buffer_decl([8], dtype="float32", data=const_2)
- const_2_1 = T.buffer_decl([8], dtype="float32", data=const_2)
+ const_1 = T.Buffer([8], dtype="float32", data=const_2)
+ const_2_1 = T.Buffer([8], dtype="float32", data=const_2)
const2 = T.allocate_const([0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7], "float32", [8])
for ax0, ax1 in T.grid(128, 128):
with T.block("A_global"):
diff --git a/tests/python/unittest/test_tir_transform_common_subexpr_elim.py b/tests/python/unittest/test_tir_transform_common_subexpr_elim.py
index be229a580f..113d9f0474 100644
--- a/tests/python/unittest/test_tir_transform_common_subexpr_elim.py
+++ b/tests/python/unittest/test_tir_transform_common_subexpr_elim.py
@@ -349,7 +349,7 @@ def test_no_normalization_without_commoning():
# -------------------------------------------------
@T.prim_func
def func_distributivity(i1: T.int32, i2: T.int32, x: T.int32, y: T.int32, z: T.int32) -> None:
- B = T.buffer_decl((50,), "int32")
+ B = T.Buffer((50,), "int32")
B[i1] = x * (y + z)
B[i2] = x * y + x * z
@@ -358,7 +358,7 @@ def func_distributivity(i1: T.int32, i2: T.int32, x: T.int32, y: T.int32, z: T.i
def func_distributivity_expected(
i1: T.int32, i2: T.int32, x: T.int32, y: T.int32, z: T.int32
) -> None:
- B = T.buffer_decl((50,), "int32")
+ B = T.Buffer((50,), "int32")
cse_var_1 = T.var("int32")
with T.let(cse_var_1, x * y + x * z):
B[i1] = cse_var_1
@@ -367,7 +367,7 @@ def func_distributivity_expected(
@T.prim_func
def func_associativity(i1: T.int32, i2: T.int32, x: T.int32, y: T.int32, z: T.int32) -> None:
- B = T.buffer_decl((50,), "int32")
+ B = T.Buffer((50,), "int32")
B[i1] = (x + y) + z
B[i2] = x + (y + z)
@@ -376,7 +376,7 @@ def func_associativity(i1: T.int32, i2: T.int32, x: T.int32, y: T.int32, z: T.in
def func_associativity_expected(
i1: T.int32, i2: T.int32, x: T.int32, y: T.int32, z: T.int32
) -> None:
- B = T.buffer_decl((50,), "int32")
+ B = T.Buffer((50,), "int32")
cse_var_1 = T.var("int32")
with T.let(cse_var_1, (x + y) + z):
B[i1] = cse_var_1
diff --git a/tests/python/unittest/test_tir_transform_extract_constants.py b/tests/python/unittest/test_tir_transform_extract_constants.py
index 5de06e38a5..b3e0aa74f9 100644
--- a/tests/python/unittest/test_tir_transform_extract_constants.py
+++ b/tests/python/unittest/test_tir_transform_extract_constants.py
@@ -28,7 +28,7 @@ class Module4:
A = T.match_buffer(a, (10), "int32")
B = T.alloc_buffer((10), "int32")
K_data = T.allocate_const([1, 1, 1, 1, 1, 1, 1, 1, 1, 1], "int32", [10])
- K = T.buffer_decl(shape=(10), dtype="int32", data=K_data)
+ K = T.Buffer(shape=(10), dtype="int32", data=K_data)
for x in T.serial(0, 10):
B[x] = A[x] + K[x]
@@ -37,7 +37,7 @@ class Module4:
A = T.match_buffer(a, (10), "int32")
B = T.alloc_buffer((10), "int32")
K_data = T.allocate_const([1, 1, 1, 1, 1, 1, 1, 1, 1, 1], "int32", [10])
- K = T.buffer_decl(shape=(10), dtype="int32", data=K_data)
+ K = T.Buffer(shape=(10), dtype="int32", data=K_data)
for x in T.serial(0, 10):
B[x] = A[x] + K[x]
@@ -46,7 +46,7 @@ class Module4:
A = T.match_buffer(a, (10), "int32")
B = T.alloc_buffer((10), "int32")
K_data = T.allocate_const([1, 2, 3, 1, 1, 1, 1, 1, 1, 1], "int32", [10])
- K = T.buffer_decl(shape=(10), dtype="int32", data=K_data)
+ K = T.Buffer(shape=(10), dtype="int32", data=K_data)
for x in T.serial(0, 10):
B[x] = A[x] + K[x]
diff --git a/tests/python/unittest/test_tir_transform_flatten_buffer.py b/tests/python/unittest/test_tir_transform_flatten_buffer.py
index 513e04dc20..12523fbdb2 100644
--- a/tests/python/unittest/test_tir_transform_flatten_buffer.py
+++ b/tests/python/unittest/test_tir_transform_flatten_buffer.py
@@ -41,11 +41,11 @@ class TestElementwise(BaseCompare):
C[i, j] = B_new[0, j] * 2.0
def expected(input_A: T.Buffer[(16, 16), "float32"], input_C: T.Buffer[(16, 16), "float32"]):
- A = T.buffer_decl(256, dtype="float32", data=input_A.data)
- C = T.buffer_decl(256, dtype="float32", data=input_C.data)
+ A = T.Buffer(256, dtype="float32", data=input_A.data)
+ C = T.Buffer(256, dtype="float32", data=input_C.data)
for i in T.serial(0, 16):
B_new_data = T.allocate([16], "float32", scope="global")
- B_new = T.buffer_decl([16], "float32", scope="global", data=B_new_data)
+ B_new = T.Buffer([16], "float32", scope="global", data=B_new_data)
for j in T.serial(0, 16):
B_new[j] = A[((i * 16) + j)] + 1.0
for j in T.serial(0, 16):
@@ -56,7 +56,7 @@ class TestElementwiseWithoutDeclBuffer(BaseCompare):
"""2-d buffers are flattened to 1-d
Like TestElementwise, but the TIR doesn't have the DeclBuffer
- node. The T.buffer_decl declaration applies only during the
+ node. The T.Buffer declaration applies only during the
parsing the TVMScript, and doesn't occur in the TIR itself. In
this case, the allocation should be assumed to be targeting flat
memory, and should be flattened to a 1-d allocation.
@@ -65,18 +65,18 @@ class TestElementwiseWithoutDeclBuffer(BaseCompare):
def before(A: T.Buffer[(16, 16), "float32"], C: T.Buffer[(16, 16), "float32"]):
for i in T.serial(0, 16):
B_new_data = T.allocate([1, 16], "float32", "global")
- B_new = T.buffer_decl([1, 16], "float32", data=B_new_data)
+ B_new = T.Buffer([1, 16], "float32", data=B_new_data)
for j in T.serial(0, 16):
B_new[0, j] = A[i, j] + 1.0
for j in T.serial(0, 16):
C[i, j] = B_new[0, j] * 2.0
def expected(input_A: T.Buffer[(16, 16), "float32"], input_C: T.Buffer[(16, 16), "float32"]):
- A = T.buffer_decl(256, dtype="float32", data=input_A.data)
- C = T.buffer_decl(256, dtype="float32", data=input_C.data)
+ A = T.Buffer(256, dtype="float32", data=input_A.data)
+ C = T.Buffer(256, dtype="float32", data=input_C.data)
for i in T.serial(0, 16):
B_new_data = T.allocate([16], "float32", "global")
- B_new = T.buffer_decl(16, "float32", data=B_new_data)
+ B_new = T.Buffer(16, "float32", data=B_new_data)
for j in T.serial(0, 16):
B_new[j] = A[((i * 16) + j)] + 1.0
for j in T.serial(0, 16):
@@ -101,8 +101,8 @@ class TestGPU(BaseCompare):
C[i0 * 4 + i1 * 2 + i2, j] = B[0, j] * 2.0
def expected(input_A: T.Buffer[(16, 16), "float32"], input_C: T.Buffer[(16, 16), "float32"]):
- A = T.buffer_decl(256, dtype="float32", data=input_A.data)
- C = T.buffer_decl(256, dtype="float32", data=input_C.data)
+ A = T.Buffer(256, dtype="float32", data=input_A.data)
+ C = T.Buffer(256, dtype="float32", data=input_C.data)
i0 = T.env_thread("blockIdx.x")
i1 = T.env_thread("threadIdx.x")
@@ -112,7 +112,7 @@ class TestGPU(BaseCompare):
T.launch_thread(i1, 2)
T.launch_thread(i2, 2)
B_data = T.allocate([16], "float32", scope="local")
- B = T.buffer_decl([16], "float32", scope="local", data=B_data)
+ B = T.Buffer([16], "float32", scope="local", data=B_data)
for j in range(0, 16):
B[j] = A[i0 * 64 + i1 * 32 + i2 * 16 + j] + 1.0
for j in range(0, 16):
@@ -136,12 +136,12 @@ class TestSymbolic(BaseCompare):
def expected(a: T.handle, c: T.handle, n: T.int32, m: T.int32) -> None:
input_A = T.match_buffer(a, (n, m), "float32")
input_C = T.match_buffer(c, (n, m), "float32")
- A = T.buffer_decl(n * m, "float32", data=input_A.data)
- C = T.buffer_decl(n * m, "float32", data=input_C.data)
+ A = T.Buffer(n * m, "float32", data=input_A.data)
+ C = T.Buffer(n * m, "float32", data=input_C.data)
for i in range(0, n):
B_data = T.allocate([m], "float32", scope="global")
- B = T.buffer_decl([m], "float32", scope="global", data=B_data)
+ B = T.Buffer([m], "float32", scope="global", data=B_data)
for j in range(0, m):
B[j] = A[i * m + j] + 1.0
for j in range(0, m):
@@ -160,14 +160,14 @@ class TestMultiAlloc(BaseCompare):
D[i, j] = C[i, j] * 2.0
def expected(input_A: T.Buffer[(4, 32), "float32"], input_D: T.Buffer[(4, 32), "float32"]):
- A = T.buffer_decl(128, "float32", data=input_A.data)
- D = T.buffer_decl(128, "float32", data=input_D.data)
+ A = T.Buffer(128, "float32", data=input_A.data)
+ D = T.Buffer(128, "float32", data=input_D.data)
for i, j in T.grid(4, 32):
B_data = T.allocate([128], "float32", scope="global")
- B = T.buffer_decl([128], "float32", scope="global", data=B_data)
+ B = T.Buffer([128], "float32", scope="global", data=B_data)
C_data = T.allocate([128], "float32", scope="global")
- C = T.buffer_decl([128], "float32", scope="global", data=C_data)
+ C = T.Buffer([128], "float32", scope="global", data=C_data)
B[i * 32 + j] = A[i * 32 + j] + 1.0
C[i * 32 + j] = A[i * 32 + j] + B[i * 32 + j]
D[i * 32 + j] = C[i * 32 + j] * 2.0
@@ -179,18 +179,18 @@ class TestStrided(BaseCompare):
def before(A: T.Buffer[(16, 16), "float32"], C: T.Buffer[(16, 16), "float32"]):
for i0 in T.serial(4):
B = T.decl_buffer([4, 17], "float32")
- B_1 = T.buffer_decl([4, 16], dtype="float32", data=B.data, strides=[17, 1])
+ B_1 = T.Buffer([4, 16], dtype="float32", data=B.data, strides=[17, 1])
for i1, j in T.grid(4, 16):
B_1[i1, j] = A[i0 * 4 + i1, j] + 1.0
for i1, j in T.grid(4, 16):
C[i0 * 4 + i1, j] = B_1[i1, j] * 2.0
def expected(input_A: T.Buffer[(16, 16), "float32"], input_C: T.Buffer[(16, 16), "float32"]):
- A = T.buffer_decl(256, dtype="float32", data=input_A.data)
- C = T.buffer_decl(256, dtype="float32", data=input_C.data)
+ A = T.Buffer(256, dtype="float32", data=input_A.data)
+ C = T.Buffer(256, dtype="float32", data=input_C.data)
for i0 in T.serial(0, 4):
B_new_data = T.allocate([68], "float32", scope="global")
- B_new = T.buffer_decl([68], "float32", scope="global", data=B_new_data)
+ B_new = T.Buffer([68], "float32", scope="global", data=B_new_data)
for i1 in T.serial(0, 4):
for j in T.serial(0, 16):
B_new[i1 * 17 + j] = A[i0 * 64 + i1 * 16 + j] + 1.0
@@ -207,8 +207,8 @@ class TestBoolean(BaseCompare):
B[i0] = A[i0]
def expected(input_A: T.Buffer[10, "bool"], input_B: T.Buffer[10, "bool"]) -> None:
- A = T.buffer_decl(10, dtype="int8", data=input_A.data)
- B = T.buffer_decl(10, dtype="int8", data=input_B.data)
+ A = T.Buffer(10, dtype="int8", data=input_A.data)
+ B = T.Buffer(10, dtype="int8", data=input_B.data)
# body
for i0 in T.serial(10):
B[i0] = T.cast(T.cast(A[i0], "bool"), "int8")
@@ -285,9 +285,7 @@ class TestFlattenDeclBufferWithAxisSeparators(BaseCompare):
def expected():
A_data = T.allocate([30, 1001], dtype="float32", scope="global")
- A = T.buffer_decl(
- [30, 1001], dtype="float32", scope="global", axis_separators=[1], data=A_data
- )
+ A = T.Buffer([30, 1001], dtype="float32", scope="global", axis_separators=[1], data=A_data)
for i0, i1, i2, i3, i4, i5 in T.grid(2, 3, 5, 7, 11, 13):
T.evaluate(A[i0 * 15 + i1 * 5 + i2, i3 * 143 + i4 * 13 + i5])
diff --git a/tests/python/unittest/test_tir_transform_inject_rolling_buffer.py b/tests/python/unittest/test_tir_transform_inject_rolling_buffer.py
index d75fb2b03e..b7bd6cb46f 100644
--- a/tests/python/unittest/test_tir_transform_inject_rolling_buffer.py
+++ b/tests/python/unittest/test_tir_transform_inject_rolling_buffer.py
@@ -207,7 +207,7 @@ class PreRollingBuffer:
# function attr dict
T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
# buffer definition
- tensor_2 = T.buffer_decl([1, 10, 12, 16], dtype="int8", elem_offset=0, align=64, offset_factor=1)
+ tensor_2 = T.Buffer([1, 10, 12, 16], dtype="int8", elem_offset=0, align=64, offset_factor=1)
A_1 = T.match_buffer(A, [1, 12, 14, 16], dtype="int8", elem_offset=0, align=64, offset_factor=1)
tensor_1 = T.match_buffer(tensor, [1, 8, 8, 16], dtype="int8", elem_offset=0, align=64, offset_factor=1)
# body
@@ -239,7 +239,7 @@ class PostRollingBuffer:
# function attr dict
T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True})
# buffer definition
- tensor_2 = T.buffer_decl([1, 10, 12, 16], dtype="int8", elem_offset=0, align=64, offset_factor=1)
+ tensor_2 = T.Buffer([1, 10, 12, 16], dtype="int8", elem_offset=0, align=64, offset_factor=1)
A_1 = T.match_buffer(A, [1, 12, 14, 16], dtype="int8", elem_offset=0, align=64, offset_factor=1)
tensor_1 = T.match_buffer(tensor, [1, 8, 8, 16], dtype="int8", elem_offset=0, align=64, offset_factor=1)
# body
diff --git a/tests/python/unittest/test_tir_transform_inject_virtual_thread.py b/tests/python/unittest/test_tir_transform_inject_virtual_thread.py
index eb5ed08bb5..d327149384 100644
--- a/tests/python/unittest/test_tir_transform_inject_virtual_thread.py
+++ b/tests/python/unittest/test_tir_transform_inject_virtual_thread.py
@@ -146,13 +146,13 @@ def test_vthread_simplified():
vthread = T.env_thread("vthread")
T.launch_thread(vthread, 4)
B_data = T.allocate([4], "int32", scope="shared")
- B = T.buffer_decl([4], "int32", data=B_data, scope="shared")
+ B = T.Buffer([4], "int32", data=B_data, scope="shared")
B[0:4] = T.broadcast(vthread, 4)
@T.prim_func
def expected_func():
B_data = T.allocate([16], "int32", scope="shared")
- B = T.buffer_decl([16], "int32", data=B_data, scope="shared")
+ B = T.Buffer([16], "int32", data=B_data, scope="shared")
# The indices for B should each be a single Ramp node, and
# should not be the sum of a Ramp and Broadcast node.
B[T.Mul(0, 4) : T.Mul(0, 4) + 4] = T.broadcast(0, 4)
@@ -175,13 +175,13 @@ def test_vthread_vectorized():
vthread = T.env_thread("vthread")
T.launch_thread(vthread, 4)
B_data = T.allocate([4], "int32", "shared")
- B = T.buffer_decl([4], "int32", data=B_data, scope="shared")
+ B = T.Buffer([4], "int32", data=B_data, scope="shared")
B[0:4] = T.broadcast(vthread, 4)
@T.prim_func
def expected_func():
B_data = T.allocate([4], "int32x4", "shared")
- B = T.buffer_decl([4], "int32x4", data=B_data, scope="shared")
+ B = T.Buffer([4], "int32x4", data=B_data, scope="shared")
B[T.Mul(0, 4) / 4] = T.broadcast(0, 4)
B[T.Mul(1, 4) / 4] = T.broadcast(1, 4)
B[T.Mul(2, 4) / 4] = T.broadcast(2, 4)
diff --git a/tests/python/unittest/test_tir_transform_loop_partition.py b/tests/python/unittest/test_tir_transform_loop_partition.py
index 7dd8e79410..1a40f52140 100644
--- a/tests/python/unittest/test_tir_transform_loop_partition.py
+++ b/tests/python/unittest/test_tir_transform_loop_partition.py
@@ -583,10 +583,10 @@ def partitioned_concat_3(
placeholder_2: T.Buffer[(1, 32, 28, 28), "int8"],
T_concat: T.Buffer[(1, 128, 28, 28), "int8"],
) -> None:
- placeholder_flat = T.buffer_decl([50176], "int8", data=placeholder.data)
- placeholder_1_flat = T.buffer_decl([25088], "int8", data=placeholder_1.data)
- placeholder_2_flat = T.buffer_decl([25088], "int8", data=placeholder_2.data)
- T_concat_flat = T.buffer_decl([100352], "int8", data=T_concat.data)
+ placeholder_flat = T.Buffer([50176], "int8", data=placeholder.data)
+ placeholder_1_flat = T.Buffer([25088], "int8", data=placeholder_1.data)
+ placeholder_2_flat = T.Buffer([25088], "int8", data=placeholder_2.data)
+ T_concat_flat = T.Buffer([100352], "int8", data=T_concat.data)
for i1, i2, i3 in T.grid(64, 28, 28):
T_concat_flat[i1 * 784 + i2 * 28 + i3] = placeholder_flat[i1 * 784 + i2 * 28 + i3]
for i1, i2, i3 in T.grid(32, 28, 28):
@@ -602,10 +602,10 @@ def concat_func_3(
placeholder_2: T.Buffer[(1, 32, 28, 28), "int8"],
T_concat: T.Buffer[(1, 128, 28, 28), "int8"],
) -> None:
- placeholder_flat = T.buffer_decl([50176], "int8", data=placeholder.data)
- placeholder_1_flat = T.buffer_decl([25088], "int8", data=placeholder_1.data)
- placeholder_2_flat = T.buffer_decl([25088], "int8", data=placeholder_2.data)
- T_concat_flat = T.buffer_decl([100352], "int8", data=T_concat.data)
+ placeholder_flat = T.Buffer([50176], "int8", data=placeholder.data)
+ placeholder_1_flat = T.Buffer([25088], "int8", data=placeholder_1.data)
+ placeholder_2_flat = T.Buffer([25088], "int8", data=placeholder_2.data)
+ T_concat_flat = T.Buffer([100352], "int8", data=T_concat.data)
for i1 in T.serial(128, annotations={"pragma_loop_partition_hint": 1}):
for i2, i3 in T.grid(28, 28):
if 96 <= i1:
@@ -632,8 +632,8 @@ def test_loop_partition_unroll_hint():
def main(
A_arg: T.Buffer[(1, 3, 224, 224), "int8"], B_arg: T.Buffer[(1, 224, 7, 16), "int8"]
) -> None:
- A = T.buffer_decl(150528, "int8", data=A_arg.data)
- B = T.buffer_decl(25088, "int8", data=B_arg.data)
+ A = T.Buffer(150528, "int8", data=A_arg.data)
+ B = T.Buffer(25088, "int8", data=B_arg.data)
for ax0 in T.serial(
112,
annotations={"pragma_loop_partition_hint": True},
@@ -646,8 +646,8 @@ def test_loop_partition_unroll_hint():
def partitioned_main(
A_arg: T.Buffer[(1, 3, 224, 224), "int8"], B_arg: T.Buffer[(1, 224, 7, 16), "int8"]
) -> None:
- A = T.buffer_decl(150528, dtype="int8", data=A_arg.data)
- B = T.buffer_decl(25088, dtype="int8", data=B_arg.data)
+ A = T.Buffer(150528, dtype="int8", data=A_arg.data)
+ B = T.Buffer(25088, dtype="int8", data=B_arg.data)
# body
for ax1, ax2, ax3 in T.grid(224, 7, 16):
if 3 <= ax2 and ax3 < 3:
@@ -706,11 +706,11 @@ def test_loop_partition_recursive_unroll_hint():
@T.prim_func
def partitioned_main():
placeholder_0_dm = T.allocate([16384], "int8", "global")
- placeholder_0_dm_1 = T.buffer_decl([16384], dtype="int8", data=placeholder_0_dm)
+ placeholder_0_dm_1 = T.Buffer([16384], dtype="int8", data=placeholder_0_dm)
for i3_0 in T.unroll(2):
for i2_0 in T.unroll(2):
pad_temp = T.allocate([4096], "int8", "global")
- pad_temp_1 = T.buffer_decl([4096], dtype="int8", data=pad_temp)
+ pad_temp_1 = T.Buffer([4096], dtype="int8", data=pad_temp)
for ax0, ax1, ax2 in T.grid(16, 16, 16):
if 6 <= i2_0 * 4 + ax0 and 6 <= i3_0 * 4 + ax1:
pad_temp_1[ax0 * 256 + ax1 * 16 + ax2] = placeholder_0_dm_1[
@@ -718,7 +718,7 @@ def test_loop_partition_recursive_unroll_hint():
]
for i2_0 in T.unroll(2):
pad_temp_2 = T.allocate([4096], "int8", "global")
- pad_temp_3 = T.buffer_decl([4096], dtype="int8", data=pad_temp_2)
+ pad_temp_3 = T.Buffer([4096], dtype="int8", data=pad_temp_2)
for ax0, ax1, ax2 in T.grid(16, 16, 16):
if 6 <= i2_0 * 4 + ax0:
pad_temp_3[ax0 * 256 + ax1 * 16 + ax2] = placeholder_0_dm_1[
@@ -727,7 +727,7 @@ def test_loop_partition_recursive_unroll_hint():
for i3_0 in T.unroll(2):
for i2_0 in T.unroll(2):
pad_temp_4 = T.allocate([4096], "int8", "global")
- pad_temp_5 = T.buffer_decl([4096], dtype="int8", data=pad_temp_4)
+ pad_temp_5 = T.Buffer([4096], dtype="int8", data=pad_temp_4)
for ax0, ax1, ax2 in T.grid(16, 16, 16):
if 6 <= i2_0 * 4 + ax0 and i3_0 * 4 + ax1 < 14:
pad_temp_5[ax0 * 256 + ax1 * 16 + ax2] = placeholder_0_dm_1[
diff --git a/tests/python/unittest/test_tir_transform_renormalize_split_pattern.py b/tests/python/unittest/test_tir_transform_renormalize_split_pattern.py
index 635badb847..5cdc272440 100644
--- a/tests/python/unittest/test_tir_transform_renormalize_split_pattern.py
+++ b/tests/python/unittest/test_tir_transform_renormalize_split_pattern.py
@@ -28,9 +28,9 @@ class Before:
def main(inputs: T.Buffer[(1, 4, 4, 512), "float32"], weight: T.Buffer[(4, 4, 512, 256), "float32"], conv2d_transpose_nhwc: T.Buffer[(1, 8, 8, 256), "float32"]) -> None:
# function attr dict
T.func_attr({"global_symbol": "main", "tir.noalias": True})
- inputs_flat = T.buffer_decl([8192], dtype="float32", data=inputs.data)
- weight_flat = T.buffer_decl([2097152], dtype="float32", data=weight.data)
- conv2d_transpose_nhwc_flat = T.buffer_decl([16384], dtype="float32", data=conv2d_transpose_nhwc.data)
+ inputs_flat = T.Buffer([8192], dtype="float32", data=inputs.data)
+ weight_flat = T.Buffer([2097152], dtype="float32", data=weight.data)
+ conv2d_transpose_nhwc_flat = T.Buffer([16384], dtype="float32", data=conv2d_transpose_nhwc.data)
# var definition
threadIdx_x = T.env_thread("threadIdx.x")
blockIdx_x = T.env_thread("blockIdx.x")
@@ -59,9 +59,9 @@ class After:
def main(inputs: T.Buffer[(1, 4, 4, 512), "float32"], weight: T.Buffer[(4, 4, 512, 256), "float32"], conv2d_transpose_nhwc: T.Buffer[(1, 8, 8, 256), "float32"]) -> None:
# function attr dict
T.func_attr({"global_symbol": "main", "tir.noalias": True})
- inputs_flat = T.buffer_decl([8192], dtype="float32", data=inputs.data)
- weight_flat = T.buffer_decl([2097152], dtype="float32", data=weight.data)
- conv2d_transpose_nhwc_flat = T.buffer_decl([16384], dtype="float32", data=conv2d_transpose_nhwc.data)
+ inputs_flat = T.Buffer([8192], dtype="float32", data=inputs.data)
+ weight_flat = T.Buffer([2097152], dtype="float32", data=weight.data)
+ conv2d_transpose_nhwc_flat = T.Buffer([16384], dtype="float32", data=conv2d_transpose_nhwc.data)
# var definition
threadIdx_x = T.env_thread("threadIdx.x")
blockIdx_x = T.env_thread("blockIdx.x")
@@ -93,9 +93,9 @@ class After_simplified:
# var definition
threadIdx_x = T.env_thread("threadIdx.x")
blockIdx_x = T.env_thread("blockIdx.x")
- inputs_flat = T.buffer_decl([8192], dtype="float32", data=inputs.data)
- weight_flat = T.buffer_decl([2097152], dtype="float32", data=weight.data)
- conv2d_transpose_nhwc_flat = T.buffer_decl([16384], dtype="float32", data=conv2d_transpose_nhwc.data)
+ inputs_flat = T.Buffer([8192], dtype="float32", data=inputs.data)
+ weight_flat = T.Buffer([2097152], dtype="float32", data=weight.data)
+ conv2d_transpose_nhwc_flat = T.Buffer([16384], dtype="float32", data=conv2d_transpose_nhwc.data)
# body
T.launch_thread(blockIdx_x, 64)
conv2d_transpose_nhwc_local = T.decl_buffer([8], "float32", scope="local")
diff --git a/tests/python/unittest/test_tir_transform_storage_rewrite.py b/tests/python/unittest/test_tir_transform_storage_rewrite.py
index 533a835e0f..2ed2e6ec6d 100644
--- a/tests/python/unittest/test_tir_transform_storage_rewrite.py
+++ b/tests/python/unittest/test_tir_transform_storage_rewrite.py
@@ -655,7 +655,7 @@ def test_access_in_let_value():
def func(A: T.Buffer[(8,), "float32"]):
for i in range(8):
B_data = T.allocate((1,), "float32", "global")
- B = T.buffer_decl(shape=[1], dtype="float32", data=B_data)
+ B = T.Buffer(shape=[1], dtype="float32", data=B_data)
B[0] = 3.14
x: T.float32 = T.exp(B[0], dtype="float32")
A[i] = (x + 1.0) / (x - 1.0)
@@ -663,7 +663,7 @@ def test_access_in_let_value():
@T.prim_func
def func_rewritten(A: T.Buffer[(8,), "float32"]) -> None:
B_data = T.allocate((1,), "float32", "global")
- B = T.buffer_decl(shape=[1], dtype="float32", data=B_data)
+ B = T.Buffer(shape=[1], dtype="float32", data=B_data)
for i in range(8):
B[0] = 3.14
x: T.float32 = T.exp(B[0], dtype="float32")
@@ -690,12 +690,12 @@ class TestLetBufferRewrite(BaseCompare):
def before() -> None:
A_data: T.Ptr[T.int32] = T.call_extern("dummy_func", dtype="handle")
- A = T.buffer_decl([8], "int32", data=A_data)
+ A = T.Buffer([8], "int32", data=A_data)
A[0:8] = T.broadcast(42, 8)
def expected() -> None:
A_data: T.Ptr[T.int32x8] = T.call_extern("dummy_func", dtype="handle")
- A = T.buffer_decl([1], "int32x8", data=A_data)
+ A = T.Buffer([1], "int32x8", data=A_data)
A[0] = T.broadcast(42, 8)
@@ -708,7 +708,7 @@ class TestRewriteInPlaceUseOfNonFlatBuffer(BaseCompare):
dtype="float32",
scope="global",
)
- B = T.buffer_decl(
+ B = T.Buffer(
[16, 16],
dtype="float32",
axis_separators=[1],
@@ -719,7 +719,7 @@ class TestRewriteInPlaceUseOfNonFlatBuffer(BaseCompare):
dtype="float32",
scope="global",
)
- C = T.buffer_decl(
+ C = T.Buffer(
[16, 16],
dtype="float32",
axis_separators=[1],
@@ -741,8 +741,8 @@ class TestRewriteInPlaceUseOfNonFlatBuffer(BaseCompare):
dtype="float32",
scope="global",
)
- B = T.buffer_decl([16, 16], dtype="float32", axis_separators=[1], data=B_data)
- C = T.buffer_decl(
+ B = T.Buffer([16, 16], dtype="float32", axis_separators=[1], data=B_data)
+ C = T.Buffer(
[16, 16],
dtype="float32",
axis_separators=[1],
@@ -777,7 +777,7 @@ class TestNoRewriteOfSharedNonFlatBuffer(BaseCompare):
dtype="float32",
scope="global",
)
- B = T.buffer_decl(
+ B = T.Buffer(
[16, 16],
dtype="float32",
axis_separators=[1],
@@ -788,7 +788,7 @@ class TestNoRewriteOfSharedNonFlatBuffer(BaseCompare):
dtype="float32",
scope="global",
)
- C = T.buffer_decl(
+ C = T.Buffer(
[20, 20],
dtype="float32",
axis_separators=[1],
diff --git a/tests/python/unittest/test_tir_transform_thread_sync.py b/tests/python/unittest/test_tir_transform_thread_sync.py
index b2a0581d69..b7caf04d65 100644
--- a/tests/python/unittest/test_tir_transform_thread_sync.py
+++ b/tests/python/unittest/test_tir_transform_thread_sync.py
@@ -101,7 +101,7 @@ def test_sync_read_thread_id_independent_location():
def func(p0_arg: T.Buffer[(1, 2, 1, 1), "float32"], p1: T.Buffer[2, "float32"]) -> None:
threadIdx_x = T.env_thread("threadIdx.x")
blockIdx_x = T.env_thread("blockIdx.x")
- p0 = T.buffer_decl([2], dtype="float32", data=p0_arg.data)
+ p0 = T.Buffer([2], dtype="float32", data=p0_arg.data)
result_local = T.alloc_buffer([1], dtype="float32", scope="local")
temp_shared = T.alloc_buffer([1], dtype="float32", scope="shared")
T.launch_thread(blockIdx_x, 8)
diff --git a/tests/python/unittest/test_tir_usmp_transform_convert_pool_allocations_to_offsets.py b/tests/python/unittest/test_tir_usmp_transform_convert_pool_allocations_to_offsets.py
index 25e8955735..6145c39b87 100644
--- a/tests/python/unittest/test_tir_usmp_transform_convert_pool_allocations_to_offsets.py
+++ b/tests/python/unittest/test_tir_usmp_transform_convert_pool_allocations_to_offsets.py
@@ -92,13 +92,13 @@ class LinearStructure:
T_cast_21 = T.match_buffer(T_cast_20, [289], dtype="uint8", elem_offset=0, align=64, offset_factor=1)
# body
PaddedInput_7_data = T.allocate([157323], "int16", "global")
- PaddedInput_7 = T.buffer_decl(shape=[157323], dtype="int16", data=PaddedInput_7_data)
+ PaddedInput_7 = T.Buffer(shape=[157323], dtype="int16", data=PaddedInput_7_data)
for i0_i1_fused_7 in T.serial(0, 229):
for i2_7, i3_7 in T.grid(229, 3):
PaddedInput_7[(((i0_i1_fused_7*687) + (i2_7*3)) + i3_7)] = T.if_then_else(((((2 <= i0_i1_fused_7) and (i0_i1_fused_7 < 226)) and (2 <= i2_7)) and (i2_7 < 226)), placeholder_65[((((i0_i1_fused_7*672) + (i2_7*3)) + i3_7) - 1350)], T.int16(0), dtype="int16")
for ax0_ax1_fused_ax2_fused_7 in T.serial(0, 12544):
Conv2dOutput_7_data = T.allocate([64], "int32", "global")
- Conv2dOutput_7 = T.buffer_decl(shape=[64], dtype="int32", data=Conv2dOutput_7_data)
+ Conv2dOutput_7 = T.Buffer(shape=[64], dtype="int32", data=Conv2dOutput_7_data)
for ff_3 in T.serial(0, 64):
Conv2dOutput_7[ff_3] = 0
for ry_2, rx_2, rc_7 in T.grid(7, 7, 3):
@@ -114,7 +114,7 @@ class LinearStructure:
T_cast_7 = T.match_buffer(T_cast_6, [177], dtype="int16", elem_offset=0, align=64, offset_factor=1)
# body
tensor_2_data = T.allocate([200704], "uint8", "global")
- tensor_2 = T.buffer_decl(shape=[200704], dtype="uint8", data=tensor_2_data)
+ tensor_2 = T.Buffer(shape=[200704], dtype="uint8", data=tensor_2_data)
for ax0_ax1_fused_4 in T.serial(0, 56):
for ax2_4 in T.serial(0, 56):
for ax3_init in T.serial(0, 64):
@@ -163,7 +163,7 @@ class LinearStructurePlanned:
fast_memory_6_buffer_var = T.match_buffer(fast_memory_6_var, [200704], dtype="uint8", strides=[1], elem_offset=0, align=16)
slow_memory_7_buffer_var = T.match_buffer(slow_memory_7_var, [1418528], dtype="uint8", strides=[1], elem_offset=0, align=16)
# body
- tensor_2_let = T.buffer_decl([200704], dtype="uint8")
+ tensor_2_let = T.Buffer([200704], dtype="uint8")
with T.let(tensor_2_let.data, T.address_of(fast_memory_6_buffer_var[0], dtype="handle")):
for ax0_ax1_fused_4, ax2_4 in T.grid(56, 56):
for ax3_init in T.serial(0, 64):
@@ -193,12 +193,12 @@ class LinearStructurePlanned:
fast_memory_4_buffer_var = T.match_buffer(fast_memory_4_var, [200704], dtype="uint8", strides=[1], elem_offset=0, align=16)
slow_memory_5_buffer_var = T.match_buffer(slow_memory_5_var, [1418528], dtype="uint8", strides=[1], elem_offset=0, align=16)
# body
- PaddedInput_7_let = T.buffer_decl([157323], "int16")
+ PaddedInput_7_let = T.Buffer([157323], "int16")
with T.let(PaddedInput_7_let.data, T.address_of(slow_memory_5_buffer_var[802816], dtype="handle")):
for i0_i1_fused_7, i2_7, i3_7 in T.grid(229, 229, 3):
PaddedInput_7_let[i0_i1_fused_7 * 687 + i2_7 * 3 + i3_7] = T.if_then_else(2 <= i0_i1_fused_7 and i0_i1_fused_7 < 226 and 2 <= i2_7 and i2_7 < 226, placeholder_65[i0_i1_fused_7 * 672 + i2_7 * 3 + i3_7 - 1350], T.int16(0), dtype="int16")
for ax0_ax1_fused_ax2_fused_7 in T.serial(0, 12544):
- Conv2dOutput_7_let = T.buffer_decl([64], "int32")
+ Conv2dOutput_7_let = T.Buffer([64], "int32")
with T.let(Conv2dOutput_7_let.data, T.address_of(fast_memory_4_buffer_var[0], dtype="handle")):
for ff_3 in T.serial(0, 64):
Conv2dOutput_7_let[ff_3] = 0
@@ -272,12 +272,12 @@ class ResnetStructure:
T_cast_5 = T.match_buffer(T_cast_4, [215], dtype="int16")
# body
PaddedInput_1_data = T.allocate([379456], "int16", "global")
- PaddedInput_1 = T.buffer_decl(shape=[379456], dtype="int16", data=PaddedInput_1_data)
+ PaddedInput_1 = T.Buffer(shape=[379456], dtype="int16", data=PaddedInput_1_data)
for i0_i1_fused_1, i2_1, i3_1 in T.grid(77, 77, 64):
PaddedInput_1[i0_i1_fused_1 * 4928 + i2_1 * 64 + i3_1] = T.if_then_else(1 <= i0_i1_fused_1 and i0_i1_fused_1 < 76 and 1 <= i2_1 and i2_1 < 76, placeholder_13[i0_i1_fused_1 * 4800 + i2_1 * 64 + i3_1 - 4864], T.int16(0), dtype="int16")
for ax0_ax1_fused_ax2_fused_1 in T.serial(0, 5625):
Conv2dOutput_1_data = T.allocate([64], "int32", "global")
- Conv2dOutput_1 = T.buffer_decl(shape=[64], dtype="int32", data=Conv2dOutput_1_data)
+ Conv2dOutput_1 = T.Buffer(shape=[64], dtype="int32", data=Conv2dOutput_1_data)
for ff_1 in T.serial(0, 64):
Conv2dOutput_1[ff_1] = 0
for ry, rx, rc_1 in T.grid(3, 3, 64):
@@ -295,12 +295,12 @@ class ResnetStructure:
T_add_1 = T.match_buffer(T_add, [407], dtype="int32")
# body
PaddedInput_2_data = T.allocate([360000], "int16", "global")
- PaddedInput_2 = T.buffer_decl(shape=[360000], dtype="int16", data=PaddedInput_2_data)
+ PaddedInput_2 = T.Buffer(shape=[360000], dtype="int16", data=PaddedInput_2_data)
for i0_i1_fused_2, i2_2, i3_2 in T.grid(75, 75, 64):
PaddedInput_2[i0_i1_fused_2 * 4800 + i2_2 * 64 + i3_2] = placeholder_19[i0_i1_fused_2 * 4800 + i2_2 * 64 + i3_2]
for ax0_ax1_fused_ax2_fused_2 in T.serial(0, 5625):
Conv2dOutput_2_data = T.allocate([64], "int32", "global")
- Conv2dOutput_2 = T.buffer_decl(shape=[64], dtype="int32", data=Conv2dOutput_2_data)
+ Conv2dOutput_2 = T.Buffer(shape=[64], dtype="int32", data=Conv2dOutput_2_data)
for ax3_outer_1 in T.serial(0, 4):
for ff_2 in T.serial(0, 64):
Conv2dOutput_2[ff_2] = 0
@@ -320,12 +320,12 @@ class ResnetStructure:
T_cast_7 = T.match_buffer(T_cast_6, [407], dtype="uint8")
# body
PaddedInput_3_data = T.allocate([360000], "int16", "global")
- PaddedInput_3 = T.buffer_decl(shape=[360000], dtype="int16", data=PaddedInput_3_data)
+ PaddedInput_3 = T.Buffer(shape=[360000], dtype="int16", data=PaddedInput_3_data)
for i0_i1_fused_3, i2_3, i3_3 in T.grid(75, 75, 64):
PaddedInput_3[i0_i1_fused_3 * 4800 + i2_3 * 64 + i3_3] = placeholder_29[i0_i1_fused_3 * 4800 + i2_3 * 64 + i3_3]
for ax0_ax1_fused_ax2_fused_3 in T.serial(0, 5625):
Conv2dOutput_3_data = T.allocate([64], "int32", "global")
- Conv2dOutput_3 = T.buffer_decl(shape=[64], dtype="int32", data=Conv2dOutput_3_data)
+ Conv2dOutput_3 = T.Buffer(shape=[64], dtype="int32", data=Conv2dOutput_3_data)
for ax3_outer_2 in T.serial(0, 4):
for ff_3 in T.serial(0, 64):
Conv2dOutput_3[ff_3] = 0
@@ -361,12 +361,12 @@ class ResnetStructure:
T_cast_3 = T.match_buffer(T_cast_2, [215], dtype="int16")
# body
PaddedInput_data = T.allocate([360000], "int16", "global")
- PaddedInput = T.buffer_decl([360000], "int16", data=PaddedInput_data)
+ PaddedInput = T.Buffer([360000], "int16", data=PaddedInput_data)
for i0_i1_fused, i2, i3 in T.grid(75, 75, 64):
PaddedInput[i0_i1_fused * 4800 + i2 * 64 + i3] = placeholder_7[i0_i1_fused * 4800 + i2 * 64 + i3]
for ax0_ax1_fused_ax2_fused in T.serial(0, 5625):
Conv2dOutput_data = T.allocate([64], "int32", "global")
- Conv2dOutput = T.buffer_decl([64], "int32", data=Conv2dOutput_data)
+ Conv2dOutput = T.Buffer([64], "int32", data=Conv2dOutput_data)
for ff in T.serial(0, 64):
Conv2dOutput[ff] = 0
for rc in T.serial(0, 64):
@@ -398,12 +398,12 @@ class ResnetStructurePlanned:
T_cast_7 = T.match_buffer(T_cast_6, [407], dtype="uint8")
global_workspace_5_buffer_var = T.match_buffer(global_workspace_5_var, [7920256], dtype="uint8", strides=[1], elem_offset=0, align=16)
# body
- PaddedInput_3_let = T.buffer_decl([360000], 'int16')
+ PaddedInput_3_let = T.Buffer([360000], 'int16')
with T.let(PaddedInput_3_let.data, T.address_of(global_workspace_5_buffer_var[6480000], dtype="handle")):
for i0_i1_fused_3, i2_3, i3_3 in T.grid(75, 75, 64):
PaddedInput_3_let[i0_i1_fused_3 * 4800 + i2_3 * 64 + i3_3] = placeholder_29[i0_i1_fused_3 * 4800 + i2_3 * 64 + i3_3]
for ax0_ax1_fused_ax2_fused_3 in T.serial(0, 5625):
- Conv2dOutput_3_let = T.buffer_decl([64], 'int32')
+ Conv2dOutput_3_let = T.Buffer([64], 'int32')
with T.let(Conv2dOutput_3_let.data, T.address_of(global_workspace_5_buffer_var[7200000], dtype="handle")):
for ax3_outer_2 in T.serial(0, 4):
for ff_3 in T.serial(0, 64):
@@ -421,12 +421,12 @@ class ResnetStructurePlanned:
T_add_1 = T.match_buffer(T_add, [407], dtype="int32")
global_workspace_4_buffer_var = T.match_buffer(global_workspace_4_var, [7920256], dtype="uint8", strides=[1], elem_offset=0, align=16)
# body
- PaddedInput_2_let = T.buffer_decl([360000], "int16")
+ PaddedInput_2_let = T.Buffer([360000], "int16")
with T.let(PaddedInput_2_let.data, T.address_of(global_workspace_4_buffer_var[7200000], dtype="handle")):
for i0_i1_fused_2, i2_2, i3_2 in T.grid(75, 75, 64):
PaddedInput_2_let[i0_i1_fused_2 * 4800 + i2_2 * 64 + i3_2] = placeholder_19[i0_i1_fused_2 * 4800 + i2_2 * 64 + i3_2]
for ax0_ax1_fused_ax2_fused_2 in T.serial(0, 5625):
- Conv2dOutput_2_let = T.buffer_decl([64], 'int32')
+ Conv2dOutput_2_let = T.Buffer([64], 'int32')
with T.let(Conv2dOutput_2_let.data, T.address_of(global_workspace_4_buffer_var[7920000], dtype="handle")):
for ax3_outer_1 in T.serial(0, 4):
for ff_2 in T.serial(0, 64):
@@ -444,12 +444,12 @@ class ResnetStructurePlanned:
T_cast_3 = T.match_buffer(T_cast_2, [215], dtype="int16")
global_workspace_2_buffer_var = T.match_buffer(global_workspace_2_var, [7920256], dtype="uint8", strides=[1], elem_offset=0, align=16)
# body
- PaddedInput_let = T.buffer_decl([360000], "int16")
+ PaddedInput_let = T.Buffer([360000], "int16")
with T.let(PaddedInput_let.data, T.address_of(global_workspace_2_buffer_var[7200000], dtype="handle")):
for i0_i1_fused, i2, i3 in T.grid(75, 75, 64):
PaddedInput_let[i0_i1_fused * 4800 + i2 * 64 + i3] = placeholder_7[i0_i1_fused * 4800 + i2 * 64 + i3]
for ax0_ax1_fused_ax2_fused in T.serial(0, 5625):
- Conv2dOutput_let = T.buffer_decl([64], "int32")
+ Conv2dOutput_let = T.Buffer([64], "int32")
with T.let(Conv2dOutput_let.data, T.address_of(global_workspace_2_buffer_var[7920000], dtype="handle")):
for ff in T.serial(0, 64):
Conv2dOutput_let[ff] = 0
@@ -466,12 +466,12 @@ class ResnetStructurePlanned:
T_cast_5 = T.match_buffer(T_cast_4, [215], dtype="int16")
global_workspace_3_buffer_var = T.match_buffer(global_workspace_3_var, [7920256], dtype="uint8", strides=[1], elem_offset=0, align=16)
# body
- PaddedInput_1_let = T.buffer_decl([379456], "int16")
+ PaddedInput_1_let = T.Buffer([379456], "int16")
with T.let(PaddedInput_1_let.data, T.address_of(global_workspace_3_buffer_var[0], dtype="handle")):
for i0_i1_fused_1, i2_1, i3_1 in T.grid(77, 77, 64):
PaddedInput_1_let[i0_i1_fused_1 * 4928 + i2_1 * 64 + i3_1] = T.if_then_else(1 <= i0_i1_fused_1 and i0_i1_fused_1 < 76 and 1 <= i2_1 and i2_1 < 76, placeholder_13[i0_i1_fused_1 * 4800 + i2_1 * 64 + i3_1 - 4864], T.int16(0), dtype="int16")
for ax0_ax1_fused_ax2_fused_1 in T.serial(0, 5625):
- Conv2dOutput_1_let = T.buffer_decl([64], "int32")
+ Conv2dOutput_1_let = T.Buffer([64], "int32")
with T.let(Conv2dOutput_1_let.data, T.address_of(global_workspace_3_buffer_var[7200000], dtype="handle")):
for ff_1 in T.serial(0, 64):
Conv2dOutput_1_let[ff_1] = 0
@@ -546,7 +546,7 @@ class TensorIntrinStructure:
)
)
- dense = T.buffer_decl([10], "int32", data=dense_data)
+ dense = T.Buffer([10], "int32", data=dense_data)
dense[0] = T.q_multiply_shift(dense[0], 1608879842, 31, -7, dtype="int32")
@T.prim_func
@@ -561,7 +561,7 @@ class TensorIntrinStructurePlanned:
global_workspace_1_buffer_var = T.match_buffer(
global_workspace_1_var, [40], dtype="uint8", strides=[1], elem_offset=0, align=16
)
- dense_let = T.buffer_decl([10], "int32")
+ dense_let = T.Buffer([10], "int32")
with T.let(dense_let.data, T.address_of(global_workspace_1_buffer_var[0], dtype="handle")):
T.evaluate(
T.call_extern(
diff --git a/tests/python/unittest/test_tvmscript_ir_builder_tir.py b/tests/python/unittest/test_tvmscript_ir_builder_tir.py
index 7d542c7bc7..85d2e808b3 100644
--- a/tests/python/unittest/test_tvmscript_ir_builder_tir.py
+++ b/tests/python/unittest/test_tvmscript_ir_builder_tir.py
@@ -53,9 +53,9 @@ def test_ir_builder_tir_primfunc_complete():
with T.prim_func():
T.arg("a", T.handle())
T.arg("b", T.var("int64"))
- T.arg("c", T.buffer_decl((128, 128), "float32"))
+ T.arg("c", T.Buffer((128, 128), "float32"))
d = T.arg("d", T.handle())
- e = T.arg("e", T.buffer_decl((1024,), "int8"))
+ e = T.arg("e", T.Buffer((1024,), "int8"))
T.func_attr({"key": "value"})
T.func_ret(tvm.ir.PrimType("int64"))
buffer_d = T.match_buffer(d, (64, 64), "int64")
@@ -120,10 +120,10 @@ def test_ir_builder_tir_block_base():
def test_ir_builder_tir_block_complete():
with IRBuilder() as ib:
a = T.var("int64", "a")
- b = T.buffer_decl((128, 128), "float32")
- c = T.buffer_decl((128, 128), "float32")
+ b = T.Buffer((128, 128), "float32")
+ c = T.Buffer((128, 128), "float32")
d = T.var("int32", "d")
- e = T.buffer_decl((128, 128), "float32")
+ e = T.Buffer((128, 128), "float32")
f = T.var("int32", "f")
with T.block("block"):
T.where(a > 1)
@@ -298,7 +298,7 @@ def test_ir_builder_tir_let():
def test_ir_builder_tir_realize():
- buffer_a = T.buffer_decl((128, 128), "float32")
+ buffer_a = T.Buffer((128, 128), "float32")
with IRBuilder() as ib:
with T.realize(buffer_a[0:128, 0:128], "test_storage_scope", True):
T.evaluate(0)
@@ -417,7 +417,7 @@ def test_ir_builder_tir_if_then_else():
def test_ir_builder_tir_buffer_store():
- buffer_a = T.buffer_decl((10, 10), "float32")
+ buffer_a = T.Buffer((10, 10), "float32")
i = T.var("int32", "x")
with IRBuilder() as ib:
T.buffer_store(buffer_a, 0.1, [0, i])
@@ -434,7 +434,7 @@ def test_ir_builder_tir_buffer_store():
def test_ir_builder_tir_prefetch():
with IRBuilder() as ib:
- buffer_a = T.buffer_decl((128, 128), "float32")
+ buffer_a = T.Buffer((128, 128), "float32")
T.prefetch(buffer_a, [])
# the prefetch generated by IRBuilder
@@ -469,7 +469,7 @@ def test_ir_builder_tir_decl_buffer():
ir_actual = ib.get()
# the expected decl_buffer
- buffer = T.buffer_decl((128, 128), "float32")
+ buffer = T.Buffer((128, 128), "float32")
ir_expected = tir.Allocate(
buffer.data,
"float32",
diff --git a/tests/python/unittest/test_tvmscript_printer_tir.py b/tests/python/unittest/test_tvmscript_printer_tir.py
index 71da86bff7..ec69c54396 100644
--- a/tests/python/unittest/test_tvmscript_printer_tir.py
+++ b/tests/python/unittest/test_tvmscript_printer_tir.py
@@ -166,7 +166,7 @@ def test_match_buffer_region():
_assert_print(
obj,
"""
-src = T.buffer_decl((128, 128))
+src = T.Buffer((128, 128))
tgt = T.match_buffer(src[64:128, 64:128], (64, 64))
""",
)
@@ -176,7 +176,7 @@ def test_buffer():
a = tir.decl_buffer((128, 128), "float16", name="A")
_assert_print(
a,
- """A = T.buffer_decl((128, 128), "float16")
+ """A = T.Buffer((128, 128), "float16")
A""",
)
@@ -193,7 +193,7 @@ def test_buffer_region():
_assert_print(
obj,
"""
-src = T.buffer_decl((128, 128))
+src = T.Buffer((128, 128))
src[64:128, 64:128]
""",
)
@@ -205,7 +205,7 @@ def test_buffer_load():
_assert_print(
obj,
"""
-A = T.buffer_decl((128, 128), "float16")
+A = T.Buffer((128, 128), "float16")
A[128, 128]
""",
)
@@ -219,7 +219,7 @@ def test_buffer_store():
_assert_print(
obj,
"""
-A = T.buffer_decl((128, 128), "float16")
+A = T.Buffer((128, 128), "float16")
A[128, 128] = A[128, 128] + T.float16(1)
""",
)
@@ -380,7 +380,7 @@ def test_prefetch():
_assert_print(
obj,
"""
-A = T.buffer_decl((128, 128), "float16")
+A = T.Buffer((128, 128), "float16")
T.prefetch(A, [T.Range(0, 64), T.Range(0, 64)])
""",
)
@@ -439,7 +439,7 @@ def test_buffer_realize():
_assert_print(
obj,
"""
-A = T.buffer_decl((128, 128))
+A = T.Buffer((128, 128))
with T.realize(A[0:128, 0:128], "test_storage_scope"):
T.evaluate(0)
""",
diff --git a/tests/python/unittest/test_tvmscript_roundtrip.py b/tests/python/unittest/test_tvmscript_roundtrip.py
index 0a6a2a2638..4300c4bbad 100644
--- a/tests/python/unittest/test_tvmscript_roundtrip.py
+++ b/tests/python/unittest/test_tvmscript_roundtrip.py
@@ -34,8 +34,8 @@ def opt_gemm_normalize():
# function attr dict
T.func_attr({"global_symbol": "mmult", "tir.noalias": True})
# buffer definition
- C_global = T.buffer_decl([1024, 1024], elem_offset=0, align=64, offset_factor=1)
- packedB = T.buffer_decl([32, 1024, 32], elem_offset=0, align=64, offset_factor=1)
+ C_global = T.Buffer([1024, 1024], elem_offset=0, align=64, offset_factor=1)
+ packedB = T.Buffer([32, 1024, 32], elem_offset=0, align=64, offset_factor=1)
A_1 = T.match_buffer(A, [1024, 1024], elem_offset=0, align=64, offset_factor=1)
B_1 = T.match_buffer(B, [1024, 1024], elem_offset=0, align=64, offset_factor=1)
C_1 = T.match_buffer(C, [1024, 1024], elem_offset=0, align=64, offset_factor=1)
@@ -95,15 +95,13 @@ def opt_gemm_lower():
C_1 = T.match_buffer(C, [16384], elem_offset=0, align=64, offset_factor=1)
# body
packedB_data = T.allocate([32768], "float32", "global")
- packedB = T.buffer_decl(
- shape=[32768], dtype="float32", scope="global", data=packedB_data
- )
+ packedB = T.Buffer(shape=[32768], dtype="float32", scope="global", data=packedB_data)
for x in T.parallel(0, 32):
for y in T.serial(0, 1024):
packedB[T.ramp(((x * 32768) + (y * 32)), 1, 32)] = B_1[y, T.ramp(x * 32, 1, 32)]
for x_outer in T.parallel(0, 32):
C_global_data = T.allocate([1024], "float32", "global")
- C_global = T.buffer_decl(
+ C_global = T.Buffer(
shape=[1024], dtype="float32", scope="global", data=C_global_data
)
for y_outer in T.serial(0, 32):
@@ -196,8 +194,8 @@ def opt_gemm_mod_host():
# buffer definition
buf_type_ids = T.match_buffer(arg_type_ids, [3], dtype="int32")
- packedB = T.buffer_decl([32768], dtype="float32")
- C_global = T.buffer_decl([1024], dtype="float32")
+ packedB = T.Buffer([32768], dtype="float32")
+ C_global = T.Buffer([1024], dtype="float32")
# var definition
# C_global = T.buffer_var("float32", "global")
# packedB = T.buffer_var("float32", "global")
@@ -212,29 +210,29 @@ def opt_gemm_mod_host():
A_data: T.Ptr[T.int32] = T.tvm_struct_get(arg0, 0, 1, dtype="handle")
T.attr(A_data, "storage_alignment", 128)
- A = T.buffer_decl([1024 * 1024], dtype="int32", data=A_data)
+ A = T.Buffer([1024 * 1024], dtype="int32", data=A_data)
buf0_shape_data: T.Ptr[T.int32] = T.tvm_struct_get(arg0, 0, 2, dtype="handle")
- buf0_shape = T.buffer_decl([2], dtype="int32", data=buf0_shape_data)
+ buf0_shape = T.Buffer([2], dtype="int32", data=buf0_shape_data)
buf0_strides_data: T.Ptr[T.int32] = T.tvm_struct_get(arg0, 0, 3, dtype="handle")
- buf0_strides = T.buffer_decl([2], dtype="int32", data=buf0_strides_data)
+ buf0_strides = T.Buffer([2], dtype="int32", data=buf0_strides_data)
dev_id: T.int32 = T.tvm_struct_get(arg0, 0, 9, dtype="int32")
B_data: T.Ptr[T.int32] = T.tvm_struct_get(arg1, 0, 1, dtype="handle")
T.attr(B_data, "storage_alignment", 128)
- B = T.buffer_decl([1024 * 1024], dtype="int32", data=B_data)
+ B = T.Buffer([1024 * 1024], dtype="int32", data=B_data)
buf1_shape_data: T.Ptr[T.int32] = T.tvm_struct_get(arg1, 0, 2, dtype="handle")
- buf1_shape = T.buffer_decl([2], dtype="int32", data=buf1_shape_data)
+ buf1_shape = T.Buffer([2], dtype="int32", data=buf1_shape_data)
buf1_strides_data: T.Ptr[T.int32] = T.tvm_struct_get(arg1, 0, 3, dtype="handle")
- buf1_strides = T.buffer_decl([2], dtype="int32", data=buf1_strides_data)
+ buf1_strides = T.Buffer([2], dtype="int32", data=buf1_strides_data)
C_data: T.Ptr[T.int32] = T.tvm_struct_get(arg2, 0, 1, dtype="handle")
T.attr(C_data, "storage_alignment", 128)
- C = T.buffer_decl([1024 * 1024], dtype="int32", data=C_data)
+ C = T.Buffer([1024 * 1024], dtype="int32", data=C_data)
buf2_shape_data: T.Ptr[T.int32] = T.tvm_struct_get(arg2, 0, 2, dtype="handle")
- buf2_shape = T.buffer_decl([2], dtype="int32", data=buf2_shape_data)
+ buf2_shape = T.Buffer([2], dtype="int32", data=buf2_shape_data)
buf2_strides_data: T.Ptr[T.int32] = T.tvm_struct_get(arg2, 0, 3, dtype="handle")
- buf2_strides = T.buffer_decl([2], dtype="int32", data=buf2_strides_data)
+ buf2_strides = T.Buffer([2], dtype="int32", data=buf2_strides_data)
assert (((arg0_code == 3) or (arg0_code == 13)) or (arg0_code == 7)) or (
arg0_code == 4
@@ -489,42 +487,34 @@ def opt_conv_tensorcore_normalize():
ty = T.env_thread("threadIdx.y")
tz = T.env_thread("threadIdx.z")
# buffer definition
- Apad_shared = T.buffer_decl(
+ Apad_shared = T.Buffer(
[16, 16, 16, 16, 16, 16], dtype="float16", elem_offset=0, align=64, offset_factor=1
)
- Apad_shared_wmma_matrix_a = T.buffer_decl(
+ Apad_shared_wmma_matrix_a = T.Buffer(
[16, 16, 16, 16, 16, 16], dtype="float16", elem_offset=0, align=64, offset_factor=1
)
- BA = T.buffer_decl(
- [16, 16], dtype="float16", scope="wmma.matrix_a", align=32, offset_factor=256
- )
- BB = T.buffer_decl(
- [16, 16], dtype="float16", scope="wmma.matrix_b", align=32, offset_factor=256
- )
- BC = T.buffer_decl([16, 16], scope="wmma.accumulator", align=32, offset_factor=256)
- Conv_wmma_accumulator = T.buffer_decl(
+ BA = T.Buffer([16, 16], dtype="float16", scope="wmma.matrix_a", align=32, offset_factor=256)
+ BB = T.Buffer([16, 16], dtype="float16", scope="wmma.matrix_b", align=32, offset_factor=256)
+ BC = T.Buffer([16, 16], scope="wmma.accumulator", align=32, offset_factor=256)
+ Conv_wmma_accumulator = T.Buffer(
[16, 14, 14, 32, 16, 16], elem_offset=0, align=64, offset_factor=1
)
- W_shared = T.buffer_decl(
+ W_shared = T.Buffer(
[3, 3, 16, 32, 16, 16], dtype="float16", elem_offset=0, align=64, offset_factor=1
)
- W_shared_wmma_matrix_b = T.buffer_decl(
+ W_shared_wmma_matrix_b = T.Buffer(
[3, 3, 16, 32, 16, 16], dtype="float16", elem_offset=0, align=64, offset_factor=1
)
- buffer = T.buffer_decl(
- [16, 16], dtype="float16", scope="shared", align=32, offset_factor=256
- )
- buffer_1 = T.buffer_decl(
+ buffer = T.Buffer([16, 16], dtype="float16", scope="shared", align=32, offset_factor=256)
+ buffer_1 = T.Buffer(
[16, 16], dtype="float16", scope="wmma.matrix_a", align=32, offset_factor=256
)
- buffer_2 = T.buffer_decl(
- [16, 16], dtype="float16", scope="shared", align=32, offset_factor=256
- )
- buffer_3 = T.buffer_decl(
+ buffer_2 = T.Buffer([16, 16], dtype="float16", scope="shared", align=32, offset_factor=256)
+ buffer_3 = T.Buffer(
[16, 16], dtype="float16", scope="wmma.matrix_b", align=32, offset_factor=256
)
- buffer_4 = T.buffer_decl([16, 16], scope="wmma.accumulator", align=32, offset_factor=256)
- buffer_5 = T.buffer_decl([16, 16], align=32, offset_factor=256)
+ buffer_4 = T.Buffer([16, 16], scope="wmma.accumulator", align=32, offset_factor=256)
+ buffer_5 = T.Buffer([16, 16], align=32, offset_factor=256)
A_1 = T.match_buffer(
A, [16, 14, 14, 16, 16, 16], dtype="float16", elem_offset=0, align=64, offset_factor=1
)
@@ -949,9 +939,9 @@ def opt_conv_tensorcore_lower():
# function attr dict
T.func_attr({"global_symbol": "default_function", "tir.noalias": True})
# body
- A_1 = T.buffer_decl([12845056], dtype="float16", data=A.data)
- W_1 = T.buffer_decl([1179648], dtype="float16", data=W.data)
- Conv_1 = T.buffer_decl([25690112], data=Conv.data)
+ A_1 = T.Buffer([12845056], dtype="float16", data=A.data)
+ W_1 = T.Buffer([1179648], dtype="float16", data=W.data)
+ Conv_1 = T.Buffer([25690112], data=Conv.data)
bx = T.env_thread("blockIdx.x")
by = T.env_thread("blockIdx.y")
bz = T.env_thread("blockIdx.z")
@@ -960,21 +950,21 @@ def opt_conv_tensorcore_lower():
tz = T.env_thread("threadIdx.z")
T.launch_thread(bz, 196)
Conv_wmma_accumulator_data = T.allocate([2048], "float32", "wmma.accumulator")
- Conv_wmma_accumulator = T.buffer_decl(
+ Conv_wmma_accumulator = T.Buffer(
shape=[2048], dtype="float32", scope="wmma.accumulator", data=Conv_wmma_accumulator_data
)
Apad_shared_data = T.allocate([12288], "float16", "shared")
- Apad_shared = T.buffer_decl(
+ Apad_shared = T.Buffer(
shape=[12288], dtype="float16", scope="shared", data=Apad_shared_data
)
W_shared_data = T.allocate([12288], "float16", "shared")
- W_shared = T.buffer_decl(shape=[12288], dtype="float16", scope="shared", data=W_shared_data)
+ W_shared = T.Buffer(shape=[12288], dtype="float16", scope="shared", data=W_shared_data)
Apad_shared_wmma_matrix_a_data = T.allocate([512], "float16", "wmma.matrix_a")
- Apad_shared_wmma_matrix_a = T.buffer_decl(
+ Apad_shared_wmma_matrix_a = T.Buffer(
shape=[512], dtype="float16", scope="wmma.matrix_a", data=Apad_shared_wmma_matrix_a_data
)
W_shared_wmma_matrix_b_data = T.allocate([1024], "float16", "wmma.matrix_b")
- W_shared_wmma_matrix_b = T.buffer_decl(
+ W_shared_wmma_matrix_b = T.Buffer(
shape=[1024], dtype="float16", scope="wmma.matrix_b", data=W_shared_wmma_matrix_b_data
)
T.launch_thread(bx, 2)
@@ -2253,7 +2243,7 @@ def opt_conv_tensorcore_mod_host():
)
# body
stack_tcode_data: T.Ptr[T.int32] = T.tvm_stack_alloca("arg_tcode", 10, dtype="handle")
- stack_tcode = T.buffer_decl([9], "int32", data=stack_tcode_data)
+ stack_tcode = T.Buffer([9], "int32", data=stack_tcode_data)
stack_value: T.handle = T.tvm_stack_alloca("arg_value", 10, dtype="handle")
assert num_args == 3, "default_function: num_args should be 3"
arg0: T.handle = T.tvm_struct_get(args, 0, 12, dtype="handle")
@@ -2266,25 +2256,25 @@ def opt_conv_tensorcore_mod_host():
A: T.handle = T.tvm_struct_get(arg0, 0, 1, dtype="handle")
T.attr(A, "storage_alignment", 128)
arg0_shape_data: T.Ptr[T.int64] = T.tvm_struct_get(arg0, 0, 2, dtype="handle")
- arg0_shape = T.buffer_decl([6], "int64", data=arg0_shape_data)
+ arg0_shape = T.Buffer([6], "int64", data=arg0_shape_data)
arg0_strides_data: T.Ptr[T.int64] = T.tvm_struct_get(arg0, 0, 3, dtype="handle")
- arg0_strides = T.buffer_decl([6], "int64", data=arg0_strides_data)
+ arg0_strides = T.Buffer([6], "int64", data=arg0_strides_data)
dev_id: T.int32 = T.tvm_struct_get(arg0, 0, 9, dtype="int32")
W: T.handle = T.tvm_struct_get(arg1, 0, 1, dtype="handle")
T.attr(W, "storage_alignment", 128)
arg1_shape_data: T.Ptr[T.int64] = T.tvm_struct_get(arg1, 0, 2, dtype="handle")
- arg1_shape = T.buffer_decl([6], "int64", data=arg1_shape_data)
+ arg1_shape = T.Buffer([6], "int64", data=arg1_shape_data)
arg1_strides_data: T.Ptr[T.int64] = T.tvm_struct_get(arg1, 0, 3, dtype="handle")
- arg1_strides = T.buffer_decl([6], "int64", data=arg1_strides_data)
+ arg1_strides = T.Buffer([6], "int64", data=arg1_strides_data)
Conv: T.handle = T.tvm_struct_get(arg2, 0, 1, dtype="handle")
T.attr(Conv, "storage_alignment", 128)
arg2_shape_data: T.Ptr[T.int64] = T.tvm_struct_get(arg2, 0, 2, dtype="handle")
- arg2_shape = T.buffer_decl([6], "int64", data=arg2_shape_data)
+ arg2_shape = T.Buffer([6], "int64", data=arg2_shape_data)
arg2_strides_data: T.Ptr[T.int64] = T.tvm_struct_get(arg2, 0, 3, dtype="handle")
- arg2_strides = T.buffer_decl([6], "int64", data=arg2_strides_data)
+ arg2_strides = T.Buffer([6], "int64", data=arg2_strides_data)
assert (((arg0_code == 3) or (arg0_code == 13)) or (arg0_code == 7)) or (
arg0_code == 4
@@ -2499,7 +2489,7 @@ def vthread_func():
T.launch_thread(i1, 2)
T.launch_thread(i2, 2)
B_data = T.allocate([16], "float32", "local")
- B = T.buffer_decl(shape=[16], dtype="float32", scope="local", data=B_data)
+ B = T.Buffer(shape=[16], dtype="float32", scope="local", data=B_data)
for j in range(16):
B[j] = A[i0 * 64 + i1 * 32 + i2 * 16 + j] + T.float32(1)
for j in range(16):
@@ -2813,12 +2803,12 @@ def module_const():
B = T.alloc_buffer((10), "int32")
K1_data = T.allocate_const([1, 1, 1, 1, 1, 1, 1, 1, 1, 1], "int32", [10])
- K1 = T.buffer_decl(shape=[10], dtype="int32", data=K1_data)
+ K1 = T.Buffer(shape=[10], dtype="int32", data=K1_data)
for x in T.serial(0, 10):
B[x] = A[x] + K1[x]
K2_data = T.allocate_const([1, 1, 1, 1, 1, 1, 1, 1, 1, 1], "int32", [10])
- K2 = T.buffer_decl(shape=[10], dtype="int32", data=K2_data)
+ K2 = T.Buffer(shape=[10], dtype="int32", data=K2_data)
for x in T.serial(0, 10):
B[x] = B[x] + K2[x]
@@ -2835,7 +2825,7 @@ def constant():
C = T.match_buffer(c, (10), "int32")
B = T.alloc_buffer((10), "int32")
K_data = T.allocate_const([1, 1, 1, 1, 1, 1, 1, 1, 1, 1], "int32", [10])
- K = T.buffer_decl(shape=[10], dtype="int32", data=K_data)
+ K = T.Buffer(shape=[10], dtype="int32", data=K_data)
for x in T.serial(0, 10):
B[x] = A[x] + K[x]
@@ -2980,7 +2970,7 @@ def primfunc_with_allocate_annotations():
T_cast_7 = T.match_buffer(T_cast_6, [200704], dtype="int16", elem_offset=0, align=64, offset_factor=1)
# body
tensor_2_data = T.allocate([200704], "uint8", "global", annotations={"attr1_key": "attr1_value"})
- tensor_2 = T.buffer_decl(shape=[200704], dtype="uint8", scope="global", data=tensor_2_data)
+ tensor_2 = T.Buffer(shape=[200704], dtype="uint8", scope="global", data=tensor_2_data)
for ax0_ax1_fused_4 in T.serial(0, 56):
for ax2_4 in T.serial(0, 56):
for ax3_init in T.serial(0, 64):
@@ -3007,7 +2997,7 @@ def comm_reducer_single_reduce_group():
for i in T.serial(0, 128):
T.launch_thread(threadIdx_x, 128)
reduce_temp0_data = T.allocate([1], "float32", "local")
- reduce_temp0 = T.buffer_decl(shape=[1], dtype="float32", scope="local", data=reduce_temp0_data)
+ reduce_temp0 = T.Buffer(shape=[1], dtype="float32", scope="local", data=reduce_temp0_data)
with T.attr(T.comm_reducer(lambda x, y: x + y, [T.float32(0)]), "reduce_scope", T.reinterpret(T.uint64(0), dtype="handle")):
T.evaluate(T.tvm_thread_allreduce(T.uint32(1), A[i * 128 + threadIdx_x], True, reduce_temp0.data, threadIdx_x, dtype="handle"))
@@ -3023,7 +3013,7 @@ def comm_reducer_multiple_reduce_groups():
for i in T.serial(0, 128):
T.launch_thread(threadIdx_x, 128)
reduce_temp0_data = T.allocate([1], "float32", "local")
- reduce_temp0 = T.buffer_decl(shape=[1], dtype="float32", scope="local", data=reduce_temp0_data)
+ reduce_temp0 = T.Buffer(shape=[1], dtype="float32", scope="local", data=reduce_temp0_data)
with T.attr(T.comm_reducer(lambda x0, x1, y0, y1: (T.Select((x1 >= y1), x0, y0), T.Select((x1 >= y1), x1, y1)), [T.int32(-1), T.min_value("float32")]), "reduce_scope", T.reinterpret(T.uint64(0), dtype="handle")):
T.evaluate(T.tvm_thread_allreduce(T.uint32(1), A[i * 128 + threadIdx_x], True, reduce_temp0.data, threadIdx_x, dtype="handle"))
@@ -3033,10 +3023,10 @@ def comm_reducer_multiple_reduce_groups():
def multiple_commreducer():
@T.prim_func
def multiple_commreducer() -> None:
- normal_reduce_temp0 = T.buffer_decl([1], dtype="float32", strides=[1], scope="local")
- normal_reduce_temp1 = T.buffer_decl([1], dtype="float32", strides=[1], scope="local")
- reduce_temp0 = T.buffer_decl([1], dtype="float32", strides=[1], scope="local")
- reduce_temp1 = T.buffer_decl([1], dtype="float32", strides=[1], scope="local")
+ normal_reduce_temp0 = T.Buffer([1], dtype="float32", strides=[1], scope="local")
+ normal_reduce_temp1 = T.Buffer([1], dtype="float32", strides=[1], scope="local")
+ reduce_temp0 = T.Buffer([1], dtype="float32", strides=[1], scope="local")
+ reduce_temp1 = T.Buffer([1], dtype="float32", strides=[1], scope="local")
for ax0_1 in T.thread_binding(0, 32, thread="threadIdx.x"):
with T.block("T_softmax_maxelem_cross_thread_reduction"):
T.attr(T.comm_reducer(lambda x, y: T.max(x, y), [T.min_value("float32")]), "reduce_scope", T.reinterpret(T.uint64(0), dtype="handle"))
@@ -3163,7 +3153,7 @@ def func_T_ptr_let_statement():
) -> None:
# The T.Ptr declaration in the parameter list should parse
# correctly, and should be usable as the data pointer in a buffer.
- arg_type_ids = T.buffer_decl([2], dtype="int32", data=arg_type_ids_handle)
+ arg_type_ids = T.Buffer([2], dtype="int32", data=arg_type_ids_handle)
arg0: T.handle = T.tvm_struct_get(args, 0, 12, dtype="handle")
arg1: T.handle = T.tvm_struct_get(args, 1, 12, dtype="handle")
@@ -3177,9 +3167,9 @@ def func_T_ptr_let_statement():
# this function. It should only be defined after the data pointer
# has been defined, and should not be hoisted into the header of
# the function as other buffer_decl statements can be.
- A = T.buffer_decl([1024], dtype="float32", data=A_data)
+ A = T.Buffer([1024], dtype="float32", data=A_data)
B_data: T.Ptr[T.float32] = T.tvm_struct_get(arg1, 0, 1, dtype="handle")
- B = T.buffer_decl([1024], dtype="float32", data=B_data)
+ B = T.Buffer([1024], dtype="float32", data=B_data)
B[0] = A[0]
@@ -3190,7 +3180,7 @@ def func_T_ptr_allocate():
@T.prim_func
def func_T_ptr_allocate() -> None:
A_data = T.allocate([1024], "float32", "global")
- A = T.buffer_decl(shape=[1024], dtype="float32", scope="global", data=A_data)
+ A = T.Buffer(shape=[1024], dtype="float32", scope="global", data=A_data)
A[0] = 0.0
return func_T_ptr_allocate
@@ -3282,9 +3272,9 @@ def pointer_type():
@T.prim_func
def func_with_ptr_type_annotations(x: T.Ptr[T.int32], y: T.Ptr[T.int32, "shared"]):
xx_data = T.allocate([16], "int32", "global")
- xx = T.buffer_decl(shape=[16], dtype="int32", scope="global", data=xx_data)
+ xx = T.Buffer(shape=[16], dtype="int32", scope="global", data=xx_data)
yy_data = T.allocate([16], "int32", "shared")
- yy = T.buffer_decl(shape=[16], dtype="int32", scope="shared", data=yy_data)
+ yy = T.Buffer(shape=[16], dtype="int32", scope="shared", data=yy_data)
a: T.Ptr[T.int32] = T.address_of(xx[0], dtype="handle")
b: T.Ptr[T.int32, "shared"] = T.address_of(yy[0], dtype="handle")
T.evaluate(T.call_extern("copy", a, b, dtype=""))
diff --git a/tests/python/unittest/test_tvmscript_syntax_sugar.py b/tests/python/unittest/test_tvmscript_syntax_sugar.py
index 02b18e7e7c..35f9e6c2e6 100644
--- a/tests/python/unittest/test_tvmscript_syntax_sugar.py
+++ b/tests/python/unittest/test_tvmscript_syntax_sugar.py
@@ -152,18 +152,6 @@ def test_match_buffer_1d():
assert_structural_equal(func_no_sugar, func_with_sugar)
-# match buffer failed case
-def test_match_buffer_no_kwargs_failed():
- with pytest.raises(ValueError) as e:
-
- @T.prim_func
- def elementwise_buffer_no_kwargs_failed(
- a: T.Buffer[(128, 128, 128, 128)],
- b: T.Buffer[(128, 128, 128, 128)],
- ) -> None:
- pass
-
-
# dynamic shape gemm
@T.prim_func
def gemm_dyn_shape(a: T.handle, b: T.handle, c: T.handle):