You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tvm.apache.org by mo...@apache.org on 2021/09/14 21:22:01 UTC
[tvm] branch main updated: [Hexagon] Add contrib tests for blocked
conv2d and maxpool2d (#8960)
This is an automated email from the ASF dual-hosted git repository.
moreau pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tvm.git
The following commit(s) were added to refs/heads/main by this push:
new b856d9e [Hexagon] Add contrib tests for blocked conv2d and maxpool2d (#8960)
b856d9e is described below
commit b856d9e1b357587f1bbb92b70606b9f6b551573d
Author: Chris Sullivan <cs...@octoml.ai>
AuthorDate: Tue Sep 14 14:21:47 2021 -0700
[Hexagon] Add contrib tests for blocked conv2d and maxpool2d (#8960)
* Add hexagon contrib tests for blocked conv2d and maxpool2d
* Restructure based on review comments
---
tests/python/contrib/test_hexagon/__init__.py | 18 +
tests/python/contrib/test_hexagon/conftest.py | 37 ++
.../python/contrib/test_hexagon/infrastructure.py | 88 ++++
.../contrib/test_hexagon/test_conv2d_blocked.py | 473 +++++++++++++++++++++
.../contrib/test_hexagon/test_maxpool2d_blocked.py | 155 +++++++
5 files changed, 771 insertions(+)
diff --git a/tests/python/contrib/test_hexagon/__init__.py b/tests/python/contrib/test_hexagon/__init__.py
new file mode 100644
index 0000000..58dc4cc
--- /dev/null
+++ b/tests/python/contrib/test_hexagon/__init__.py
@@ -0,0 +1,18 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+""" Testing infrastructure for Hexagon """
diff --git a/tests/python/contrib/test_hexagon/conftest.py b/tests/python/contrib/test_hexagon/conftest.py
new file mode 100644
index 0000000..0329328
--- /dev/null
+++ b/tests/python/contrib/test_hexagon/conftest.py
@@ -0,0 +1,37 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+""" Hexagon testing fixtures used to deduce testing argument
+ values from testing parameters """
+
+import tvm
+from .infrastructure import get_packed_filter_layout
+
+
+@tvm.testing.fixture
+def shape_nhwc(batch, in_channel, in_size):
+ return (batch, in_size, in_size, in_channel)
+
+
+@tvm.testing.fixture
+def shape_oihw(out_channel, in_channel, kernel):
+ return (out_channel, in_channel, kernel, kernel)
+
+
+@tvm.testing.fixture
+def shape_oihw8i32o4i(out_channel, in_channel, kernel):
+ return get_packed_filter_layout(out_channel, in_channel, kernel, kernel)
diff --git a/tests/python/contrib/test_hexagon/infrastructure.py b/tests/python/contrib/test_hexagon/infrastructure.py
new file mode 100644
index 0000000..193a863
--- /dev/null
+++ b/tests/python/contrib/test_hexagon/infrastructure.py
@@ -0,0 +1,88 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+""" Hexagon testing infrastructure """
+
+import tvm
+import numpy
+
+
+def ceildiv(o, d):
+ return tvm.tir.floordiv(o + d - 1, d)
+
+
+def get_packed_activation_layout(shape_nhwc, block_shape, packed_C=True):
+ assert len(shape_nhwc) == 4
+ shape = [shape_nhwc[0]]
+ off_h, off_w, off_c = block_shape
+ shape.append(ceildiv(shape_nhwc[1], off_h))
+ shape.append(ceildiv(shape_nhwc[2], off_w))
+ if packed_C:
+ shape.append(ceildiv(shape_nhwc[3], off_c))
+ shape.extend(block_shape)
+ else:
+ shape.extend([off_h, off_w, shape_nhwc[3]])
+ return shape
+
+
+def get_packed_filter_layout(out_channel, in_channel, kernel_h, kernel_w):
+ out_factor, in_first_factor, in_second_factor = 32, 32, 4
+ return (
+ int(ceildiv(out_channel, out_factor)),
+ int(ceildiv(in_channel, in_first_factor)),
+ kernel_h,
+ kernel_w,
+ in_first_factor // in_second_factor,
+ out_factor,
+ in_second_factor,
+ )
+
+
+def build_and_run(inputs, func, target, target_host, *args, **kwargs):
+ schedule, placeholders, binds = func(*args, **kwargs)
+
+ func = tvm.build(schedule, placeholders, target=target, target_host=target_host, binds=binds)
+ dev = tvm.device(target)
+ tensors = []
+ for tensor in inputs:
+ tensors.append(tvm.nd.array(tensor, dev))
+ tensors.append(
+ tvm.nd.array(
+ numpy.zeros([i.value for i in placeholders[-1].shape], dtype=placeholders[-1].dtype),
+ dev,
+ )
+ )
+ func(*tensors)
+
+ return tensors[-1].asnumpy()
+
+
+def get_block_shape():
+ return 8, 8, 32
+
+
+def get_conv2d_nhwc_shape(shape_nhwc, kernel_size, strides, padding, dilation, out_channels):
+ assert len(shape_nhwc) == 4
+ kernel = []
+ kernel.append((kernel_size[0] - 1) * dilation[0] + 1)
+ kernel.append((kernel_size[1] - 1) * dilation[1] + 1)
+ return (
+ shape_nhwc[0],
+ (shape_nhwc[1] - kernel[0] + padding[0] + padding[1]) // strides[0] + 1,
+ (shape_nhwc[2] - kernel[1] + padding[2] + padding[3]) // strides[1] + 1,
+ out_channels,
+ )
diff --git a/tests/python/contrib/test_hexagon/test_conv2d_blocked.py b/tests/python/contrib/test_hexagon/test_conv2d_blocked.py
new file mode 100644
index 0000000..e0b7fb2
--- /dev/null
+++ b/tests/python/contrib/test_hexagon/test_conv2d_blocked.py
@@ -0,0 +1,473 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import sys
+
+import tvm
+from tvm import te
+from tvm import topi
+from tvm.topi import testing
+from .infrastructure import (
+ ceildiv,
+ build_and_run,
+ get_block_shape,
+ get_conv2d_nhwc_shape,
+ get_packed_filter_layout,
+ get_packed_activation_layout,
+)
+
+import numpy as np
+import pytest
+
+
+def conv2d_logical(
+ shape_nhwc,
+ shape_oihw,
+ kernel_size,
+ stride,
+ padding,
+ dtype,
+ storage_scope="global",
+):
+ """
+ Conv2d TE wherein both input activation and filter tensors
+ are defined with their logical NHWC/OIHW shapes, respectively.
+ The packed physical layout for the activation and filter are:
+ Activation: nhwc8h8w32c
+ Filter: oihw8i32o4i
+ """
+ assert kernel_size == tuple(shape_oihw[2:])
+
+ block_shape = get_block_shape()
+ block_H, block_W, block_C = block_shape
+ shape = get_packed_activation_layout(shape_nhwc, block_shape)
+ logical_output_shape = get_conv2d_nhwc_shape(
+ shape_nhwc, kernel_size, stride, padding, [1, 1], shape_oihw[0]
+ )
+ output_shape = get_packed_activation_layout(logical_output_shape, block_shape)
+
+ N, H, W, C = shape_nhwc
+ X = te.placeholder(shape_nhwc, dtype=dtype)
+ # Combination of padding required by conv2d operator and padding to evenly divisible
+ # number of blocks. Note that this padding should be inlined in the schedule so
+ # as to avoid input copying.
+ pad_h = (block_H - ((H + padding[1]) % block_H)) % block_H
+ pad_w = (block_W - ((W + padding[3]) % block_W)) % block_W
+ X_pad = topi.nn.pad(X, [0, padding[0], padding[2], 0], [0, pad_h, pad_w, 0], pad_value=0)
+ # Calculate packed layout
+ X_packed = te.compute(
+ shape,
+ lambda n, ho, wo, co, hi, wi, ci: X_pad[
+ n, ho * block_H + hi, wo * block_W + wi, co * block_C + ci
+ ],
+ )
+
+ # Filter shape using KCRS (OIHW) notation
+ K, C, R, S = shape_oihw
+ filter_Ki, filter_Ci, filter_Cii = 32, 32, 4
+ shape_filter = get_packed_filter_layout(K, C, R, S)
+ filt = te.placeholder(shape_oihw, dtype=dtype)
+ # Channel padding to multiples of 32
+ pad_c = (filter_Ci - (C % filter_Ci)) % filter_Ci
+ pad_k = (filter_Ki - (K % filter_Ki)) % filter_Ki
+ filt_pad = topi.nn.pad(
+ filt, [0, 0, 0, 0], [pad_k, pad_c, R, S], pad_value=0, name="padded_filter"
+ )
+ filt_packed = te.compute(
+ shape_filter,
+ lambda ko, co, r, s, cio, ki, cii: filt_pad[
+ ko * filter_Ki + ki, co * filter_Ci + cio * filter_Cii + cii, r, s
+ ],
+ name="packed_filter",
+ )
+
+ rh = te.reduce_axis((0, kernel_size[0]), name="rh")
+ rw = te.reduce_axis((0, kernel_size[1]), name="rw")
+ rc = te.reduce_axis((0, C), name="rc")
+
+ def compute(n, ho, wo, ko, hi, wi, ki):
+ # Construct blockized strided conv2d height index
+ h = ho * block_H + hi
+ h_contig = h * stride[0] + rh
+ h_block_id = h_contig // block_H
+ h_block_offset = h_contig % block_H
+
+ # Construct blockized strided conv2d width index
+ w = wo * block_W + wi
+ w_contig = w * stride[1] + rw
+ w_block_id = w_contig // block_W
+ w_block_offset = w_contig % block_W
+
+ # Construct blockized conv2d channel index
+ c_block_id = rc // block_C
+ c_block_offset = rc % block_C
+
+ # Construct flat filter input channel indices
+ rco = rc // filter_Ci
+ rcio = (rc % filter_Ci) // filter_Cii
+ rcii = rc % filter_Cii
+
+ return te.sum(
+ X_packed[
+ n,
+ h_block_id,
+ w_block_id,
+ c_block_id,
+ h_block_offset,
+ w_block_offset,
+ c_block_offset,
+ ]
+ * filt_packed[ko, rco, rh, rw, rcio, ki, rcii],
+ axis=[rh, rw, rc],
+ )
+
+ Y = te.compute(output_shape, compute)
+ s = te.create_schedule(Y.op)
+
+ # Ensure the padding and array packing is performed inline
+ s[X_pad].compute_inline()
+ s[X_packed].compute_inline()
+
+ s[filt_pad].compute_inline()
+ s[filt_packed].compute_inline()
+
+ binds = {}
+ if storage_scope and storage_scope != "global":
+ with tvm.transform.PassContext():
+ Xb = tvm.tir.decl_buffer(shape, name="Xb", dtype=dtype, scope=storage_scope)
+ Yb = tvm.tir.decl_buffer(output_shape, name="Yb", dtype=dtype, scope=storage_scope)
+ binds = {X: Xb, Y: Yb}
+
+ return (s, [X, filt, Y], binds)
+
+
+def conv2d_packed_filter(
+ shape_nhwc,
+ shape_oihw8i32o4i,
+ kernel_size,
+ stride,
+ padding,
+ dtype,
+ storage_scope="global",
+):
+ """
+ Conv2d TE wherein the input activation is defined by its
+ logical NHWC shape, but the filter is provided in the
+ packed layout oihw8i32o4i. The physical packed layout used
+ for the activation is: nhwc8h8w32c
+ """
+ assert kernel_size == tuple(shape_oihw8i32o4i[2:4])
+
+ block_shape = get_block_shape()
+ block_H, block_W, block_C = block_shape
+ shape = get_packed_activation_layout(shape_nhwc, block_shape)
+ logical_output_shape = get_conv2d_nhwc_shape(
+ shape_nhwc,
+ kernel_size,
+ stride,
+ padding,
+ [1, 1],
+ shape_oihw8i32o4i[0] * shape_oihw8i32o4i[5],
+ )
+
+ output_shape = get_packed_activation_layout(logical_output_shape, block_shape)
+
+ N, H, W, C = shape_nhwc
+ X = te.placeholder(shape_nhwc, dtype=dtype)
+ # Combination of padding required by conv2d operator and padding to evenly divisible
+ # number of blocks. Note that this padding should be inlined in the schedule so
+ # as to avoid input copying.
+ pad_h = (block_H - ((H + padding[1]) % block_H)) % block_H
+ pad_w = (block_W - ((W + padding[3]) % block_W)) % block_W
+
+ X_pad = topi.nn.pad(X, [0, padding[0], padding[2], 0], [0, pad_h, pad_w, 0], pad_value=0)
+ # Calculate packed layout
+ packed_shape = get_packed_activation_layout(X_pad.shape, block_shape)
+
+ X_packed = te.compute(
+ packed_shape,
+ lambda n, ho, wo, co, hi, wi, ci: X_pad[
+ n, ho * block_H + hi, wo * block_W + wi, co * block_C + ci
+ ],
+ )
+
+ # Filter shape using KCRS (OIHW) notation
+ filter_Ki, filter_Ci, filter_Cii = 32, 32, 4
+ assert shape_oihw8i32o4i[-1] == filter_Cii
+ assert shape_oihw8i32o4i[-2] == filter_Ki
+ assert shape_oihw8i32o4i[-3] == filter_Ci // filter_Cii
+
+ filt_packed = te.placeholder(shape_oihw8i32o4i, dtype=dtype)
+
+ rh = te.reduce_axis((0, kernel_size[0]), name="rh")
+ rw = te.reduce_axis((0, kernel_size[1]), name="rw")
+ rc = te.reduce_axis((0, C), name="rc")
+
+ def compute(n, ho, wo, ko, hi, wi, ki):
+ # Construct blockized strided conv2d height index
+ h = ho * block_H + hi
+ h_contig = h * stride[0] + rh
+ h_block_id = h_contig // block_H
+ h_block_offset = h_contig % block_H
+
+ # Construct blockized strided conv2d width index
+ w = wo * block_W + wi
+ w_contig = w * stride[1] + rw
+ w_block_id = w_contig // block_W
+ w_block_offset = w_contig % block_W
+
+ # Construct blockized conv2d channel index
+ c_block_id = rc // block_C
+ c_block_offset = rc % block_C
+
+ # Construct flat filter input channel indices
+ rco = rc // filter_Ci
+ rcio = (rc % filter_Ci) // filter_Cii
+ rcii = rc % filter_Cii
+
+ return te.sum(
+ X_packed[
+ n,
+ h_block_id,
+ w_block_id,
+ c_block_id,
+ h_block_offset,
+ w_block_offset,
+ c_block_offset,
+ ]
+ * filt_packed[ko, rco, rh, rw, rcio, ki, rcii],
+ axis=[rh, rw, rc],
+ )
+
+ Y = te.compute(output_shape, compute)
+ s = te.create_schedule(Y.op)
+
+ # Ensure the padding and array packing is performed inline
+ s[X_pad].compute_inline()
+ s[X_packed].compute_inline()
+
+ # Perform scheduling
+ n, hid, wid, cid, hoff, woff, coff = s[Y].op.axis
+ slice = s[Y].fuse(wid, cid)
+ Xl = s.cache_read(X_packed, storage_scope, [Y])
+ Yl = s.cache_write(Y, storage_scope)
+
+ s[Yl].compute_at(s[Y], hid)
+ n, hid, slice, hoff, woff, coff = s[Yl].op.axis
+ s[Xl].compute_at(s[Yl], slice)
+
+ binds = {}
+ if storage_scope and storage_scope != "global":
+ with tvm.transform.PassContext():
+ Xb = tvm.tir.decl_buffer(shape, name="Xb", dtype=dtype, scope=storage_scope)
+ Yb = tvm.tir.decl_buffer(output_shape, name="Yb", dtype=dtype, scope=storage_scope)
+ binds = {X: Xb, Y: Yb}
+
+ return (s, [X, filt_packed, Y], binds)
+
+
+def conv2d_packed_filter_nhwhwc(
+ shape_nhwc,
+ shape_oihw8i32o4i,
+ kernel_size,
+ stride,
+ padding,
+ dtype,
+ storage_scope="global",
+):
+ """
+ Conv2d TE wherein the input activation is defined by its
+ logical NHWC shape, but the filter is provided in the
+ packed layout oihw8i32o4i. The physical packed layout used
+ for the activation is: nhw8h8wc
+
+ """
+ assert kernel_size == tuple(shape_oihw8i32o4i[2:4])
+
+ block_shape = get_block_shape()
+ block_H, block_W, _ = block_shape
+ shape = get_packed_activation_layout(shape_nhwc, block_shape, packed_C=False)
+ logical_output_shape = get_conv2d_nhwc_shape(
+ shape_nhwc,
+ kernel_size,
+ stride,
+ padding,
+ [1, 1],
+ shape_oihw8i32o4i[0] * shape_oihw8i32o4i[5],
+ )
+ output_shape = get_packed_activation_layout(logical_output_shape, block_shape, packed_C=False)
+
+ N, H, W, C = shape_nhwc
+ X = te.placeholder(shape_nhwc, dtype=dtype)
+ # Combination of padding required by conv2d operator and padding to evenly divisible
+ # number of blocks. Note that this padding should be inlined in the schedule so
+ # as to avoid input copying.
+ pad_h = (block_H - ((H + padding[1]) % block_H)) % block_H
+ pad_w = (block_W - ((W + padding[3]) % block_W)) % block_W
+ X_pad = topi.nn.pad(X, [0, padding[0], padding[2], 0], [0, pad_h, pad_w, 0], pad_value=0)
+ # Calculate packed layout
+ packed_shape = get_packed_activation_layout(X_pad.shape, block_shape, packed_C=False)
+ X_packed = te.compute(
+ packed_shape, lambda n, ho, wo, hi, wi, c: X_pad[n, ho * block_H + hi, wo * block_W + wi, c]
+ )
+
+ # Filter shape using KCRS (OIHW) notation
+ filter_Ki, filter_Ci, filter_Cii = 32, 32, 4
+ assert shape_oihw8i32o4i[-1] == filter_Cii
+ assert shape_oihw8i32o4i[-2] == filter_Ki
+ assert shape_oihw8i32o4i[-3] == filter_Ci // filter_Cii
+
+ filt_packed = te.placeholder(shape_oihw8i32o4i, dtype=dtype)
+
+ rh = te.reduce_axis((0, kernel_size[0]), name="rh")
+ rw = te.reduce_axis((0, kernel_size[1]), name="rw")
+ rc = te.reduce_axis((0, C), name="rc")
+
+ def compute(n, ho, wo, hi, wi, k):
+ # Construct blockized strided conv2d height index
+ h = ho * block_H + hi
+ h_contig = h * stride[0] + rh
+ h_block_id = h_contig // block_H
+ h_block_offset = h_contig % block_H
+
+ # Construct blockized strided conv2d width index
+ w = wo * block_W + wi
+ w_contig = w * stride[1] + rw
+ w_block_id = w_contig // block_W
+ w_block_offset = w_contig % block_W
+
+ # Construct flat filter input channel indices
+ rco = rc // filter_Ci
+ rcio = (rc % filter_Ci) // filter_Cii
+ rcii = rc % filter_Cii
+
+ # Construct split filter output channel index
+ ko = k // filter_Ki
+ ki = k % filter_Ki
+
+ return te.sum(
+ X_packed[n, h_block_id, w_block_id, h_block_offset, w_block_offset, rc]
+ * filt_packed[ko, rco, rh, rw, rcio, ki, rcii],
+ axis=[rh, rw, rc],
+ )
+
+ Y = te.compute(output_shape, compute)
+ s = te.create_schedule(Y.op)
+
+ # Ensure the padding and array packing is performed inline
+ s[X_pad].compute_inline()
+ s[X_packed].compute_inline()
+
+ n, ho, wo, hi, wi, k = s[Y].op.axis
+ rh, rw, rc = s[Y].op.reduce_axis
+
+ rco, rci = s[Y].split(rc, factor=32)
+ s[Y].reorder(n, rco, wo, ho, k, hi, wi)
+ Xl = s.cache_read(X_packed, storage_scope, [Y])
+ s[Xl].compute_at(s[Y], rco)
+
+ ko, ki = s[Y].split(k, factor=32)
+ s[Y].reorder(n, rco, wo, ho, ko, hi, wi, ki)
+ Fl = s.cache_read(filt_packed, storage_scope, [Y])
+ s[Fl].compute_at(s[Y], ko)
+
+ binds = {}
+ if storage_scope and storage_scope != "global":
+ with tvm.transform.PassContext():
+ Xb = tvm.tir.decl_buffer(shape, name="Xb", dtype=dtype, scope=storage_scope)
+ Yb = tvm.tir.decl_buffer(output_shape, name="Yb", dtype=dtype, scope=storage_scope)
+ binds = {X: Xb, Y: Yb}
+
+ return (s, [X, filt_packed, Y], binds)
+
+
+class BaseConv2d:
+ batch = tvm.testing.parameter(1)
+ in_size = tvm.testing.parameter(8, 56)
+ in_channel = tvm.testing.parameter(64)
+ out_channel = tvm.testing.parameter(64)
+ kernel = tvm.testing.parameter(3)
+ stride = tvm.testing.parameter(1)
+ pad = tvm.testing.parameter(1)
+ dtype = tvm.testing.parameter("float32")
+
+
+class TestConv2dLogical(BaseConv2d):
+ @tvm.testing.parametrize_targets("llvm")
+ def test_conv2d(self, shape_nhwc, shape_oihw, kernel, stride, pad, dtype, target):
+ inputs = [
+ np.random.uniform(0, 255, size=shape_nhwc).astype(dtype),
+ np.random.uniform(0, 255, size=shape_oihw).astype(dtype),
+ ]
+ np_filter = inputs[1].transpose(2, 3, 1, 0)
+ ref_output = testing.conv2d_nhwc_python(inputs[0], np_filter, stride, pad)
+ output = build_and_run(
+ inputs,
+ conv2d_logical,
+ target,
+ target,
+ shape_nhwc=shape_nhwc,
+ shape_oihw=shape_oihw,
+ kernel_size=(kernel, kernel),
+ stride=(stride, stride),
+ padding=(pad, pad, pad, pad),
+ dtype=dtype,
+ )
+ return output, ref_output
+
+
+class TestConv2dPackedFilter(BaseConv2d):
+ conv2d_impl = tvm.testing.parameter(conv2d_packed_filter, conv2d_packed_filter_nhwhwc)
+
+ @tvm.testing.parametrize_targets("llvm")
+ def test_conv2d(
+ self,
+ conv2d_impl,
+ shape_nhwc,
+ shape_oihw,
+ shape_oihw8i32o4i,
+ kernel,
+ stride,
+ pad,
+ dtype,
+ target,
+ ):
+ inputs = [
+ np.random.uniform(0, 255, size=shape_nhwc).astype(dtype),
+ np.random.uniform(0, 255, size=shape_oihw8i32o4i).astype(dtype),
+ ]
+ np_filter = (
+ inputs[1].transpose(0, 5, 1, 4, 6, 2, 3).reshape(shape_oihw).transpose(2, 3, 1, 0)
+ )
+ ref_output = testing.conv2d_nhwc_python(inputs[0], np_filter, stride, pad)
+ output = build_and_run(
+ inputs,
+ conv2d_impl,
+ target,
+ target,
+ shape_nhwc=shape_nhwc,
+ shape_oihw8i32o4i=shape_oihw8i32o4i,
+ kernel_size=(kernel, kernel),
+ stride=(stride, stride),
+ padding=(pad, pad, pad, pad),
+ dtype=dtype,
+ )
+ return output, ref_output
+
+
+if __name__ == "__main__":
+ sys.exit(pytest.main(sys.argv))
diff --git a/tests/python/contrib/test_hexagon/test_maxpool2d_blocked.py b/tests/python/contrib/test_hexagon/test_maxpool2d_blocked.py
new file mode 100644
index 0000000..67af8d8
--- /dev/null
+++ b/tests/python/contrib/test_hexagon/test_maxpool2d_blocked.py
@@ -0,0 +1,155 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import sys
+
+import tvm
+from tvm import te
+from tvm import topi
+from tvm.topi import testing
+from .infrastructure import (
+ ceildiv,
+ build_and_run,
+ get_block_shape,
+ get_packed_filter_layout,
+ get_packed_activation_layout,
+)
+
+import numpy as np
+import pytest
+
+# Blocked layout: NHWC8h8w32c :: [N, H//8, W//8, C//32, 8h, 8w, 32c]
+def maxpool2d_logical(
+ shape_nhwc,
+ window_shape,
+ stride,
+ padding,
+ dtype,
+ storage_scope="global",
+):
+ """
+ Maxpool2d TE wherein the input activation is defined by its
+ logical NHWC shape. The packed physical layout for the
+ activation is nhwc8h8w32c.
+ """
+
+ block_shape = get_block_shape()
+ block_H, block_W, block_C = block_shape
+ shape = get_packed_activation_layout(shape_nhwc, block_shape)
+ logical_output_shape = (
+ shape_nhwc[0],
+ (shape_nhwc[1] - window_shape[0] + padding[0] + padding[1]) // stride[0] + 1,
+ (shape_nhwc[2] - window_shape[1] + padding[2] + padding[3]) // stride[0] + 1,
+ shape_nhwc[3],
+ )
+ output_shape = get_packed_activation_layout(logical_output_shape, block_shape)
+
+ N, H, W, C = shape_nhwc
+ X = te.placeholder(shape_nhwc, dtype=dtype)
+
+ # Combination of padding required by maxpool operator and padding to evenly divisible
+ # number of blocks. Note that this padding should be inlined in the schedule so
+ # as to avoid input copying.
+ pad_h = (block_H - ((H + padding[1]) % block_H)) % block_H
+ pad_w = (block_W - ((W + padding[3]) % block_W)) % block_W
+ X_pad = topi.nn.pad(X, [0, padding[0], padding[2], 0], [0, pad_h, pad_w, 0], pad_value=0)
+
+ # Calculate packed layout
+ X_packed = te.compute(
+ shape,
+ lambda n, ho, wo, co, hi, wi, ci: X_pad[
+ n, ho * block_H + hi, wo * block_W + wi, co * block_C + ci
+ ],
+ )
+
+ rh = te.reduce_axis((0, window_shape[0]), name="rh")
+ rw = te.reduce_axis((0, window_shape[1]), name="rw")
+
+ def compute(n, ho, wo, co, hi, wi, ci):
+ # Construct blockized strided maxpool height indices
+ h = ho * block_H + hi
+ h_contig = h * stride[0] + rh
+ h_block_id = h_contig // block_H
+ h_block_offset = h_contig % block_H
+
+ # Construct blockized strided maxpool width indices
+ w = wo * block_W + wi
+ w_contig = w * stride[1] + rw
+ w_block_id = w_contig // block_W
+ w_block_offset = w_contig % block_W
+
+ return te.max(
+ X_packed[n, h_block_id, w_block_id, co, h_block_offset, w_block_offset, ci],
+ axis=[rh, rw],
+ )
+
+ Y = te.compute(output_shape, compute)
+ s = te.create_schedule(Y.op)
+
+ # Ensure the padding and array packing is performed inline
+ s[X_pad].compute_inline()
+ s[X_packed].compute_inline()
+
+ binds = {}
+ if storage_scope and storage_scope != "global":
+ with tvm.transform.PassContext():
+ Xb = tvm.tir.decl_buffer(shape, name="Xb", dtype=dtype, scope=storage_scope)
+ Yb = tvm.tir.decl_buffer(output_shape, name="Yb", dtype=dtype, scope=storage_scope)
+ binds = {X: Xb, Y: Yb}
+
+ return (s, [X, Y], binds)
+
+
+class BaseMaxPooling:
+ batch = tvm.testing.parameter(1)
+ in_size = tvm.testing.parameter(8, 112)
+ in_channel = tvm.testing.parameter(64)
+ window_size = tvm.testing.parameter(3)
+ stride = tvm.testing.parameter(2)
+ pad = tvm.testing.parameter(1)
+ dtype = tvm.testing.parameter("float32")
+
+
+class TestMaxPooling(BaseMaxPooling):
+ @tvm.testing.parametrize_targets("llvm")
+ def test_maxpool(self, shape_nhwc, window_size, stride, pad, dtype, target):
+ inputs = [np.random.uniform(0, 255, size=shape_nhwc).astype(dtype)]
+ ref_output = testing.poolnd_python(
+ inputs[0],
+ (window_size, window_size),
+ strides=(stride, stride),
+ dilation=(1, 1),
+ padding_before=(pad, pad),
+ padding_after=(pad, pad),
+ pool_type="max",
+ )
+ output = build_and_run(
+ inputs,
+ maxpool2d_logical,
+ target,
+ target,
+ shape_nhwc,
+ window_shape=(window_size, window_size),
+ stride=(stride, stride),
+ padding=(pad, pad, pad, pad),
+ dtype=dtype,
+ )
+ return output, ref_output
+
+
+if __name__ == "__main__":
+ sys.exit(pytest.main(sys.argv))