You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tvm.apache.org by GitBox <gi...@apache.org> on 2022/06/14 21:49:39 UTC
[GitHub] [tvm] cconvey opened a new pull request, #11720: [hexagon][testing] add max_pool2d benchmark
cconvey opened a new pull request, #11720:
URL: https://github.com/apache/tvm/pull/11720
- Add benchmarking framework for Hexagon maxpool-2d kernels,
and one (simple) kernel.
- Minor refactor hexagon benchmark utilities.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: commits-unsubscribe@tvm.apache.org
For queries about this service, please contact Infrastructure at:
users@infra.apache.org
[GitHub] [tvm] cconvey commented on pull request #11720: [hexagon][testing] add max_pool2d benchmark
Posted by GitBox <gi...@apache.org>.
cconvey commented on PR #11720:
URL: https://github.com/apache/tvm/pull/11720#issuecomment-1155745811
CC: @csullivan
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: commits-unsubscribe@tvm.apache.org
For queries about this service, please contact Infrastructure at:
users@infra.apache.org
[GitHub] [tvm] cconvey commented on a diff in pull request #11720: [hexagon][testing] add max_pool2d benchmark
Posted by GitBox <gi...@apache.org>.
cconvey commented on code in PR #11720:
URL: https://github.com/apache/tvm/pull/11720#discussion_r899294444
##########
tests/python/contrib/test_hexagon/benchmark_maxpool2d.py:
##########
@@ -0,0 +1,379 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""
+This module serves two purposes:
+ (1) Demonstrates how to write Python code that exercises various
+ Hexagon-related algorithms / features.
+
+ (2) Benchmark the resulting primfuncs.
+
+Current limitations:
+ - Input shapes are limited to NHWC --> NHWC_8h8w32c.
+
+ - Testing parameters (input shapes, dtypes, etc.) currently
+ support only one value for each parameter.
+
+ - H, W, C must be integer multiples of 8, 8, and 32,
+ respectively. I.e., partial blocks aren't currently
+ supported by this script.
+
+ - Requires that I/O tensors reside in "global.VTCM" memory,
+ rather than "global" memory.
+ This prevents benchmarking with I/O tensors that are too
+ large to fit into availble VTCM.
+
+ - The script only develops one primfunc.
+ Future revisions to this script are expected to add more
+ primfuncs and demonstrate more coding strategies.
+"""
+
+import sys
+import pytest
+import numpy as np
+import copy
+import tempfile
+import os
+
+import tvm.testing
+from tvm import te, topi, tir
+from tvm.topi import testing
+from tvm.script import tir as T
+from tvm.tir import IndexMap
+from tvm.relay.backend import Executor, Runtime
+from tvm.contrib.hexagon.session import Session
+from typing import List
+
+from .infrastructure import allocate_hexagon_array
+from . import benchmark_util as bu
+
+
+def _ceil_div(numerator, denominator):
+ return (numerator + (denominator - 1)) // denominator
+
+
+def _int8_nhwc_8h8w32c_map(n, h, w, c):
+ return [
+ n,
+ h // 8,
+ w // 8,
+ c // 32,
+ te.AXIS_SEPARATOR,
+ h % 8,
+ w % 8,
+ c % 32,
+ ]
+
+
+def _int8_nhwc_8h8w32c_shape(n, h, w, c) -> List[int]:
+ return [
+ n,
+ _ceil_div(h, 8),
+ _ceil_div(w, 8),
+ _ceil_div(c, 32),
+ 8,
+ 8,
+ 32,
+ ]
+
+
+def _int8_nhwc_8h8w32c_xform_immediate(arr_in: np.ndarray) -> np.ndarray:
+ """
+ Return a deep copy of 'arr_in', transformed from a NWHC to
+ NHWC-8h8wc32 shape. Any newly created array elements have value 0.
+ """
+ stage1 = copy.copy(arr_in)
+
+ (
+ n,
+ h,
+ w,
+ c,
+ ) = stage1.shape
+
+ (
+ h_minor,
+ w_minor,
+ c_minor,
+ ) = [8, 8, 32]
+
+ h_major = _ceil_div(h, h_minor)
+ w_major = _ceil_div(w, w_minor)
+ c_major = _ceil_div(c, c_minor)
+
+ # This handles cases where the dimensions of arr_in are not cleanly divided
+ # by the minor block size, i.e. [8, 8, 32].
+ #
+ # Any additional array elements that this creates will ahve value 0.
+ # We shouldn't actually care what value is used for those elements, because they
+ # shouldn't be treated as meaningful by any of our algorithms.
+ if (h % h_minor) or (w % w_minor) or (c % c_minor):
+ stage1.resize((n, h_major * h_minor, w_major * w_minor, c_major * c_minor), refcheck=False)
+
+ stage2 = stage1.reshape(n, h_major, h_minor, w_major, w_minor, c_major, c_minor)
+ stage3 = stage2.transpose(0, 1, 3, 5, 2, 4, 6)
+ return stage3
+
+
+def _create_test_input(shape, dtype: str) -> np.ndarray:
+ np_dtype = np.dtype(dtype)
+ min_value = np.iinfo(np_dtype).min
+ max_value = np.iinfo(np_dtype).max
+ return np.random.randint(low=min_value, high=max_value, size=tuple(shape), dtype=np.int8)
+
+
+# This fixture provides some initialization / finalization logic for groups of related
+# benchmark runs.
+# See the `TestMaxPool2D` class for its intended usage.
+@pytest.fixture(scope="class")
+def benchmark_group(request):
+ working_dir = tempfile.mkdtemp()
+ bt = bu.BenchmarksTable()
+
+ request.cls.working_dir = working_dir
+ request.cls.benchmark_table = bt
+
+ yield
+
+ tabular_output_filename = os.path.join(working_dir, "benchmark-results.csv")
+
+ if not hasattr(request.cls, "csv_column_order"):
+ raise Exception('Classes using this fixture must have a member named "csv_column_order"')
+
+ with open(tabular_output_filename, "w") as csv_file:
+ bt.print_csv(csv_file, request.cls.csv_column_order)
+
+ print()
+ print("*" * 80)
+ print(f"BENCHMARK RESULTS FILE: {tabular_output_filename}")
+ print("*" * 80)
+ print()
+
+ if bt.has_fail() > 0:
+ pytest.fail("At least one benchmark configuration failed", pytrace=False)
+
+
+@pytest.mark.usefixtures("benchmark_group")
+class TestMaxPool2D:
+ csv_column_order = [
+ # Identifies which TE-compute / TIRScript is used as the basis for the
+ # benchmarked primfunc. Only needs to be meaningful to humans.
+ "basic_kernel",
+ # When applicable, indicates the particular variation of schedules
+ # apply by the Python code. Decoding this may require looking at this
+ # script's source code.
+ "sched_type",
+ # Values directly based on test parameters...
+ "input_shape_4d",
+ "block_shape",
+ "DTYPE",
+ "KERNEL",
+ "STRIDE",
+ "DILATION",
+ "PADDING",
+ "IO_TENSOR_MEM_SCOPE",
+ # Reserved columns defined by the BenchmarksTable class.
+ "row_status",
+ "timings_min_usecs",
+ "timings_max_usecs",
+ "timings_median_usecs",
+ "timings_mean_usecs",
+ "timings_stddev_usecs",
+ # For benchmarks that produce files on the host file system, this indicates
+ # their location. Useful for post-mortem investigation of benchmark results.
+ "host_files_dir_path",
+ # Miscellaneous comments about the benchmark.
+ "comments",
+ ]
+
+ DTYPE = tvm.testing.parameter("int8")
+
+ # FIXME(cconvey): The script currently fails when H, W, or C is not an
+ # integer multiple of 8, 8, or 32, respectively.
+ N = tvm.testing.parameter(1)
+ H = tvm.testing.parameter(*[x * 8 for x in [1, 4, 16]])
+ W = tvm.testing.parameter(*[x * 8 for x in [1, 4, 16]])
+ C = tvm.testing.parameter(*[x * 32 for x in [1, 2]])
+
+ KERNEL = tvm.testing.parameter((1, 1), (3, 3))
+ STRIDE = tvm.testing.parameter((1, 1))
+ DILATION = tvm.testing.parameter((1, 1))
+ PADDING = tvm.testing.parameter((0, 0, 0, 0))
+ IO_TENSOR_MEM_SCOPE = tvm.testing.parameter("global.vtcm")
+
+ @tvm.testing.requires_hexagon
+ def test_maxpool2d_nhwc(
+ self,
+ N,
+ H,
+ W,
+ C,
+ DTYPE,
+ KERNEL,
+ STRIDE,
+ DILATION,
+ PADDING,
+ IO_TENSOR_MEM_SCOPE,
+ hexagon_session: Session,
+ ):
+ keys_dict = {
+ "basic_kernel": "max_pool2d",
+ "sched_type": 1,
+ "input_shape_4d": [N, H, W, C],
+ "block_shape": [8, 8, 32],
+ "DTYPE": DTYPE,
+ "KERNEL": KERNEL,
+ "STRIDE": STRIDE,
+ "DILATION": DILATION,
+ "PADDING": PADDING,
+ "IO_TENSOR_MEM_SCOPE": IO_TENSOR_MEM_SCOPE,
+ }
+
+ desc = bu.get_benchmark_decription(keys_dict)
+
+ # Create the host-side directory for this benchmark run's files / logs...
+ host_files_dir_name = bu.get_benchmark_id(keys_dict)
+ host_files_dir_path = os.path.join(self.working_dir, host_files_dir_name)
+ os.mkdir(host_files_dir_path)
+
+ keys_dict["host_files_dir_path"] = host_files_dir_path
+
+ log_file_path = os.path.join(host_files_dir_path, "out.txt")
+ with open(log_file_path, "w") as log_file:
+ print(f"CONFIGURATION: {desc}")
+ log_file.write(f"CONFIGURATION: {desc}\n")
+
+ try:
+ input_tensor_shape_4d = [N, H, W, C]
+ input_tensor_shape_7d = _int8_nhwc_8h8w32c_shape(N, H, W, C)
+
+ data = te.placeholder(tuple(input_tensor_shape_4d), dtype=DTYPE)
+
+ output = topi.nn.pool2d(
+ data, KERNEL, STRIDE, DILATION, PADDING, "max", layout="NHWC"
+ )
+ primfunc = te.create_prim_func([data, output])
+
+ sch = tir.Schedule(primfunc, debug_mask="all")
+
+ sch.transform_layout(
+ block="tensor", buffer="placeholder", index_map=_int8_nhwc_8h8w32c_map
+ )
+
+ target_hexagon = tvm.target.hexagon("v69", link_params=True)
+ # func = tvm.build(sch.mod, target=tvm.target.Target(target_hexagon, host=target_hexagon))
+ built_module = tvm.build(
+ sch.mod, target=tvm.target.Target(target_hexagon, host=target_hexagon)
+ )
+
+ # Save a local copy of the Hexagon object code (in the form of a .so file)
+ # to allow post-mortem inspection.
+ host_dso_binary_path = os.path.join(host_files_dir_path, "test_binary.so")
+ built_module.save(host_dso_binary_path)
+ print(f"SAVED BINARY TO HOST PATH: {host_dso_binary_path}")
+
+ hexagon_mod = hexagon_session.load_module(built_module)
+
+ # Generate the input tensor's data.
+ # Note that we'll eventually need it in two different layouts:
+ # (1) NHWC as an argument to testing.poolnd_python.
+ # (2) NHWC_8h8w32c for as an argument to our Hexagon primfunc.
+ # a_numpy_4d = np.random.randint(low=-128, high=127, size=input_tensor_shape_4d, dtype=np.int8)
+ a_numpy_4d = _create_test_input(input_tensor_shape_4d, DTYPE)
+
+ ref_output_4d = testing.poolnd_python(
+ a_numpy_4d.astype("int32"),
+ KERNEL,
+ STRIDE,
+ DILATION,
+ PADDING[0:2],
+ PADDING[2:],
+ pool_type="max",
+ dtype="int32",
+ layout="NHWC",
+ ).astype(DTYPE)
+
+ output_tensor_shape_4d = ref_output_4d.shape
+
+ a_numpy_7d = _int8_nhwc_8h8w32c_xform_immediate(a_numpy_4d)
+
+ a_hexagon_7d = allocate_hexagon_array(
+ hexagon_session.device,
+ tensor_shape=input_tensor_shape_7d,
+ axis_separators=[4],
+ dtype=DTYPE,
+ mem_scope=IO_TENSOR_MEM_SCOPE,
+ )
+
+ c_hexagon_4d = allocate_hexagon_array(
+ hexagon_session.device,
+ tensor_shape=output_tensor_shape_4d,
+ axis_separators=[],
+ dtype=DTYPE,
+ mem_scope=IO_TENSOR_MEM_SCOPE,
+ )
+
+ a_hexagon_7d.copyfrom(a_numpy_7d)
+
+ if DTYPE == "int8":
+ rel_tolerance = 0
+ abs_tolerance = 0
+ else:
+ assert False, f"Need to decide acceptable tolerances for DTYPE {DTYPE}"
+
+ # hexagon_mod(a_hexagon_7d, c_hexagon_4d)
+ # tvm.testing.assert_allclose(ref_output_4d, c_hexagon_4d.numpy(), rtol=rel_tolerance, atol=abs_tolerance)
+
+ timer = hexagon_mod.time_evaluator(
+ "main", hexagon_session.device, number=10, repeat=1
+ )
+ timing_result = timer(a_hexagon_7d, c_hexagon_4d)
+
+ try:
+ tvm.testing.assert_allclose(
+ ref_output_4d, c_hexagon_4d.numpy(), rtol=rel_tolerance, atol=abs_tolerance
+ )
+ except AssertionError as e:
+ raise bu.NumericalAccuracyException(str(e))
+
+ except bu.NumericalAccuracyException as e:
+ print()
+ print(f"FAIL: Numerical accuracy error. See log file.")
+
+ log_file.write("\n")
+ log_file.write(f"FAIL: {e}\n")
+
+ self.benchmark_table.record_fail(
+ **keys_dict, comments=f"Numerical accuracy error. See log file."
+ )
+
+ except bu.UnsupportedException as e:
+ print()
+ print(f"SKIP: {e}")
+
+ log_file.write("\n")
+ log_file.write(f"SKIP: {e}\n")
+
+ self.benchmark_table.record_skip(
+ **keys_dict, comments=f"Unsupported configuration: {e}"
+ )
+
+ self.benchmark_table.record_success(timing_result, **keys_dict)
+
+
+if __name__ == "__main__":
+ sys.exit(pytest.main(sys.argv))
Review Comment:
Fixed now I believe.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: commits-unsubscribe@tvm.apache.org
For queries about this service, please contact Infrastructure at:
users@infra.apache.org
[GitHub] [tvm] cconvey commented on pull request #11720: [hexagon][testing] add max_pool2d benchmark
Posted by GitBox <gi...@apache.org>.
cconvey commented on PR #11720:
URL: https://github.com/apache/tvm/pull/11720#issuecomment-1155745345
Example of the current output:
```
$ cat /tmp/tmphut9v83z/benchmark-results.csv | column -s $'\t' -t -n | less -SN
1 basic_kernel sched_type N H W C DTYPE KERNEL STRIDE DILATION PADDING IO_TENSOR_MEM_SCOPE row_status timings_min_usecs timings_max_usecs timings_median_usecs timings_mean_usecs timings_st
2 max_pool2d 1 1 8 8 32 int8 (3, 3) (1, 1) (1, 1) (0, 0, 0, 0) global.vtcm SUCCESS 197.200 197.200 197.200 197.200 0.000
3 max_pool2d 1 1 8 8 64 int8 (3, 3) (1, 1) (1, 1) (0, 0, 0, 0) global.vtcm SUCCESS 531.300 531.300 531.300 531.300 0.000
4 max_pool2d 1 1 8 16 32 int8 (3, 3) (1, 1) (1, 1) (0, 0, 0, 0) global.vtcm SUCCESS 459.500 459.500 459.500 459.500 0.000
5 max_pool2d 1 1 8 16 64 int8 (3, 3) (1, 1) (1, 1) (0, 0, 0, 0) global.vtcm SUCCESS 1641.800 1641.800 1641.800 1641.800 0.000
6 max_pool2d 1 1 8 32 32 int8 (3, 3) (1, 1) (1, 1) (0, 0, 0, 0) global.vtcm SUCCESS 979.700 979.700 979.700 979.700 0.000
7 max_pool2d 1 1 8 32 64 int8 (3, 3) (1, 1) (1, 1) (0, 0, 0, 0) global.vtcm SUCCESS 3513.800 3513.800 3513.800 3513.800 0.000
8 max_pool2d 1 1 8 64 32 int8 (3, 3) (1, 1) (1, 1) (0, 0, 0, 0) global.vtcm SUCCESS 2019.900 2019.900 2019.900 2019.900 0.000
9 max_pool2d 1 1 8 64 64 int8 (3, 3) (1, 1) (1, 1) (0, 0, 0, 0) global.vtcm SUCCESS 7040.600 7040.600 7040.600 7040.600 0.000
10 max_pool2d 1 1 8 128 32 int8 (3, 3) (1, 1) (1, 1) (0, 0, 0, 0) global.vtcm SUCCESS 4100.400 4100.400 4100.400 4100.400 0.000
11 max_pool2d 1 1 8 128 64 int8 (3, 3) (1, 1) (1, 1) (0, 0, 0, 0) global.vtcm SUCCESS 13208.500 13208.500 13208.500 13208.500 0.000
12 max_pool2d 1 1 16 8 32 int8 (3, 3) (1, 1) (1, 1) (0, 0, 0, 0) global.vtcm SUCCESS 455.400 455.400 455.400 455.400 0.000
13 max_pool2d 1 1 16 8 64 int8 (3, 3) (1, 1) (1, 1) (0, 0, 0, 0) global.vtcm SUCCESS 1482.100 1482.100 1482.100 1482.100 0.000
14 max_pool2d 1 1 16 16 32 int8 (3, 3) (1, 1) (1, 1) (0, 0, 0, 0) global.vtcm SUCCESS 1537.500 1537.500 1537.500 1537.500 0.000
15 max_pool2d 1 1 16 16 64 int8 (3, 3) (1, 1) (1, 1) (0, 0, 0, 0) global.vtcm SUCCESS 3894.300 3894.300 3894.300 3894.300 0.000
16 max_pool2d 1 1 16 32 32 int8 (3, 3) (1, 1) (1, 1) (0, 0, 0, 0) global.vtcm SUCCESS 3286.100 3286.100 3286.100 3286.100 0.000
17 max_pool2d 1 1 16 32 64 int8 (3, 3) (1, 1) (1, 1) (0, 0, 0, 0) global.vtcm SUCCESS 7447.000 7447.000 7447.000 7447.000 0.000
18 max_pool2d 1 1 16 64 32 int8 (3, 3) (1, 1) (1, 1) (0, 0, 0, 0) global.vtcm SUCCESS 6665.300 6665.300 6665.300 6665.300 0.000
19 max_pool2d 1 1 16 64 64 int8 (3, 3) (1, 1) (1, 1) (0, 0, 0, 0) global.vtcm SUCCESS 15824.300 15824.300 15824.300 15824.300 0.000
20 max_pool2d 1 1 16 128 32 int8 (3, 3) (1, 1) (1, 1) (0, 0, 0, 0) global.vtcm SUCCESS 12444.100 12444.100 12444.100 12444.100 0.000
21 max_pool2d 1 1 16 128 64 int8 (3, 3) (1, 1) (1, 1) (0, 0, 0, 0) global.vtcm SUCCESS 32704.500 32704.500 32704.500 32704.500 0.000
22 max_pool2d 1 1 32 8 32 int8 (3, 3) (1, 1) (1, 1) (0, 0, 0, 0) global.vtcm SUCCESS 971.800 971.800 971.800 971.800 0.000
23 max_pool2d 1 1 32 8 64 int8 (3, 3) (1, 1) (1, 1) (0, 0, 0, 0) global.vtcm SUCCESS 3171.200 3171.200 3171.200 3171.200 0.000
24 max_pool2d 1 1 32 16 32 int8 (3, 3) (1, 1) (1, 1) (0, 0, 0, 0) global.vtcm SUCCESS 3289.100 3289.100 3289.100 3289.100 0.000
25 max_pool2d 1 1 32 16 64 int8 (3, 3) (1, 1) (1, 1) (0, 0, 0, 0) global.vtcm SUCCESS 8009.500 8009.500 8009.500 8009.500 0.000
26 max_pool2d 1 1 32 32 32 int8 (3, 3) (1, 1) (1, 1) (0, 0, 0, 0) global.vtcm SUCCESS 6866.800 6866.800 6866.800 6866.800 0.000
27 max_pool2d 1 1 32 32 64 int8 (3, 3) (1, 1) (1, 1) (0, 0, 0, 0) global.vtcm SUCCESS 15006.800 15006.800 15006.800 15006.800 0.000
28 max_pool2d 1 1 32 64 32 int8 (3, 3) (1, 1) (1, 1) (0, 0, 0, 0) global.vtcm SUCCESS 13205.000 13205.000 13205.000 13205.000 0.000
29 max_pool2d 1 1 32 64 64 int8 (3, 3) (1, 1) (1, 1) (0, 0, 0, 0) global.vtcm SUCCESS 36247.900 36247.900 36247.900 36247.900 0.000
30 max_pool2d 1 1 32 128 32 int8 (3, 3) (1, 1) (1, 1) (0, 0, 0, 0) global.vtcm SUCCESS 28164.600 28164.600 28164.600 28164.600 0.000
31 max_pool2d 1 1 32 128 64 int8 (3, 3) (1, 1) (1, 1) (0, 0, 0, 0) global.vtcm SUCCESS 67673.200 67673.200 67673.200 67673.200 0.000
32 max_pool2d 1 1 64 8 32 int8 (3, 3) (1, 1) (1, 1) (0, 0, 0, 0) global.vtcm SUCCESS 2004.100 2004.100 2004.100 2004.100 0.000
33 max_pool2d 1 1 64 8 64 int8 (3, 3) (1, 1) (1, 1) (0, 0, 0, 0) global.vtcm SUCCESS 6370.700 6370.700 6370.700 6370.700 0.000
34 max_pool2d 1 1 64 16 32 int8 (3, 3) (1, 1) (1, 1) (0, 0, 0, 0) global.vtcm SUCCESS 6640.800 6640.800 6640.800 6640.800 0.000
35 max_pool2d 1 1 64 16 64 int8 (3, 3) (1, 1) (1, 1) (0, 0, 0, 0) global.vtcm SUCCESS 15813.800 15813.800 15813.800 15813.800 0.000
36 max_pool2d 1 1 64 32 32 int8 (3, 3) (1, 1) (1, 1) (0, 0, 0, 0) global.vtcm SUCCESS 13045.600 13045.600 13045.600 13045.600 0.000
37 max_pool2d 1 1 64 32 64 int8 (3, 3) (1, 1) (1, 1) (0, 0, 0, 0) global.vtcm SUCCESS 33073.000 33073.000 33073.000 33073.000 0.000
38 max_pool2d 1 1 64 64 32 int8 (3, 3) (1, 1) (1, 1) (0, 0, 0, 0) global.vtcm SUCCESS 28791.800 28791.800 28791.800 28791.800 0.000
39 max_pool2d 1 1 64 64 64 int8 (3, 3) (1, 1) (1, 1) (0, 0, 0, 0) global.vtcm SUCCESS 76532.600 76532.600 76532.600 76532.600 0.000
40 max_pool2d 1 1 64 128 32 int8 (3, 3) (1, 1) (1, 1) (0, 0, 0, 0) global.vtcm SUCCESS 59452.100 59452.100 59452.100 59452.100 0.000
41 max_pool2d 1 1 64 128 64 int8 (3, 3) (1, 1) (1, 1) (0, 0, 0, 0) global.vtcm SUCCESS 155364.400 155364.400 155364.400 155364.400 0.000
42 max_pool2d 1 1 128 8 32 int8 (3, 3) (1, 1) (1, 1) (0, 0, 0, 0) global.vtcm SUCCESS 4068.500 4068.500 4068.500 4068.500 0.000
43 max_pool2d 1 1 128 8 64 int8 (3, 3) (1, 1) (1, 1) (0, 0, 0, 0) global.vtcm SUCCESS 12074.000 12074.000 12074.000 12074.000 0.000
44 max_pool2d 1 1 128 16 32 int8 (3, 3) (1, 1) (1, 1) (0, 0, 0, 0) global.vtcm SUCCESS 12445.000 12445.000 12445.000 12445.000 0.000
45 max_pool2d 1 1 128 16 64 int8 (3, 3) (1, 1) (1, 1) (0, 0, 0, 0) global.vtcm SUCCESS 34239.600 34239.600 34239.600 34239.600 0.000
46 max_pool2d 1 1 128 32 32 int8 (3, 3) (1, 1) (1, 1) (0, 0, 0, 0) global.vtcm SUCCESS 27057.200 27057.200 27057.200 27057.200 0.000
47 max_pool2d 1 1 128 32 64 int8 (3, 3) (1, 1) (1, 1) (0, 0, 0, 0) global.vtcm SUCCESS 69079.900 69079.900 69079.900 69079.900 0.000
48 max_pool2d 1 1 128 64 32 int8 (3, 3) (1, 1) (1, 1) (0, 0, 0, 0) global.vtcm SUCCESS 63014.100 63014.100 63014.100 63014.100 0.000
49 max_pool2d 1 1 128 64 64 int8 (3, 3) (1, 1) (1, 1) (0, 0, 0, 0) global.vtcm SUCCESS 135556.200 135556.200 135556.200 135556.200 0.000
50 max_pool2d 1 1 128 128 32 int8 (3, 3) (1, 1) (1, 1) (0, 0, 0, 0) global.vtcm SUCCESS 131045.600 131045.600 131045.600 131045.600 0.000
51 max_pool2d 1 1 128 128 64 int8 (3, 3) (1, 1) (1, 1) (0, 0, 0, 0) global.vtcm SUCCESS 298585.000 298585.000 298585.000 298585.000 0.000
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: commits-unsubscribe@tvm.apache.org
For queries about this service, please contact Infrastructure at:
users@infra.apache.org
[GitHub] [tvm] csullivan merged pull request #11720: [hexagon][testing] add max_pool2d benchmark
Posted by GitBox <gi...@apache.org>.
csullivan merged PR #11720:
URL: https://github.com/apache/tvm/pull/11720
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: commits-unsubscribe@tvm.apache.org
For queries about this service, please contact Infrastructure at:
users@infra.apache.org
[GitHub] [tvm] cconvey commented on pull request #11720: [hexagon][testing] add max_pool2d benchmark
Posted by GitBox <gi...@apache.org>.
cconvey commented on PR #11720:
URL: https://github.com/apache/tvm/pull/11720#issuecomment-1156664401
Updated benchmark output, due to a new revision of the PR code:
```
basic_kernel sched_type input_shape_4d block_shape DTYPE KERNEL STRIDE DILATION PADDING IO_TENSOR_MEM_SCOPE row_status timings_min_usecs timings_max_usecs timings_median_usecs timings_mean_usecs timings_stddev_usecs host_files_dir_path comments
max_pool2d 1 [1, 8, 8, 32] [8, 8, 32] int8 (1, 1) (1, 1) (1, 1) (0, 0, 0, 0) global.vtcm SUCCESS 76.200 76.200 76.200 76.200 0.000 /tmp/tmpszyuisxa/basic_kernel:max_pool2d-sched_type:1-input_shape_4d:1_8_8_32-block_shape:8_8_32-DTYPE:int8-KERNEL:1_1-STRIDE:1_1-DILATION:1_1-PADDING:0_0_0_0-IO_TENSOR_MEM_SCOPE:global.vtcm
max_pool2d 1 [1, 8, 8, 32] [8, 8, 32] int8 (3, 3) (1, 1) (1, 1) (0, 0, 0, 0) global.vtcm SUCCESS 197.300 197.300 197.300 197.300 0.000 /tmp/tmpszyuisxa/basic_kernel:max_pool2d-sched_type:1-input_shape_4d:1_8_8_32-block_shape:8_8_32-DTYPE:int8-KERNEL:3_3-STRIDE:1_1-DILATION:1_1-PADDING:0_0_0_0-IO_TENSOR_MEM_SCOPE:global.vtcm
max_pool2d 1 [1, 8, 8, 64] [8, 8, 32] int8 (1, 1) (1, 1) (1, 1) (0, 0, 0, 0) global.vtcm SUCCESS 143.600 143.600 143.600 143.600 0.000 /tmp/tmpszyuisxa/basic_kernel:max_pool2d-sched_type:1-input_shape_4d:1_8_8_64-block_shape:8_8_32-DTYPE:int8-KERNEL:1_1-STRIDE:1_1-DILATION:1_1-PADDING:0_0_0_0-IO_TENSOR_MEM_SCOPE:global.vtcm
max_pool2d 1 [1, 8, 8, 64] [8, 8, 32] int8 (3, 3) (1, 1) (1, 1) (0, 0, 0, 0) global.vtcm SUCCESS 531.100 531.100 531.100 531.100 0.000 /tmp/tmpszyuisxa/basic_kernel:max_pool2d-sched_type:1-input_shape_4d:1_8_8_64-block_shape:8_8_32-DTYPE:int8-KERNEL:3_3-STRIDE:1_1-DILATION:1_1-PADDING:0_0_0_0-IO_TENSOR_MEM_SCOPE:global.vtcm
max_pool2d 1 [1, 8, 32, 32] [8, 8, 32] int8 (1, 1) (1, 1) (1, 1) (0, 0, 0, 0) global.vtcm SUCCESS 283.500 283.500 283.500 283.500 0.000 /tmp/tmpszyuisxa/basic_kernel:max_pool2d-sched_type:1-input_shape_4d:1_8_32_32-block_shape:8_8_32-DTYPE:int8-KERNEL:1_1-STRIDE:1_1-DILATION:1_1-PADDING:0_0_0_0-IO_TENSOR_MEM_SCOPE:global.vtcm
max_pool2d 1 [1, 8, 32, 32] [8, 8, 32] int8 (3, 3) (1, 1) (1, 1) (0, 0, 0, 0) global.vtcm SUCCESS 979.800 979.800 979.800 979.800 0.000 /tmp/tmpszyuisxa/basic_kernel:max_pool2d-sched_type:1-input_shape_4d:1_8_32_32-block_shape:8_8_32-DTYPE:int8-KERNEL:3_3-STRIDE:1_1-DILATION:1_1-PADDING:0_0_0_0-IO_TENSOR_MEM_SCOPE:global.vtcm
max_pool2d 1 [1, 8, 32, 64] [8, 8, 32] int8 (1, 1) (1, 1) (1, 1) (0, 0, 0, 0) global.vtcm SUCCESS 529.200 529.200 529.200 529.200 0.000 /tmp/tmpszyuisxa/basic_kernel:max_pool2d-sched_type:1-input_shape_4d:1_8_32_64-block_shape:8_8_32-DTYPE:int8-KERNEL:1_1-STRIDE:1_1-DILATION:1_1-PADDING:0_0_0_0-IO_TENSOR_MEM_SCOPE:global.vtcm
max_pool2d 1 [1, 8, 32, 64] [8, 8, 32] int8 (3, 3) (1, 1) (1, 1) (0, 0, 0, 0) global.vtcm SUCCESS 3513.700 3513.700 3513.700 3513.700 0.000 /tmp/tmpszyuisxa/basic_kernel:max_pool2d-sched_type:1-input_shape_4d:1_8_32_64-block_shape:8_8_32-DTYPE:int8-KERNEL:3_3-STRIDE:1_1-DILATION:1_1-PADDING:0_0_0_0-IO_TENSOR_MEM_SCOPE:global.vtcm
max_pool2d 1 [1, 8, 128, 32] [8, 8, 32] int8 (1, 1) (1, 1) (1, 1) (0, 0, 0, 0) global.vtcm SUCCESS 1122.700 1122.700 1122.700 1122.700 0.000 /tmp/tmpszyuisxa/basic_kernel:max_pool2d-sched_type:1-input_shape_4d:1_8_128_32-block_shape:8_8_32-DTYPE:int8-KERNEL:1_1-STRIDE:1_1-DILATION:1_1-PADDING:0_0_0_0-IO_TENSOR_MEM_SCOPE:global.vtcm
max_pool2d 1 [1, 8, 128, 32] [8, 8, 32] int8 (3, 3) (1, 1) (1, 1) (0, 0, 0, 0) global.vtcm SUCCESS 4100.300 4100.300 4100.300 4100.300 0.000 /tmp/tmpszyuisxa/basic_kernel:max_pool2d-sched_type:1-input_shape_4d:1_8_128_32-block_shape:8_8_32-DTYPE:int8-KERNEL:3_3-STRIDE:1_1-DILATION:1_1-PADDING:0_0_0_0-IO_TENSOR_MEM_SCOPE:global.vtcm
max_pool2d 1 [1, 8, 128, 64] [8, 8, 32] int8 (1, 1) (1, 1) (1, 1) (0, 0, 0, 0) global.vtcm SUCCESS 2105.000 2105.000 2105.000 2105.000 0.000 /tmp/tmpszyuisxa/basic_kernel:max_pool2d-sched_type:1-input_shape_4d:1_8_128_64-block_shape:8_8_32-DTYPE:int8-KERNEL:1_1-STRIDE:1_1-DILATION:1_1-PADDING:0_0_0_0-IO_TENSOR_MEM_SCOPE:global.vtcm
max_pool2d 1 [1, 8, 128, 64] [8, 8, 32] int8 (3, 3) (1, 1) (1, 1) (0, 0, 0, 0) global.vtcm SUCCESS 13259.000 13259.000 13259.000 13259.000 0.000 /tmp/tmpszyuisxa/basic_kernel:max_pool2d-sched_type:1-input_shape_4d:1_8_128_64-block_shape:8_8_32-DTYPE:int8-KERNEL:3_3-STRIDE:1_1-DILATION:1_1-PADDING:0_0_0_0-IO_TENSOR_MEM_SCOPE:global.vtcm
max_pool2d 1 [1, 32, 8, 32] [8, 8, 32] int8 (1, 1) (1, 1) (1, 1) (0, 0, 0, 0) global.vtcm SUCCESS 4.600 4.600 4.600 4.600 0.000 /tmp/tmpszyuisxa/basic_kernel:max_pool2d-sched_type:1-input_shape_4d:1_32_8_32-block_shape:8_8_32-DTYPE:int8-KERNEL:1_1-STRIDE:1_1-DILATION:1_1-PADDING:0_0_0_0-IO_TENSOR_MEM_SCOPE:global.vtcm
max_pool2d 1 [1, 32, 8, 32] [8, 8, 32] int8 (3, 3) (1, 1) (1, 1) (0, 0, 0, 0) global.vtcm SUCCESS 971.900 971.900 971.900 971.900 0.000 /tmp/tmpszyuisxa/basic_kernel:max_pool2d-sched_type:1-input_shape_4d:1_32_8_32-block_shape:8_8_32-DTYPE:int8-KERNEL:3_3-STRIDE:1_1-DILATION:1_1-PADDING:0_0_0_0-IO_TENSOR_MEM_SCOPE:global.vtcm
max_pool2d 1 [1, 32, 8, 64] [8, 8, 32] int8 (1, 1) (1, 1) (1, 1) (0, 0, 0, 0) global.vtcm SUCCESS 528.700 528.700 528.700 528.700 0.000 /tmp/tmpszyuisxa/basic_kernel:max_pool2d-sched_type:1-input_shape_4d:1_32_8_64-block_shape:8_8_32-DTYPE:int8-KERNEL:1_1-STRIDE:1_1-DILATION:1_1-PADDING:0_0_0_0-IO_TENSOR_MEM_SCOPE:global.vtcm
max_pool2d 1 [1, 32, 8, 64] [8, 8, 32] int8 (3, 3) (1, 1) (1, 1) (0, 0, 0, 0) global.vtcm SUCCESS 3171.100 3171.100 3171.100 3171.100 0.000 /tmp/tmpszyuisxa/basic_kernel:max_pool2d-sched_type:1-input_shape_4d:1_32_8_64-block_shape:8_8_32-DTYPE:int8-KERNEL:3_3-STRIDE:1_1-DILATION:1_1-PADDING:0_0_0_0-IO_TENSOR_MEM_SCOPE:global.vtcm
max_pool2d 1 [1, 32, 32, 32] [8, 8, 32] int8 (1, 1) (1, 1) (1, 1) (0, 0, 0, 0) global.vtcm SUCCESS 1107.800 1107.800 1107.800 1107.800 0.000 /tmp/tmpszyuisxa/basic_kernel:max_pool2d-sched_type:1-input_shape_4d:1_32_32_32-block_shape:8_8_32-DTYPE:int8-KERNEL:1_1-STRIDE:1_1-DILATION:1_1-PADDING:0_0_0_0-IO_TENSOR_MEM_SCOPE:global.vtcm
max_pool2d 1 [1, 32, 32, 32] [8, 8, 32] int8 (3, 3) (1, 1) (1, 1) (0, 0, 0, 0) global.vtcm SUCCESS 6864.800 6864.800 6864.800 6864.800 0.000 /tmp/tmpszyuisxa/basic_kernel:max_pool2d-sched_type:1-input_shape_4d:1_32_32_32-block_shape:8_8_32-DTYPE:int8-KERNEL:3_3-STRIDE:1_1-DILATION:1_1-PADDING:0_0_0_0-IO_TENSOR_MEM_SCOPE:global.vtcm
max_pool2d 1 [1, 32, 32, 64] [8, 8, 32] int8 (1, 1) (1, 1) (1, 1) (0, 0, 0, 0) global.vtcm SUCCESS 2108.700 2108.700 2108.700 2108.700 0.000 /tmp/tmpszyuisxa/basic_kernel:max_pool2d-sched_type:1-input_shape_4d:1_32_32_64-block_shape:8_8_32-DTYPE:int8-KERNEL:1_1-STRIDE:1_1-DILATION:1_1-PADDING:0_0_0_0-IO_TENSOR_MEM_SCOPE:global.vtcm
max_pool2d 1 [1, 32, 32, 64] [8, 8, 32] int8 (3, 3) (1, 1) (1, 1) (0, 0, 0, 0) global.vtcm SUCCESS 14937.400 14937.400 14937.400 14937.400 0.000 /tmp/tmpszyuisxa/basic_kernel:max_pool2d-sched_type:1-input_shape_4d:1_32_32_64-block_shape:8_8_32-DTYPE:int8-KERNEL:3_3-STRIDE:1_1-DILATION:1_1-PADDING:0_0_0_0-IO_TENSOR_MEM_SCOPE:global.vtcm
max_pool2d 1 [1, 32, 128, 32] [8, 8, 32] int8 (1, 1) (1, 1) (1, 1) (0, 0, 0, 0) global.vtcm SUCCESS 4418.500 4418.500 4418.500 4418.500 0.000 /tmp/tmpszyuisxa/basic_kernel:max_pool2d-sched_type:1-input_shape_4d:1_32_128_32-block_shape:8_8_32-DTYPE:int8-KERNEL:1_1-STRIDE:1_1-DILATION:1_1-PADDING:0_0_0_0-IO_TENSOR_MEM_SCOPE:global.vtcm
max_pool2d 1 [1, 32, 128, 32] [8, 8, 32] int8 (3, 3) (1, 1) (1, 1) (0, 0, 0, 0) global.vtcm SUCCESS 27957.400 27957.400 27957.400 27957.400 0.000 /tmp/tmpszyuisxa/basic_kernel:max_pool2d-sched_type:1-input_shape_4d:1_32_128_32-block_shape:8_8_32-DTYPE:int8-KERNEL:3_3-STRIDE:1_1-DILATION:1_1-PADDING:0_0_0_0-IO_TENSOR_MEM_SCOPE:global.vtcm
max_pool2d 1 [1, 32, 128, 64] [8, 8, 32] int8 (1, 1) (1, 1) (1, 1) (0, 0, 0, 0) global.vtcm SUCCESS 8104.600 8104.600 8104.600 8104.600 0.000 /tmp/tmpszyuisxa/basic_kernel:max_pool2d-sched_type:1-input_shape_4d:1_32_128_64-block_shape:8_8_32-DTYPE:int8-KERNEL:1_1-STRIDE:1_1-DILATION:1_1-PADDING:0_0_0_0-IO_TENSOR_MEM_SCOPE:global.vtcm
max_pool2d 1 [1, 32, 128, 64] [8, 8, 32] int8 (3, 3) (1, 1) (1, 1) (0, 0, 0, 0) global.vtcm SUCCESS 73345.100 73345.100 73345.100 73345.100 0.000 /tmp/tmpszyuisxa/basic_kernel:max_pool2d-sched_type:1-input_shape_4d:1_32_128_64-block_shape:8_8_32-DTYPE:int8-KERNEL:3_3-STRIDE:1_1-DILATION:1_1-PADDING:0_0_0_0-IO_TENSOR_MEM_SCOPE:global.vtcm
max_pool2d 1 [1, 128, 8, 32] [8, 8, 32] int8 (1, 1) (1, 1) (1, 1) (0, 0, 0, 0) global.vtcm SUCCESS 5.800 5.800 5.800 5.800 0.000 /tmp/tmpszyuisxa/basic_kernel:max_pool2d-sched_type:1-input_shape_4d:1_128_8_32-block_shape:8_8_32-DTYPE:int8-KERNEL:1_1-STRIDE:1_1-DILATION:1_1-PADDING:0_0_0_0-IO_TENSOR_MEM_SCOPE:global.vtcm
max_pool2d 1 [1, 128, 8, 32] [8, 8, 32] int8 (3, 3) (1, 1) (1, 1) (0, 0, 0, 0) global.vtcm SUCCESS 4068.300 4068.300 4068.300 4068.300 0.000 /tmp/tmpszyuisxa/basic_kernel:max_pool2d-sched_type:1-input_shape_4d:1_128_8_32-block_shape:8_8_32-DTYPE:int8-KERNEL:3_3-STRIDE:1_1-DILATION:1_1-PADDING:0_0_0_0-IO_TENSOR_MEM_SCOPE:global.vtcm
max_pool2d 1 [1, 128, 8, 64] [8, 8, 32] int8 (1, 1) (1, 1) (1, 1) (0, 0, 0, 0) global.vtcm SUCCESS 2103.200 2103.200 2103.200 2103.200 0.000 /tmp/tmpszyuisxa/basic_kernel:max_pool2d-sched_type:1-input_shape_4d:1_128_8_64-block_shape:8_8_32-DTYPE:int8-KERNEL:1_1-STRIDE:1_1-DILATION:1_1-PADDING:0_0_0_0-IO_TENSOR_MEM_SCOPE:global.vtcm
max_pool2d 1 [1, 128, 8, 64] [8, 8, 32] int8 (3, 3) (1, 1) (1, 1) (0, 0, 0, 0) global.vtcm SUCCESS 12057.600 12057.600 12057.600 12057.600 0.000 /tmp/tmpszyuisxa/basic_kernel:max_pool2d-sched_type:1-input_shape_4d:1_128_8_64-block_shape:8_8_32-DTYPE:int8-KERNEL:3_3-STRIDE:1_1-DILATION:1_1-PADDING:0_0_0_0-IO_TENSOR_MEM_SCOPE:global.vtcm
max_pool2d 1 [1, 128, 32, 32] [8, 8, 32] int8 (1, 1) (1, 1) (1, 1) (0, 0, 0, 0) global.vtcm SUCCESS 4419.300 4419.300 4419.300 4419.300 0.000 /tmp/tmpszyuisxa/basic_kernel:max_pool2d-sched_type:1-input_shape_4d:1_128_32_32-block_shape:8_8_32-DTYPE:int8-KERNEL:1_1-STRIDE:1_1-DILATION:1_1-PADDING:0_0_0_0-IO_TENSOR_MEM_SCOPE:global.vtcm
max_pool2d 1 [1, 128, 32, 32] [8, 8, 32] int8 (3, 3) (1, 1) (1, 1) (0, 0, 0, 0) global.vtcm SUCCESS 28135.200 28135.200 28135.200 28135.200 0.000 /tmp/tmpszyuisxa/basic_kernel:max_pool2d-sched_type:1-input_shape_4d:1_128_32_32-block_shape:8_8_32-DTYPE:int8-KERNEL:3_3-STRIDE:1_1-DILATION:1_1-PADDING:0_0_0_0-IO_TENSOR_MEM_SCOPE:global.vtcm
max_pool2d 1 [1, 128, 32, 64] [8, 8, 32] int8 (1, 1) (1, 1) (1, 1) (0, 0, 0, 0) global.vtcm SUCCESS 7944.200 7944.200 7944.200 7944.200 0.000 /tmp/tmpszyuisxa/basic_kernel:max_pool2d-sched_type:1-input_shape_4d:1_128_32_64-block_shape:8_8_32-DTYPE:int8-KERNEL:1_1-STRIDE:1_1-DILATION:1_1-PADDING:0_0_0_0-IO_TENSOR_MEM_SCOPE:global.vtcm
max_pool2d 1 [1, 128, 32, 64] [8, 8, 32] int8 (3, 3) (1, 1) (1, 1) (0, 0, 0, 0) global.vtcm SUCCESS 60932.900 60932.900 60932.900 60932.900 0.000 /tmp/tmpszyuisxa/basic_kernel:max_pool2d-sched_type:1-input_shape_4d:1_128_32_64-block_shape:8_8_32-DTYPE:int8-KERNEL:3_3-STRIDE:1_1-DILATION:1_1-PADDING:0_0_0_0-IO_TENSOR_MEM_SCOPE:global.vtcm
max_pool2d 1 [1, 128, 128, 32] [8, 8, 32] int8 (1, 1) (1, 1) (1, 1) (0, 0, 0, 0) global.vtcm SUCCESS 16996.200 16996.200 16996.200 16996.200 0.000 /tmp/tmpszyuisxa/basic_kernel:max_pool2d-sched_type:1-input_shape_4d:1_128_128_32-block_shape:8_8_32-DTYPE:int8-KERNEL:1_1-STRIDE:1_1-DILATION:1_1-PADDING:0_0_0_0-IO_TENSOR_MEM_SCOPE:global.vtcm
max_pool2d 1 [1, 128, 128, 32] [8, 8, 32] int8 (3, 3) (1, 1) (1, 1) (0, 0, 0, 0) global.vtcm SUCCESS 126598.000 126598.000 126598.000 126598.000 0.000 /tmp/tmpszyuisxa/basic_kernel:max_pool2d-sched_type:1-input_shape_4d:1_128_128_32-block_shape:8_8_32-DTYPE:int8-KERNEL:3_3-STRIDE:1_1-DILATION:1_1-PADDING:0_0_0_0-IO_TENSOR_MEM_SCOPE:global.vtcm
max_pool2d 1 [1, 128, 128, 64] [8, 8, 32] int8 (1, 1) (1, 1) (1, 1) (0, 0, 0, 0) global.vtcm SUCCESS 32163.600 32163.600 32163.600 32163.600 0.000 /tmp/tmpszyuisxa/basic_kernel:max_pool2d-sched_type:1-input_shape_4d:1_128_128_64-block_shape:8_8_32-DTYPE:int8-KERNEL:1_1-STRIDE:1_1-DILATION:1_1-PADDING:0_0_0_0-IO_TENSOR_MEM_SCOPE:global.vtcm
max_pool2d 1 [1, 128, 128, 64] [8, 8, 32] int8 (3, 3) (1, 1) (1, 1) (0, 0, 0, 0) global.vtcm SUCCESS 312788.700 312788.700 312788.700 312788.700 0.000 /tmp/tmpszyuisxa/basic_kernel:max_pool2d-sched_type:1-input_shape_4d:1_128_128_64-block_shape:8_8_32-DTYPE:int8-KERNEL:3_3-STRIDE:1_1-DILATION:1_1-PADDING:0_0_0_0-IO_TENSOR_MEM_SCOPE:global.vtcm
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: commits-unsubscribe@tvm.apache.org
For queries about this service, please contact Infrastructure at:
users@infra.apache.org
[GitHub] [tvm] mehrdadh commented on a diff in pull request #11720: [hexagon][testing] add max_pool2d benchmark
Posted by GitBox <gi...@apache.org>.
mehrdadh commented on code in PR #11720:
URL: https://github.com/apache/tvm/pull/11720#discussion_r898271843
##########
tests/python/contrib/test_hexagon/benchmark_maxpool2d.py:
##########
@@ -0,0 +1,379 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""
+This module serves two purposes:
+ (1) Demonstrates how to write Python code that exercises various
+ Hexagon-related algorithms / features.
+
+ (2) Benchmark the resulting primfuncs.
+
+Current limitations:
+ - Input shapes are limited to NHWC --> NHWC_8h8w32c.
+
+ - Testing parameters (input shapes, dtypes, etc.) currently
+ support only one value for each parameter.
+
+ - H, W, C must be integer multiples of 8, 8, and 32,
+ respectively. I.e., partial blocks aren't currently
+ supported by this script.
+
+ - Requires that I/O tensors reside in "global.VTCM" memory,
+ rather than "global" memory.
+ This prevents benchmarking with I/O tensors that are too
+ large to fit into availble VTCM.
+
+ - The script only develops one primfunc.
+ Future revisions to this script are expected to add more
+ primfuncs and demonstrate more coding strategies.
+"""
+
+import sys
+import pytest
+import numpy as np
+import copy
+import tempfile
+import os
+
+import tvm.testing
+from tvm import te, topi, tir
+from tvm.topi import testing
+from tvm.script import tir as T
+from tvm.tir import IndexMap
+from tvm.relay.backend import Executor, Runtime
+from tvm.contrib.hexagon.session import Session
+from typing import List
+
+from .infrastructure import allocate_hexagon_array
+from . import benchmark_util as bu
+
+
+def _ceil_div(numerator, denominator):
+ return (numerator + (denominator - 1)) // denominator
+
+
+def _int8_nhwc_8h8w32c_map(n, h, w, c):
+ return [
+ n,
+ h // 8,
+ w // 8,
+ c // 32,
+ te.AXIS_SEPARATOR,
+ h % 8,
+ w % 8,
+ c % 32,
+ ]
+
+
+def _int8_nhwc_8h8w32c_shape(n, h, w, c) -> List[int]:
+ return [
+ n,
+ _ceil_div(h, 8),
+ _ceil_div(w, 8),
+ _ceil_div(c, 32),
+ 8,
+ 8,
+ 32,
+ ]
+
+
+def _int8_nhwc_8h8w32c_xform_immediate(arr_in: np.ndarray) -> np.ndarray:
+ """
+ Return a deep copy of 'arr_in', transformed from a NWHC to
+ NHWC-8h8wc32 shape. Any newly created array elements have value 0.
+ """
+ stage1 = copy.copy(arr_in)
+
+ (
+ n,
+ h,
+ w,
+ c,
+ ) = stage1.shape
+
+ (
+ h_minor,
+ w_minor,
+ c_minor,
+ ) = [8, 8, 32]
+
+ h_major = _ceil_div(h, h_minor)
+ w_major = _ceil_div(w, w_minor)
+ c_major = _ceil_div(c, c_minor)
+
+ # This handles cases where the dimensions of arr_in are not cleanly divided
+ # by the minor block size, i.e. [8, 8, 32].
+ #
+ # Any additional array elements that this creates will ahve value 0.
+ # We shouldn't actually care what value is used for those elements, because they
+ # shouldn't be treated as meaningful by any of our algorithms.
+ if (h % h_minor) or (w % w_minor) or (c % c_minor):
+ stage1.resize((n, h_major * h_minor, w_major * w_minor, c_major * c_minor), refcheck=False)
+
+ stage2 = stage1.reshape(n, h_major, h_minor, w_major, w_minor, c_major, c_minor)
+ stage3 = stage2.transpose(0, 1, 3, 5, 2, 4, 6)
+ return stage3
+
+
+def _create_test_input(shape, dtype: str) -> np.ndarray:
+ np_dtype = np.dtype(dtype)
+ min_value = np.iinfo(np_dtype).min
+ max_value = np.iinfo(np_dtype).max
+ return np.random.randint(low=min_value, high=max_value, size=tuple(shape), dtype=np.int8)
+
+
+# This fixture provides some initialization / finalization logic for groups of related
+# benchmark runs.
+# See the `TestMaxPool2D` class for its intended usage.
+@pytest.fixture(scope="class")
+def benchmark_group(request):
+ working_dir = tempfile.mkdtemp()
+ bt = bu.BenchmarksTable()
+
+ request.cls.working_dir = working_dir
+ request.cls.benchmark_table = bt
+
+ yield
+
+ tabular_output_filename = os.path.join(working_dir, "benchmark-results.csv")
+
+ if not hasattr(request.cls, "csv_column_order"):
+ raise Exception('Classes using this fixture must have a member named "csv_column_order"')
+
+ with open(tabular_output_filename, "w") as csv_file:
+ bt.print_csv(csv_file, request.cls.csv_column_order)
+
+ print()
+ print("*" * 80)
+ print(f"BENCHMARK RESULTS FILE: {tabular_output_filename}")
+ print("*" * 80)
+ print()
+
+ if bt.has_fail() > 0:
+ pytest.fail("At least one benchmark configuration failed", pytrace=False)
+
+
+@pytest.mark.usefixtures("benchmark_group")
+class TestMaxPool2D:
+ csv_column_order = [
+ # Identifies which TE-compute / TIRScript is used as the basis for the
+ # benchmarked primfunc. Only needs to be meaningful to humans.
+ "basic_kernel",
+ # When applicable, indicates the particular variation of schedules
+ # apply by the Python code. Decoding this may require looking at this
+ # script's source code.
+ "sched_type",
+ # Values directly based on test parameters...
+ "input_shape_4d",
+ "block_shape",
+ "DTYPE",
+ "KERNEL",
+ "STRIDE",
+ "DILATION",
+ "PADDING",
+ "IO_TENSOR_MEM_SCOPE",
+ # Reserved columns defined by the BenchmarksTable class.
+ "row_status",
+ "timings_min_usecs",
+ "timings_max_usecs",
+ "timings_median_usecs",
+ "timings_mean_usecs",
+ "timings_stddev_usecs",
+ # For benchmarks that produce files on the host file system, this indicates
+ # their location. Useful for post-mortem investigation of benchmark results.
+ "host_files_dir_path",
+ # Miscellaneous comments about the benchmark.
+ "comments",
+ ]
+
+ DTYPE = tvm.testing.parameter("int8")
+
+ # FIXME(cconvey): The script currently fails when H, W, or C is not an
+ # integer multiple of 8, 8, or 32, respectively.
+ N = tvm.testing.parameter(1)
+ H = tvm.testing.parameter(*[x * 8 for x in [1, 4, 16]])
+ W = tvm.testing.parameter(*[x * 8 for x in [1, 4, 16]])
+ C = tvm.testing.parameter(*[x * 32 for x in [1, 2]])
+
+ KERNEL = tvm.testing.parameter((1, 1), (3, 3))
+ STRIDE = tvm.testing.parameter((1, 1))
+ DILATION = tvm.testing.parameter((1, 1))
+ PADDING = tvm.testing.parameter((0, 0, 0, 0))
+ IO_TENSOR_MEM_SCOPE = tvm.testing.parameter("global.vtcm")
+
+ @tvm.testing.requires_hexagon
+ def test_maxpool2d_nhwc(
+ self,
+ N,
+ H,
+ W,
+ C,
+ DTYPE,
+ KERNEL,
+ STRIDE,
+ DILATION,
+ PADDING,
+ IO_TENSOR_MEM_SCOPE,
+ hexagon_session: Session,
+ ):
+ keys_dict = {
+ "basic_kernel": "max_pool2d",
+ "sched_type": 1,
+ "input_shape_4d": [N, H, W, C],
+ "block_shape": [8, 8, 32],
+ "DTYPE": DTYPE,
+ "KERNEL": KERNEL,
+ "STRIDE": STRIDE,
+ "DILATION": DILATION,
+ "PADDING": PADDING,
+ "IO_TENSOR_MEM_SCOPE": IO_TENSOR_MEM_SCOPE,
+ }
+
+ desc = bu.get_benchmark_decription(keys_dict)
+
+ # Create the host-side directory for this benchmark run's files / logs...
+ host_files_dir_name = bu.get_benchmark_id(keys_dict)
+ host_files_dir_path = os.path.join(self.working_dir, host_files_dir_name)
+ os.mkdir(host_files_dir_path)
+
+ keys_dict["host_files_dir_path"] = host_files_dir_path
+
+ log_file_path = os.path.join(host_files_dir_path, "out.txt")
+ with open(log_file_path, "w") as log_file:
+ print(f"CONFIGURATION: {desc}")
+ log_file.write(f"CONFIGURATION: {desc}\n")
+
+ try:
+ input_tensor_shape_4d = [N, H, W, C]
+ input_tensor_shape_7d = _int8_nhwc_8h8w32c_shape(N, H, W, C)
+
+ data = te.placeholder(tuple(input_tensor_shape_4d), dtype=DTYPE)
+
+ output = topi.nn.pool2d(
+ data, KERNEL, STRIDE, DILATION, PADDING, "max", layout="NHWC"
+ )
+ primfunc = te.create_prim_func([data, output])
+
+ sch = tir.Schedule(primfunc, debug_mask="all")
+
+ sch.transform_layout(
+ block="tensor", buffer="placeholder", index_map=_int8_nhwc_8h8w32c_map
+ )
+
+ target_hexagon = tvm.target.hexagon("v69", link_params=True)
+ # func = tvm.build(sch.mod, target=tvm.target.Target(target_hexagon, host=target_hexagon))
+ built_module = tvm.build(
+ sch.mod, target=tvm.target.Target(target_hexagon, host=target_hexagon)
+ )
+
+ # Save a local copy of the Hexagon object code (in the form of a .so file)
+ # to allow post-mortem inspection.
+ host_dso_binary_path = os.path.join(host_files_dir_path, "test_binary.so")
+ built_module.save(host_dso_binary_path)
+ print(f"SAVED BINARY TO HOST PATH: {host_dso_binary_path}")
+
+ hexagon_mod = hexagon_session.load_module(built_module)
+
+ # Generate the input tensor's data.
+ # Note that we'll eventually need it in two different layouts:
+ # (1) NHWC as an argument to testing.poolnd_python.
+ # (2) NHWC_8h8w32c for as an argument to our Hexagon primfunc.
+ # a_numpy_4d = np.random.randint(low=-128, high=127, size=input_tensor_shape_4d, dtype=np.int8)
+ a_numpy_4d = _create_test_input(input_tensor_shape_4d, DTYPE)
+
+ ref_output_4d = testing.poolnd_python(
+ a_numpy_4d.astype("int32"),
+ KERNEL,
+ STRIDE,
+ DILATION,
+ PADDING[0:2],
+ PADDING[2:],
+ pool_type="max",
+ dtype="int32",
+ layout="NHWC",
+ ).astype(DTYPE)
+
+ output_tensor_shape_4d = ref_output_4d.shape
+
+ a_numpy_7d = _int8_nhwc_8h8w32c_xform_immediate(a_numpy_4d)
+
+ a_hexagon_7d = allocate_hexagon_array(
+ hexagon_session.device,
+ tensor_shape=input_tensor_shape_7d,
+ axis_separators=[4],
+ dtype=DTYPE,
+ mem_scope=IO_TENSOR_MEM_SCOPE,
+ )
+
+ c_hexagon_4d = allocate_hexagon_array(
+ hexagon_session.device,
+ tensor_shape=output_tensor_shape_4d,
+ axis_separators=[],
+ dtype=DTYPE,
+ mem_scope=IO_TENSOR_MEM_SCOPE,
+ )
+
+ a_hexagon_7d.copyfrom(a_numpy_7d)
+
+ if DTYPE == "int8":
+ rel_tolerance = 0
+ abs_tolerance = 0
+ else:
+ assert False, f"Need to decide acceptable tolerances for DTYPE {DTYPE}"
+
+ # hexagon_mod(a_hexagon_7d, c_hexagon_4d)
+ # tvm.testing.assert_allclose(ref_output_4d, c_hexagon_4d.numpy(), rtol=rel_tolerance, atol=abs_tolerance)
+
+ timer = hexagon_mod.time_evaluator(
+ "main", hexagon_session.device, number=10, repeat=1
+ )
+ timing_result = timer(a_hexagon_7d, c_hexagon_4d)
+
+ try:
+ tvm.testing.assert_allclose(
+ ref_output_4d, c_hexagon_4d.numpy(), rtol=rel_tolerance, atol=abs_tolerance
+ )
+ except AssertionError as e:
+ raise bu.NumericalAccuracyException(str(e))
+
+ except bu.NumericalAccuracyException as e:
+ print()
+ print(f"FAIL: Numerical accuracy error. See log file.")
+
+ log_file.write("\n")
+ log_file.write(f"FAIL: {e}\n")
+
+ self.benchmark_table.record_fail(
+ **keys_dict, comments=f"Numerical accuracy error. See log file."
+ )
+
+ except bu.UnsupportedException as e:
+ print()
+ print(f"SKIP: {e}")
+
+ log_file.write("\n")
+ log_file.write(f"SKIP: {e}\n")
+
+ self.benchmark_table.record_skip(
+ **keys_dict, comments=f"Unsupported configuration: {e}"
+ )
+
+ self.benchmark_table.record_success(timing_result, **keys_dict)
+
+
+if __name__ == "__main__":
+ sys.exit(pytest.main(sys.argv))
Review Comment:
change this to `tvm.testing.main()`
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: commits-unsubscribe@tvm.apache.org
For queries about this service, please contact Infrastructure at:
users@infra.apache.org