You are viewing a plain text version of this content. The canonical link for it is here.

Posted to commits@tvm.apache.org by GitBox <gi...@apache.org> on 2022/06/14 21:49:39 UTC

[GitHub] [tvm] cconvey opened a new pull request, #11720: [hexagon][testing] add max_pool2d benchmark

cconvey opened a new pull request, #11720:
URL: https://github.com/apache/tvm/pull/11720

   - Add benchmarking framework for Hexagon maxpool-2d kernels,
     and one (simple) kernel.
   
   - Minor refactor hexagon benchmark utilities.


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@tvm.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] cconvey commented on pull request #11720: [hexagon][testing] add max_pool2d benchmark

Posted by GitBox <gi...@apache.org>.

cconvey commented on PR #11720:
URL: https://github.com/apache/tvm/pull/11720#issuecomment-1155745811

   CC: @csullivan 


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@tvm.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] cconvey commented on a diff in pull request #11720: [hexagon][testing] add max_pool2d benchmark

Posted by GitBox <gi...@apache.org>.

cconvey commented on code in PR #11720:
URL: https://github.com/apache/tvm/pull/11720#discussion_r899294444


##########
tests/python/contrib/test_hexagon/benchmark_maxpool2d.py:
##########
@@ -0,0 +1,379 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""
+This module serves two purposes:
+    (1) Demonstrates how to write Python code that exercises various
+        Hexagon-related algorithms / features.
+
+    (2) Benchmark the resulting primfuncs.
+
+Current limitations:
+    - Input shapes are limited to NHWC --> NHWC_8h8w32c.
+
+    - Testing parameters (input shapes, dtypes, etc.) currently
+      support only one value for each parameter.
+
+    - H, W, C must be integer multiples of 8, 8, and 32,
+      respectively.  I.e., partial blocks aren't currently
+      supported by this script.
+
+    - Requires that I/O tensors reside in "global.VTCM" memory,
+      rather than "global" memory.
+      This prevents benchmarking with I/O tensors that are too
+      large to fit into availble VTCM.
+
+    - The script only develops one primfunc.
+      Future revisions to this script are expected to add more
+      primfuncs and demonstrate more coding strategies.
+"""
+
+import sys
+import pytest
+import numpy as np
+import copy
+import tempfile
+import os
+
+import tvm.testing
+from tvm import te, topi, tir
+from tvm.topi import testing
+from tvm.script import tir as T
+from tvm.tir import IndexMap
+from tvm.relay.backend import Executor, Runtime
+from tvm.contrib.hexagon.session import Session
+from typing import List
+
+from .infrastructure import allocate_hexagon_array
+from . import benchmark_util as bu
+
+
+def _ceil_div(numerator, denominator):
+    return (numerator + (denominator - 1)) // denominator
+
+
+def _int8_nhwc_8h8w32c_map(n, h, w, c):
+    return [
+        n,
+        h // 8,
+        w // 8,
+        c // 32,
+        te.AXIS_SEPARATOR,
+        h % 8,
+        w % 8,
+        c % 32,
+    ]
+
+
+def _int8_nhwc_8h8w32c_shape(n, h, w, c) -> List[int]:
+    return [
+        n,
+        _ceil_div(h, 8),
+        _ceil_div(w, 8),
+        _ceil_div(c, 32),
+        8,
+        8,
+        32,
+    ]
+
+
+def _int8_nhwc_8h8w32c_xform_immediate(arr_in: np.ndarray) -> np.ndarray:
+    """
+    Return a deep copy of 'arr_in', transformed from a NWHC to
+    NHWC-8h8wc32 shape.  Any newly created array elements have value 0.
+    """
+    stage1 = copy.copy(arr_in)
+
+    (
+        n,
+        h,
+        w,
+        c,
+    ) = stage1.shape
+
+    (
+        h_minor,
+        w_minor,
+        c_minor,
+    ) = [8, 8, 32]
+
+    h_major = _ceil_div(h, h_minor)
+    w_major = _ceil_div(w, w_minor)
+    c_major = _ceil_div(c, c_minor)
+
+    # This handles cases where the dimensions of arr_in are not cleanly divided
+    # by the minor block size, i.e. [8, 8, 32].
+    #
+    # Any additional array elements that this creates will ahve value 0.
+    # We shouldn't actually care what value is used for those elements, because they
+    # shouldn't be treated as meaningful by any of our algorithms.
+    if (h % h_minor) or (w % w_minor) or (c % c_minor):
+        stage1.resize((n, h_major * h_minor, w_major * w_minor, c_major * c_minor), refcheck=False)
+
+    stage2 = stage1.reshape(n, h_major, h_minor, w_major, w_minor, c_major, c_minor)
+    stage3 = stage2.transpose(0, 1, 3, 5, 2, 4, 6)
+    return stage3
+
+
+def _create_test_input(shape, dtype: str) -> np.ndarray:
+    np_dtype = np.dtype(dtype)
+    min_value = np.iinfo(np_dtype).min
+    max_value = np.iinfo(np_dtype).max
+    return np.random.randint(low=min_value, high=max_value, size=tuple(shape), dtype=np.int8)
+
+
+# This fixture provides some initialization / finalization logic for groups of related
+# benchmark runs.
+# See the `TestMaxPool2D` class for its intended usage.
+@pytest.fixture(scope="class")
+def benchmark_group(request):
+    working_dir = tempfile.mkdtemp()
+    bt = bu.BenchmarksTable()
+
+    request.cls.working_dir = working_dir
+    request.cls.benchmark_table = bt
+
+    yield
+
+    tabular_output_filename = os.path.join(working_dir, "benchmark-results.csv")
+
+    if not hasattr(request.cls, "csv_column_order"):
+        raise Exception('Classes using this fixture must have a member named "csv_column_order"')
+
+    with open(tabular_output_filename, "w") as csv_file:
+        bt.print_csv(csv_file, request.cls.csv_column_order)
+
+    print()
+    print("*" * 80)
+    print(f"BENCHMARK RESULTS FILE: {tabular_output_filename}")
+    print("*" * 80)
+    print()
+
+    if bt.has_fail() > 0:
+        pytest.fail("At least one benchmark configuration failed", pytrace=False)
+
+
+@pytest.mark.usefixtures("benchmark_group")
+class TestMaxPool2D:
+    csv_column_order = [
+        # Identifies which TE-compute / TIRScript is used as the basis for the
+        # benchmarked primfunc. Only needs to be meaningful to humans.
+        "basic_kernel",
+        # When applicable, indicates the particular variation of schedules
+        # apply by the Python code. Decoding this may require looking at this
+        # script's source code.
+        "sched_type",
+        # Values directly based on test parameters...
+        "input_shape_4d",
+        "block_shape",
+        "DTYPE",
+        "KERNEL",
+        "STRIDE",
+        "DILATION",
+        "PADDING",
+        "IO_TENSOR_MEM_SCOPE",
+        # Reserved columns defined by the BenchmarksTable class.
+        "row_status",
+        "timings_min_usecs",
+        "timings_max_usecs",
+        "timings_median_usecs",
+        "timings_mean_usecs",
+        "timings_stddev_usecs",
+        # For benchmarks that produce files on the host file system, this indicates
+        # their location. Useful for post-mortem investigation of benchmark results.
+        "host_files_dir_path",
+        # Miscellaneous comments about the benchmark.
+        "comments",
+    ]
+
+    DTYPE = tvm.testing.parameter("int8")
+
+    # FIXME(cconvey): The script currently fails when H, W, or C is not an
+    # integer multiple of 8, 8, or 32, respectively.
+    N = tvm.testing.parameter(1)
+    H = tvm.testing.parameter(*[x * 8 for x in [1, 4, 16]])
+    W = tvm.testing.parameter(*[x * 8 for x in [1, 4, 16]])
+    C = tvm.testing.parameter(*[x * 32 for x in [1, 2]])
+
+    KERNEL = tvm.testing.parameter((1, 1), (3, 3))
+    STRIDE = tvm.testing.parameter((1, 1))
+    DILATION = tvm.testing.parameter((1, 1))
+    PADDING = tvm.testing.parameter((0, 0, 0, 0))
+    IO_TENSOR_MEM_SCOPE = tvm.testing.parameter("global.vtcm")
+
+    @tvm.testing.requires_hexagon
+    def test_maxpool2d_nhwc(
+        self,
+        N,
+        H,
+        W,
+        C,
+        DTYPE,
+        KERNEL,
+        STRIDE,
+        DILATION,
+        PADDING,
+        IO_TENSOR_MEM_SCOPE,
+        hexagon_session: Session,
+    ):
+        keys_dict = {
+            "basic_kernel": "max_pool2d",
+            "sched_type": 1,
+            "input_shape_4d": [N, H, W, C],
+            "block_shape": [8, 8, 32],
+            "DTYPE": DTYPE,
+            "KERNEL": KERNEL,
+            "STRIDE": STRIDE,
+            "DILATION": DILATION,
+            "PADDING": PADDING,
+            "IO_TENSOR_MEM_SCOPE": IO_TENSOR_MEM_SCOPE,
+        }
+
+        desc = bu.get_benchmark_decription(keys_dict)
+
+        # Create the host-side directory for this benchmark run's files / logs...
+        host_files_dir_name = bu.get_benchmark_id(keys_dict)
+        host_files_dir_path = os.path.join(self.working_dir, host_files_dir_name)
+        os.mkdir(host_files_dir_path)
+
+        keys_dict["host_files_dir_path"] = host_files_dir_path
+
+        log_file_path = os.path.join(host_files_dir_path, "out.txt")
+        with open(log_file_path, "w") as log_file:
+            print(f"CONFIGURATION: {desc}")
+            log_file.write(f"CONFIGURATION: {desc}\n")
+
+            try:
+                input_tensor_shape_4d = [N, H, W, C]
+                input_tensor_shape_7d = _int8_nhwc_8h8w32c_shape(N, H, W, C)
+
+                data = te.placeholder(tuple(input_tensor_shape_4d), dtype=DTYPE)
+
+                output = topi.nn.pool2d(
+                    data, KERNEL, STRIDE, DILATION, PADDING, "max", layout="NHWC"
+                )
+                primfunc = te.create_prim_func([data, output])
+
+                sch = tir.Schedule(primfunc, debug_mask="all")
+
+                sch.transform_layout(
+                    block="tensor", buffer="placeholder", index_map=_int8_nhwc_8h8w32c_map
+                )
+
+                target_hexagon = tvm.target.hexagon("v69", link_params=True)
+                # func = tvm.build(sch.mod, target=tvm.target.Target(target_hexagon, host=target_hexagon))
+                built_module = tvm.build(
+                    sch.mod, target=tvm.target.Target(target_hexagon, host=target_hexagon)
+                )
+
+                # Save a local copy of the Hexagon object code (in the form of a .so file)
+                # to allow post-mortem inspection.
+                host_dso_binary_path = os.path.join(host_files_dir_path, "test_binary.so")
+                built_module.save(host_dso_binary_path)
+                print(f"SAVED BINARY TO HOST PATH: {host_dso_binary_path}")
+
+                hexagon_mod = hexagon_session.load_module(built_module)
+
+                # Generate the input tensor's data.
+                # Note that we'll eventually need it in two different layouts:
+                # (1) NHWC as an argument to testing.poolnd_python.
+                # (2) NHWC_8h8w32c for as an argument to our Hexagon primfunc.
+                # a_numpy_4d = np.random.randint(low=-128, high=127, size=input_tensor_shape_4d, dtype=np.int8)
+                a_numpy_4d = _create_test_input(input_tensor_shape_4d, DTYPE)
+
+                ref_output_4d = testing.poolnd_python(
+                    a_numpy_4d.astype("int32"),
+                    KERNEL,
+                    STRIDE,
+                    DILATION,
+                    PADDING[0:2],
+                    PADDING[2:],
+                    pool_type="max",
+                    dtype="int32",
+                    layout="NHWC",
+                ).astype(DTYPE)
+
+                output_tensor_shape_4d = ref_output_4d.shape
+
+                a_numpy_7d = _int8_nhwc_8h8w32c_xform_immediate(a_numpy_4d)
+
+                a_hexagon_7d = allocate_hexagon_array(
+                    hexagon_session.device,
+                    tensor_shape=input_tensor_shape_7d,
+                    axis_separators=[4],
+                    dtype=DTYPE,
+                    mem_scope=IO_TENSOR_MEM_SCOPE,
+                )
+
+                c_hexagon_4d = allocate_hexagon_array(
+                    hexagon_session.device,
+                    tensor_shape=output_tensor_shape_4d,
+                    axis_separators=[],
+                    dtype=DTYPE,
+                    mem_scope=IO_TENSOR_MEM_SCOPE,
+                )
+
+                a_hexagon_7d.copyfrom(a_numpy_7d)
+
+                if DTYPE == "int8":
+                    rel_tolerance = 0
+                    abs_tolerance = 0
+                else:
+                    assert False, f"Need to decide acceptable tolerances for DTYPE {DTYPE}"
+
+                # hexagon_mod(a_hexagon_7d, c_hexagon_4d)
+                # tvm.testing.assert_allclose(ref_output_4d, c_hexagon_4d.numpy(), rtol=rel_tolerance, atol=abs_tolerance)
+
+                timer = hexagon_mod.time_evaluator(
+                    "main", hexagon_session.device, number=10, repeat=1
+                )
+                timing_result = timer(a_hexagon_7d, c_hexagon_4d)
+
+                try:
+                    tvm.testing.assert_allclose(
+                        ref_output_4d, c_hexagon_4d.numpy(), rtol=rel_tolerance, atol=abs_tolerance
+                    )
+                except AssertionError as e:
+                    raise bu.NumericalAccuracyException(str(e))
+
+            except bu.NumericalAccuracyException as e:
+                print()
+                print(f"FAIL: Numerical accuracy error. See log file.")
+
+                log_file.write("\n")
+                log_file.write(f"FAIL: {e}\n")
+
+                self.benchmark_table.record_fail(
+                    **keys_dict, comments=f"Numerical accuracy error. See log file."
+                )
+
+            except bu.UnsupportedException as e:
+                print()
+                print(f"SKIP: {e}")
+
+                log_file.write("\n")
+                log_file.write(f"SKIP: {e}\n")
+
+                self.benchmark_table.record_skip(
+                    **keys_dict, comments=f"Unsupported configuration: {e}"
+                )
+
+            self.benchmark_table.record_success(timing_result, **keys_dict)
+
+
+if __name__ == "__main__":
+    sys.exit(pytest.main(sys.argv))

Review Comment:
   Fixed now I believe.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@tvm.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] cconvey commented on pull request #11720: [hexagon][testing] add max_pool2d benchmark

Posted by GitBox <gi...@apache.org>.

cconvey commented on PR #11720:
URL: https://github.com/apache/tvm/pull/11720#issuecomment-1155745345

   Example of the current output:
   ```
   $ cat /tmp/tmphut9v83z/benchmark-results.csv | column -s $'\t' -t -n | less -SN
         1 basic_kernel  sched_type  N  H    W    C   DTYPE  KERNEL  STRIDE  DILATION  PADDING       IO_TENSOR_MEM_SCOPE  row_status  timings_min_usecs  timings_max_usecs  timings_median_usecs  timings_mean_usecs  timings_st
         2 max_pool2d    1           1  8    8    32  int8   (3, 3)  (1, 1)  (1, 1)    (0, 0, 0, 0)  global.vtcm          SUCCESS     197.200            197.200            197.200               197.200             0.000
         3 max_pool2d    1           1  8    8    64  int8   (3, 3)  (1, 1)  (1, 1)    (0, 0, 0, 0)  global.vtcm          SUCCESS     531.300            531.300            531.300               531.300             0.000
         4 max_pool2d    1           1  8    16   32  int8   (3, 3)  (1, 1)  (1, 1)    (0, 0, 0, 0)  global.vtcm          SUCCESS     459.500            459.500            459.500               459.500             0.000
         5 max_pool2d    1           1  8    16   64  int8   (3, 3)  (1, 1)  (1, 1)    (0, 0, 0, 0)  global.vtcm          SUCCESS     1641.800           1641.800           1641.800              1641.800            0.000
         6 max_pool2d    1           1  8    32   32  int8   (3, 3)  (1, 1)  (1, 1)    (0, 0, 0, 0)  global.vtcm          SUCCESS     979.700            979.700            979.700               979.700             0.000
         7 max_pool2d    1           1  8    32   64  int8   (3, 3)  (1, 1)  (1, 1)    (0, 0, 0, 0)  global.vtcm          SUCCESS     3513.800           3513.800           3513.800              3513.800            0.000
         8 max_pool2d    1           1  8    64   32  int8   (3, 3)  (1, 1)  (1, 1)    (0, 0, 0, 0)  global.vtcm          SUCCESS     2019.900           2019.900           2019.900              2019.900            0.000
         9 max_pool2d    1           1  8    64   64  int8   (3, 3)  (1, 1)  (1, 1)    (0, 0, 0, 0)  global.vtcm          SUCCESS     7040.600           7040.600           7040.600              7040.600            0.000
        10 max_pool2d    1           1  8    128  32  int8   (3, 3)  (1, 1)  (1, 1)    (0, 0, 0, 0)  global.vtcm          SUCCESS     4100.400           4100.400           4100.400              4100.400            0.000
        11 max_pool2d    1           1  8    128  64  int8   (3, 3)  (1, 1)  (1, 1)    (0, 0, 0, 0)  global.vtcm          SUCCESS     13208.500          13208.500          13208.500             13208.500           0.000
        12 max_pool2d    1           1  16   8    32  int8   (3, 3)  (1, 1)  (1, 1)    (0, 0, 0, 0)  global.vtcm          SUCCESS     455.400            455.400            455.400               455.400             0.000
        13 max_pool2d    1           1  16   8    64  int8   (3, 3)  (1, 1)  (1, 1)    (0, 0, 0, 0)  global.vtcm          SUCCESS     1482.100           1482.100           1482.100              1482.100            0.000
        14 max_pool2d    1           1  16   16   32  int8   (3, 3)  (1, 1)  (1, 1)    (0, 0, 0, 0)  global.vtcm          SUCCESS     1537.500           1537.500           1537.500              1537.500            0.000
        15 max_pool2d    1           1  16   16   64  int8   (3, 3)  (1, 1)  (1, 1)    (0, 0, 0, 0)  global.vtcm          SUCCESS     3894.300           3894.300           3894.300              3894.300            0.000
        16 max_pool2d    1           1  16   32   32  int8   (3, 3)  (1, 1)  (1, 1)    (0, 0, 0, 0)  global.vtcm          SUCCESS     3286.100           3286.100           3286.100              3286.100            0.000
        17 max_pool2d    1           1  16   32   64  int8   (3, 3)  (1, 1)  (1, 1)    (0, 0, 0, 0)  global.vtcm          SUCCESS     7447.000           7447.000           7447.000              7447.000            0.000
        18 max_pool2d    1           1  16   64   32  int8   (3, 3)  (1, 1)  (1, 1)    (0, 0, 0, 0)  global.vtcm          SUCCESS     6665.300           6665.300           6665.300              6665.300            0.000
        19 max_pool2d    1           1  16   64   64  int8   (3, 3)  (1, 1)  (1, 1)    (0, 0, 0, 0)  global.vtcm          SUCCESS     15824.300          15824.300          15824.300             15824.300           0.000
        20 max_pool2d    1           1  16   128  32  int8   (3, 3)  (1, 1)  (1, 1)    (0, 0, 0, 0)  global.vtcm          SUCCESS     12444.100          12444.100          12444.100             12444.100           0.000
        21 max_pool2d    1           1  16   128  64  int8   (3, 3)  (1, 1)  (1, 1)    (0, 0, 0, 0)  global.vtcm          SUCCESS     32704.500          32704.500          32704.500             32704.500           0.000
        22 max_pool2d    1           1  32   8    32  int8   (3, 3)  (1, 1)  (1, 1)    (0, 0, 0, 0)  global.vtcm          SUCCESS     971.800            971.800            971.800               971.800             0.000
        23 max_pool2d    1           1  32   8    64  int8   (3, 3)  (1, 1)  (1, 1)    (0, 0, 0, 0)  global.vtcm          SUCCESS     3171.200           3171.200           3171.200              3171.200            0.000
        24 max_pool2d    1           1  32   16   32  int8   (3, 3)  (1, 1)  (1, 1)    (0, 0, 0, 0)  global.vtcm          SUCCESS     3289.100           3289.100           3289.100              3289.100            0.000
        25 max_pool2d    1           1  32   16   64  int8   (3, 3)  (1, 1)  (1, 1)    (0, 0, 0, 0)  global.vtcm          SUCCESS     8009.500           8009.500           8009.500              8009.500            0.000
        26 max_pool2d    1           1  32   32   32  int8   (3, 3)  (1, 1)  (1, 1)    (0, 0, 0, 0)  global.vtcm          SUCCESS     6866.800           6866.800           6866.800              6866.800            0.000
        27 max_pool2d    1           1  32   32   64  int8   (3, 3)  (1, 1)  (1, 1)    (0, 0, 0, 0)  global.vtcm          SUCCESS     15006.800          15006.800          15006.800             15006.800           0.000
        28 max_pool2d    1           1  32   64   32  int8   (3, 3)  (1, 1)  (1, 1)    (0, 0, 0, 0)  global.vtcm          SUCCESS     13205.000          13205.000          13205.000             13205.000           0.000
        29 max_pool2d    1           1  32   64   64  int8   (3, 3)  (1, 1)  (1, 1)    (0, 0, 0, 0)  global.vtcm          SUCCESS     36247.900          36247.900          36247.900             36247.900           0.000
        30 max_pool2d    1           1  32   128  32  int8   (3, 3)  (1, 1)  (1, 1)    (0, 0, 0, 0)  global.vtcm          SUCCESS     28164.600          28164.600          28164.600             28164.600           0.000
        31 max_pool2d    1           1  32   128  64  int8   (3, 3)  (1, 1)  (1, 1)    (0, 0, 0, 0)  global.vtcm          SUCCESS     67673.200          67673.200          67673.200             67673.200           0.000
        32 max_pool2d    1           1  64   8    32  int8   (3, 3)  (1, 1)  (1, 1)    (0, 0, 0, 0)  global.vtcm          SUCCESS     2004.100           2004.100           2004.100              2004.100            0.000
        33 max_pool2d    1           1  64   8    64  int8   (3, 3)  (1, 1)  (1, 1)    (0, 0, 0, 0)  global.vtcm          SUCCESS     6370.700           6370.700           6370.700              6370.700            0.000
        34 max_pool2d    1           1  64   16   32  int8   (3, 3)  (1, 1)  (1, 1)    (0, 0, 0, 0)  global.vtcm          SUCCESS     6640.800           6640.800           6640.800              6640.800            0.000
        35 max_pool2d    1           1  64   16   64  int8   (3, 3)  (1, 1)  (1, 1)    (0, 0, 0, 0)  global.vtcm          SUCCESS     15813.800          15813.800          15813.800             15813.800           0.000
        36 max_pool2d    1           1  64   32   32  int8   (3, 3)  (1, 1)  (1, 1)    (0, 0, 0, 0)  global.vtcm          SUCCESS     13045.600          13045.600          13045.600             13045.600           0.000
        37 max_pool2d    1           1  64   32   64  int8   (3, 3)  (1, 1)  (1, 1)    (0, 0, 0, 0)  global.vtcm          SUCCESS     33073.000          33073.000          33073.000             33073.000           0.000
        38 max_pool2d    1           1  64   64   32  int8   (3, 3)  (1, 1)  (1, 1)    (0, 0, 0, 0)  global.vtcm          SUCCESS     28791.800          28791.800          28791.800             28791.800           0.000
        39 max_pool2d    1           1  64   64   64  int8   (3, 3)  (1, 1)  (1, 1)    (0, 0, 0, 0)  global.vtcm          SUCCESS     76532.600          76532.600          76532.600             76532.600           0.000
        40 max_pool2d    1           1  64   128  32  int8   (3, 3)  (1, 1)  (1, 1)    (0, 0, 0, 0)  global.vtcm          SUCCESS     59452.100          59452.100          59452.100             59452.100           0.000
        41 max_pool2d    1           1  64   128  64  int8   (3, 3)  (1, 1)  (1, 1)    (0, 0, 0, 0)  global.vtcm          SUCCESS     155364.400         155364.400         155364.400            155364.400          0.000
        42 max_pool2d    1           1  128  8    32  int8   (3, 3)  (1, 1)  (1, 1)    (0, 0, 0, 0)  global.vtcm          SUCCESS     4068.500           4068.500           4068.500              4068.500            0.000
        43 max_pool2d    1           1  128  8    64  int8   (3, 3)  (1, 1)  (1, 1)    (0, 0, 0, 0)  global.vtcm          SUCCESS     12074.000          12074.000          12074.000             12074.000           0.000
        44 max_pool2d    1           1  128  16   32  int8   (3, 3)  (1, 1)  (1, 1)    (0, 0, 0, 0)  global.vtcm          SUCCESS     12445.000          12445.000          12445.000             12445.000           0.000
        45 max_pool2d    1           1  128  16   64  int8   (3, 3)  (1, 1)  (1, 1)    (0, 0, 0, 0)  global.vtcm          SUCCESS     34239.600          34239.600          34239.600             34239.600           0.000
        46 max_pool2d    1           1  128  32   32  int8   (3, 3)  (1, 1)  (1, 1)    (0, 0, 0, 0)  global.vtcm          SUCCESS     27057.200          27057.200          27057.200             27057.200           0.000
        47 max_pool2d    1           1  128  32   64  int8   (3, 3)  (1, 1)  (1, 1)    (0, 0, 0, 0)  global.vtcm          SUCCESS     69079.900          69079.900          69079.900             69079.900           0.000
        48 max_pool2d    1           1  128  64   32  int8   (3, 3)  (1, 1)  (1, 1)    (0, 0, 0, 0)  global.vtcm          SUCCESS     63014.100          63014.100          63014.100             63014.100           0.000
        49 max_pool2d    1           1  128  64   64  int8   (3, 3)  (1, 1)  (1, 1)    (0, 0, 0, 0)  global.vtcm          SUCCESS     135556.200         135556.200         135556.200            135556.200          0.000
        50 max_pool2d    1           1  128  128  32  int8   (3, 3)  (1, 1)  (1, 1)    (0, 0, 0, 0)  global.vtcm          SUCCESS     131045.600         131045.600         131045.600            131045.600          0.000
        51 max_pool2d    1           1  128  128  64  int8   (3, 3)  (1, 1)  (1, 1)    (0, 0, 0, 0)  global.vtcm          SUCCESS     298585.000         298585.000         298585.000            298585.000          0.000
   ```


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@tvm.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] csullivan merged pull request #11720: [hexagon][testing] add max_pool2d benchmark

Posted by GitBox <gi...@apache.org>.

csullivan merged PR #11720:
URL: https://github.com/apache/tvm/pull/11720


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@tvm.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] cconvey commented on pull request #11720: [hexagon][testing] add max_pool2d benchmark

Posted by GitBox <gi...@apache.org>.

cconvey commented on PR #11720:
URL: https://github.com/apache/tvm/pull/11720#issuecomment-1156664401

   Updated benchmark output, due to a new revision of the PR code:
   ```
   basic_kernel  sched_type  input_shape_4d     block_shape  DTYPE  KERNEL  STRIDE  DILATION  PADDING       IO_TENSOR_MEM_SCOPE  row_status  timings_min_usecs  timings_max_usecs  timings_median_usecs  timings_mean_usecs  timings_stddev_usecs  host_files_dir_path                                                                                                                                                                                 comments
   max_pool2d    1           [1, 8, 8, 32]      [8, 8, 32]   int8   (1, 1)  (1, 1)  (1, 1)    (0, 0, 0, 0)  global.vtcm          SUCCESS     76.200             76.200             76.200                76.200              0.000                 /tmp/tmpszyuisxa/basic_kernel:max_pool2d-sched_type:1-input_shape_4d:1_8_8_32-block_shape:8_8_32-DTYPE:int8-KERNEL:1_1-STRIDE:1_1-DILATION:1_1-PADDING:0_0_0_0-IO_TENSOR_MEM_SCOPE:global.vtcm      
   max_pool2d    1           [1, 8, 8, 32]      [8, 8, 32]   int8   (3, 3)  (1, 1)  (1, 1)    (0, 0, 0, 0)  global.vtcm          SUCCESS     197.300            197.300            197.300               197.300             0.000                 /tmp/tmpszyuisxa/basic_kernel:max_pool2d-sched_type:1-input_shape_4d:1_8_8_32-block_shape:8_8_32-DTYPE:int8-KERNEL:3_3-STRIDE:1_1-DILATION:1_1-PADDING:0_0_0_0-IO_TENSOR_MEM_SCOPE:global.vtcm      
   max_pool2d    1           [1, 8, 8, 64]      [8, 8, 32]   int8   (1, 1)  (1, 1)  (1, 1)    (0, 0, 0, 0)  global.vtcm          SUCCESS     143.600            143.600            143.600               143.600             0.000                 /tmp/tmpszyuisxa/basic_kernel:max_pool2d-sched_type:1-input_shape_4d:1_8_8_64-block_shape:8_8_32-DTYPE:int8-KERNEL:1_1-STRIDE:1_1-DILATION:1_1-PADDING:0_0_0_0-IO_TENSOR_MEM_SCOPE:global.vtcm      
   max_pool2d    1           [1, 8, 8, 64]      [8, 8, 32]   int8   (3, 3)  (1, 1)  (1, 1)    (0, 0, 0, 0)  global.vtcm          SUCCESS     531.100            531.100            531.100               531.100             0.000                 /tmp/tmpszyuisxa/basic_kernel:max_pool2d-sched_type:1-input_shape_4d:1_8_8_64-block_shape:8_8_32-DTYPE:int8-KERNEL:3_3-STRIDE:1_1-DILATION:1_1-PADDING:0_0_0_0-IO_TENSOR_MEM_SCOPE:global.vtcm      
   max_pool2d    1           [1, 8, 32, 32]     [8, 8, 32]   int8   (1, 1)  (1, 1)  (1, 1)    (0, 0, 0, 0)  global.vtcm          SUCCESS     283.500            283.500            283.500               283.500             0.000                 /tmp/tmpszyuisxa/basic_kernel:max_pool2d-sched_type:1-input_shape_4d:1_8_32_32-block_shape:8_8_32-DTYPE:int8-KERNEL:1_1-STRIDE:1_1-DILATION:1_1-PADDING:0_0_0_0-IO_TENSOR_MEM_SCOPE:global.vtcm     
   max_pool2d    1           [1, 8, 32, 32]     [8, 8, 32]   int8   (3, 3)  (1, 1)  (1, 1)    (0, 0, 0, 0)  global.vtcm          SUCCESS     979.800            979.800            979.800               979.800             0.000                 /tmp/tmpszyuisxa/basic_kernel:max_pool2d-sched_type:1-input_shape_4d:1_8_32_32-block_shape:8_8_32-DTYPE:int8-KERNEL:3_3-STRIDE:1_1-DILATION:1_1-PADDING:0_0_0_0-IO_TENSOR_MEM_SCOPE:global.vtcm     
   max_pool2d    1           [1, 8, 32, 64]     [8, 8, 32]   int8   (1, 1)  (1, 1)  (1, 1)    (0, 0, 0, 0)  global.vtcm          SUCCESS     529.200            529.200            529.200               529.200             0.000                 /tmp/tmpszyuisxa/basic_kernel:max_pool2d-sched_type:1-input_shape_4d:1_8_32_64-block_shape:8_8_32-DTYPE:int8-KERNEL:1_1-STRIDE:1_1-DILATION:1_1-PADDING:0_0_0_0-IO_TENSOR_MEM_SCOPE:global.vtcm     
   max_pool2d    1           [1, 8, 32, 64]     [8, 8, 32]   int8   (3, 3)  (1, 1)  (1, 1)    (0, 0, 0, 0)  global.vtcm          SUCCESS     3513.700           3513.700           3513.700              3513.700            0.000                 /tmp/tmpszyuisxa/basic_kernel:max_pool2d-sched_type:1-input_shape_4d:1_8_32_64-block_shape:8_8_32-DTYPE:int8-KERNEL:3_3-STRIDE:1_1-DILATION:1_1-PADDING:0_0_0_0-IO_TENSOR_MEM_SCOPE:global.vtcm     
   max_pool2d    1           [1, 8, 128, 32]    [8, 8, 32]   int8   (1, 1)  (1, 1)  (1, 1)    (0, 0, 0, 0)  global.vtcm          SUCCESS     1122.700           1122.700           1122.700              1122.700            0.000                 /tmp/tmpszyuisxa/basic_kernel:max_pool2d-sched_type:1-input_shape_4d:1_8_128_32-block_shape:8_8_32-DTYPE:int8-KERNEL:1_1-STRIDE:1_1-DILATION:1_1-PADDING:0_0_0_0-IO_TENSOR_MEM_SCOPE:global.vtcm    
   max_pool2d    1           [1, 8, 128, 32]    [8, 8, 32]   int8   (3, 3)  (1, 1)  (1, 1)    (0, 0, 0, 0)  global.vtcm          SUCCESS     4100.300           4100.300           4100.300              4100.300            0.000                 /tmp/tmpszyuisxa/basic_kernel:max_pool2d-sched_type:1-input_shape_4d:1_8_128_32-block_shape:8_8_32-DTYPE:int8-KERNEL:3_3-STRIDE:1_1-DILATION:1_1-PADDING:0_0_0_0-IO_TENSOR_MEM_SCOPE:global.vtcm    
   max_pool2d    1           [1, 8, 128, 64]    [8, 8, 32]   int8   (1, 1)  (1, 1)  (1, 1)    (0, 0, 0, 0)  global.vtcm          SUCCESS     2105.000           2105.000           2105.000              2105.000            0.000                 /tmp/tmpszyuisxa/basic_kernel:max_pool2d-sched_type:1-input_shape_4d:1_8_128_64-block_shape:8_8_32-DTYPE:int8-KERNEL:1_1-STRIDE:1_1-DILATION:1_1-PADDING:0_0_0_0-IO_TENSOR_MEM_SCOPE:global.vtcm    
   max_pool2d    1           [1, 8, 128, 64]    [8, 8, 32]   int8   (3, 3)  (1, 1)  (1, 1)    (0, 0, 0, 0)  global.vtcm          SUCCESS     13259.000          13259.000          13259.000             13259.000           0.000                 /tmp/tmpszyuisxa/basic_kernel:max_pool2d-sched_type:1-input_shape_4d:1_8_128_64-block_shape:8_8_32-DTYPE:int8-KERNEL:3_3-STRIDE:1_1-DILATION:1_1-PADDING:0_0_0_0-IO_TENSOR_MEM_SCOPE:global.vtcm    
   max_pool2d    1           [1, 32, 8, 32]     [8, 8, 32]   int8   (1, 1)  (1, 1)  (1, 1)    (0, 0, 0, 0)  global.vtcm          SUCCESS     4.600              4.600              4.600                 4.600               0.000                 /tmp/tmpszyuisxa/basic_kernel:max_pool2d-sched_type:1-input_shape_4d:1_32_8_32-block_shape:8_8_32-DTYPE:int8-KERNEL:1_1-STRIDE:1_1-DILATION:1_1-PADDING:0_0_0_0-IO_TENSOR_MEM_SCOPE:global.vtcm     
   max_pool2d    1           [1, 32, 8, 32]     [8, 8, 32]   int8   (3, 3)  (1, 1)  (1, 1)    (0, 0, 0, 0)  global.vtcm          SUCCESS     971.900            971.900            971.900               971.900             0.000                 /tmp/tmpszyuisxa/basic_kernel:max_pool2d-sched_type:1-input_shape_4d:1_32_8_32-block_shape:8_8_32-DTYPE:int8-KERNEL:3_3-STRIDE:1_1-DILATION:1_1-PADDING:0_0_0_0-IO_TENSOR_MEM_SCOPE:global.vtcm     
   max_pool2d    1           [1, 32, 8, 64]     [8, 8, 32]   int8   (1, 1)  (1, 1)  (1, 1)    (0, 0, 0, 0)  global.vtcm          SUCCESS     528.700            528.700            528.700               528.700             0.000                 /tmp/tmpszyuisxa/basic_kernel:max_pool2d-sched_type:1-input_shape_4d:1_32_8_64-block_shape:8_8_32-DTYPE:int8-KERNEL:1_1-STRIDE:1_1-DILATION:1_1-PADDING:0_0_0_0-IO_TENSOR_MEM_SCOPE:global.vtcm     
   max_pool2d    1           [1, 32, 8, 64]     [8, 8, 32]   int8   (3, 3)  (1, 1)  (1, 1)    (0, 0, 0, 0)  global.vtcm          SUCCESS     3171.100           3171.100           3171.100              3171.100            0.000                 /tmp/tmpszyuisxa/basic_kernel:max_pool2d-sched_type:1-input_shape_4d:1_32_8_64-block_shape:8_8_32-DTYPE:int8-KERNEL:3_3-STRIDE:1_1-DILATION:1_1-PADDING:0_0_0_0-IO_TENSOR_MEM_SCOPE:global.vtcm     
   max_pool2d    1           [1, 32, 32, 32]    [8, 8, 32]   int8   (1, 1)  (1, 1)  (1, 1)    (0, 0, 0, 0)  global.vtcm          SUCCESS     1107.800           1107.800           1107.800              1107.800            0.000                 /tmp/tmpszyuisxa/basic_kernel:max_pool2d-sched_type:1-input_shape_4d:1_32_32_32-block_shape:8_8_32-DTYPE:int8-KERNEL:1_1-STRIDE:1_1-DILATION:1_1-PADDING:0_0_0_0-IO_TENSOR_MEM_SCOPE:global.vtcm    
   max_pool2d    1           [1, 32, 32, 32]    [8, 8, 32]   int8   (3, 3)  (1, 1)  (1, 1)    (0, 0, 0, 0)  global.vtcm          SUCCESS     6864.800           6864.800           6864.800              6864.800            0.000                 /tmp/tmpszyuisxa/basic_kernel:max_pool2d-sched_type:1-input_shape_4d:1_32_32_32-block_shape:8_8_32-DTYPE:int8-KERNEL:3_3-STRIDE:1_1-DILATION:1_1-PADDING:0_0_0_0-IO_TENSOR_MEM_SCOPE:global.vtcm    
   max_pool2d    1           [1, 32, 32, 64]    [8, 8, 32]   int8   (1, 1)  (1, 1)  (1, 1)    (0, 0, 0, 0)  global.vtcm          SUCCESS     2108.700           2108.700           2108.700              2108.700            0.000                 /tmp/tmpszyuisxa/basic_kernel:max_pool2d-sched_type:1-input_shape_4d:1_32_32_64-block_shape:8_8_32-DTYPE:int8-KERNEL:1_1-STRIDE:1_1-DILATION:1_1-PADDING:0_0_0_0-IO_TENSOR_MEM_SCOPE:global.vtcm    
   max_pool2d    1           [1, 32, 32, 64]    [8, 8, 32]   int8   (3, 3)  (1, 1)  (1, 1)    (0, 0, 0, 0)  global.vtcm          SUCCESS     14937.400          14937.400          14937.400             14937.400           0.000                 /tmp/tmpszyuisxa/basic_kernel:max_pool2d-sched_type:1-input_shape_4d:1_32_32_64-block_shape:8_8_32-DTYPE:int8-KERNEL:3_3-STRIDE:1_1-DILATION:1_1-PADDING:0_0_0_0-IO_TENSOR_MEM_SCOPE:global.vtcm    
   max_pool2d    1           [1, 32, 128, 32]   [8, 8, 32]   int8   (1, 1)  (1, 1)  (1, 1)    (0, 0, 0, 0)  global.vtcm          SUCCESS     4418.500           4418.500           4418.500              4418.500            0.000                 /tmp/tmpszyuisxa/basic_kernel:max_pool2d-sched_type:1-input_shape_4d:1_32_128_32-block_shape:8_8_32-DTYPE:int8-KERNEL:1_1-STRIDE:1_1-DILATION:1_1-PADDING:0_0_0_0-IO_TENSOR_MEM_SCOPE:global.vtcm   
   max_pool2d    1           [1, 32, 128, 32]   [8, 8, 32]   int8   (3, 3)  (1, 1)  (1, 1)    (0, 0, 0, 0)  global.vtcm          SUCCESS     27957.400          27957.400          27957.400             27957.400           0.000                 /tmp/tmpszyuisxa/basic_kernel:max_pool2d-sched_type:1-input_shape_4d:1_32_128_32-block_shape:8_8_32-DTYPE:int8-KERNEL:3_3-STRIDE:1_1-DILATION:1_1-PADDING:0_0_0_0-IO_TENSOR_MEM_SCOPE:global.vtcm   
   max_pool2d    1           [1, 32, 128, 64]   [8, 8, 32]   int8   (1, 1)  (1, 1)  (1, 1)    (0, 0, 0, 0)  global.vtcm          SUCCESS     8104.600           8104.600           8104.600              8104.600            0.000                 /tmp/tmpszyuisxa/basic_kernel:max_pool2d-sched_type:1-input_shape_4d:1_32_128_64-block_shape:8_8_32-DTYPE:int8-KERNEL:1_1-STRIDE:1_1-DILATION:1_1-PADDING:0_0_0_0-IO_TENSOR_MEM_SCOPE:global.vtcm   
   max_pool2d    1           [1, 32, 128, 64]   [8, 8, 32]   int8   (3, 3)  (1, 1)  (1, 1)    (0, 0, 0, 0)  global.vtcm          SUCCESS     73345.100          73345.100          73345.100             73345.100           0.000                 /tmp/tmpszyuisxa/basic_kernel:max_pool2d-sched_type:1-input_shape_4d:1_32_128_64-block_shape:8_8_32-DTYPE:int8-KERNEL:3_3-STRIDE:1_1-DILATION:1_1-PADDING:0_0_0_0-IO_TENSOR_MEM_SCOPE:global.vtcm   
   max_pool2d    1           [1, 128, 8, 32]    [8, 8, 32]   int8   (1, 1)  (1, 1)  (1, 1)    (0, 0, 0, 0)  global.vtcm          SUCCESS     5.800              5.800              5.800                 5.800               0.000                 /tmp/tmpszyuisxa/basic_kernel:max_pool2d-sched_type:1-input_shape_4d:1_128_8_32-block_shape:8_8_32-DTYPE:int8-KERNEL:1_1-STRIDE:1_1-DILATION:1_1-PADDING:0_0_0_0-IO_TENSOR_MEM_SCOPE:global.vtcm    
   max_pool2d    1           [1, 128, 8, 32]    [8, 8, 32]   int8   (3, 3)  (1, 1)  (1, 1)    (0, 0, 0, 0)  global.vtcm          SUCCESS     4068.300           4068.300           4068.300              4068.300            0.000                 /tmp/tmpszyuisxa/basic_kernel:max_pool2d-sched_type:1-input_shape_4d:1_128_8_32-block_shape:8_8_32-DTYPE:int8-KERNEL:3_3-STRIDE:1_1-DILATION:1_1-PADDING:0_0_0_0-IO_TENSOR_MEM_SCOPE:global.vtcm    
   max_pool2d    1           [1, 128, 8, 64]    [8, 8, 32]   int8   (1, 1)  (1, 1)  (1, 1)    (0, 0, 0, 0)  global.vtcm          SUCCESS     2103.200           2103.200           2103.200              2103.200            0.000                 /tmp/tmpszyuisxa/basic_kernel:max_pool2d-sched_type:1-input_shape_4d:1_128_8_64-block_shape:8_8_32-DTYPE:int8-KERNEL:1_1-STRIDE:1_1-DILATION:1_1-PADDING:0_0_0_0-IO_TENSOR_MEM_SCOPE:global.vtcm    
   max_pool2d    1           [1, 128, 8, 64]    [8, 8, 32]   int8   (3, 3)  (1, 1)  (1, 1)    (0, 0, 0, 0)  global.vtcm          SUCCESS     12057.600          12057.600          12057.600             12057.600           0.000                 /tmp/tmpszyuisxa/basic_kernel:max_pool2d-sched_type:1-input_shape_4d:1_128_8_64-block_shape:8_8_32-DTYPE:int8-KERNEL:3_3-STRIDE:1_1-DILATION:1_1-PADDING:0_0_0_0-IO_TENSOR_MEM_SCOPE:global.vtcm    
   max_pool2d    1           [1, 128, 32, 32]   [8, 8, 32]   int8   (1, 1)  (1, 1)  (1, 1)    (0, 0, 0, 0)  global.vtcm          SUCCESS     4419.300           4419.300           4419.300              4419.300            0.000                 /tmp/tmpszyuisxa/basic_kernel:max_pool2d-sched_type:1-input_shape_4d:1_128_32_32-block_shape:8_8_32-DTYPE:int8-KERNEL:1_1-STRIDE:1_1-DILATION:1_1-PADDING:0_0_0_0-IO_TENSOR_MEM_SCOPE:global.vtcm   
   max_pool2d    1           [1, 128, 32, 32]   [8, 8, 32]   int8   (3, 3)  (1, 1)  (1, 1)    (0, 0, 0, 0)  global.vtcm          SUCCESS     28135.200          28135.200          28135.200             28135.200           0.000                 /tmp/tmpszyuisxa/basic_kernel:max_pool2d-sched_type:1-input_shape_4d:1_128_32_32-block_shape:8_8_32-DTYPE:int8-KERNEL:3_3-STRIDE:1_1-DILATION:1_1-PADDING:0_0_0_0-IO_TENSOR_MEM_SCOPE:global.vtcm   
   max_pool2d    1           [1, 128, 32, 64]   [8, 8, 32]   int8   (1, 1)  (1, 1)  (1, 1)    (0, 0, 0, 0)  global.vtcm          SUCCESS     7944.200           7944.200           7944.200              7944.200            0.000                 /tmp/tmpszyuisxa/basic_kernel:max_pool2d-sched_type:1-input_shape_4d:1_128_32_64-block_shape:8_8_32-DTYPE:int8-KERNEL:1_1-STRIDE:1_1-DILATION:1_1-PADDING:0_0_0_0-IO_TENSOR_MEM_SCOPE:global.vtcm   
   max_pool2d    1           [1, 128, 32, 64]   [8, 8, 32]   int8   (3, 3)  (1, 1)  (1, 1)    (0, 0, 0, 0)  global.vtcm          SUCCESS     60932.900          60932.900          60932.900             60932.900           0.000                 /tmp/tmpszyuisxa/basic_kernel:max_pool2d-sched_type:1-input_shape_4d:1_128_32_64-block_shape:8_8_32-DTYPE:int8-KERNEL:3_3-STRIDE:1_1-DILATION:1_1-PADDING:0_0_0_0-IO_TENSOR_MEM_SCOPE:global.vtcm   
   max_pool2d    1           [1, 128, 128, 32]  [8, 8, 32]   int8   (1, 1)  (1, 1)  (1, 1)    (0, 0, 0, 0)  global.vtcm          SUCCESS     16996.200          16996.200          16996.200             16996.200           0.000                 /tmp/tmpszyuisxa/basic_kernel:max_pool2d-sched_type:1-input_shape_4d:1_128_128_32-block_shape:8_8_32-DTYPE:int8-KERNEL:1_1-STRIDE:1_1-DILATION:1_1-PADDING:0_0_0_0-IO_TENSOR_MEM_SCOPE:global.vtcm  
   max_pool2d    1           [1, 128, 128, 32]  [8, 8, 32]   int8   (3, 3)  (1, 1)  (1, 1)    (0, 0, 0, 0)  global.vtcm          SUCCESS     126598.000         126598.000         126598.000            126598.000          0.000                 /tmp/tmpszyuisxa/basic_kernel:max_pool2d-sched_type:1-input_shape_4d:1_128_128_32-block_shape:8_8_32-DTYPE:int8-KERNEL:3_3-STRIDE:1_1-DILATION:1_1-PADDING:0_0_0_0-IO_TENSOR_MEM_SCOPE:global.vtcm  
   max_pool2d    1           [1, 128, 128, 64]  [8, 8, 32]   int8   (1, 1)  (1, 1)  (1, 1)    (0, 0, 0, 0)  global.vtcm          SUCCESS     32163.600          32163.600          32163.600             32163.600           0.000                 /tmp/tmpszyuisxa/basic_kernel:max_pool2d-sched_type:1-input_shape_4d:1_128_128_64-block_shape:8_8_32-DTYPE:int8-KERNEL:1_1-STRIDE:1_1-DILATION:1_1-PADDING:0_0_0_0-IO_TENSOR_MEM_SCOPE:global.vtcm  
   max_pool2d    1           [1, 128, 128, 64]  [8, 8, 32]   int8   (3, 3)  (1, 1)  (1, 1)    (0, 0, 0, 0)  global.vtcm          SUCCESS     312788.700         312788.700         312788.700            312788.700          0.000                 /tmp/tmpszyuisxa/basic_kernel:max_pool2d-sched_type:1-input_shape_4d:1_128_128_64-block_shape:8_8_32-DTYPE:int8-KERNEL:3_3-STRIDE:1_1-DILATION:1_1-PADDING:0_0_0_0-IO_TENSOR_MEM_SCOPE:global.vtcm  
   ```


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@tvm.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] mehrdadh commented on a diff in pull request #11720: [hexagon][testing] add max_pool2d benchmark

Posted by GitBox <gi...@apache.org>.

mehrdadh commented on code in PR #11720:
URL: https://github.com/apache/tvm/pull/11720#discussion_r898271843


##########
tests/python/contrib/test_hexagon/benchmark_maxpool2d.py:
##########
@@ -0,0 +1,379 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""
+This module serves two purposes:
+    (1) Demonstrates how to write Python code that exercises various
+        Hexagon-related algorithms / features.
+
+    (2) Benchmark the resulting primfuncs.
+
+Current limitations:
+    - Input shapes are limited to NHWC --> NHWC_8h8w32c.
+
+    - Testing parameters (input shapes, dtypes, etc.) currently
+      support only one value for each parameter.
+
+    - H, W, C must be integer multiples of 8, 8, and 32,
+      respectively.  I.e., partial blocks aren't currently
+      supported by this script.
+
+    - Requires that I/O tensors reside in "global.VTCM" memory,
+      rather than "global" memory.
+      This prevents benchmarking with I/O tensors that are too
+      large to fit into availble VTCM.
+
+    - The script only develops one primfunc.
+      Future revisions to this script are expected to add more
+      primfuncs and demonstrate more coding strategies.
+"""
+
+import sys
+import pytest
+import numpy as np
+import copy
+import tempfile
+import os
+
+import tvm.testing
+from tvm import te, topi, tir
+from tvm.topi import testing
+from tvm.script import tir as T
+from tvm.tir import IndexMap
+from tvm.relay.backend import Executor, Runtime
+from tvm.contrib.hexagon.session import Session
+from typing import List
+
+from .infrastructure import allocate_hexagon_array
+from . import benchmark_util as bu
+
+
+def _ceil_div(numerator, denominator):
+    return (numerator + (denominator - 1)) // denominator
+
+
+def _int8_nhwc_8h8w32c_map(n, h, w, c):
+    return [
+        n,
+        h // 8,
+        w // 8,
+        c // 32,
+        te.AXIS_SEPARATOR,
+        h % 8,
+        w % 8,
+        c % 32,
+    ]
+
+
+def _int8_nhwc_8h8w32c_shape(n, h, w, c) -> List[int]:
+    return [
+        n,
+        _ceil_div(h, 8),
+        _ceil_div(w, 8),
+        _ceil_div(c, 32),
+        8,
+        8,
+        32,
+    ]
+
+
+def _int8_nhwc_8h8w32c_xform_immediate(arr_in: np.ndarray) -> np.ndarray:
+    """
+    Return a deep copy of 'arr_in', transformed from a NWHC to
+    NHWC-8h8wc32 shape.  Any newly created array elements have value 0.
+    """
+    stage1 = copy.copy(arr_in)
+
+    (
+        n,
+        h,
+        w,
+        c,
+    ) = stage1.shape
+
+    (
+        h_minor,
+        w_minor,
+        c_minor,
+    ) = [8, 8, 32]
+
+    h_major = _ceil_div(h, h_minor)
+    w_major = _ceil_div(w, w_minor)
+    c_major = _ceil_div(c, c_minor)
+
+    # This handles cases where the dimensions of arr_in are not cleanly divided
+    # by the minor block size, i.e. [8, 8, 32].
+    #
+    # Any additional array elements that this creates will ahve value 0.
+    # We shouldn't actually care what value is used for those elements, because they
+    # shouldn't be treated as meaningful by any of our algorithms.
+    if (h % h_minor) or (w % w_minor) or (c % c_minor):
+        stage1.resize((n, h_major * h_minor, w_major * w_minor, c_major * c_minor), refcheck=False)
+
+    stage2 = stage1.reshape(n, h_major, h_minor, w_major, w_minor, c_major, c_minor)
+    stage3 = stage2.transpose(0, 1, 3, 5, 2, 4, 6)
+    return stage3
+
+
+def _create_test_input(shape, dtype: str) -> np.ndarray:
+    np_dtype = np.dtype(dtype)
+    min_value = np.iinfo(np_dtype).min
+    max_value = np.iinfo(np_dtype).max
+    return np.random.randint(low=min_value, high=max_value, size=tuple(shape), dtype=np.int8)
+
+
+# This fixture provides some initialization / finalization logic for groups of related
+# benchmark runs.
+# See the `TestMaxPool2D` class for its intended usage.
+@pytest.fixture(scope="class")
+def benchmark_group(request):
+    working_dir = tempfile.mkdtemp()
+    bt = bu.BenchmarksTable()
+
+    request.cls.working_dir = working_dir
+    request.cls.benchmark_table = bt
+
+    yield
+
+    tabular_output_filename = os.path.join(working_dir, "benchmark-results.csv")
+
+    if not hasattr(request.cls, "csv_column_order"):
+        raise Exception('Classes using this fixture must have a member named "csv_column_order"')
+
+    with open(tabular_output_filename, "w") as csv_file:
+        bt.print_csv(csv_file, request.cls.csv_column_order)
+
+    print()
+    print("*" * 80)
+    print(f"BENCHMARK RESULTS FILE: {tabular_output_filename}")
+    print("*" * 80)
+    print()
+
+    if bt.has_fail() > 0:
+        pytest.fail("At least one benchmark configuration failed", pytrace=False)
+
+
+@pytest.mark.usefixtures("benchmark_group")
+class TestMaxPool2D:
+    csv_column_order = [
+        # Identifies which TE-compute / TIRScript is used as the basis for the
+        # benchmarked primfunc. Only needs to be meaningful to humans.
+        "basic_kernel",
+        # When applicable, indicates the particular variation of schedules
+        # apply by the Python code. Decoding this may require looking at this
+        # script's source code.
+        "sched_type",
+        # Values directly based on test parameters...
+        "input_shape_4d",
+        "block_shape",
+        "DTYPE",
+        "KERNEL",
+        "STRIDE",
+        "DILATION",
+        "PADDING",
+        "IO_TENSOR_MEM_SCOPE",
+        # Reserved columns defined by the BenchmarksTable class.
+        "row_status",
+        "timings_min_usecs",
+        "timings_max_usecs",
+        "timings_median_usecs",
+        "timings_mean_usecs",
+        "timings_stddev_usecs",
+        # For benchmarks that produce files on the host file system, this indicates
+        # their location. Useful for post-mortem investigation of benchmark results.
+        "host_files_dir_path",
+        # Miscellaneous comments about the benchmark.
+        "comments",
+    ]
+
+    DTYPE = tvm.testing.parameter("int8")
+
+    # FIXME(cconvey): The script currently fails when H, W, or C is not an
+    # integer multiple of 8, 8, or 32, respectively.
+    N = tvm.testing.parameter(1)
+    H = tvm.testing.parameter(*[x * 8 for x in [1, 4, 16]])
+    W = tvm.testing.parameter(*[x * 8 for x in [1, 4, 16]])
+    C = tvm.testing.parameter(*[x * 32 for x in [1, 2]])
+
+    KERNEL = tvm.testing.parameter((1, 1), (3, 3))
+    STRIDE = tvm.testing.parameter((1, 1))
+    DILATION = tvm.testing.parameter((1, 1))
+    PADDING = tvm.testing.parameter((0, 0, 0, 0))
+    IO_TENSOR_MEM_SCOPE = tvm.testing.parameter("global.vtcm")
+
+    @tvm.testing.requires_hexagon
+    def test_maxpool2d_nhwc(
+        self,
+        N,
+        H,
+        W,
+        C,
+        DTYPE,
+        KERNEL,
+        STRIDE,
+        DILATION,
+        PADDING,
+        IO_TENSOR_MEM_SCOPE,
+        hexagon_session: Session,
+    ):
+        keys_dict = {
+            "basic_kernel": "max_pool2d",
+            "sched_type": 1,
+            "input_shape_4d": [N, H, W, C],
+            "block_shape": [8, 8, 32],
+            "DTYPE": DTYPE,
+            "KERNEL": KERNEL,
+            "STRIDE": STRIDE,
+            "DILATION": DILATION,
+            "PADDING": PADDING,
+            "IO_TENSOR_MEM_SCOPE": IO_TENSOR_MEM_SCOPE,
+        }
+
+        desc = bu.get_benchmark_decription(keys_dict)
+
+        # Create the host-side directory for this benchmark run's files / logs...
+        host_files_dir_name = bu.get_benchmark_id(keys_dict)
+        host_files_dir_path = os.path.join(self.working_dir, host_files_dir_name)
+        os.mkdir(host_files_dir_path)
+
+        keys_dict["host_files_dir_path"] = host_files_dir_path
+
+        log_file_path = os.path.join(host_files_dir_path, "out.txt")
+        with open(log_file_path, "w") as log_file:
+            print(f"CONFIGURATION: {desc}")
+            log_file.write(f"CONFIGURATION: {desc}\n")
+
+            try:
+                input_tensor_shape_4d = [N, H, W, C]
+                input_tensor_shape_7d = _int8_nhwc_8h8w32c_shape(N, H, W, C)
+
+                data = te.placeholder(tuple(input_tensor_shape_4d), dtype=DTYPE)
+
+                output = topi.nn.pool2d(
+                    data, KERNEL, STRIDE, DILATION, PADDING, "max", layout="NHWC"
+                )
+                primfunc = te.create_prim_func([data, output])
+
+                sch = tir.Schedule(primfunc, debug_mask="all")
+
+                sch.transform_layout(
+                    block="tensor", buffer="placeholder", index_map=_int8_nhwc_8h8w32c_map
+                )
+
+                target_hexagon = tvm.target.hexagon("v69", link_params=True)
+                # func = tvm.build(sch.mod, target=tvm.target.Target(target_hexagon, host=target_hexagon))
+                built_module = tvm.build(
+                    sch.mod, target=tvm.target.Target(target_hexagon, host=target_hexagon)
+                )
+
+                # Save a local copy of the Hexagon object code (in the form of a .so file)
+                # to allow post-mortem inspection.
+                host_dso_binary_path = os.path.join(host_files_dir_path, "test_binary.so")
+                built_module.save(host_dso_binary_path)
+                print(f"SAVED BINARY TO HOST PATH: {host_dso_binary_path}")
+
+                hexagon_mod = hexagon_session.load_module(built_module)
+
+                # Generate the input tensor's data.
+                # Note that we'll eventually need it in two different layouts:
+                # (1) NHWC as an argument to testing.poolnd_python.
+                # (2) NHWC_8h8w32c for as an argument to our Hexagon primfunc.
+                # a_numpy_4d = np.random.randint(low=-128, high=127, size=input_tensor_shape_4d, dtype=np.int8)
+                a_numpy_4d = _create_test_input(input_tensor_shape_4d, DTYPE)
+
+                ref_output_4d = testing.poolnd_python(
+                    a_numpy_4d.astype("int32"),
+                    KERNEL,
+                    STRIDE,
+                    DILATION,
+                    PADDING[0:2],
+                    PADDING[2:],
+                    pool_type="max",
+                    dtype="int32",
+                    layout="NHWC",
+                ).astype(DTYPE)
+
+                output_tensor_shape_4d = ref_output_4d.shape
+
+                a_numpy_7d = _int8_nhwc_8h8w32c_xform_immediate(a_numpy_4d)
+
+                a_hexagon_7d = allocate_hexagon_array(
+                    hexagon_session.device,
+                    tensor_shape=input_tensor_shape_7d,
+                    axis_separators=[4],
+                    dtype=DTYPE,
+                    mem_scope=IO_TENSOR_MEM_SCOPE,
+                )
+
+                c_hexagon_4d = allocate_hexagon_array(
+                    hexagon_session.device,
+                    tensor_shape=output_tensor_shape_4d,
+                    axis_separators=[],
+                    dtype=DTYPE,
+                    mem_scope=IO_TENSOR_MEM_SCOPE,
+                )
+
+                a_hexagon_7d.copyfrom(a_numpy_7d)
+
+                if DTYPE == "int8":
+                    rel_tolerance = 0
+                    abs_tolerance = 0
+                else:
+                    assert False, f"Need to decide acceptable tolerances for DTYPE {DTYPE}"
+
+                # hexagon_mod(a_hexagon_7d, c_hexagon_4d)
+                # tvm.testing.assert_allclose(ref_output_4d, c_hexagon_4d.numpy(), rtol=rel_tolerance, atol=abs_tolerance)
+
+                timer = hexagon_mod.time_evaluator(
+                    "main", hexagon_session.device, number=10, repeat=1
+                )
+                timing_result = timer(a_hexagon_7d, c_hexagon_4d)
+
+                try:
+                    tvm.testing.assert_allclose(
+                        ref_output_4d, c_hexagon_4d.numpy(), rtol=rel_tolerance, atol=abs_tolerance
+                    )
+                except AssertionError as e:
+                    raise bu.NumericalAccuracyException(str(e))
+
+            except bu.NumericalAccuracyException as e:
+                print()
+                print(f"FAIL: Numerical accuracy error. See log file.")
+
+                log_file.write("\n")
+                log_file.write(f"FAIL: {e}\n")
+
+                self.benchmark_table.record_fail(
+                    **keys_dict, comments=f"Numerical accuracy error. See log file."
+                )
+
+            except bu.UnsupportedException as e:
+                print()
+                print(f"SKIP: {e}")
+
+                log_file.write("\n")
+                log_file.write(f"SKIP: {e}\n")
+
+                self.benchmark_table.record_skip(
+                    **keys_dict, comments=f"Unsupported configuration: {e}"
+                )
+
+            self.benchmark_table.record_success(timing_result, **keys_dict)
+
+
+if __name__ == "__main__":
+    sys.exit(pytest.main(sys.argv))

Review Comment:
   change this to `tvm.testing.main()`



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@tvm.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org