You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tvm.apache.org by GitBox <gi...@apache.org> on 2021/01/18 10:40:21 UTC

[GitHub] [tvm] leandron commented on a change in pull request #7299: Introduce Apple BNNS backend

leandron commented on a change in pull request #7299:
URL: https://github.com/apache/tvm/pull/7299#discussion_r559460487



##########
File path: python/tvm/relay/op/contrib/bnns.py
##########
@@ -0,0 +1,247 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+# pylint: disable=invalid-name, unused-argument
+"""BNNS library supported operators.
+Is a part of Accelerate framework on macOS/iOS platforms. Apple provide several APIs
+to handle tensor processing. Particularly:
+ * BNNS (basic neural )
+ * vDSP (1D and 2D tensor processing)
+ * BLAS (gemm provide)
+
+# There are two ways to registering a function for an op to indicate if it is
+# supported by DNNL.
+
+# - The first and simplest way is to use the helper so that
+# users only need to provide the operator name and a boolean value to indicate if
+# it is supported. For example:
+#
+#     .. code-block:: python
+#
+#       add = _register_external_op_helper("add")
+#       add = _register_external_op_helper("add", True)
+#       add = _register_external_op_helper("add", False)
+#
+# - The other way is to implement the function by themselves to
+# check the attributes of the op and decide if it should be offloaded to DNNL.
+"""
+import math
+import tvm.ir
+from ...dataflow_pattern import wildcard, is_op, is_expr, is_constant
+from .register import register_pattern_table, get_pattern_table
+
+from tvm.relay import transform
+from tvm.relay.expr import const
+from tvm.relay.build_module import bind_params_by_name
+
+def partition_for_bnns(mod, params=None):
+    """Partition the graph greedily offloading supported
+    operators to BNNS.
+
+    Parameters
+    ----------
+    mod : Module
+        The module to run passes on.
+    params : Optional[Dict[str, NDArray]]
+        Constant input parameters.
+
+    Returns
+    -------
+    ret : annotated and partitioned module.
+    """
+    if params:
+        mod["main"] = bind_params_by_name(mod["main"], params)
+
+    seq = tvm.transform.Sequential(
+        [
+            transform.InferType(),
+            transform.FoldConstant(),
+            transform.FoldScaleAxis(),
+            transform.DynamicToStatic(),
+            transform.AlterOpLayout(),
+            # TODO(apeskov): WA. AlterOpLayout call lead to constants shape transformation
+            #   Some expand_dims op may appears after constants. It breaks BNNS fusing.
+            #   So we have to call FoldConstant right before bnns composite passes.
+            transform.FoldConstant(),
+            transform.MergeComposite(get_pattern_table("bnns")),
+            transform.AnnotateTarget("bnns"),
+            #   If you no need in per layer performance statistic you can
+            #   uncomment next line
+            # transform.MergeCompilerRegions(),
+            transform.PartitionGraph(),
+        ]
+    )
+
+    return seq(mod)
+
+
+def _register_external_op_helper(op_name, supported=True):
+    """The helper function to indicate that a given operator can be supported
+    by BNNS.
+
+    Paramters
+    ---------
+    op_name : Str
+        The name of operator that will be registered.
+
+    Returns
+    -------
+    f : callable
+        A function that returns if the operator is supported by BNNS.
+    """
+
+    @tvm.ir.register_op_attr(op_name, "target.bnns")
+    def _func_wrapper(expr):
+        return supported
+
+    return _func_wrapper
+
+_register_external_op_helper("nn.batch_matmul")
+
+
+# TODO [apeskov]:
+#   1. enlarge list of supported types on
+#   2. clarify meaning of "" value
+def dtype_is_supported(dtype):
+    return dtype == "float32" or dtype == ""

Review comment:
       ```suggestion
       return dtype in ("", "float32")
   ```

##########
File path: python/tvm/relay/op/contrib/bnns.py
##########
@@ -0,0 +1,247 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+# pylint: disable=invalid-name, unused-argument
+"""BNNS library supported operators.
+Is a part of Accelerate framework on macOS/iOS platforms. Apple provide several APIs
+to handle tensor processing. Particularly:
+ * BNNS (basic neural )
+ * vDSP (1D and 2D tensor processing)
+ * BLAS (gemm provide)
+
+# There are two ways to registering a function for an op to indicate if it is
+# supported by DNNL.
+
+# - The first and simplest way is to use the helper so that
+# users only need to provide the operator name and a boolean value to indicate if
+# it is supported. For example:
+#
+#     .. code-block:: python
+#
+#       add = _register_external_op_helper("add")
+#       add = _register_external_op_helper("add", True)
+#       add = _register_external_op_helper("add", False)
+#
+# - The other way is to implement the function by themselves to
+# check the attributes of the op and decide if it should be offloaded to DNNL.
+"""
+import math
+import tvm.ir
+from ...dataflow_pattern import wildcard, is_op, is_expr, is_constant
+from .register import register_pattern_table, get_pattern_table
+
+from tvm.relay import transform
+from tvm.relay.expr import const
+from tvm.relay.build_module import bind_params_by_name
+
+def partition_for_bnns(mod, params=None):
+    """Partition the graph greedily offloading supported
+    operators to BNNS.
+
+    Parameters
+    ----------
+    mod : Module
+        The module to run passes on.
+    params : Optional[Dict[str, NDArray]]
+        Constant input parameters.
+
+    Returns
+    -------
+    ret : annotated and partitioned module.
+    """
+    if params:
+        mod["main"] = bind_params_by_name(mod["main"], params)
+
+    seq = tvm.transform.Sequential(
+        [
+            transform.InferType(),
+            transform.FoldConstant(),
+            transform.FoldScaleAxis(),
+            transform.DynamicToStatic(),
+            transform.AlterOpLayout(),
+            # TODO(apeskov): WA. AlterOpLayout call lead to constants shape transformation
+            #   Some expand_dims op may appears after constants. It breaks BNNS fusing.
+            #   So we have to call FoldConstant right before bnns composite passes.
+            transform.FoldConstant(),
+            transform.MergeComposite(get_pattern_table("bnns")),
+            transform.AnnotateTarget("bnns"),
+            #   If you no need in per layer performance statistic you can
+            #   uncomment next line
+            # transform.MergeCompilerRegions(),
+            transform.PartitionGraph(),
+        ]
+    )
+
+    return seq(mod)
+
+
+def _register_external_op_helper(op_name, supported=True):
+    """The helper function to indicate that a given operator can be supported
+    by BNNS.
+
+    Paramters
+    ---------
+    op_name : Str
+        The name of operator that will be registered.

Review comment:
       nit: `supported` is missing

##########
File path: tests/python/contrib/test_bnns/infrastructure.py
##########
@@ -0,0 +1,309 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from itertools import zip_longest, combinations
+import json
+import os
+import warnings
+
+import numpy as np
+
+import tvm
+from tvm import relay
+from tvm import rpc
+from tvm.contrib import graph_runtime
+from tvm.relay.op.contrib.bnns import partition_for_bnns
+from tvm.contrib import utils
+from tvm.autotvm.measure import request_remote
+
+
+class Device:
+    """
+    Common device configuration for python tests.
+
+    Check tests/python/contrib/arm_compute_lib/ for the presence of an test_config.json file.
+    This file can be used to override the default configuration here which will attempt to run the Arm
+    Compute Library runtime tests locally if the runtime is available. Changing the configuration
+    will allow these runtime tests to be offloaded to a remote Arm device via a tracker for example.
+
+    Notes
+    -----
+        The test configuration will be loaded once when the the class is created. If the configuration
+        changes between tests, any changes will not be picked up.
+
+    Parameters
+    ----------
+    device : RPCSession
+        Allows tests to connect to and use remote device.
+
+    Attributes
+    ----------
+    connection_type : str
+        Details the type of RPC connection to use. Options:
+        local - Use the local device,
+        tracker - Connect to a tracker to request a remote device,
+        remote - Connect to a remote device directly.
+    host : str
+        Specify IP address or hostname of remote target.
+    port : int
+        Specify port number of remote target.
+    target : str
+        The compilation target.
+    device_key : str
+        The device key of the remote target. Use when connecting to a remote device via a tracker.
+    cross_compile : str
+        Specify path to cross compiler to use when connecting a remote device from a non-arm platform.
+    """
+
+    connection_type = "local"
+    host = "localhost"
+    port = 9090
+    target = "llvm"
+    device_key = ""
+    cross_compile = ""
+
+    def __init__(self):
+        """Keep remote device for lifetime of object."""
+        self.device = self._get_remote()
+
+    @classmethod
+    def _get_remote(cls):
+        """Get a remote (or local) device to use for testing."""
+        if cls.connection_type == "tracker":
+            device = request_remote(cls.device_key, cls.host, cls.port, timeout=1000)
+        elif cls.connection_type == "remote":
+            device = rpc.connect(cls.host, cls.port)
+        elif cls.connection_type == "local":
+            device = rpc.LocalSession()
+        else:
+            raise ValueError(
+                "connection_type in test_config.json should be one of: " "local, tracker, remote."
+            )
+
+        return device
+
+    @classmethod
+    def load(cls, file_name):
+        """Load test config
+
+        Load the test configuration by looking for file_name relative
+        to the test_arm_compute_lib directory.
+        """
+        location = os.path.realpath(os.path.join(os.getcwd(), os.path.dirname(__file__)))
+        config_file = os.path.join(location, file_name)
+        if not os.path.exists(config_file):
+            warnings.warn(
+                "Config file doesn't exist, resuming tests with default config."
+            )
+            return
+        with open(config_file, mode="r") as config:
+            test_config = json.load(config)
+
+        cls.connection_type = test_config["connection_type"]
+        cls.host = test_config["host"]
+        cls.port = test_config["port"]
+        cls.target = test_config["target"]
+        cls.device_key = test_config.get("device_key") or ""
+        cls.cross_compile = test_config.get("cross_compile") or ""
+
+
+Device.target = "llvm"
+
+
+def skip_runtime_test():
+    """Skip test if it requires the runtime and it's not present."""
+    # BNNS codegen not present.
+    if not tvm.get_global_func("relay.ext.bnns", True):
+        print("Skip because BNNS codegen is not available.")
+        return True
+    return False
+
+
+def skip_codegen_test():
+    """Skip test if it requires the BNNS codegen and it's not present."""
+    if not tvm.get_global_func("relay.ext.bnns", True):
+        print("Skip because BNNS codegen is not available.")
+        return True
+
+
+def build_module(mod, target, params=None, enable_bnns=True, tvm_ops=0):
+    """Build module with option to build for BNNS."""
+    if isinstance(mod, tvm.relay.expr.Call):
+        mod = tvm.IRModule.from_expr(mod)
+    with tvm.transform.PassContext(opt_level=3):
+        if enable_bnns:
+            mod = partition_for_bnns(mod)
+        relay.backend.compile_engine.get().clear()
+        return relay.build(mod, target=target, target_host=target, params=params)
+
+
+def build_and_run(
+    mod,
+    inputs,
+    outputs,
+    params,
+    device,
+    enable_bnns=True,
+    no_runs=1,
+    tvm_ops=0,
+    config=None,
+):
+    """Build and run the relay module."""
+    if config is None:
+        config = {}
+
+    try:
+        lib = build_module(mod, device.target, params, enable_bnns, tvm_ops)
+    except Exception as e:
+        err_msg = "The module could not be built.\n"
+        if config:
+            err_msg += f"The test failed with the following parameters: {config}\n"
+        err_msg += str(e)
+        raise Exception(err_msg)
+
+    lib = update_lib(lib, device.device, device.cross_compile)
+    gen_module = graph_runtime.GraphModule(lib["default"](device.device.cpu(0)))
+    gen_module.set_input(**inputs)
+    out = []
+    for _ in range(no_runs):
+        gen_module.run()
+        out.append([gen_module.get_output(i) for i in range(outputs)])
+    return out
+
+
+def update_lib(lib, device, cross_compile):
+    """Export the library to the remote/local device."""
+    lib_name = "mod.so"
+    temp = utils.tempdir()
+    lib_path = temp.relpath(lib_name)
+    if cross_compile:
+        lib.export_library(lib_path, cc=cross_compile)
+    else:
+        lib.export_library(lib_path)
+    device.upload(lib_path)
+    lib = device.load_module(lib_name)
+    return lib
+
+
+def extract_bnns_modules(module):
+    """Get the BNNS module(s) from llvm module."""
+    return list(
+        filter(lambda mod: mod.type_key == "bnns_json", module.get_lib().imported_modules)
+    )
+
+
+def verify(answers, atol, rtol, verify_saturation=False, config=None):
+    """Compare the array of answers. Each entry is a list of outputs."""
+    if config is None:
+        config = {}
+
+    if len(answers) < 2:
+        raise RuntimeError(f"No results to compare: expected at least two, found {len(answers)}")
+    for answer in zip_longest(*answers):
+        for outs in combinations(answer, 2):
+            try:
+                if verify_saturation:
+                    assert (
+                        np.count_nonzero(outs[0].asnumpy() == 255) < 0.25 * outs[0].asnumpy().size
+                    ), "Output is saturated: {}".format(outs[0])
+                    assert (
+                        np.count_nonzero(outs[0].asnumpy() == 0) < 0.25 * outs[0].asnumpy().size
+                    ), "Output is saturated: {}".format(outs[0])
+                tvm.testing.assert_allclose(
+                    outs[0].asnumpy(), outs[1].asnumpy(), rtol=rtol, atol=atol
+                )
+            except AssertionError as e:
+                err_msg = "Results not within the acceptable tolerance.\n"
+                if config:
+                    err_msg += f"The test failed with the following parameters: {config}\n"
+                err_msg += str(e)
+                raise AssertionError(err_msg)
+
+
+def verify_codegen(
+    module,
+    known_good_codegen,
+    num_bnns_modules,
+    tvm_ops=0,
+    target=Device.target,
+):
+    """Check BNNS codegen against a known good output."""
+    module = build_module(module, target, tvm_ops=tvm_ops)
+    bnns_modules = extract_bnns_modules(module)
+
+    assert len(bnns_modules) == num_bnns_modules, (
+        f"The number of BNNS modules produced ({len(bnns_modules)}) does not "
+        f"match the expected value ({num_bnns_modules})."
+    )
+
+    for mod in bnns_modules:
+        source = mod.get_source("json")
+        codegen = json.loads(source)["nodes"]
+        # remove input and const names as these cannot be predetermined
+        for node in range(len(codegen)):
+            if codegen[node]["op"] == "input" or codegen[node]["op"] == "const":
+                codegen[node]["name"] = ""
+        codegen_str = json.dumps(codegen, sort_keys=True, indent=2)
+        known_good_codegen_str = json.dumps(known_good_codegen, sort_keys=True, indent=2)
+
+        assert codegen_str == known_good_codegen_str, (
+            f"The JSON produced by codegen does not match the expected result. \n"
+            f"Actual={codegen_str} \n"
+            f"Expected={known_good_codegen_str}"
+        )
+
+
+def generate_trials(space, r_factor=3):
+    """Generates a series of trials.
+
+    This algorithm generates a series of non-deterministic trials given a
+    space of options to test. A trial is generated by pulling a value from
+    each option in the space. On some occasions the values are shuffled to
+    ensure a different trial on each r_factor iteration. The algorithm ensures
+    that each value from an option is used at least once. The total number of
+    trials is determined by the r_factor * the option with the largest number
+    of values.
+
+    Parameters
+    ----------
+    space: List[List[Any]]
+        A list of different options with varying values to test.
+    r_factor: (optional) int
+        The repeat factor.
+

Review comment:
       There is a mix of numpy-style docstrings and other formats (in other Python sources in this same PR as well). I think it would be better to make it consistent, as this is quite a big chunk of work.
   
   Once one is picked, please check the other source files in this PR.

##########
File path: python/tvm/relay/op/contrib/bnns.py
##########
@@ -0,0 +1,247 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+# pylint: disable=invalid-name, unused-argument
+"""BNNS library supported operators.
+Is a part of Accelerate framework on macOS/iOS platforms. Apple provide several APIs
+to handle tensor processing. Particularly:
+ * BNNS (basic neural )
+ * vDSP (1D and 2D tensor processing)
+ * BLAS (gemm provide)
+
+# There are two ways to registering a function for an op to indicate if it is
+# supported by DNNL.
+
+# - The first and simplest way is to use the helper so that
+# users only need to provide the operator name and a boolean value to indicate if
+# it is supported. For example:
+#
+#     .. code-block:: python
+#
+#       add = _register_external_op_helper("add")
+#       add = _register_external_op_helper("add", True)
+#       add = _register_external_op_helper("add", False)
+#
+# - The other way is to implement the function by themselves to
+# check the attributes of the op and decide if it should be offloaded to DNNL.
+"""
+import math
+import tvm.ir
+from ...dataflow_pattern import wildcard, is_op, is_expr, is_constant
+from .register import register_pattern_table, get_pattern_table
+
+from tvm.relay import transform
+from tvm.relay.expr import const
+from tvm.relay.build_module import bind_params_by_name
+
+def partition_for_bnns(mod, params=None):
+    """Partition the graph greedily offloading supported
+    operators to BNNS.
+
+    Parameters
+    ----------
+    mod : Module
+        The module to run passes on.
+    params : Optional[Dict[str, NDArray]]
+        Constant input parameters.
+
+    Returns
+    -------
+    ret : annotated and partitioned module.
+    """
+    if params:
+        mod["main"] = bind_params_by_name(mod["main"], params)
+
+    seq = tvm.transform.Sequential(
+        [
+            transform.InferType(),
+            transform.FoldConstant(),
+            transform.FoldScaleAxis(),
+            transform.DynamicToStatic(),
+            transform.AlterOpLayout(),
+            # TODO(apeskov): WA. AlterOpLayout call lead to constants shape transformation
+            #   Some expand_dims op may appears after constants. It breaks BNNS fusing.
+            #   So we have to call FoldConstant right before bnns composite passes.
+            transform.FoldConstant(),
+            transform.MergeComposite(get_pattern_table("bnns")),
+            transform.AnnotateTarget("bnns"),
+            #   If you no need in per layer performance statistic you can
+            #   uncomment next line
+            # transform.MergeCompilerRegions(),
+            transform.PartitionGraph(),
+        ]
+    )
+
+    return seq(mod)
+
+
+def _register_external_op_helper(op_name, supported=True):
+    """The helper function to indicate that a given operator can be supported
+    by BNNS.
+
+    Paramters
+    ---------
+    op_name : Str
+        The name of operator that will be registered.
+
+    Returns
+    -------
+    f : callable
+        A function that returns if the operator is supported by BNNS.
+    """
+
+    @tvm.ir.register_op_attr(op_name, "target.bnns")
+    def _func_wrapper(expr):
+        return supported
+
+    return _func_wrapper
+
+_register_external_op_helper("nn.batch_matmul")
+
+
+# TODO [apeskov]:
+#   1. enlarge list of supported types on
+#   2. clarify meaning of "" value
+def dtype_is_supported(dtype):
+    return dtype == "float32" or dtype == ""
+
+
+@tvm.ir.register_op_attr("nn.conv2d", "target.bnns")
+def conv2d_check(expr):
+    """Check if the conv2d can be executed in BNNS"""
+    attrs, args = expr.attrs, expr.args
+    data_typ = args[0].checked_type
+    if len(data_typ.shape) != 4 or data_typ.dtype != "float32":
+        return False
+    if not isinstance(args[1], tvm.relay.expr.Constant):
+        return False
+    kernel_typ = args[1].checked_type
+    if len(kernel_typ.shape) != 4 or kernel_typ.dtype != "float32":
+        return False
+    if attrs.data_layout != "NCHW":
+        return False
+    if not dtype_is_supported(attrs.out_dtype):
+        return False
+    return True
+
+
+def bias_check(expr):
+    """Check is bias added through the correct dimension"""
+    attrs, args = expr.attrs, expr.args
+    if not isinstance(args[1], tvm.relay.expr.Constant):
+        return False
+    if expr.op.name == "nn.bias_add":
+        return attrs.axis == 1
+    elif expr.op.name == "add":
+        b_shape = args[1].checked_type.shape
+        if len(b_shape) == 4:
+            return bool(b_shape[0] == 1 and b_shape[2] == 1 and b_shape[3] == 1)
+        elif len(b_shape) == 3:
+            return bool(b_shape[1] == 1 and b_shape[2] == 1)

Review comment:
       Why do we need these `bool()` calls here after the expression?




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org