You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tvm.apache.org by tq...@apache.org on 2020/11/23 13:59:11 UTC

[incubator-tvm] branch main updated: [µTVM] Remove binutils module, no longer needed after microTVM refactor. (#6947)

This is an automated email from the ASF dual-hosted git repository.

tqchen pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-tvm.git


The following commit(s) were added to refs/heads/main by this push:
     new 1541c76  [µTVM] Remove binutils module, no longer needed after microTVM refactor. (#6947)
1541c76 is described below

commit 1541c76b0f6e962b3d002097171702e9b687982f
Author: Andrew Reusch <ar...@octoml.ai>
AuthorDate: Mon Nov 23 05:58:53 2020 -0800

    [µTVM] Remove binutils module, no longer needed after microTVM refactor. (#6947)
---
 python/tvm/contrib/binutils.py        | 320 ----------------------------------
 python/tvm/micro/compiler.py          |  32 +++-
 tests/python/contrib/test_binutils.py | 167 ------------------
 3 files changed, 27 insertions(+), 492 deletions(-)

diff --git a/python/tvm/contrib/binutils.py b/python/tvm/contrib/binutils.py
deleted file mode 100644
index 646362a..0000000
--- a/python/tvm/contrib/binutils.py
+++ /dev/null
@@ -1,320 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-"""Utilities for binary file manipulation"""
-import os
-import subprocess
-import tvm._ffi
-from . import utils
-
-# TODO does this file still belong in `contrib`. is it too µTVM-specific?
-
-# TODO shouldn't need so many `ALIGN` directives
-RELOCATION_LD_SCRIPT_TEMPLATE = """
-/* linker symbol for use in UTVMInit */
-_utvm_stack_pointer_init = 0x{stack_pointer_init:x};
-
-SECTIONS
-{{
-  . = 0x{text_start:x};
-  . = ALIGN({word_size});
-  .text :
-  {{
-    . = ALIGN({word_size});
-    KEEP(*(.text))
-    KEEP(*(.text*))
-    . = ALIGN({word_size});
-  }}
-
-  . = 0x{rodata_start:x};
-  . = ALIGN({word_size});
-  .rodata :
-  {{
-    . = ALIGN({word_size});
-    KEEP(*(.rodata))
-    KEEP(*(.rodata*))
-    . = ALIGN({word_size});
-  }}
-
-  . = 0x{data_start:x};
-  . = ALIGN({word_size});
-  .data :
-  {{
-    . = ALIGN({word_size});
-    KEEP(*(.data))
-    KEEP(*(.data*))
-    . = ALIGN({word_size});
-  }}
-
-  . = 0x{bss_start:x};
-  . = ALIGN({word_size});
-  .bss :
-  {{
-    . = ALIGN({word_size});
-    KEEP(*(.bss))
-    KEEP(*(.bss*))
-    . = ALIGN({word_size});
-  }}
-}}
-"""
-
-
-def run_cmd(cmd):
-    """Runs `cmd` in a subprocess and awaits its completion.
-
-    Parameters
-    ----------
-    cmd : List[str]
-        list of command-line arguments
-
-    Returns
-    -------
-    output : str
-        resulting stdout capture from the subprocess
-    """
-    proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
-    (output, _) = proc.communicate()
-    output = output.decode("utf-8")
-    if proc.returncode != 0:
-        cmd_str = " ".join(cmd)
-        msg = f'error while running command "{cmd_str}":\n{output}'
-        raise RuntimeError(msg)
-    return output
-
-
-@tvm._ffi.register_func("tvm_callback_get_section_size")
-def tvm_callback_get_section_size(binary_path, section_name, toolchain_prefix):
-    """Finds size of the section in the binary.
-    Assumes `size` shell command exists (typically works only on Linux machines)
-
-    Parameters
-    ----------
-    binary_path : str
-        path of the binary file
-
-    section_name : str
-        name of section
-
-    toolchain_prefix : str
-        prefix for binary names in target compiler toolchain
-
-    Returns
-    -------
-    size : integer
-        size of the section in bytes
-    """
-    if not os.path.isfile(binary_path):
-        raise RuntimeError('no such file "{}"'.format(binary_path))
-    # We use the "-A" flag here to get the ".rodata" section's size, which is
-    # not included by default.
-    size_output = run_cmd(["{}size".format(toolchain_prefix), "-A", binary_path])
-
-    # TODO(weberlo): Refactor this method and `*relocate_binary` so they are
-    # both aware of [".bss", ".sbss", ".sdata"] being relocated to ".bss".
-    section_mapping = {
-        ".text": [".text"],
-        ".rodata": [".rodata"],
-        ".data": [".data", ".sdata"],
-        ".bss": [".bss", ".sbss"],
-    }
-    sections_to_sum = section_mapping["." + section_name]
-    section_size = 0
-    # Skip the first two header lines in the `size` output.
-    for line in size_output.split("\n")[2:]:
-        tokens = list(filter(lambda s: len(s) != 0, line.split(" ")))
-        if len(tokens) != 3:
-            continue
-        entry_name = tokens[0]
-        entry_size = int(tokens[1])
-        for section in sections_to_sum:
-            if entry_name.startswith(section):
-                section_size += entry_size
-                break
-
-    # NOTE: in the past, section_size has been wrong on x86. it may be
-    # inconsistent. TODO: maybe stop relying on `*size` to give us the size and
-    # instead read the section with `*objcopy` and count the bytes.
-    # NOTE(areusch): I think the problem is due to alignment ops in the linker.
-    # Since this is going away in the impending switch to on-device runtime,
-    # add a constant to hopefully absorb these relocations.
-    if section_size > 0:
-        section_size += 64
-
-    return section_size
-
-
-@tvm._ffi.register_func("tvm_callback_relocate_binary")
-def tvm_callback_relocate_binary(
-    binary_path,
-    word_size,
-    text_start,
-    rodata_start,
-    data_start,
-    bss_start,
-    stack_end,
-    toolchain_prefix,
-):
-    """Relocates sections in the binary to new addresses
-
-    Parameters
-    ----------
-    binary_path : str
-        path of the binary file
-
-    word_size : int
-        word size on the target machine
-
-    text_start : int
-        text section address
-
-    rodata_start : int
-        rodata section address
-
-    data_start : int
-        data section address
-
-    bss_start : int
-        bss section address
-
-    stack_end : int
-        stack section end address
-
-    toolchain_prefix : str
-        prefix for binary names in target compiler toolchain
-
-    Returns
-    -------
-    rel_bin : bytearray
-        the relocated binary
-    """
-    assert text_start < rodata_start < data_start < bss_start < stack_end
-    stack_pointer_init = stack_end - word_size
-    ld_script_contents = ""
-    # TODO(weberlo): There should be a better way to configure this for different archs.
-    # TODO is this line even necessary?
-    if "riscv" in toolchain_prefix:
-        ld_script_contents += 'OUTPUT_ARCH( "riscv" )\n\n'
-    ld_script_contents += RELOCATION_LD_SCRIPT_TEMPLATE.format(
-        word_size=word_size,
-        text_start=text_start,
-        rodata_start=rodata_start,
-        data_start=data_start,
-        bss_start=bss_start,
-        stack_pointer_init=stack_pointer_init,
-    )
-
-    tmp_dir = utils.tempdir()
-    rel_obj_path = tmp_dir.relpath("relocated.obj")
-    rel_ld_script_path = tmp_dir.relpath("relocate.lds")
-    with open(rel_ld_script_path, "w") as f:
-        f.write(ld_script_contents)
-    run_cmd(
-        ["{}ld".format(toolchain_prefix), binary_path, "-T", rel_ld_script_path, "-o", rel_obj_path]
-    )
-
-    with open(rel_obj_path, "rb") as f:
-        rel_bin = bytearray(f.read())
-
-    gdb_init_dir = os.environ.get("MICRO_GDB_INIT_DIR")
-    if gdb_init_dir is not None:
-        gdb_init_path = f"{gdb_init_dir}/.gdbinit"
-        with open(gdb_init_path, "r") as f:
-            gdbinit_contents = f.read().split("\n")
-        new_contents = []
-        for line in gdbinit_contents:
-            new_contents.append(line)
-            if line.startswith("target"):
-                new_contents.append(f"add-symbol-file {rel_obj_path}")
-        with open(gdb_init_path, "w") as f:
-            f.write("\n".join(new_contents))
-
-    return rel_bin
-
-
-@tvm._ffi.register_func("tvm_callback_read_binary_section")
-def tvm_callback_read_binary_section(binary, section, toolchain_prefix):
-    """Returns the contents of the specified section in the binary byte array
-
-    Parameters
-    ----------
-    binary : bytearray
-        contents of the binary
-
-    section : str
-        type of section
-
-    toolchain_prefix : str
-        prefix for binary names in target compiler toolchain
-
-    Returns
-    -------
-    section_bin : bytearray
-        contents of the read section
-    """
-    tmp_dir = utils.tempdir()
-    tmp_bin = tmp_dir.relpath("temp.bin")
-    tmp_section = tmp_dir.relpath("tmp_section.bin")
-    with open(tmp_bin, "wb") as out_file:
-        out_file.write(bytes(binary))
-    run_cmd(
-        [
-            "{}objcopy".format(toolchain_prefix),
-            "--dump-section",
-            ".{}={}".format(section, tmp_section),
-            tmp_bin,
-        ]
-    )
-    if os.path.isfile(tmp_section):
-        # Get section content if it exists.
-        with open(tmp_section, "rb") as f:
-            section_bin = bytearray(f.read())
-    else:
-        # Return empty bytearray if the section does not exist.
-        section_bin = bytearray("", "utf-8")
-    return section_bin
-
-
-@tvm._ffi.register_func("tvm_callback_get_symbol_map")
-def tvm_callback_get_symbol_map(binary, toolchain_prefix):
-    """Obtains a map of symbols to addresses in the passed binary
-
-    Parameters
-    ----------
-    binary : bytearray
-        contents of the binary
-
-    toolchain_prefix : str
-        prefix for binary names in target compiler toolchain
-
-    Returns
-    -------
-    map_str : str
-        map of defined symbols to addresses, encoded as a series of
-        alternating newline-separated keys and values
-    """
-    tmp_dir = utils.tempdir()
-    tmp_obj = tmp_dir.relpath("tmp_obj.bin")
-    with open(tmp_obj, "wb") as out_file:
-        out_file.write(bytes(binary))
-    nm_output = run_cmd(["{}nm".format(toolchain_prefix), "-C", "--defined-only", tmp_obj])
-    nm_output = nm_output.splitlines()
-    map_str = ""
-    for line in nm_output:
-        line = line.split()
-        map_str += line[2] + "\n"
-        map_str += line[0] + "\n"
-    return map_str
diff --git a/python/tvm/micro/compiler.py b/python/tvm/micro/compiler.py
index 069f600..a265f2a 100644
--- a/python/tvm/micro/compiler.py
+++ b/python/tvm/micro/compiler.py
@@ -21,8 +21,8 @@ import abc
 import glob
 import os
 import re
+import subprocess
 
-from tvm.contrib import binutils
 import tvm.target
 from . import build
 from . import class_factory
@@ -30,6 +30,28 @@ from . import debugger
 from . import transport
 
 
+def run_cmd(cmd):
+    """Runs `cmd` in a subprocess and awaits its completion.
+
+    Parameters
+    ----------
+    cmd : List[str]
+        list of command-line arguments
+
+    Returns
+    -------
+    output : str
+        resulting stdout capture from the subprocess
+    """
+    proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
+    (output, _) = proc.communicate()
+    output = output.decode("utf-8")
+    if proc.returncode != 0:
+        cmd_str = " ".join(cmd)
+        msg = f'error while running command "{cmd_str}":\n{output}'
+        raise RuntimeError(msg)
+
+
 class DetectTargetError(Exception):
     """Raised when no target comment was detected in the sources given."""
 
@@ -232,13 +254,13 @@ class DefaultCompiler(Compiler):
 
             output_filename = f"{src_base}.o"
             output_abspath = os.path.join(output, output_filename)
-            binutils.run_cmd(args + ["-c", "-o", output_abspath, src])
+            run_cmd(args + ["-c", "-o", output_abspath, src])
             outputs.append(output_abspath)
 
         output_filename = f"{os.path.basename(output)}.a"
         output_abspath = os.path.join(output, output_filename)
-        binutils.run_cmd([prefix + "ar", "-r", output_abspath] + outputs)
-        binutils.run_cmd([prefix + "ranlib", output_abspath])
+        run_cmd([prefix + "ar", "-r", output_abspath] + outputs)
+        run_cmd([prefix + "ranlib", output_abspath])
 
         return tvm.micro.MicroLibrary(output, [output_filename])
 
@@ -273,7 +295,7 @@ class DefaultCompiler(Compiler):
             for lib_name in obj.library_files:
                 args.append(obj.abspath(lib_name))
 
-        binutils.run_cmd(args)
+        run_cmd(args)
         return tvm.micro.MicroBinary(output, output_filename, [])
 
     @property
diff --git a/tests/python/contrib/test_binutils.py b/tests/python/contrib/test_binutils.py
deleted file mode 100644
index f0aa2d1..0000000
--- a/tests/python/contrib/test_binutils.py
+++ /dev/null
@@ -1,167 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""Test various utilities for interaction with compiled binaries.
-
-Specifically, we test the following capabilities:
-  - querying the size of a binary section
-  - relocating sections within a binary to new addresses
-  - reading the contents of a binary section
-  - querying the address of a symbol in the binary
-"""
-
-import tvm
-from tvm import te
-import subprocess
-from tvm.contrib import utils
-from tvm.contrib import cc
-from tvm.contrib.binutils import *
-
-TOOLCHAIN_PREFIX = ""
-
-
-def make_binary():
-    prog = "int a = 7; \
-            int main() { \
-                int b = 5; \
-                return 0; \
-            }"
-    tmp_dir = utils.tempdir()
-    tmp_source = tmp_dir.relpath("source.c")
-    tmp_obj = tmp_dir.relpath("obj.obj")
-    with open(tmp_source, "w") as f:
-        f.write(prog)
-    cc.create_executable(tmp_obj, tmp_source, [], cc="{}gcc".format(TOOLCHAIN_PREFIX))
-    prog_bin = bytearray(open(tmp_obj, "rb").read())
-    return prog_bin
-
-
-def test_tvm_callback_get_section_size(binary=None):
-    if binary is None:
-        binary = make_binary()
-    tmp_dir = utils.tempdir()
-    tmp_bin = tmp_dir.relpath("obj.bin")
-    with open(tmp_bin, "wb") as f:
-        f.write(binary)
-
-    def verify():
-        print(
-            "Text section size: %d"
-            % tvm_callback_get_section_size(tmp_bin, "text", TOOLCHAIN_PREFIX)
-        )
-        print(
-            "Data section size: %d"
-            % tvm_callback_get_section_size(tmp_bin, "data", TOOLCHAIN_PREFIX)
-        )
-        print(
-            "Bss section size: %d" % tvm_callback_get_section_size(tmp_bin, "bss", TOOLCHAIN_PREFIX)
-        )
-        print()
-
-    verify()
-
-
-def test_tvm_callback_relocate_binary():
-    binary = make_binary()
-    tmp_dir = utils.tempdir()
-    tmp_bin = tmp_dir.relpath("obj.bin")
-    with open(tmp_bin, "wb") as f:
-        f.write(binary)
-
-    def verify():
-        word_size = 8
-        text_loc = 0x0
-        rodata_loc = 0x10000
-        data_loc = 0x20000
-        bss_loc = 0x30000
-        stack_end = 0x50000
-        rel_bin = tvm_callback_relocate_binary(
-            tmp_bin, word_size, text_loc, rodata_loc, data_loc, bss_loc, stack_end, TOOLCHAIN_PREFIX
-        )
-        print("Relocated binary section sizes")
-        test_tvm_callback_get_section_size(binary=rel_bin)
-        relf = tmp_dir.relpath("rel.bin")
-        with open(relf, "wb") as f:
-            f.write(rel_bin)
-        nm_proc = subprocess.Popen(
-            ["nm", "-C", "--defined-only", relf], stdout=subprocess.PIPE, stderr=subprocess.STDOUT
-        )
-        (out, _) = nm_proc.communicate()
-        symbol_entries = out.decode("utf-8").split("\n")
-        for entry in symbol_entries:
-            if len(entry) == 0:
-                continue
-            sym_loc, section, sym_name = entry.split(" ")
-            sym_loc = int(sym_loc, 16)
-            if section == "T":  # text
-                assert sym_loc >= text_loc and sym_loc < data_loc
-            elif section == "D":  # data
-                assert sym_loc >= data_loc and sym_loc < bss_loc
-            elif section == "B":  # bss
-                assert sym_loc >= bss_loc
-
-    verify()
-
-
-def test_tvm_callback_read_binary_section():
-    binary = make_binary()
-
-    def verify():
-        text_bin = tvm_callback_read_binary_section(binary, "text", TOOLCHAIN_PREFIX)
-        data_bin = tvm_callback_read_binary_section(binary, "data", TOOLCHAIN_PREFIX)
-        bss_bin = tvm_callback_read_binary_section(binary, "bss", TOOLCHAIN_PREFIX)
-        print("Read text section part of binary? %r" % (text_bin in binary))
-        print("Read data section part of binary? %r" % (data_bin in binary))
-        print("Read bss section part of binary? %r" % (bss_bin in binary))
-        print()
-
-    verify()
-
-
-def test_tvm_callback_get_symbol_map():
-    binary = make_binary()
-    tmp_dir = utils.tempdir()
-    tmp_bin = tmp_dir.relpath("obj.bin")
-    with open(tmp_bin, "wb") as f:
-        f.write(binary)
-
-    def verify():
-        word_size = 8
-        text_loc = 0x0
-        rodata_loc = 0x10000
-        data_loc = 0x20000
-        bss_loc = 0x30000
-        stack_end = 0x50000
-        rel_bin = tvm_callback_relocate_binary(
-            tmp_bin, word_size, text_loc, rodata_loc, data_loc, bss_loc, stack_end, TOOLCHAIN_PREFIX
-        )
-        symbol_map = tvm_callback_get_symbol_map(rel_bin, TOOLCHAIN_PREFIX)
-        symbols = set()
-        for i, line in enumerate(symbol_map.split("\n")):
-            # Every other line is the value the symbol maps to.
-            if i % 2 == 0:
-                symbols.add(line)
-        assert "a" in symbols
-        assert "main" in symbols
-
-    verify()
-
-
-if __name__ == "__main__":
-    test_tvm_callback_get_section_size()
-    test_tvm_callback_relocate_binary()
-    test_tvm_callback_read_binary_section()
-    test_tvm_callback_get_symbol_map()