You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tvm.apache.org by tq...@apache.org on 2020/11/23 13:59:11 UTC
[incubator-tvm] branch main updated: [µTVM] Remove binutils module, no longer needed after microTVM refactor. (#6947)
This is an automated email from the ASF dual-hosted git repository.
tqchen pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-tvm.git
The following commit(s) were added to refs/heads/main by this push:
new 1541c76 [µTVM] Remove binutils module, no longer needed after microTVM refactor. (#6947)
1541c76 is described below
commit 1541c76b0f6e962b3d002097171702e9b687982f
Author: Andrew Reusch <ar...@octoml.ai>
AuthorDate: Mon Nov 23 05:58:53 2020 -0800
[µTVM] Remove binutils module, no longer needed after microTVM refactor. (#6947)
---
python/tvm/contrib/binutils.py | 320 ----------------------------------
python/tvm/micro/compiler.py | 32 +++-
tests/python/contrib/test_binutils.py | 167 ------------------
3 files changed, 27 insertions(+), 492 deletions(-)
diff --git a/python/tvm/contrib/binutils.py b/python/tvm/contrib/binutils.py
deleted file mode 100644
index 646362a..0000000
--- a/python/tvm/contrib/binutils.py
+++ /dev/null
@@ -1,320 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-"""Utilities for binary file manipulation"""
-import os
-import subprocess
-import tvm._ffi
-from . import utils
-
-# TODO does this file still belong in `contrib`. is it too µTVM-specific?
-
-# TODO shouldn't need so many `ALIGN` directives
-RELOCATION_LD_SCRIPT_TEMPLATE = """
-/* linker symbol for use in UTVMInit */
-_utvm_stack_pointer_init = 0x{stack_pointer_init:x};
-
-SECTIONS
-{{
- . = 0x{text_start:x};
- . = ALIGN({word_size});
- .text :
- {{
- . = ALIGN({word_size});
- KEEP(*(.text))
- KEEP(*(.text*))
- . = ALIGN({word_size});
- }}
-
- . = 0x{rodata_start:x};
- . = ALIGN({word_size});
- .rodata :
- {{
- . = ALIGN({word_size});
- KEEP(*(.rodata))
- KEEP(*(.rodata*))
- . = ALIGN({word_size});
- }}
-
- . = 0x{data_start:x};
- . = ALIGN({word_size});
- .data :
- {{
- . = ALIGN({word_size});
- KEEP(*(.data))
- KEEP(*(.data*))
- . = ALIGN({word_size});
- }}
-
- . = 0x{bss_start:x};
- . = ALIGN({word_size});
- .bss :
- {{
- . = ALIGN({word_size});
- KEEP(*(.bss))
- KEEP(*(.bss*))
- . = ALIGN({word_size});
- }}
-}}
-"""
-
-
-def run_cmd(cmd):
- """Runs `cmd` in a subprocess and awaits its completion.
-
- Parameters
- ----------
- cmd : List[str]
- list of command-line arguments
-
- Returns
- -------
- output : str
- resulting stdout capture from the subprocess
- """
- proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
- (output, _) = proc.communicate()
- output = output.decode("utf-8")
- if proc.returncode != 0:
- cmd_str = " ".join(cmd)
- msg = f'error while running command "{cmd_str}":\n{output}'
- raise RuntimeError(msg)
- return output
-
-
-@tvm._ffi.register_func("tvm_callback_get_section_size")
-def tvm_callback_get_section_size(binary_path, section_name, toolchain_prefix):
- """Finds size of the section in the binary.
- Assumes `size` shell command exists (typically works only on Linux machines)
-
- Parameters
- ----------
- binary_path : str
- path of the binary file
-
- section_name : str
- name of section
-
- toolchain_prefix : str
- prefix for binary names in target compiler toolchain
-
- Returns
- -------
- size : integer
- size of the section in bytes
- """
- if not os.path.isfile(binary_path):
- raise RuntimeError('no such file "{}"'.format(binary_path))
- # We use the "-A" flag here to get the ".rodata" section's size, which is
- # not included by default.
- size_output = run_cmd(["{}size".format(toolchain_prefix), "-A", binary_path])
-
- # TODO(weberlo): Refactor this method and `*relocate_binary` so they are
- # both aware of [".bss", ".sbss", ".sdata"] being relocated to ".bss".
- section_mapping = {
- ".text": [".text"],
- ".rodata": [".rodata"],
- ".data": [".data", ".sdata"],
- ".bss": [".bss", ".sbss"],
- }
- sections_to_sum = section_mapping["." + section_name]
- section_size = 0
- # Skip the first two header lines in the `size` output.
- for line in size_output.split("\n")[2:]:
- tokens = list(filter(lambda s: len(s) != 0, line.split(" ")))
- if len(tokens) != 3:
- continue
- entry_name = tokens[0]
- entry_size = int(tokens[1])
- for section in sections_to_sum:
- if entry_name.startswith(section):
- section_size += entry_size
- break
-
- # NOTE: in the past, section_size has been wrong on x86. it may be
- # inconsistent. TODO: maybe stop relying on `*size` to give us the size and
- # instead read the section with `*objcopy` and count the bytes.
- # NOTE(areusch): I think the problem is due to alignment ops in the linker.
- # Since this is going away in the impending switch to on-device runtime,
- # add a constant to hopefully absorb these relocations.
- if section_size > 0:
- section_size += 64
-
- return section_size
-
-
-@tvm._ffi.register_func("tvm_callback_relocate_binary")
-def tvm_callback_relocate_binary(
- binary_path,
- word_size,
- text_start,
- rodata_start,
- data_start,
- bss_start,
- stack_end,
- toolchain_prefix,
-):
- """Relocates sections in the binary to new addresses
-
- Parameters
- ----------
- binary_path : str
- path of the binary file
-
- word_size : int
- word size on the target machine
-
- text_start : int
- text section address
-
- rodata_start : int
- rodata section address
-
- data_start : int
- data section address
-
- bss_start : int
- bss section address
-
- stack_end : int
- stack section end address
-
- toolchain_prefix : str
- prefix for binary names in target compiler toolchain
-
- Returns
- -------
- rel_bin : bytearray
- the relocated binary
- """
- assert text_start < rodata_start < data_start < bss_start < stack_end
- stack_pointer_init = stack_end - word_size
- ld_script_contents = ""
- # TODO(weberlo): There should be a better way to configure this for different archs.
- # TODO is this line even necessary?
- if "riscv" in toolchain_prefix:
- ld_script_contents += 'OUTPUT_ARCH( "riscv" )\n\n'
- ld_script_contents += RELOCATION_LD_SCRIPT_TEMPLATE.format(
- word_size=word_size,
- text_start=text_start,
- rodata_start=rodata_start,
- data_start=data_start,
- bss_start=bss_start,
- stack_pointer_init=stack_pointer_init,
- )
-
- tmp_dir = utils.tempdir()
- rel_obj_path = tmp_dir.relpath("relocated.obj")
- rel_ld_script_path = tmp_dir.relpath("relocate.lds")
- with open(rel_ld_script_path, "w") as f:
- f.write(ld_script_contents)
- run_cmd(
- ["{}ld".format(toolchain_prefix), binary_path, "-T", rel_ld_script_path, "-o", rel_obj_path]
- )
-
- with open(rel_obj_path, "rb") as f:
- rel_bin = bytearray(f.read())
-
- gdb_init_dir = os.environ.get("MICRO_GDB_INIT_DIR")
- if gdb_init_dir is not None:
- gdb_init_path = f"{gdb_init_dir}/.gdbinit"
- with open(gdb_init_path, "r") as f:
- gdbinit_contents = f.read().split("\n")
- new_contents = []
- for line in gdbinit_contents:
- new_contents.append(line)
- if line.startswith("target"):
- new_contents.append(f"add-symbol-file {rel_obj_path}")
- with open(gdb_init_path, "w") as f:
- f.write("\n".join(new_contents))
-
- return rel_bin
-
-
-@tvm._ffi.register_func("tvm_callback_read_binary_section")
-def tvm_callback_read_binary_section(binary, section, toolchain_prefix):
- """Returns the contents of the specified section in the binary byte array
-
- Parameters
- ----------
- binary : bytearray
- contents of the binary
-
- section : str
- type of section
-
- toolchain_prefix : str
- prefix for binary names in target compiler toolchain
-
- Returns
- -------
- section_bin : bytearray
- contents of the read section
- """
- tmp_dir = utils.tempdir()
- tmp_bin = tmp_dir.relpath("temp.bin")
- tmp_section = tmp_dir.relpath("tmp_section.bin")
- with open(tmp_bin, "wb") as out_file:
- out_file.write(bytes(binary))
- run_cmd(
- [
- "{}objcopy".format(toolchain_prefix),
- "--dump-section",
- ".{}={}".format(section, tmp_section),
- tmp_bin,
- ]
- )
- if os.path.isfile(tmp_section):
- # Get section content if it exists.
- with open(tmp_section, "rb") as f:
- section_bin = bytearray(f.read())
- else:
- # Return empty bytearray if the section does not exist.
- section_bin = bytearray("", "utf-8")
- return section_bin
-
-
-@tvm._ffi.register_func("tvm_callback_get_symbol_map")
-def tvm_callback_get_symbol_map(binary, toolchain_prefix):
- """Obtains a map of symbols to addresses in the passed binary
-
- Parameters
- ----------
- binary : bytearray
- contents of the binary
-
- toolchain_prefix : str
- prefix for binary names in target compiler toolchain
-
- Returns
- -------
- map_str : str
- map of defined symbols to addresses, encoded as a series of
- alternating newline-separated keys and values
- """
- tmp_dir = utils.tempdir()
- tmp_obj = tmp_dir.relpath("tmp_obj.bin")
- with open(tmp_obj, "wb") as out_file:
- out_file.write(bytes(binary))
- nm_output = run_cmd(["{}nm".format(toolchain_prefix), "-C", "--defined-only", tmp_obj])
- nm_output = nm_output.splitlines()
- map_str = ""
- for line in nm_output:
- line = line.split()
- map_str += line[2] + "\n"
- map_str += line[0] + "\n"
- return map_str
diff --git a/python/tvm/micro/compiler.py b/python/tvm/micro/compiler.py
index 069f600..a265f2a 100644
--- a/python/tvm/micro/compiler.py
+++ b/python/tvm/micro/compiler.py
@@ -21,8 +21,8 @@ import abc
import glob
import os
import re
+import subprocess
-from tvm.contrib import binutils
import tvm.target
from . import build
from . import class_factory
@@ -30,6 +30,28 @@ from . import debugger
from . import transport
+def run_cmd(cmd):
+ """Runs `cmd` in a subprocess and awaits its completion.
+
+ Parameters
+ ----------
+ cmd : List[str]
+ list of command-line arguments
+
+ Returns
+ -------
+ output : str
+ resulting stdout capture from the subprocess
+ """
+ proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
+ (output, _) = proc.communicate()
+ output = output.decode("utf-8")
+ if proc.returncode != 0:
+ cmd_str = " ".join(cmd)
+ msg = f'error while running command "{cmd_str}":\n{output}'
+ raise RuntimeError(msg)
+
+
class DetectTargetError(Exception):
"""Raised when no target comment was detected in the sources given."""
@@ -232,13 +254,13 @@ class DefaultCompiler(Compiler):
output_filename = f"{src_base}.o"
output_abspath = os.path.join(output, output_filename)
- binutils.run_cmd(args + ["-c", "-o", output_abspath, src])
+ run_cmd(args + ["-c", "-o", output_abspath, src])
outputs.append(output_abspath)
output_filename = f"{os.path.basename(output)}.a"
output_abspath = os.path.join(output, output_filename)
- binutils.run_cmd([prefix + "ar", "-r", output_abspath] + outputs)
- binutils.run_cmd([prefix + "ranlib", output_abspath])
+ run_cmd([prefix + "ar", "-r", output_abspath] + outputs)
+ run_cmd([prefix + "ranlib", output_abspath])
return tvm.micro.MicroLibrary(output, [output_filename])
@@ -273,7 +295,7 @@ class DefaultCompiler(Compiler):
for lib_name in obj.library_files:
args.append(obj.abspath(lib_name))
- binutils.run_cmd(args)
+ run_cmd(args)
return tvm.micro.MicroBinary(output, output_filename, [])
@property
diff --git a/tests/python/contrib/test_binutils.py b/tests/python/contrib/test_binutils.py
deleted file mode 100644
index f0aa2d1..0000000
--- a/tests/python/contrib/test_binutils.py
+++ /dev/null
@@ -1,167 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""Test various utilities for interaction with compiled binaries.
-
-Specifically, we test the following capabilities:
- - querying the size of a binary section
- - relocating sections within a binary to new addresses
- - reading the contents of a binary section
- - querying the address of a symbol in the binary
-"""
-
-import tvm
-from tvm import te
-import subprocess
-from tvm.contrib import utils
-from tvm.contrib import cc
-from tvm.contrib.binutils import *
-
-TOOLCHAIN_PREFIX = ""
-
-
-def make_binary():
- prog = "int a = 7; \
- int main() { \
- int b = 5; \
- return 0; \
- }"
- tmp_dir = utils.tempdir()
- tmp_source = tmp_dir.relpath("source.c")
- tmp_obj = tmp_dir.relpath("obj.obj")
- with open(tmp_source, "w") as f:
- f.write(prog)
- cc.create_executable(tmp_obj, tmp_source, [], cc="{}gcc".format(TOOLCHAIN_PREFIX))
- prog_bin = bytearray(open(tmp_obj, "rb").read())
- return prog_bin
-
-
-def test_tvm_callback_get_section_size(binary=None):
- if binary is None:
- binary = make_binary()
- tmp_dir = utils.tempdir()
- tmp_bin = tmp_dir.relpath("obj.bin")
- with open(tmp_bin, "wb") as f:
- f.write(binary)
-
- def verify():
- print(
- "Text section size: %d"
- % tvm_callback_get_section_size(tmp_bin, "text", TOOLCHAIN_PREFIX)
- )
- print(
- "Data section size: %d"
- % tvm_callback_get_section_size(tmp_bin, "data", TOOLCHAIN_PREFIX)
- )
- print(
- "Bss section size: %d" % tvm_callback_get_section_size(tmp_bin, "bss", TOOLCHAIN_PREFIX)
- )
- print()
-
- verify()
-
-
-def test_tvm_callback_relocate_binary():
- binary = make_binary()
- tmp_dir = utils.tempdir()
- tmp_bin = tmp_dir.relpath("obj.bin")
- with open(tmp_bin, "wb") as f:
- f.write(binary)
-
- def verify():
- word_size = 8
- text_loc = 0x0
- rodata_loc = 0x10000
- data_loc = 0x20000
- bss_loc = 0x30000
- stack_end = 0x50000
- rel_bin = tvm_callback_relocate_binary(
- tmp_bin, word_size, text_loc, rodata_loc, data_loc, bss_loc, stack_end, TOOLCHAIN_PREFIX
- )
- print("Relocated binary section sizes")
- test_tvm_callback_get_section_size(binary=rel_bin)
- relf = tmp_dir.relpath("rel.bin")
- with open(relf, "wb") as f:
- f.write(rel_bin)
- nm_proc = subprocess.Popen(
- ["nm", "-C", "--defined-only", relf], stdout=subprocess.PIPE, stderr=subprocess.STDOUT
- )
- (out, _) = nm_proc.communicate()
- symbol_entries = out.decode("utf-8").split("\n")
- for entry in symbol_entries:
- if len(entry) == 0:
- continue
- sym_loc, section, sym_name = entry.split(" ")
- sym_loc = int(sym_loc, 16)
- if section == "T": # text
- assert sym_loc >= text_loc and sym_loc < data_loc
- elif section == "D": # data
- assert sym_loc >= data_loc and sym_loc < bss_loc
- elif section == "B": # bss
- assert sym_loc >= bss_loc
-
- verify()
-
-
-def test_tvm_callback_read_binary_section():
- binary = make_binary()
-
- def verify():
- text_bin = tvm_callback_read_binary_section(binary, "text", TOOLCHAIN_PREFIX)
- data_bin = tvm_callback_read_binary_section(binary, "data", TOOLCHAIN_PREFIX)
- bss_bin = tvm_callback_read_binary_section(binary, "bss", TOOLCHAIN_PREFIX)
- print("Read text section part of binary? %r" % (text_bin in binary))
- print("Read data section part of binary? %r" % (data_bin in binary))
- print("Read bss section part of binary? %r" % (bss_bin in binary))
- print()
-
- verify()
-
-
-def test_tvm_callback_get_symbol_map():
- binary = make_binary()
- tmp_dir = utils.tempdir()
- tmp_bin = tmp_dir.relpath("obj.bin")
- with open(tmp_bin, "wb") as f:
- f.write(binary)
-
- def verify():
- word_size = 8
- text_loc = 0x0
- rodata_loc = 0x10000
- data_loc = 0x20000
- bss_loc = 0x30000
- stack_end = 0x50000
- rel_bin = tvm_callback_relocate_binary(
- tmp_bin, word_size, text_loc, rodata_loc, data_loc, bss_loc, stack_end, TOOLCHAIN_PREFIX
- )
- symbol_map = tvm_callback_get_symbol_map(rel_bin, TOOLCHAIN_PREFIX)
- symbols = set()
- for i, line in enumerate(symbol_map.split("\n")):
- # Every other line is the value the symbol maps to.
- if i % 2 == 0:
- symbols.add(line)
- assert "a" in symbols
- assert "main" in symbols
-
- verify()
-
-
-if __name__ == "__main__":
- test_tvm_callback_get_section_size()
- test_tvm_callback_relocate_binary()
- test_tvm_callback_read_binary_section()
- test_tvm_callback_get_symbol_map()