[Auto-TVM] How to Auto tune the model on iOS device

Hi Experts,

I need to Auto-tune the ONNX model on iOS device. i went through the some of the tutorials from . i'm able to tune the model on the cuda (NVIDIA-GPU)target, but some how my bad i'm not able auto-tune the model on iOS device ( there is no tutorial on iOS device). 
1.can we able to Auto-tune the model on iOS device ? 
if yes can you please help me to how to tune the model on iOS device.

Thank you.

Hi @kazum - Thank you for the previous suggestions, I am also looking at how to use autotvm to tune a model on iOS.

Below is a modified version of 'tutorials/autotvm/' that is based on your previous comment suggestion of adding a build_func, but something isn't working quite right yet.

Tuning tasks are stuck at 0 GFLOPS and the tuning trials time out.

> [Task  1/12]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/100) | 0.00 s

If I skip tuning (remove '#tune_tasks(tasks, **tuning_opt'), it successfully builds and runs the untuned model and reports an inference result.

Any idea what step might be missing here?

Thank you!

0. Assumption: you have a single macOS based host running the rpc proxy, tracker and xcode, with local network IP:

1. Setup environment variables:

         export TVM_IOS_CODESIGN='Apple Development: <> (<SIGNINGCODE>)'
         export TVM_IOS_RPC_ROOT=${TVM_HOME}/apps/ios_rpc
         export TVM_IOS_RPC_PROXY_HOST=
         #export TVM_IOS_RPC_DESTINATION='platform=iOS Simulator,id=<simulator id>'
         export TVM_IOS_RPC_DESTINATION='platform=iOS,id=<ios device id>'

2. Start the tracker

        python3 -m tvm.exec.rpc_tracker --host= --port=9190 --no-fork
        INFO:RPCTracker:bind to

3. Start the rpc proxy and point it to the tracker

        python3 -m tvm.exec.rpc_proxy --host --tracker --no-fork
        INFO:root:RPCProxy: client port bind to

4. Run tuning:

        cd ${TVM_HOME}/apps/ios_rpc
        python3 tests/



Auto-tuning a convolutional network for iPhone CPU


import os
import numpy as np
import tvm
from tvm import te
from tvm import autotvm
from tvm import relay
import tvm.relay.testing
from tvm.autotvm.tuner import XGBTuner, GATuner, RandomTuner, GridSearchTuner
from tvm.contrib.util import tempdir
import tvm.contrib.graph_runtime as runtime
from tvm.contrib import xcode

# Define network
# --------------
# First we need to define the network in relay frontend API.
# We can load some pre-defined network from :code:`relay.testing`.
# We can also load models from MXNet, ONNX and TensorFlow.

def get_network(name, batch_size):
    """Get the symbol definition and random weight of a network"""
    input_shape = (batch_size, 3, 224, 224)
    output_shape = (batch_size, 1000)

    if "resnet" in name:
        n_layer = int(name.split('-')[1])
        mod, params = relay.testing.resnet.get_workload(num_layers=n_layer, batch_size=batch_size, dtype=dtype)
    elif "vgg" in name:
        n_layer = int(name.split('-')[1])
        mod, params = relay.testing.vgg.get_workload(num_layers=n_layer, batch_size=batch_size, dtype=dtype)
    elif name == 'mobilenet':
        mod, params = relay.testing.mobilenet.get_workload(batch_size=batch_size)
    elif name == 'squeezenet_v1.1':
        mod, params = relay.testing.squeezenet.get_workload(batch_size=batch_size, version='1.1', dtype=dtype)
    elif name == 'inception_v3':
        input_shape = (1, 3, 299, 299)
        mod, params = relay.testing.inception_v3.get_workload(batch_size=batch_size, dtype=dtype)
    elif name == 'mxnet':
        # an example for mxnet model
        from import get_model
        block = get_model('resnet18_v1', pretrained=True)
        mod, params = relay.frontend.from_mxnet(block, shape={'data': input_shape}, dtype=dtype)
        net = mod["main"]
        net = relay.Function(net.params, relay.nn.softmax(net.body), None, net.type_params, net.attrs)
        mod = tvm.IRModule.from_expr(net)
        raise ValueError("Unsupported network: " + name)

    return mod, params, input_shape, output_shape

# Start RPC Tracker
# ------------------
# python3 -m tvm.exec.rpc_tracker --host= --port=9190 --no-fork
#  - Autotvm will use the tracker to orchestrate tuning test runs.
# Start RPC Proxy
# python3 -m tvm.exec.rpc_proxy --host --tracker --no-fork

# Set Tuning Options
# ------------------


# Set to be address of tvm proxy.
proxy_host = os.environ["TVM_IOS_RPC_PROXY_HOST"]
# Set your desination via env variable.

# Should in format "platform=iOS,id=<the test device uuid>"
destination = os.environ["TVM_IOS_RPC_DESTINATION"]

device_key = 'iphone'
proxy_port = 9090

arch = "arm64"
sdk = "iphoneos"
target = "llvm -mtriple=%s-apple-darwin" % arch
target_host = "llvm -mtriple=%s-apple-darwin" % arch

network = 'resnet-18'
log_file = "%s.%s.log" % (device_key, network)
dtype = 'float32'

autotvm.measure.measure_methods.check_remote = lambda *args: True

def fcompile(*args):
    xcode.create_dylib(*args, arch=arch, sdk=sdk)
    path = args[0]
    xcode.popen_test_rpc(proxy_host, proxy_port, device_key, destination=destination, libs=[path])

fcompile.output_format = "dylib"

tuning_option = {
    'log_filename': log_file,
    'tuner': 'random',
    'early_stopping': None,
    'n_trial': 100,

    'measure_option': autotvm.measure_option(
            device_key, host='', port=9190,
            number=20, repeat=3, timeout=60, min_repeat_ms=150)

# Begin Tuning
# ------------

def tune_tasks(tasks,
    # create tmp log file
    tmp_log_file = log_filename + ".tmp"
    if os.path.exists(tmp_log_file):

    for i, tsk in enumerate(reversed(tasks)):
        prefix = "[Task %2d/%2d] " % (i+1, len(tasks))

        # create tuner
        if tuner == 'xgb' or tuner == 'xgb-rank':
            tuner_obj = XGBTuner(tsk, loss_type='rank')
        elif tuner == 'xgb_knob':
            tuner_obj = XGBTuner(tsk, loss_type='rank', feature_type='knob')
        elif tuner == 'ga':
            tuner_obj = GATuner(tsk, pop_size=50)
        elif tuner == 'random':
            tuner_obj = RandomTuner(tsk)
        elif tuner == 'gridsearch':
            tuner_obj = GridSearchTuner(tsk)
            raise ValueError("Invalid tuner: " + tuner)

        if use_transfer_learning:
            if os.path.isfile(tmp_log_file):
        # do tuning
        tsk_trial = min(n_trial, len(tsk.config_space))
                           autotvm.callback.progress_bar(tsk_trial, prefix=prefix),

    # pick best records to a cache file
    autotvm.record.pick_best(tmp_log_file, log_filename)

# Finally, we launch tuning jobs and evaluate the end-to-end performance.

def tune_and_evaluate(tuning_opt):
    # extract workloads from relay program
    print("Extract tasks...")
    mod, params, input_shape, _ = get_network(network, batch_size=1)
    tasks = autotvm.task.extract_from_program(mod["main"], target=target,

    # run tuning tasks
    tune_tasks(tasks, **tuning_opt)

    # compile kernels with history best records
    with autotvm.apply_history_best(log_file):
        with tvm.transform.PassContext(opt_level=3):
            graph, lib, params =
                mod, target=target, params=params)

        # export library
        path_dso = "tuned_deploy.dylib"
        lib.export_library(path_dso, xcode.create_dylib, arch=arch, sdk=sdk)

        # Evaluate inference cost on tuned lib
        xcode.popen_test_rpc(proxy_host, proxy_port, device_key, destination=destination, libs=[path_dso])

        remote = autotvm.measure.request_remote(device_key, '', 9190,

        # Upload not needed for ios because dylib is built into app
        # remote.upload(path_dso)

        rlib = remote.load_module(path_dso)

        ctx = remote.cpu(0)
        module = runtime.create(graph, rlib, ctx)
        data_tvm = tvm.nd.array((np.random.uniform(size=input_shape)).astype(dtype))
        module.set_input('data', data_tvm)

        # evaluate
        print("Evaluate inference time cost...")
        ftimer = module.module.time_evaluator("run", ctx, number=3, repeat=20)
        prof_res = np.array(ftimer().results) * 1000  # convert to millisecond
        print("Mean inference time (std dev): %.2f ms (%.2f ms)" %
              (np.mean(prof_res), np.std(prof_res)))

# We do not run the tuning in our webpage server since it takes too long.
# Uncomment the following line to run it by yourself.
if __name__ == '__main__':
    if os.path.exists("rpc_config.txt"):

# Sample Output
# -------------

[quote="Dileep, post:7, topic:7681"]
i set TRACKER_IP as a IP address of the remote device ( ip address of the iOS device ), am i right ?

No, the tracker should run on the host machine.  I think you also need to start the tracker with the following command:
python -m tvm.exec.rpc_tracker --host= --port=9190```

Hi @kazum , thank you for the response.

* I ran rpc_proxy on the host machine with below command
> python -m tvm.exec.rpc_proxy --host [HOST_IP] --tracker [TRACKER_IP]:9190

HOST_IP = IP address of host machine , TRACKER_IP = IP address of remote device (iOS device IP)

* Tried to connect the iOS device to this RPC via rpc_proxy app (Opened the app, set  **Address** (ip address of host machine),  **Port** (9090) and the key set to be “iphone”).

* Able to connect the RPC proxy , but while connecting the proxy app i got the below error in host terminal

        INFO:root:Handler ready TCPSocketProxy:
        INFO:root:Lost tracker connection: [Errno 61] Connection refused, try reconnect in 2 sec
i set TRACKER_IP as a IP address of the remote device ( ip address of the iOS device ), am i right ?
For above issue I tried changing the network also, still the issue is exist . Can you please help me to fix the above issue.


@Dileep Sorry for the late response. I've tried auto tuning on iOS device before and needed some hacks to make it work.

- Pass a customized build function to LocalBuilder to compile your model with Xcode.
- Modify autotvm.measure.measure_methods.check_remote to make it return True always. It is necessary because, with the iOS RPC workflow, the devices are not visible from tuner before compiling.
- Run rpc_proxy on the host machine so that iOS device can connect to the tracker.
  python -m tvm.exec.rpc_proxy --host [HOST_IP] --tracker [TRACKER_IP]:9190

Here is a part of code.  Hope this would be helpful.
autotvm.measure.measure_methods.check_remote = lambda *args: True

def fcompile(*args):
    from tvm.contrib import xcode
    xcode.create_dylib(*args, arch=arch, sdk=sdk)
    path = args[0]
    xcode.popen_test_rpc(proxy_host, proxy_port, key,

fcompile.output_format = "dylib"

tuning_option = {
    'log_filename': log_file,
    'tuner': 'random',
    'early_stopping': None,

    'measure_option': autotvm.measure_option(
            key, host='', port=9190,
            number=20, repeat=3, timeout=60, min_repeat_ms=150)

@kazum, @tqchen any suggestion on the above issue.


Hi @kazum, @tqchen Thank you for the response.
I followed the above link to connect the proxy on iOS device able to connect the RPC Proxy. but i went through some of  the tutorials , before tune the model we need to start the RPC tracker ( . i followed the same steps to connect the RPC tracker on iOS device  but some how i'm not able to connect the RPC tracker on iOS device (

if i need to tune the model on iOS device should i need to start the RPC tracker before tune the model? 
if yes How to connect the RPC tracker on iOS device .

@kazum any tutorials or scripts are available , how to auto tune the model on iOS device.


cc @kazum who might have some experience. iOS requires a special RPC , you can find some instructions here

Hi Team,

Any suggestion on the above issue .


Hi @Dileep - 

There were a couple of problems I was running into:
1. The xcode build process initiated by the  popen_test_rpc() was being terminated prematurely when run in the fcompile build_func. 
2. The temp dylib filename with full path was not accessible from the ios rpc runner. As a hack to test, I removed the tmp_dir and just created the dylib in the local run directory.
3. remote.upload() should not be called because the dylib is built into the app

Following the code in my previous post, making the following changes should be all you need to get started. I'm going to look at a real solution that's not just a quick hack, but this may get you up and running.

def fcompile(*args):
    print("\nCalling fcompile. args[0] = %s" % args[0])
    xcode.create_dylib(*args, arch=arch, sdk=sdk)
    path = args[0]

diff --git a/python/tvm/autotvm/measure/ b/python/tvm/autotvm/measure/
index 9cef674d3..784184e61 100644
--- a/python/tvm/autotvm/measure/
+++ b/python/tvm/autotvm/measure/
@@ -426,8 +426,9 @@ class _WrappedBuildFunc():
         tic = time.time()
-            filename = os.path.join(tmp_dir, "tmp_func_%0x.%s" % (
-                getrandbits(64), self.build_func.output_format))
+            # filename = os.path.join(tmp_dir, "tmp_func_%0x.%s" % (
+            #     getrandbits(64), self.build_func.output_format))
+            filename = "tmp_func_%0x.%s" % (getrandbits(64), self.build_func.output_format)
             # TODO(tvm-team) consider linline _build_func_common
             func, arg_info = _build_func_common(measure_input, **kwargs)
             func.export_library(filename, self.build_func)
@@ -485,6 +486,8 @@ def run_through_rpc(measure_input, build_result,
     errno = MeasureErrorNo.NO_ERROR
         # upload built module
+        from tvm.contrib import xcode
+        xcode.popen_test_rpc(os.environ["TVM_IOS_RPC_PROXY_HOST"], 9090, "iphone", destination=os.environ["TVM_IOS_RPC_DESTINATION"], libs=[os.path.split(build_result.filename)[1]])
         remote = request_remote(*remote_args)
         # Program the FPGA every single time when targeting VTA
         if hasattr(, 'device_name') and \
@@ -493,7 +496,7 @@ def run_through_rpc(measure_input, build_result,
             from vta import program_fpga, reconfig_runtime
             program_fpga(remote, None)
-        remote.upload(build_result.filename)
+        # remote.upload(build_result.filename)
         func = remote.load_module(os.path.split(build_result.filename)[1])
         ctx = remote.context(str(, 0)


Hi @kazum,

I enabled the debugging logs of Autotune TVM, it is failing to tune the model on iOS device. can you please have a look into the below debugging logs

    Extract tasks...
    Get devices for measurement successfully!
    [15:27:22] /Users/Dileep/LatestTVM/31_08/tvm/src/runtime/metal/ Intializing Metal device 0, name=Intel Iris Pro Graphics
    No: 1	GFLOPS: 0.00/0.00	result: MeasureResult(costs=('Cannot request iphone after 5 retry, last_error:Traceback (most recent call last):\n  [bt] (4) 5   libtvm.dylib                        0x0000000113c86f26 TVMFuncCall + 70\n  [bt] (3) 4   libtvm.dylib                        0x0000000113cdf3b0 std::__1::__function::__func<tvm::runtime::$_0, std::__1::allocator<tvm::runtime::$_0>, void (tvm::runtime::TVMArgs, tvm::runtime::TVMRetValue*)>::operator()(tvm::runtime::TVMArgs&&, tvm::runtime::TVMRetValue*&&) + 336\n  [bt] (2) 3   libtvm.dylib                        0x0000000113cde753 tvm::runtime::RPCClientConnect(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >, int, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >, tvm::runtime::TVMArgs) + 99\n  [bt] (1) 2   libtvm.dylib                        0x0000000113cdd206 tvm::runtime::RPCConnect(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >, int, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >, tvm::runtime::TVMArgs) + 390\n  [bt] (0) 1   libtvm.dylib                        0x0000000112f92641 dmlc::LogMessageFatal::~LogMessageFatal() + 113\n  File "/Users/Dileep/LatestTVM/31_08/tvm/src/runtime/rpc/", line 73\nTVMError: Check failed: sock.Connect(addr): Connect to failed',), error_no=7, all_cost=60, timestamp=1599127077.6432252)	[('tile_f', [-1, 1, 128, 1]), ('tile_y', [-1, 7, 1, 1]), ('tile_x', [-1, 1, 7, 1]), ('tile_rc', [-1, 1]), ('tile_ry', [-1, 1]), ('tile_rx', [-1, 1]), ('auto_unroll_max_step', 1500), ('unroll_explicit', 0)],None,65389
    [15:28:01] /Users/Dileep/LatestTVM/31_08/tvm/src/runtime/metal/ Intializing Metal device 0, name=Intel Iris Pro Graphics

I always see the above logs while tuning the model, can you please suggest to me what is wrong in the tuning? 

Hi @jacobpostman, did you get any solution for the above issue.


Hi @kazum, thank you for your suggestion.I'm able to start the RPC tracker.

I'm also tried as same as @jacobpostman mostly, but while tuning the sample model I got the below logs.

    [Task  1/16]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (15/100) | 600.01 s[19:07:37] /Users/Dileep/LatestTVM/22_08/tvm/src/runtime/metal/ Intializing Metal device 0, name=Intel Iris Pro Graphics
    [Task  1/16]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (16/100) | 640.05 s[19:08:18] /Users/Dileep/LatestTVM/22_08/tvm/src/runtime/metal/ Intializing Metal device 0, name=Intel Iris Pro Graphics
    [Task  1/16]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (17/100) | 680.11 s[19:08:57] /Users/Dileep/LatestTVM/22_08/tvm/src/runtime/metal/ Intializing Metal device 0, name=Intel Iris Pro Graphics
    [Task  1/16]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (18/100) | 720.17 s[19:09:38] /Users/Dileep/LatestTVM/22_08/tvm/src/runtime/metal/ Intializing Metal device 0, name=Intel Iris Pro Graphics
    [Task  1/16]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (19/100) | 760.24 s[19:10:18] /Users/Dileep/LatestTVM/22_08/tvm/src/runtime/metal/ Intializing Metal device 0, name=Intel Iris Pro Graphics

I always see " 0.00/ 0.00 GFLOPS" while tuning the model. whether the model is tuning properly or missing some thing while tuning the model ?

here is the part of code 

    target = 'metal'
    proxy_port = 9090
    key = "iphone"
    arch = "arm64"
    sdk = "iphoneos"
    target_host = "llvm -mtriple=%s-apple-darwin" % arch

    def compile_metal(src):
        return xcode.compile_metal(src, sdk=sdk)

    #### TUNING OPTION ####
    network = 'resnet-18'
    log_file = "%s.log" % network
    dtype = 'float32'

    autotvm.measure.measure_methods.check_remote = lambda *args: True

    def fcompile(*args):
        from tvm.contrib import xcode
        xcode.create_dylib(*args, arch=arch, sdk=sdk)
        path = args[0]
        xcode.popen_test_rpc(proxy_host, proxy_port, key,

    fcompile.output_format = "dylib"

    tuning_option = {
        'log_filename': log_file,
        'tuner': 'xgb',
        'early_stopping': None,

        'measure_option': autotvm.measure_option(
                key, host='', port=9190,
                number=20, repeat=3, timeout=60, min_repeat_ms=150)
    def tune_tasks(tasks,
        # create tmp log file
        tmp_log_file = log_filename + ".tmp"
        if os.path.exists(tmp_log_file):

        for i, tsk in enumerate(reversed(tasks)):
            prefix = "[Task %2d/%2d] " %(i+1, len(tasks))

            # create tuner
            if tuner == 'xgb' or tuner == 'xgb-rank':
                tuner_obj = XGBTuner(tsk, loss_type='rank')
            elif tuner == 'ga':
                tuner_obj = GATuner(tsk, pop_size=100)
            elif tuner == 'random':
                tuner_obj = RandomTuner(tsk)
            elif tuner == 'gridsearch':
                tuner_obj = GridSearchTuner(tsk)
                raise ValueError("Invalid tuner: " + tuner)

            if use_transfer_learning:
                if os.path.isfile(tmp_log_file):

            # do tuning
            tsk_trial = min(n_trial, len(tsk.config_space))
                               autotvm.callback.progress_bar(tsk_trial, prefix=prefix),

        # pick best records to a cache file
        autotvm.record.pick_best(tmp_log_file, log_filename)

    # Finally, we launch tuning jobs and evaluate the end-to-end performance.

    def tune_and_evaluate(tuning_opt):
        # extract workloads from relay program
        print("Extract tasks...")
        mod, params, input_shape, out_shape = get_network(network, batch_size=1)

        tasks = autotvm.task.extract_from_program(mod["main"], target=target,

        # run tuning tasks
        tune_tasks(tasks, **tuning_opt)

Please let me know if anything i'm missing on the above code.


