You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tvm.apache.org by GitBox <gi...@apache.org> on 2020/01/08 00:10:30 UTC

[GitHub] [incubator-tvm] jwfromm commented on a change in pull request #4497: [WIP] [Relay] Add a PyTorch to Relay Parser

jwfromm commented on a change in pull request #4497: [WIP] [Relay] Add a PyTorch to Relay Parser
URL: https://github.com/apache/incubator-tvm/pull/4497#discussion_r364012051
 
 

 ##########
 File path: tests/python/frontend/pytorch/test_forward.py
 ##########
 @@ -0,0 +1,608 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+# pylint: disable=import-self, invalid-name, unused-argument
+"""Unit tests for various models and operators"""
+from time import time
+import os
+import sys
+from tempfile import TemporaryDirectory
+from scipy.stats import t as tdistr
+import numpy as np
+import torch
+import tvm
+import torchvision
+import single_op
+
+
+from tvm import relay
+from tvm.contrib import graph_runtime
+#from tvm.relay.testing.config import ctx_list
+
+sys.setrecursionlimit(10000)
+
+TARGET = 'llvm'
+CTX = tvm.cpu()
+EXT_ACCEL = None
+
+model_names = []
+baseline_latencies_map = {}
+compiled_latencies_map = {}
+speedups_map = {}
+
+test_repeats = 1
+
+def _vectorize(ten):
+    return ten.reshape(-1)
+
+def atol(tru, est):
+    def _atol_elt(tru, est):
+        return abs(tru - est)
+    tru = _vectorize(tru)
+    est = _vectorize(est)
+    return max([_atol_elt(x, y) for x, y in zip(tru, est)])
+
+def rtol(tru, est):
+    def _rtol_elt(tru, est):
+        return abs(tru - est) / min(abs(tru), abs(est))
+    tru = _vectorize(tru)
+    est = _vectorize(est)
+    return max([_rtol_elt(x, y) for x, y in zip(tru, est)])
+
+def assert_shapes_match(tru, est):
+    if tru.shape != est.shape:
+        msg = "Output shapes {} and {} don't match"
+        raise AssertionError(msg.format(tru.shape, est.shape))
+
+def load_single_op(model_name):
+    """Given a model name, returns a single-operator model in eval
+    mode as well as an example input."""
+    model = getattr(single_op, model_name)().float().eval()
+    input_shape = [1, 3, 224, 224]
+    input_data = torch.rand(input_shape).float()
+    return model, input_data
+
+def load_torchvision(model_name):
+    """Given a model name, returns a Torchvision model in eval mode as well
+    as an example input."""
+    if model_name.startswith('inception'):
+        height = width = 299
+        mean = [0.5, 0.5, 0.5]
+        std = [0.5, 0.5, 0.5]
+    else:
+        height = width = 224
+        mean = [0.485, 0.456, 0.406]
+        std = [0.229, 0.224, 0.225]
+    input_shape = [1, 3, height, width]
+    input_data = torch.randn(input_shape).float()
+    for channel in range(3):
+        input_data[:, channel] -= mean[channel]
+        input_data[:, channel] /= std[channel]
+    model = getattr(torchvision.models, model_name)(pretrained=True)
+    model = model.float().eval()
+    return model, input_data
+
+def load_pretrainedmodels(model_name):
+    """Given a model name, returns a pretrainedmodels.pytorch model in eval
+    mode as well as an example input."""
+    import pretrainedmodels # https://github.com/Cadene/pretrained-models.pytorch
+    model = getattr(pretrainedmodels, model_name)().float().eval()
+    input_shape = [1, *model.input_size]
+    input_data = torch.rand(input_shape).float() * 256
+    for channel in range(3):
+        input_data[:, channel] -= model.mean[channel]
+        input_data[:, channel] /= model.std[channel]
+    return model, input_data
+
+def load_model(model_name):
+    """Given a model name, returns a model as well as an example input."""
+    if hasattr(single_op, model_name):
+        return load_single_op(model_name)
+    if hasattr(torchvision.models, model_name):
+        return load_torchvision(model_name)
+    try:
+        if hasattr(pretrainedmodels, model_name):
+            return load_pretrainedmodels(model_name)
+    except ModuleNotFoundError:
+        raise ModuleNotFoundError('Please install pretrainedmodels.pytorch')
+    raise RuntimeError('Model not supported')
+
+
+def confidence_interval(mean, stdev, count, alpha=.01):
+    """Returns the lower and upper bounds of the confidence interval of a random
+    variable. Confidence is 1 - alpha (default confidence is 99%)."""
+    stdval = tdistr.ppf(1 - alpha / 2, count - 1)
+    lower, upper = mean + np.array([-1, 1]) * stdval * stdev / np.sqrt(count)
+    return lower, upper
+
+def measure_latency(model, input_shapes, output_shapes, thresh, dryruns=40):
+    """Compute the latency of the given model"""
+    latencies = []
+    count = 0
+    while True:
+        if isinstance(model, torch.nn.Module):
+            input_data = [torch.rand(shape).float() for shape in input_shapes]
+            if torch.cuda.is_available():
+                input_data = list(map(lambda x: x.cuda(), input_data))
+                model = model.cuda()
+            t_start = time()
+            model(*input_data)
+            t_end = time()
+            latencies.append(t_end - t_start)
+        else:
+            input_data = {}
+            for i, shape in enumerate(input_shapes):
+                name = 'input' + str(i)
+                arr = np.random.random(shape).astype('float32')
+                input_data[name] = tvm.nd.array(arr)
+            t_start = time()
+            model.set_input(**input_data)
+            model.run()
+            for i, shape in enumerate(output_shapes):
+                arr = np.zeros(shape).astype('float32')
+                model.get_output(i, tvm.nd.array(arr))
+            t_end = time()
+        count += 1
+        if count < dryruns:
+            continue
+        latencies.append(t_end - t_start)
+        mean = np.mean(latencies)
+        stdev = np.std(latencies)
+        sample_size = len(latencies)
+        if sample_size > dryruns:
+            lower, upper = confidence_interval(mean, stdev, sample_size)
+            est = (upper + lower) / 2
+            err = (upper - lower) / 2
+            if err < thresh:
+                return est
+
+def verify_model(model_name):
+    """Assert that the output of a compiled model matches with that of its
+    baseline."""
+    baseline_model, baseline_input = load_model(model_name)
+    if torch.cuda.is_available():
+        baseline_model = baseline_model.cuda()
+        baseline_input = baseline_input.cuda()
+    baseline_outputs = baseline_model(baseline_input)
+    if isinstance(baseline_outputs, tuple):
+        baseline_outputs = tuple(out.detach().cpu().numpy() for out in baseline_outputs)
+    else:
+        baseline_outputs = (baseline_outputs.detach().float().cpu().numpy(),)
+    output_shapes = [out.shape for out in baseline_outputs]
+    dtype = 'float32'
+    input_name = 'input0'
+    input_shapes = {input_name: list(baseline_input.shape)}
+    baseline_model(baseline_input)
+    trace = torch.jit.trace(baseline_model, baseline_input).float().eval()
+    if torch.cuda.is_available():
+        trace = trace.cuda()
+    else:
+        trace = trace.cpu()
+    with TemporaryDirectory() as tmp:
+        path = os.path.join(tmp, 'model.pth')
+        torch.jit.save(trace, path)
+        mod, params = relay.frontend.from_pytorch(trace, input_shapes)
+
+    compiled_input = {input_name: tvm.nd.array(baseline_input.cpu().numpy())}
+
+    with relay.build_config(opt_level=3):
+        relay_graph, relay_lib, relay_params = relay.build(mod, target=TARGET, params=params)
+        relay_model = graph_runtime.create(relay_graph, relay_lib, CTX)
+        relay_model.set_input(**relay_params)
+        relay_model.set_input(**compiled_input)
+        relay_model.run()
+
+    for i, baseline_output in enumerate(baseline_outputs):
+        output_shape = baseline_output.shape
+        compiled_output = relay_model.get_output(
+            i, tvm.nd.array(np.zeros(output_shape).astype(dtype), CTX)).asnumpy()
+
+        compiled_relay_output = relay_model.get_output(
+            i, tvm.nd.array(np.zeros(output_shape).astype(dtype), CTX)).asnumpy()
+
+        assert_shapes_match(baseline_output, compiled_output)
+        tvm.testing.assert_allclose(baseline_output, compiled_output,
+                                    rtol=1e-3, atol=1e-3)
+
+        assert_shapes_match(baseline_output, compiled_relay_output)
+        tvm.testing.assert_allclose(baseline_output, compiled_relay_output,
+                                    rtol=1e-3, atol=1e-3)
+
+    if(test_repeats > 0):
+        thresh = 1e-2
+        units = 1e3
+        thresh = int(thresh * units)
+        input_shapes = list(input_shapes.values())
+
+        compiled_latencies = []
+        baseline_latencies = []
+        speedups = []
+
+        for i in range(0, test_repeats):
+            print("computing compiled latency")
+            compiled_latency = measure_latency(relay_model, input_shapes,
+                                               output_shapes, thresh) * units
+            print(f'Compiled latency is {compiled_latency:.3f} +/- {thresh:d} ms.')
+            print("computing baseline latency")
+            baseline_latency = measure_latency(baseline_model, input_shapes,
+                                               output_shapes, thresh) * units
+
+            print(f'Baseline latency is {baseline_latency:.3f} +/- {thresh:d} ms.')
+
+            speedup = baseline_latency/compiled_latency
+            print(f'Relative speedup is {speedup:.3f}')
+
+            compiled_latencies.append(compiled_latency)
+            baseline_latencies.append(baseline_latency)
+            speedups.append(speedup)
+
+        baseline_latencies_map[model_name] = baseline_latencies
+        compiled_latencies_map[model_name] = compiled_latencies
+        speedups_map[model_name] = speedups
+        model_names.append(model_name)
+
+        print_results()
+
+    from subprocess import call
+    call('rm -rf ~/.torch/models/*', shell=True)
+
+def print_results():
+    print(baseline_latencies_map)
+    print(compiled_latencies_map)
+    print(speedups_map)
+
+    thresh = 1e-2
+    units = 1e3
+    thresh = int(thresh * units)
+
+    for model_name in model_names:
+
+        compiled_sum = 0.0
+        baseline_sum = 0.0
+        speedup_sum = 0.0
+
+        print("For model name "+model_name)
+        for i in range(0, test_repeats):
+            print(f'Compiled latency is {compiled_latencies_map[model_name][i]:.3f} +/- {thresh:d} ms.')
+            print(f'Baseline latency is {baseline_latencies_map[model_name][i]:.3f} +/- {thresh:d} ms.')
+            print(f'Relative speedup is {speedups_map[model_name][i]:.3f}')
+
+            compiled_sum = compiled_sum + compiled_latencies_map[model_name][i]
+            baseline_sum = baseline_sum + baseline_latencies_map[model_name][i]
+            speedup_sum = speedup_sum + speedups_map[model_name][i]
+
+        print(f'Average compiled latency is {compiled_sum/test_repeats:.3f} +/- {thresh:d} ms.')
+        print(f'Average baseline latency is {baseline_sum/test_repeats:.3f} +/- {thresh:d} ms.')
+        print(f'Average relative speedup is {speedup_sum/test_repeats:.3f}')
+
+# Test Functions
+def test_add1():
+    verify_model('Add1')
 
 Review comment:
   I wonder if its because Pytorch is trying to keep around all the models being built. You might be able to create a model, run a test, then use `del model` to force its cleanup.

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services