You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tvm.apache.org by li...@apache.org on 2019/11/27 03:25:11 UTC

[incubator-tvm] branch v0.6 updated: [VTA][HotFix] Relay->VTA quantization fix (#4433)

This is an automated email from the ASF dual-hosted git repository.

liuyizhi pushed a commit to branch v0.6
in repository https://gitbox.apache.org/repos/asf/incubator-tvm.git


The following commit(s) were added to refs/heads/v0.6 by this push:
     new c6f8c23  [VTA][HotFix] Relay->VTA quantization fix (#4433)
c6f8c23 is described below

commit c6f8c23c349f3ef8bacceaf3203f7cc08e6529de
Author: Thierry Moreau <mo...@uw.edu>
AuthorDate: Tue Nov 26 19:21:56 2019 -0800

    [VTA][HotFix] Relay->VTA quantization fix (#4433)
    
    * relay -> vta fix
    
    * setting optlevel to 3 for quantization to fold batchnorm
---
 vta/scripts/tune_resnet.py                     |  8 +++++---
 vta/tutorials/autotvm/tune_relay_vta.py        | 10 ++++++----
 vta/tutorials/frontend/deploy_vision_on_vta.py | 26 ++++++++++++++------------
 3 files changed, 25 insertions(+), 19 deletions(-)

diff --git a/vta/scripts/tune_resnet.py b/vta/scripts/tune_resnet.py
index 00fe1e8..18aee09 100644
--- a/vta/scripts/tune_resnet.py
+++ b/vta/scripts/tune_resnet.py
@@ -125,9 +125,11 @@ def compile_network(opt, env, target):
     dtype_dict.update({k: str(v.dtype) for k, v in params.items()})
 
     # Perform quantization in Relay
-    with relay.quantize.qconfig(global_scale=8.0,
-                                skip_conv_layers=[0]):
-        relay_prog = relay.quantize.quantize(mod["main"], params=params)
+    # Note: We set opt_level to 3 in order to fold batch norm
+    with relay.build_config(opt_level=3):
+        with relay.quantize.qconfig(global_scale=8.0,
+                                    skip_conv_layers=[0]):
+            relay_prog = relay.quantize.quantize(mod["main"], params=params)
 
     # Perform graph packing and constant folding for VTA target
     if target.device_name == "vta":
diff --git a/vta/tutorials/autotvm/tune_relay_vta.py b/vta/tutorials/autotvm/tune_relay_vta.py
index 97dd742..a9ab6d7 100644
--- a/vta/tutorials/autotvm/tune_relay_vta.py
+++ b/vta/tutorials/autotvm/tune_relay_vta.py
@@ -89,15 +89,17 @@ def compile_network(env, target, model, start_pack, stop_pack):
     dtype_dict.update({k: str(v.dtype) for k, v in params.items()})
 
     # Perform quantization in Relay
-    with relay.quantize.qconfig(global_scale=8.0,
-                                skip_conv_layers=[0]):
-        relay_prog = relay.quantize.quantize(mod["main"], params=params)
+    # Note: We set opt_level to 3 in order to fold batch norm
+    with relay.build_config(opt_level=3):
+        with relay.quantize.qconfig(global_scale=8.0,
+                                    skip_conv_layers=[0]):
+            mod = relay.quantize.quantize(mod, params=params)
 
     # Perform graph packing and constant folding for VTA target
     if target.device_name == "vta":
         assert env.BLOCK_IN == env.BLOCK_OUT
         relay_prog = graph_pack(
-            relay_prog,
+            mod["main"],
             env.BATCH,
             env.BLOCK_OUT,
             env.WGT_WIDTH,
diff --git a/vta/tutorials/frontend/deploy_vision_on_vta.py b/vta/tutorials/frontend/deploy_vision_on_vta.py
index a508fc4..a316986 100644
--- a/vta/tutorials/frontend/deploy_vision_on_vta.py
+++ b/vta/tutorials/frontend/deploy_vision_on_vta.py
@@ -168,18 +168,20 @@ with autotvm.tophub.context(target):
 
     if target.device_name == "vta":
         # Perform quantization in Relay
-        with relay.quantize.qconfig(global_scale=8.0,
-                                    skip_conv_layers=[0]):
-            relay_prog = relay.quantize.quantize(mod["main"], params=params)
-        # Perform graph packing and constant folding for VTA target
-        assert env.BLOCK_IN == env.BLOCK_OUT
-        relay_prog = graph_pack(
-            relay_prog,
-            env.BATCH,
-            env.BLOCK_OUT,
-            env.WGT_WIDTH,
-            start_name=pack_dict[model][0],
-            stop_name=pack_dict[model][1])
+        # Note: We set opt_level to 3 in order to fold batch norm
+        with relay.build_config(opt_level=3):
+            with relay.quantize.qconfig(global_scale=8.0,
+                                        skip_conv_layers=[0]):
+                mod = relay.quantize.quantize(mod, params=params)
+            # Perform graph packing and constant folding for VTA target
+            assert env.BLOCK_IN == env.BLOCK_OUT
+            relay_prog = graph_pack(
+                mod["main"],
+                env.BATCH,
+                env.BLOCK_OUT,
+                env.WGT_WIDTH,
+                start_name=pack_dict[model][0],
+                stop_name=pack_dict[model][1])
     else:
         relay_prog = mod["main"]