You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tvm.apache.org by ma...@apache.org on 2022/05/23 10:14:06 UTC

[tvm] branch main updated: [Tests] Replace the Relay interpreter with the VM in the op tests (#11386)

This is an automated email from the ASF dual-hosted git repository.

masahi pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tvm.git


The following commit(s) were added to refs/heads/main by this push:
     new df632baa78 [Tests] Replace the Relay interpreter with the VM in the op tests (#11386)
df632baa78 is described below

commit df632baa78a4f550759d62fbc252039bfd9a64c3
Author: Florin Blanaru <fl...@gmail.com>
AuthorDate: Mon May 23 11:14:00 2022 +0100

    [Tests] Replace the Relay interpreter with the VM in the op tests (#11386)
---
 python/tvm/relay/testing/__init__.py              |   9 +-
 tests/python/relay/dyn/test_dynamic_op_level10.py |  54 ++---
 tests/python/relay/dyn/test_dynamic_op_level2.py  |  46 +++--
 tests/python/relay/dyn/test_dynamic_op_level3.py  |  71 ++++---
 tests/python/relay/dyn/test_dynamic_op_level5.py  |  15 +-
 tests/python/relay/dyn/test_dynamic_op_level6.py  |  27 +--
 tests/python/relay/test_op_grad_level1.py         |  38 ++--
 tests/python/relay/test_op_grad_level10.py        |  37 +++-
 tests/python/relay/test_op_grad_level2.py         | 151 +++++++++-----
 tests/python/relay/test_op_grad_level3.py         |  74 +++----
 tests/python/relay/test_op_grad_level4.py         |  47 +++--
 tests/python/relay/test_op_level1.py              |  39 ++--
 tests/python/relay/test_op_level10.py             | 236 ++++++++++++----------
 tests/python/relay/test_op_level2.py              |  32 +--
 tests/python/relay/test_op_level3.py              |  69 +++----
 tests/python/relay/test_op_level4.py              |  48 +++--
 tests/python/relay/test_op_level5.py              | 166 ++++++---------
 tests/python/relay/test_op_level6.py              |  55 +++--
 18 files changed, 645 insertions(+), 569 deletions(-)

diff --git a/python/tvm/relay/testing/__init__.py b/python/tvm/relay/testing/__init__.py
index 9097125110..2399a474de 100644
--- a/python/tvm/relay/testing/__init__.py
+++ b/python/tvm/relay/testing/__init__.py
@@ -82,6 +82,7 @@ def check_grad(
     mean=0,
     mode="higher_order",
     target_devices=None,
+    executor_kind="debug",
 ):
     """Perform numerical gradient checking given a relay function.
 
@@ -146,8 +147,12 @@ def check_grad(
     for target, dev in target_devices:
         # Eval the backward and forward functions
         # TODO(mbs): Evaluate a pair of functions so can share preparation between them.
-        bwd_func_compiled = relay.create_executor(device=dev, target=target).evaluate(bwd_func)
-        fwd_func_compiled = relay.create_executor(device=dev, target=target).evaluate(fwd_func)
+        bwd_func_compiled = relay.create_executor(
+            executor_kind, device=dev, target=target
+        ).evaluate(bwd_func)
+        fwd_func_compiled = relay.create_executor(
+            executor_kind, device=dev, target=target
+        ).evaluate(fwd_func)
 
         # Get analytic gradients.
         _, grads = bwd_func_compiled(*inputs)
diff --git a/tests/python/relay/dyn/test_dynamic_op_level10.py b/tests/python/relay/dyn/test_dynamic_op_level10.py
index d34b80303b..5a31977b45 100644
--- a/tests/python/relay/dyn/test_dynamic_op_level10.py
+++ b/tests/python/relay/dyn/test_dynamic_op_level10.py
@@ -27,9 +27,11 @@ import tvm.topi.testing
 import random
 import tvm.testing
 
+executor_kind = tvm.testing.parameter("debug", "vm")
+
 
 @tvm.testing.uses_gpu
-def test_broadcast_to():
+def test_broadcast_to(executor_kind):
     def verify_more_dynamic_broadcast_to(x_shape, out_shape):
         rank = len(out_shape)
         dtype = "float32"
@@ -45,12 +47,13 @@ def test_broadcast_to():
         x = np.random.uniform(size=np.prod(x_shape)).astype(dtype)
         ref_res = np.broadcast_to(np.reshape(x, x_shape), out_shape)
         for target, dev in tvm.testing.enabled_targets():
-            for kind in ["vm", "debug"]:
-                mod = tvm.ir.IRModule.from_expr(func)
-                op_res = relay.create_executor(kind, mod=mod, device=dev, target=target).evaluate(
-                    func
-                )(x, np.array(x_shape).astype(shape_type), np.array(out_shape).astype(shape_type))
-                tvm.testing.assert_allclose(op_res.numpy(), ref_res, rtol=1e-5)
+            mod = tvm.ir.IRModule.from_expr(func)
+            op_res = relay.create_executor(
+                executor_kind, mod=mod, device=dev, target=target
+            ).evaluate(func)(
+                x, np.array(x_shape).astype(shape_type), np.array(out_shape).astype(shape_type)
+            )
+            tvm.testing.assert_allclose(op_res.numpy(), ref_res, rtol=1e-5)
 
     verify_more_dynamic_broadcast_to((4, 3), (3, 4, 3))
 
@@ -70,12 +73,11 @@ def test_broadcast_to():
         x = np.random.uniform(size=x_shape).astype(dtype)
         ref_res = np.broadcast_to(x, out_shape)
         for target, dev in tvm.testing.enabled_targets():
-            for kind in ["vm", "debug"]:
-                mod = tvm.ir.IRModule.from_expr(func)
-                op_res = relay.create_executor(kind, mod=mod, device=dev, target=target).evaluate(
-                    func
-                )(x, np.array(out_shape).astype(shape_type))
-                tvm.testing.assert_allclose(op_res.numpy(), ref_res, rtol=1e-5)
+            mod = tvm.ir.IRModule.from_expr(func)
+            op_res = relay.create_executor(
+                executor_kind, mod=mod, device=dev, target=target
+            ).evaluate(func)(x, np.array(out_shape).astype(shape_type))
+            tvm.testing.assert_allclose(op_res.numpy(), ref_res, rtol=1e-5)
 
     verify_broadcast_to((1,), (1, 1, 1))
     verify_broadcast_to((1, 1), (4, 1, 1))
@@ -83,7 +85,7 @@ def test_broadcast_to():
 
 
 @tvm.testing.uses_gpu
-def test_dyn_broadcast_to():
+def test_dyn_broadcast_to(executor_kind):
     dtype = "uint8"
     rank = 3
     shape_type = "int64"
@@ -101,16 +103,15 @@ def test_dyn_broadcast_to():
     dyn_shape = (1,) * rank
     ref_res = np.broadcast_to(x, dyn_shape)
     for target, dev in tvm.testing.enabled_targets():
-        for kind in ["vm", "debug"]:
-            mod = tvm.ir.IRModule.from_expr(func)
-            op_res = relay.create_executor(kind, mod=mod, device=dev, target=target).evaluate(func)(
-                x, np.array(dyn_shape).astype(shape_type)
-            )
-            tvm.testing.assert_allclose(op_res.numpy(), ref_res, rtol=1e-5)
+        mod = tvm.ir.IRModule.from_expr(func)
+        op_res = relay.create_executor(executor_kind, mod=mod, device=dev, target=target).evaluate(
+            func
+        )(x, np.array(dyn_shape).astype(shape_type))
+        tvm.testing.assert_allclose(op_res.numpy(), ref_res, rtol=1e-5)
 
 
 @tvm.testing.uses_gpu
-def test_dyn_one_hot():
+def test_dyn_one_hot(executor_kind):
     def _get_oshape(indices_shape, depth, axis):
         oshape = []
         true_axis = len(indices_shape) if axis == -1 else axis
@@ -135,12 +136,11 @@ def test_dyn_one_hot():
         indices_np = np.random.randint(0, depth, size=indices_shape).astype("int32")
         out_np = tvm.topi.testing.one_hot(indices_np, on_value, off_value, depth, axis, dtype)
         for target, dev in tvm.testing.enabled_targets():
-            for kind in ["vm", "debug"]:
-                mod = tvm.ir.IRModule.from_expr(func)
-                out_relay = relay.create_executor(
-                    kind, mod=mod, device=dev, target=target
-                ).evaluate()(indices_np, np.array(depth).astype("int32"))
-                tvm.testing.assert_allclose(out_relay.numpy(), out_np)
+            mod = tvm.ir.IRModule.from_expr(func)
+            out_relay = relay.create_executor(
+                executor_kind, mod=mod, device=dev, target=target
+            ).evaluate()(indices_np, np.array(depth).astype("int32"))
+            tvm.testing.assert_allclose(out_relay.numpy(), out_np)
 
     _verify((3,), 3, 1, 0, -1, "int32")
     _verify((3,), 3, 1.0, 0.0, -1, "float32")
diff --git a/tests/python/relay/dyn/test_dynamic_op_level2.py b/tests/python/relay/dyn/test_dynamic_op_level2.py
index fd7ab70028..a017762ce3 100644
--- a/tests/python/relay/dyn/test_dynamic_op_level2.py
+++ b/tests/python/relay/dyn/test_dynamic_op_level2.py
@@ -27,9 +27,11 @@ from test_dynamic_op_level3 import verify_func
 import tvm.topi.testing
 from tvm.relay.testing import run_infer_type
 
+executor_kind = tvm.testing.parameter("debug", "vm")
+
 
 @tvm.testing.uses_gpu
-def test_dyn_upsampling_run():
+def test_dyn_upsampling_run(executor_kind):
     def verify_upsampling(dshape, scale_h, scale_w, layout, method, align_corners=False):
 
         if layout == "NCHW":
@@ -58,12 +60,13 @@ def test_dyn_upsampling_run():
         func = relay.Function([x, scale_h_var, scale_w_var], z)
 
         for target, dev in tvm.testing.enabled_targets():
-            for kind in ["vm", "debug"]:
-                mod = tvm.ir.IRModule.from_expr(func)
-                op_res = relay.create_executor(kind, mod=mod, device=dev, target=target).evaluate()(
-                    x_data, np.array(scale_h).astype("float32"), np.array(scale_w).astype("float32")
-                )
-                tvm.testing.assert_allclose(op_res.numpy(), ref_res, rtol=1e-4, atol=1e-6)
+            mod = tvm.ir.IRModule.from_expr(func)
+            op_res = relay.create_executor(
+                executor_kind, mod=mod, device=dev, target=target
+            ).evaluate()(
+                x_data, np.array(scale_h).astype("float32"), np.array(scale_w).astype("float32")
+            )
+            tvm.testing.assert_allclose(op_res.numpy(), ref_res, rtol=1e-4, atol=1e-6)
 
     verify_upsampling((1, 16, 32, 32), 3, 2.0, "NCHW", "nearest_neighbor")
     verify_upsampling((1, 16, 32, 32), 5, 2.0, "NCHW", "bilinear", True)
@@ -85,7 +88,7 @@ def test_dyn_upsampling_infer_type_const():
 
 
 @tvm.testing.uses_gpu
-def test_dyn_upsampling3d_run():
+def test_dyn_upsampling3d_run(executor_kind):
     def verify_upsampling3d(
         dshape, scale_d, scale_h, scale_w, layout, method, coord_trans="asymmetric"
     ):
@@ -124,15 +127,16 @@ def test_dyn_upsampling3d_run():
         func = relay.Function([x, scale_d_var, scale_h_var, scale_w_var], z)
 
         for target, dev in enabled_targets():
-            for kind in ["vm", "debug"]:
-                mod = tvm.ir.IRModule.from_expr(func)
-                op_res = relay.create_executor(kind, mod=mod, device=dev, target=target).evaluate()(
-                    x_data,
-                    np.array(scale_d).astype("float32"),
-                    np.array(scale_h).astype("float32"),
-                    np.array(scale_w).astype("float32"),
-                )
-                tvm.testing.assert_allclose(op_res.numpy(), ref_res, rtol=1e-4, atol=1e-6)
+            mod = tvm.ir.IRModule.from_expr(func)
+            op_res = relay.create_executor(
+                executor_kind, mod=mod, device=dev, target=target
+            ).evaluate()(
+                x_data,
+                np.array(scale_d).astype("float32"),
+                np.array(scale_h).astype("float32"),
+                np.array(scale_w).astype("float32"),
+            )
+            tvm.testing.assert_allclose(op_res.numpy(), ref_res, rtol=1e-4, atol=1e-6)
 
     verify_upsampling3d((1, 1, 1, 1, 1), 2, 3, 4, "NCDHW", "nearest_neighbor")
     verify_upsampling3d((1, 8, 16, 16, 16), 2.0, 3.0, 4.0, "NCDHW", "nearest_neighbor")
@@ -163,7 +167,7 @@ def test_dyn_upsampling3d_infer_type_const():
 
 
 @tvm.testing.uses_gpu
-def test_dyn_pad():
+def test_dyn_pad(executor_kind):
     def verify_pad(dshape, pad_width, pad_val, dtype):
         x = relay.var("x", relay.TensorType(dshape, dtype))
         ndim = len(dshape)
@@ -178,7 +182,9 @@ def test_dyn_pad():
         ref_res = np.pad(data, pad_width, "constant", constant_values=(((pad_val,) * 2),) * ndim)
         pad_width = np.array(pad_width).astype("int64")
 
-        verify_func(func, [data, pad_width, np.array(pad_val).astype(dtype)], ref_res)
+        verify_func(
+            executor_kind, func, [data, pad_width, np.array(pad_val).astype(dtype)], ref_res
+        )
 
     def verify_pad_default_fill(dshape, pad_width, dtype):
         x = relay.var("x", relay.TensorType(dshape, dtype))
@@ -193,7 +199,7 @@ def test_dyn_pad():
         ref_res = np.pad(data, pad_width)
         pad_width = np.array(pad_width).astype("int64")
 
-        verify_func(func, [data, pad_width], ref_res)
+        verify_func(executor_kind, func, [data, pad_width], ref_res)
 
     verify_pad((4, 10, 7, 7), ((1, 1), (2, 2), (3, 3), (4, 4)), 2.0, "int32")
     verify_pad((2, 7), ((1, 4), (2, 2)), 4.0, "float64")
diff --git a/tests/python/relay/dyn/test_dynamic_op_level3.py b/tests/python/relay/dyn/test_dynamic_op_level3.py
index 0456401e8a..0e68cd7246 100644
--- a/tests/python/relay/dyn/test_dynamic_op_level3.py
+++ b/tests/python/relay/dyn/test_dynamic_op_level3.py
@@ -23,24 +23,25 @@ import tvm.testing
 from tvm import relay, te
 from tvm.relay.testing import check_grad, run_infer_type
 
+executor_kind = tvm.testing.parameter("debug", "vm")
 
-def verify_func(func, data, ref_res, target_device=tvm.testing.enabled_targets()):
+
+def verify_func(executor_kind, func, data, ref_res, target_device=tvm.testing.enabled_targets()):
     assert isinstance(data, list)
     for target, dev in target_device:
-        for kind in ["vm", "debug"]:
-            mod = tvm.ir.IRModule.from_expr(func)
-            op_res = relay.create_executor(kind, mod=mod, device=dev, target=target).evaluate()(
-                *data
-            )
-            if isinstance(op_res, tvm.runtime.container.ADT):
-                assert len(op_res) == len(
-                    ref_res
-                ), "Outputs from TVM and Python implementation must be equal "
-                for op_result, ref_result in zip(op_res, ref_res):
-                    tvm.testing.assert_allclose(op_result.numpy(), ref_result, rtol=1e-5)
-            else:
-                tvm.testing.assert_allclose(op_res.numpy(), ref_res, rtol=1e-5)
-            relay.backend.te_compiler.get().clear()
+        mod = tvm.ir.IRModule.from_expr(func)
+        op_res = relay.create_executor(
+            executor_kind, mod=mod, device=dev, target=target
+        ).evaluate()(*data)
+        if isinstance(op_res, tvm.runtime.container.ADT):
+            assert len(op_res) == len(
+                ref_res
+            ), "Outputs from TVM and Python implementation must be equal "
+            for op_result, ref_result in zip(op_res, ref_res):
+                tvm.testing.assert_allclose(op_result.numpy(), ref_result, rtol=1e-5)
+        else:
+            tvm.testing.assert_allclose(op_res.numpy(), ref_res, rtol=1e-5)
+        relay.backend.te_compiler.get().clear()
 
 
 def check_on_vm(target, dev, args, expected_result, mod):
@@ -53,7 +54,7 @@ def check_on_vm(target, dev, args, expected_result, mod):
 
 
 @tvm.testing.uses_gpu
-def test_dyn_reshape():
+def test_dyn_reshape(executor_kind):
     def verify_reshape(shape, newshape, oshape):
         x = relay.var("x", relay.TensorType(shape, "float32"))
         y = relay.var("y", relay.TensorType((len(newshape),), "int64"))
@@ -69,7 +70,7 @@ def test_dyn_reshape():
             test_inputs=[x_data],
             eps=1e-3,
         )
-        verify_func(func, [x_data, np.array(newshape).astype("int64")], ref_res)
+        verify_func(executor_kind, func, [x_data, np.array(newshape).astype("int64")], ref_res)
 
     verify_reshape((2, 3, 4), (8, 3), (8, 3))
     verify_reshape((4, 7), (2, 7, 2), (2, 7, 2))
@@ -83,7 +84,7 @@ def test_dyn_reshape():
 
 
 @tvm.testing.uses_gpu
-def test_dyn_shape_reshape():
+def test_dyn_shape_reshape(executor_kind):
     def verify_reshape(shape, newshape, oshape):
         x = relay.var("x", relay.TensorType(shape, "float32"))
         y = relay.var("y", relay.TensorType(newshape, "float32"))
@@ -94,13 +95,13 @@ def test_dyn_shape_reshape():
         y_data = np.random.uniform(low=-1, high=1, size=newshape).astype("float32")
         ref_res = np.reshape(x_data, oshape)
         check_grad(run_infer_type(func), inputs=[x_data, y_data], eps=1e-3)
-        verify_func(func, [x_data, y_data], ref_res)
+        verify_func(executor_kind, func, [x_data, y_data], ref_res)
 
     verify_reshape((2, 3, 4), (8, 3), (8, 3))
     verify_reshape((4, 7), (2, 7, 2), (2, 7, 2))
 
 
-def test_squeeze():
+def test_squeeze(executor_kind):
     def verify_squeeze(shape, dtype, axis):
         x = relay.var("x", relay.TensorType(shape, dtype))
         assert axis is not None
@@ -110,14 +111,14 @@ def test_squeeze():
         func = relay.Function([x, axis], squeeze)
         x_data = np.random.random_sample(shape).astype(dtype)
         ref_res = np.squeeze(x_data, axis=np_axis)
-        verify_func(func, [x_data, np.array(np_axis).astype("int64")], ref_res)
+        verify_func(executor_kind, func, [x_data, np.array(np_axis).astype("int64")], ref_res)
 
     verify_squeeze((1, 3, 1), "float32", [0])
     verify_squeeze((1, 2, 1, 2, 1), "float32", [0, 2])
 
 
 @tvm.testing.uses_gpu
-def test_dyn_expand_dims():
+def test_dyn_expand_dims(executor_kind):
     def verify_expand_dims(
         dshape, dtype, oshape, axis, num_newaxis, target_device=tvm.testing.enabled_targets()
     ):
@@ -130,7 +131,7 @@ def test_dyn_expand_dims():
         data_np = np.random.uniform(size=dshape).astype(dtype)
         axis_np = np.array(axis).astype("int64")
         ref_res = data_np.reshape(oshape)
-        verify_func(func, [data_np, axis_np], ref_res, target_device=target_device)
+        verify_func(executor_kind, func, [data_np, axis_np], ref_res, target_device=target_device)
 
     for dtype in ["float16", "float32"]:
         verify_expand_dims((2, 2), dtype, (2, 2, 1), 2, 1)
@@ -146,7 +147,7 @@ def test_dyn_expand_dims():
 
 
 @tvm.testing.uses_gpu
-def test_dyn_tile():
+def test_dyn_tile(executor_kind):
     def verify_tile(dshape, reps):
         x = relay.var("x", relay.TensorType(dshape, "float32"))
         r = relay.var("reps", relay.TensorType((len(reps),), "float32"))
@@ -156,7 +157,7 @@ def test_dyn_tile():
         x_data = np.random.uniform(low=-1, high=1, size=dshape).astype("float32")
         ref_res = np.tile(x_data, reps=reps)
         reps_data = np.array(reps).astype("float32")
-        verify_func(func, [x_data, np.array(reps).astype("float32")], ref_res)
+        verify_func(executor_kind, func, [x_data, np.array(reps).astype("float32")], ref_res)
 
     verify_tile((2, 3, 4), (3, 2, 1))
     verify_tile((2, 3, 4), (1, 2))
@@ -164,7 +165,7 @@ def test_dyn_tile():
 
 
 @tvm.testing.uses_gpu
-def test_dyn_zeros_ones():
+def test_dyn_zeros_ones(executor_kind):
     def verify_zeros_ones(shape, dtype):
         for op, ref in [(relay.zeros, np.zeros), (relay.ones, np.ones)]:
             rank = len(shape)
@@ -175,14 +176,16 @@ def test_dyn_zeros_ones():
 
             func = relay.Function([dyn_shape], y)
             ref_res = ref(shape, dtype)
-            verify_func(func, [np.array(shape).astype("int64")], ref_res.astype("int64"))
+            verify_func(
+                executor_kind, func, [np.array(shape).astype("int64")], ref_res.astype("int64")
+            )
 
     verify_zeros_ones((1, 3), "int64")
     verify_zeros_ones((8, 9, 1, 2), "float32")
 
 
 @tvm.testing.uses_gpu
-def test_dyn_full():
+def test_dyn_full(executor_kind):
     def verify_full(fill_value, src_shape, dtype):
         x = relay.var("x", relay.scalar_type(dtype))
         rank = len(src_shape)
@@ -192,7 +195,10 @@ def test_dyn_full():
         ref_res = np.full(src_shape, fill_value).astype(dtype)
 
         verify_func(
-            func, [np.array(fill_value).astype(dtype), np.array(src_shape).astype("int64")], ref_res
+            executor_kind,
+            func,
+            [np.array(fill_value).astype(dtype), np.array(src_shape).astype("int64")],
+            ref_res,
         )
 
     verify_full(4, (1, 3, 4, 4), "int32")
@@ -201,7 +207,7 @@ def test_dyn_full():
 
 
 @tvm.testing.uses_gpu
-def test_dyn_sparse_to_dense():
+def test_dyn_sparse_to_dense(executor_kind):
     def verify_sparse_to_dense(sparse_indices, sparse_values, default_value, output_shape, xpected):
         sparse_indices_data = np.array(sparse_indices)
         sparse_values_data = np.array(sparse_values)
@@ -242,7 +248,7 @@ def test_dyn_sparse_to_dense():
                 output_shape_data,
             ]
 
-        verify_func(func, arguments, xpected)
+        verify_func(executor_kind, func, arguments, xpected)
 
     verify_sparse_to_dense(1, 3, 0, [5], [0, 3, 0, 0, 0])  # scalar
     verify_sparse_to_dense([0, 1, 4], [3, 3, 3], 0, [5], [3, 3, 0, 0, 3])  # vector
@@ -301,7 +307,7 @@ def test_dyn_sparse_to_dense():
 @pytest.mark.parametrize("dtype", [np.int64, np.int32])
 @pytest.mark.parametrize("use_dyn", [True, False])
 def test_sparse_fill_empty_rows(
-    sparse_indices, sparse_values, dense_shape, default_value, dtype, use_dyn
+    sparse_indices, sparse_values, dense_shape, default_value, dtype, use_dyn, executor_kind
 ):
     def ref_sparse_fill_empty_rows(
         sparse_indices: np.ndarray,
@@ -404,6 +410,7 @@ def test_sparse_fill_empty_rows(
         assert empty_row_indicator_infer_type.checked_type.dtype == "bool"
 
         verify_func(
+            executor_kind,
             func,
             [sparse_indices_np, sparse_values_np, dense_shape_np, default_value_np],
             ref_res,
diff --git a/tests/python/relay/dyn/test_dynamic_op_level5.py b/tests/python/relay/dyn/test_dynamic_op_level5.py
index 2eeeb1d828..58234929c7 100644
--- a/tests/python/relay/dyn/test_dynamic_op_level5.py
+++ b/tests/python/relay/dyn/test_dynamic_op_level5.py
@@ -26,6 +26,8 @@ from tvm.relay.testing import run_infer_type
 import tvm.topi.testing
 import tvm.testing
 
+executor_kind = tvm.testing.parameter("debug", "vm")
+
 
 def test_resize2d_infer_type():
     n, c, h, w = te.size_var("n"), te.size_var("c"), te.size_var("h"), te.size_var("w")
@@ -37,7 +39,7 @@ def test_resize2d_infer_type():
 
 
 @tvm.testing.uses_gpu
-def test_resize2d():
+def test_resize2d(executor_kind):
     def verify_resize2d(dshape, scale, method, layout):
         if layout == "NHWC":
             size = (dshape[1] * scale, dshape[2] * scale)
@@ -62,12 +64,11 @@ def test_resize2d():
         )
 
         for target, dev in tvm.testing.enabled_targets():
-            for kind in ["vm", "debug"]:
-                mod = tvm.ir.IRModule.from_expr(func)
-                op_res = relay.create_executor(kind, mod=mod, device=dev, target=target).evaluate()(
-                    x_data, size
-                )
-                tvm.testing.assert_allclose(op_res.numpy(), ref_res, rtol=1e-4, atol=1e-6)
+            mod = tvm.ir.IRModule.from_expr(func)
+            op_res = relay.create_executor(
+                executor_kind, mod=mod, device=dev, target=target
+            ).evaluate()(x_data, size)
+            tvm.testing.assert_allclose(op_res.numpy(), ref_res, rtol=1e-4, atol=1e-6)
 
     for method in ["linear", "nearest_neighbor"]:
         for layout in ["NCHW", "NHWC"]:
diff --git a/tests/python/relay/dyn/test_dynamic_op_level6.py b/tests/python/relay/dyn/test_dynamic_op_level6.py
index 530c402b29..ebf9c36263 100644
--- a/tests/python/relay/dyn/test_dynamic_op_level6.py
+++ b/tests/python/relay/dyn/test_dynamic_op_level6.py
@@ -22,9 +22,11 @@ from tvm import te
 from tvm import relay
 import tvm.testing
 
+executor_kind = tvm.testing.parameter("debug", "vm")
+
 
 @tvm.testing.uses_gpu
-def test_dynamic_topk():
+def test_dynamic_topk(executor_kind):
     def verify_topk(k, axis, ret_type, is_ascend, dtype):
         shape = (20, 100)
         x = relay.var("x", relay.TensorType(shape, "float32"))
@@ -53,18 +55,17 @@ def test_dynamic_topk():
         np_indices = np_indices.astype(dtype)
 
         for target, dev in tvm.testing.enabled_targets():
-            for kind in ["vm", "debug"]:
-                mod = tvm.ir.IRModule.from_expr(func)
-                op_res = relay.create_executor(kind, mod=mod, device=dev, target=target).evaluate()(
-                    np_data, np.array([k]).astype("float32")
-                )
-                if ret_type == "both":
-                    tvm.testing.assert_allclose(op_res[0].numpy(), np_values)
-                    tvm.testing.assert_allclose(op_res[1].numpy(), np_indices)
-                elif ret_type == "values":
-                    tvm.testing.assert_allclose(op_res.numpy(), np_values)
-                else:
-                    tvm.testing.assert_allclose(op_res.numpy(), np_indices)
+            mod = tvm.ir.IRModule.from_expr(func)
+            op_res = relay.create_executor(
+                executor_kind, mod=mod, device=dev, target=target
+            ).evaluate()(np_data, np.array([k]).astype("float32"))
+            if ret_type == "both":
+                tvm.testing.assert_allclose(op_res[0].numpy(), np_values)
+                tvm.testing.assert_allclose(op_res[1].numpy(), np_indices)
+            elif ret_type == "values":
+                tvm.testing.assert_allclose(op_res.numpy(), np_values)
+            else:
+                tvm.testing.assert_allclose(op_res.numpy(), np_indices)
 
     np.random.seed(0)
     for k in [0, 1, 5]:
diff --git a/tests/python/relay/test_op_grad_level1.py b/tests/python/relay/test_op_grad_level1.py
index a31191a42c..cb94f297cf 100644
--- a/tests/python/relay/test_op_grad_level1.py
+++ b/tests/python/relay/test_op_grad_level1.py
@@ -26,6 +26,8 @@ from tvm import te, relay
 from tvm.relay.testing import check_grad, run_infer_type
 from tvm.relay.transform import gradient
 
+executor_kind = tvm.testing.parameter("debug")
+
 
 def sigmoid(x):
     one = np.ones_like(x)
@@ -67,7 +69,7 @@ class TestUnaryOp:
     dtype = tvm.testing.parameter("float32", "float64")
     shape = tvm.testing.parameter((10, 4))
 
-    def test_op(self, target, dev, relay_op, ref_func, shape, dtype):
+    def test_op(self, target, dev, executor_kind, relay_op, ref_func, shape, dtype):
 
         target = tvm.target.Target(target)
         if target.kind.name == "vulkan":
@@ -125,9 +127,9 @@ class TestUnaryOp:
         grad_in = np.random.rand(*shape).astype(dtype)
         ref_grad_out = ref_func(data_in, grad_in)
 
-        op_res, (op_grad, _) = relay.create_executor(device=dev, target=target).evaluate(bwd_func)(
-            data_in, grad_in
-        )
+        op_res, (op_grad, _) = relay.create_executor(
+            executor_kind, device=dev, target=target
+        ).evaluate(bwd_func)(data_in, grad_in)
         np.testing.assert_allclose(op_grad.numpy(), ref_grad_out, rtol=0.01)
 
 
@@ -143,7 +145,7 @@ class TestBinaryOp:
     dtype = tvm.testing.parameter("float32", "float64")
     shape = tvm.testing.parameter((5, 10, 5))
 
-    def test_binary_op(self, target, dev, relay_op, ref_func, shape, dtype):
+    def test_binary_op(self, target, dev, executor_kind, relay_op, ref_func, shape, dtype):
         t = relay.TensorType(shape, dtype=dtype)
         x = relay.var("x", t)
         y = relay.var("y", t)
@@ -156,31 +158,31 @@ class TestBinaryOp:
         fwd_func = run_infer_type(fwd_func)
         bwd_func = run_infer_type(gradient(fwd_func))
 
-        op_res, (op_grad0, op_grad1) = relay.create_executor(device=dev, target=target).evaluate(
-            bwd_func
-        )(x_data, y_data)
+        op_res, (op_grad0, op_grad1) = relay.create_executor(
+            executor_kind, device=dev, target=target
+        ).evaluate(bwd_func)(x_data, y_data)
         np.testing.assert_allclose(op_grad0.numpy(), ref_grad0, rtol=0.01)
         np.testing.assert_allclose(op_grad1.numpy(), ref_grad1, rtol=0.01)
 
 
-def test_softmax_grad(target, dev):
+def test_softmax_grad(executor_kind, target, dev):
     target = tvm.target.Target(target)
     if target.kind.name == "vulkan":
         pytest.xfail("Known failure on vulkan")
 
     data = relay.var("data", relay.TensorType((1, 16), "float64"))
     fwd_func = relay.Function([data], relay.nn.softmax(data))
-    check_grad(fwd_func, scale=1, target_devices=[(target, dev)])
+    check_grad(fwd_func, scale=1, target_devices=[(target, dev)], executor_kind=executor_kind)
 
 
-def test_log_softmax_grad(target, dev):
+def test_log_softmax_grad(executor_kind, target, dev):
     target = tvm.target.Target(target)
     if target.kind.name == "vulkan":
         pytest.xfail("Known failure on vulkan")
 
     data = relay.var("data", relay.TensorType((2, 16), "float64"))
     fwd_func = relay.Function([data], relay.nn.log_softmax(data))
-    check_grad(fwd_func, scale=1, target_devices=[(target, dev)])
+    check_grad(fwd_func, scale=1, target_devices=[(target, dev)], executor_kind=executor_kind)
 
 
 class TestBiasAddGrad:
@@ -191,25 +193,25 @@ class TestBiasAddGrad:
         ((4, 8), (8,), 1),
     )
 
-    def test_bias_add(self, target, dev, d_shape, b_shape, axis):
+    def test_bias_add(self, executor_kind, target, dev, d_shape, b_shape, axis):
         data = relay.var("data", relay.TensorType(d_shape, "float32"))
         bias = relay.var("bias", relay.TensorType(b_shape, "float32"))
         fwd_func = relay.Function([data, bias], relay.nn.bias_add(data, bias, axis=axis))
-        check_grad(fwd_func, target_devices=[(target, dev)])
+        check_grad(fwd_func, target_devices=[(target, dev)], executor_kind=executor_kind)
 
 
-def test_expand_dims_grad(target, dev):
+def test_expand_dims_grad(executor_kind, target, dev):
     data = relay.var("data", shape=(2, 3), dtype="float64")
     fwd_func = relay.Function([data], relay.expand_dims(data, axis=1, num_newaxis=2))
-    check_grad(fwd_func, target_devices=[(target, dev)])
+    check_grad(fwd_func, target_devices=[(target, dev)], executor_kind=executor_kind)
 
 
-def test_concatenate_grad(target, dev):
+def test_concatenate_grad(executor_kind, target, dev):
     x = relay.var("x", shape=(2, 2, 5))
     y = relay.var("y", shape=(2, 1, 5))
     z = relay.var("z", shape=(2, 4, 5))
     fwd_func = relay.Function([x, y, z], relay.concatenate([x, y, z], axis=1))
-    check_grad(fwd_func, target_devices=[(target, dev)])
+    check_grad(fwd_func, target_devices=[(target, dev)], executor_kind=executor_kind)
 
 
 if __name__ == "__main__":
diff --git a/tests/python/relay/test_op_grad_level10.py b/tests/python/relay/test_op_grad_level10.py
index 4c2c9082e0..6b2531a4a1 100644
--- a/tests/python/relay/test_op_grad_level10.py
+++ b/tests/python/relay/test_op_grad_level10.py
@@ -28,9 +28,10 @@ from tvm.relay.testing import check_grad
 
 index_dtype = tvm.testing.parameter("int32", "int64")
 val_dtype = tvm.testing.parameter("float32", "float64")
+executor_kind = tvm.testing.parameter("debug")
 
 
-def test_cross_entropy_grad(target, dev, val_dtype):
+def test_cross_entropy_grad(executor_kind, target, dev, val_dtype):
     target = tvm.target.Target(target)
     if target.kind.name == "vulkan" and val_dtype == "float64":
         # GLSL.std.450's Log implementation only takes 16/32-bit floats.
@@ -44,10 +45,11 @@ def test_cross_entropy_grad(target, dev, val_dtype):
         scale=0.1,
         mean=1,
         target_devices=[(target, dev)],
+        executor_kind=executor_kind,
     )
 
 
-def test_cross_entropy_with_logits_grad(target, dev, val_dtype):
+def test_cross_entropy_with_logits_grad(executor_kind, target, dev, val_dtype):
     x = relay.var("x", shape=(2, 5), dtype=val_dtype)
     y = relay.var("y", shape=(2, 5), dtype=val_dtype)
     check_grad(
@@ -56,13 +58,16 @@ def test_cross_entropy_with_logits_grad(target, dev, val_dtype):
         scale=0.1,
         mean=1,
         target_devices=[(target, dev)],
+        executor_kind=executor_kind,
     )
 
 
-def test_checkpoint(target, dev):
+def test_checkpoint(executor_kind, target, dev):
     inputs = [relay.var("x{}".format(i), shape=(1,)) for i in range(4)]
     output = relay.multiply(relay.add(inputs[0], inputs[1]), relay.add(inputs[2], inputs[3]))
-    check_grad(relay.Function(inputs, relay.annotation.checkpoint(output)))
+    check_grad(
+        relay.Function(inputs, relay.annotation.checkpoint(output)), executor_kind=executor_kind
+    )
 
     scope = relay.ScopeBuilder()
     out_tuple = scope.let(
@@ -76,7 +81,11 @@ def test_checkpoint(target, dev):
         )
     )
     out_single = scope.get()
-    check_grad(relay.Function(inputs, out_single), target_devices=[(target, dev)])
+    check_grad(
+        relay.Function(inputs, out_single),
+        target_devices=[(target, dev)],
+        executor_kind=executor_kind,
+    )
 
 
 class TestBatchMatmulGrad:
@@ -87,7 +96,9 @@ class TestBatchMatmulGrad:
         ((2, 5, 3), (2, 4, 5), True, True),
     )
 
-    def test_batch_matmul_grad(self, target, dev, a_shape, b_shape, transpose_a, transpose_b):
+    def test_batch_matmul_grad(
+        self, executor_kind, target, dev, a_shape, b_shape, transpose_a, transpose_b
+    ):
         tensor_a = relay.var("tensor_a", relay.TensorType(a_shape, "float32"))
         tensor_b = relay.var("tensor_b", relay.TensorType(b_shape, "float32"))
         check_grad(
@@ -98,18 +109,20 @@ class TestBatchMatmulGrad:
                 ),
             ),
             target_devices=[(target, dev)],
+            executor_kind=executor_kind,
         )
 
 
-def test_reverse_reshape_grad(target, dev):
+def test_reverse_reshape_grad(executor_kind, target, dev):
     x = relay.var("x", shape=(3, 4, 5), dtype="float64")
     check_grad(
         relay.Function([x], relay.op.reverse_reshape(x, (-1, 0))),
         target_devices=[(target, dev)],
+        executor_kind=executor_kind,
     )
 
 
-def test_one_hot_grad(target, dev, index_dtype, val_dtype):
+def test_one_hot_grad(executor_kind, target, dev, index_dtype, val_dtype):
     indices_shape = (3, 4)
     depth = 5
     axis = -1
@@ -127,7 +140,13 @@ def test_one_hot_grad(target, dev, index_dtype, val_dtype):
     y = relay.one_hot(indices, on_val, off_val, depth, axis, val_dtype)
     f = relay.Function([indices, on_val, off_val], y)
 
-    check_grad(f, inputs=inputs, test_inputs=test_inputs, target_devices=[(target, dev)])
+    check_grad(
+        f,
+        inputs=inputs,
+        test_inputs=test_inputs,
+        target_devices=[(target, dev)],
+        executor_kind=executor_kind,
+    )
 
 
 if __name__ == "__main__":
diff --git a/tests/python/relay/test_op_grad_level2.py b/tests/python/relay/test_op_grad_level2.py
index fcdcfe6acc..820f724bfc 100644
--- a/tests/python/relay/test_op_grad_level2.py
+++ b/tests/python/relay/test_op_grad_level2.py
@@ -25,8 +25,10 @@ from tvm.relay.testing import check_grad, run_infer_type, run_opt_pass
 from tvm.relay.transform import gradient
 import tvm.testing
 
+executor_kind = tvm.testing.parameter("debug")
 
-def verify_max_pool2d_grad(x_shape, pool_size, strides, padding, ceil_mode):
+
+def verify_max_pool2d_grad(executor_kind, x_shape, pool_size, strides, padding, ceil_mode):
     x = relay.var("x", relay.TensorType(x_shape, "float32"))
     y = tvm.relay.nn.max_pool2d(
         x, pool_size=pool_size, strides=strides, padding=padding, ceil_mode=ceil_mode
@@ -51,24 +53,41 @@ def verify_max_pool2d_grad(x_shape, pool_size, strides, padding, ceil_mode):
     )
 
     for target, dev in tvm.testing.enabled_targets():
-        op_res, (op_grad,) = relay.create_executor(device=dev, target=target).evaluate(bwd_func)(
-            data
-        )
+        op_res, (op_grad,) = relay.create_executor(
+            executor_kind, device=dev, target=target
+        ).evaluate(bwd_func)(data)
         np.testing.assert_allclose(op_grad.numpy(), ref_grad, rtol=0.01)
 
 
 @tvm.testing.uses_gpu
-def test_max_pool2d_grad():
+def test_max_pool2d_grad(executor_kind):
     verify_max_pool2d_grad(
-        (1, 4, 16, 16), pool_size=(2, 2), strides=(2, 2), padding=(0, 0), ceil_mode=False
+        executor_kind,
+        (1, 4, 16, 16),
+        pool_size=(2, 2),
+        strides=(2, 2),
+        padding=(0, 0),
+        ceil_mode=False,
     )
     verify_max_pool2d_grad(
-        (1, 4, 16, 16), pool_size=(1, 1), strides=(1, 1), padding=(1, 1), ceil_mode=False
+        executor_kind,
+        (1, 4, 16, 16),
+        pool_size=(1, 1),
+        strides=(1, 1),
+        padding=(1, 1),
+        ceil_mode=False,
     )
 
 
 def verify_avg_pool2d_grad(
-    x_shape, pool_size, strides, padding, ceil_mode, count_include_pad, dtype="float32"
+    x_shape,
+    pool_size,
+    strides,
+    padding,
+    ceil_mode,
+    count_include_pad,
+    executor_kind,
+    dtype="float32",
 ):
 
     for shape_dtype in ["int32", "int64"]:
@@ -101,14 +120,14 @@ def verify_avg_pool2d_grad(
         )
 
         for target, dev in tvm.testing.enabled_targets():
-            op_res, (op_grad,) = relay.create_executor(device=dev, target=target).evaluate(
-                bwd_func
-            )(data)
+            op_res, (op_grad,) = relay.create_executor(
+                executor_kind, device=dev, target=target
+            ).evaluate(bwd_func)(data)
             np.testing.assert_allclose(op_grad.numpy(), ref_grad, rtol=0.01)
 
 
 @tvm.testing.uses_gpu
-def test_avg_pool2d_grad():
+def test_avg_pool2d_grad(executor_kind):
     verify_avg_pool2d_grad(
         (1, 4, 16, 16),
         pool_size=(2, 2),
@@ -116,6 +135,7 @@ def test_avg_pool2d_grad():
         padding=(0, 0),
         ceil_mode=False,
         count_include_pad=True,
+        executor_kind=executor_kind,
     )
     verify_avg_pool2d_grad(
         (1, 4, 16, 16),
@@ -124,6 +144,7 @@ def test_avg_pool2d_grad():
         padding=(1, 1),
         ceil_mode=False,
         count_include_pad=False,
+        executor_kind=executor_kind,
     )
     verify_avg_pool2d_grad(
         (1, 4, 16, 16),
@@ -132,11 +153,12 @@ def test_avg_pool2d_grad():
         padding=(1, 1),
         ceil_mode=False,
         count_include_pad=False,
+        executor_kind=executor_kind,
         dtype="int32",
     )
 
 
-def verify_global_avg_pool2d_grad(x_shape):
+def verify_global_avg_pool2d_grad(executor_kind, x_shape):
     x = relay.var("x", relay.TensorType(x_shape, "float32"))
     y = tvm.relay.nn.global_avg_pool2d(x)
 
@@ -158,19 +180,21 @@ def verify_global_avg_pool2d_grad(x_shape):
     )
 
     for target, dev in tvm.testing.enabled_targets():
-        op_res, (op_grad,) = relay.create_executor(device=dev, target=target).evaluate(bwd_func)(
-            data
-        )
+        op_res, (op_grad,) = relay.create_executor(
+            executor_kind, device=dev, target=target
+        ).evaluate(bwd_func)(data)
         np.testing.assert_allclose(op_grad.numpy(), ref_grad, rtol=0.01)
 
 
 @tvm.testing.uses_gpu
-def test_global_avg_pool2d_grad():
-    verify_global_avg_pool2d_grad((1, 4, 16, 16))
-    verify_global_avg_pool2d_grad((1, 8, 8, 24))
+def test_global_avg_pool2d_grad(executor_kind):
+    verify_global_avg_pool2d_grad(executor_kind, (1, 4, 16, 16))
+    verify_global_avg_pool2d_grad(executor_kind, (1, 8, 8, 24))
 
 
-def verify_conv2d_grad(dshape, wshape, strides, padding, dilation, groups=1, mode="higher_order"):
+def verify_conv2d_grad(
+    dshape, wshape, strides, padding, dilation, groups=1, mode="higher_order", executor_kind="vm"
+):
     dtype = "float32"
     data = relay.var("data", shape=dshape, dtype=dtype)
     weight = relay.var("weight", shape=wshape, dtype=dtype)
@@ -184,59 +208,73 @@ def verify_conv2d_grad(dshape, wshape, strides, padding, dilation, groups=1, mod
         out_dtype=dtype,
     )
     fwd_func = relay.Function([data, weight], conv)
-    check_grad(fwd_func, mode=mode)
+    check_grad(fwd_func, mode=mode, executor_kind=executor_kind)
 
 
 @tvm.testing.uses_gpu
-def test_conv2d_grad():
-    verify_conv2d_grad((1, 4, 16, 16), (16, 4, 3, 3), [1, 1], [1, 1], [1, 1])
-    verify_conv2d_grad((1, 4, 16, 16), (16, 4, 1, 1), [1, 1], [0, 0], [1, 1])
-    verify_conv2d_grad((1, 4, 16, 16), (16, 4, 1, 1), [2, 2], [0, 0], [1, 1])
-    verify_conv2d_grad((1, 4, 16, 16), (16, 4, 3, 3), [1, 1], [1, 1], [1, 1], mode="first_order")
+def test_conv2d_grad(executor_kind):
+    verify_conv2d_grad(
+        (1, 4, 16, 16), (16, 4, 3, 3), [1, 1], [1, 1], [1, 1], executor_kind=executor_kind
+    )
+    verify_conv2d_grad(
+        (1, 4, 16, 16), (16, 4, 1, 1), [1, 1], [0, 0], [1, 1], executor_kind=executor_kind
+    )
+    verify_conv2d_grad(
+        (1, 4, 16, 16), (16, 4, 1, 1), [2, 2], [0, 0], [1, 1], executor_kind=executor_kind
+    )
+    verify_conv2d_grad(
+        (1, 4, 16, 16),
+        (16, 4, 3, 3),
+        [1, 1],
+        [1, 1],
+        [1, 1],
+        mode="first_order",
+        executor_kind=executor_kind,
+    )
 
 
-def verify_dense_grad(d_shape, w_shape):
+def verify_dense_grad(d_shape, w_shape, executor_kind):
     data = relay.var("data", relay.TensorType(d_shape, "float32"))
     weight = relay.var("weight", relay.TensorType(w_shape, "float32"))
     fwd_func = relay.Function([data, weight], relay.nn.dense(data, weight))
-    check_grad(fwd_func)
+    check_grad(fwd_func, executor_kind=executor_kind)
 
 
-def test_dense_grad():
-    verify_dense_grad((1, 8), (16, 8))
-    verify_dense_grad((1, 4), (3, 4))
-    verify_dense_grad((5, 4), (3, 4))
+def test_dense_grad(executor_kind):
+    verify_dense_grad((1, 8), (16, 8), executor_kind)
+    verify_dense_grad((1, 4), (3, 4), executor_kind)
+    verify_dense_grad((5, 4), (3, 4), executor_kind)
 
 
-def verify_matmul_grad(a_shape, b_shape, transpose_a, transpose_b):
+def verify_matmul_grad(a_shape, b_shape, transpose_a, transpose_b, executor_kind):
     tensor_a = relay.var("tensor_a", relay.TensorType(a_shape, "float32"))
     tensor_b = relay.var("tensor_b", relay.TensorType(b_shape, "float32"))
     fwd_func = relay.Function(
         [tensor_a, tensor_b],
         relay.nn.matmul(tensor_a, tensor_b, transpose_a=transpose_a, transpose_b=transpose_b),
     )
-    check_grad(fwd_func)
+    check_grad(fwd_func, executor_kind=executor_kind)
 
 
-def test_matmul_grad():
-    verify_matmul_grad((1, 8), (8, 16), False, False)
-    verify_matmul_grad((4, 1), (4, 3), True, False)
-    verify_matmul_grad((4, 5), (3, 4), True, True)
+def test_matmul_grad(executor_kind):
+    verify_matmul_grad((1, 8), (8, 16), False, False, executor_kind)
+    verify_matmul_grad((4, 1), (4, 3), True, False, executor_kind)
+    verify_matmul_grad((4, 5), (3, 4), True, True, executor_kind)
 
 
-def verify_batch_flatten_grad(d_shape):
+def verify_batch_flatten_grad(d_shape, executor_kind):
     data = relay.var("data", relay.TensorType(d_shape, "float32"))
     fwd_func = relay.Function([data], relay.nn.batch_flatten(data))
-    check_grad(fwd_func)
+    check_grad(fwd_func, executor_kind=executor_kind)
 
 
-def test_batch_flatten_grad():
-    verify_batch_flatten_grad((1, 2, 3, 4))
-    verify_batch_flatten_grad((1, 8))
+def test_batch_flatten_grad(executor_kind):
+    verify_batch_flatten_grad((1, 2, 3, 4), executor_kind)
+    verify_batch_flatten_grad((1, 8), executor_kind)
 
 
 def verify_conv2d_backward_weight(
-    dy_shape, x_shape, kernel_size, stride, padding, groups=1, out_channels=None
+    executor_kind, dy_shape, x_shape, kernel_size, stride, padding, groups=1, out_channels=None
 ):
     dtype = "float32"
     dy = relay.var("dy", shape=dy_shape, dtype=dtype)
@@ -265,7 +303,11 @@ def verify_conv2d_backward_weight(
         dy_np = np.random.randn(*dy_shape).astype(dtype)
         x_np = np.random.randn(*x_shape).astype(dtype)
 
-        dw_np = relay.create_executor(device=dev, target=target).evaluate(dw)(dy_np, x_np).numpy()
+        dw_np = (
+            relay.create_executor(executor_kind, device=dev, target=target)
+            .evaluate(dw)(dy_np, x_np)
+            .numpy()
+        )
         ref_dw_np = tvm.topi.testing.conv2d_backward_weight_python(
             dy_np, x_np, kernel_size, stride, padding, groups=groups, channels=out_channels
         )
@@ -273,11 +315,22 @@ def verify_conv2d_backward_weight(
         np.testing.assert_allclose(dw_np, ref_dw_np, rtol=1e-4, atol=1e-4)
 
 
-def test_conv2d_backward_weight():
-    verify_conv2d_backward_weight((2, 8, 32, 32), (2, 4, 32, 32), (3, 3), (1, 1), (1, 1))
-    verify_conv2d_backward_weight((2, 16, 15, 15), (2, 3, 32, 32), (3, 3), (2, 2), (0, 0))
+def test_conv2d_backward_weight(executor_kind):
+    verify_conv2d_backward_weight(
+        executor_kind, (2, 8, 32, 32), (2, 4, 32, 32), (3, 3), (1, 1), (1, 1)
+    )
+    verify_conv2d_backward_weight(
+        executor_kind, (2, 16, 15, 15), (2, 3, 32, 32), (3, 3), (2, 2), (0, 0)
+    )
     verify_conv2d_backward_weight(
-        (1, 16, 32, 32), (1, 16, 32, 32), (3, 3), (1, 1), (1, 1), groups=16, out_channels=16
+        executor_kind,
+        (1, 16, 32, 32),
+        (1, 16, 32, 32),
+        (3, 3),
+        (1, 1),
+        (1, 1),
+        groups=16,
+        out_channels=16,
     )
 
 
diff --git a/tests/python/relay/test_op_grad_level3.py b/tests/python/relay/test_op_grad_level3.py
index 30d849853d..89b8199b9e 100644
--- a/tests/python/relay/test_op_grad_level3.py
+++ b/tests/python/relay/test_op_grad_level3.py
@@ -24,9 +24,11 @@ from tvm.relay.testing import check_grad, run_infer_type, run_opt_pass, _np_rand
 from tvm.relay.transform import gradient
 import tvm.testing
 
+executor_kind = tvm.testing.parameter("debug")
+
 
 @tvm.testing.uses_gpu
-def test_clip():
+def test_clip(executor_kind):
     for dtype in ("float32", "float64"):
         ref = lambda x: np.where(
             x > 10.0, np.zeros_like(x), np.where(x < 1.0, np.zeros_like(x), np.ones_like(x))
@@ -41,49 +43,49 @@ def test_clip():
         bwd_func = run_infer_type(gradient(fwd_func))
 
         for target, dev in tvm.testing.enabled_targets():
-            op_res, (op_grad,) = relay.create_executor(device=dev, target=target).evaluate(
-                bwd_func
-            )(data)
+            op_res, (op_grad,) = relay.create_executor(
+                executor_kind, device=dev, target=target
+            ).evaluate(bwd_func)(data)
             np.testing.assert_allclose(op_grad.numpy(), ref_grad, rtol=0.01)
 
 
-def verify_transpose_grad(d_shape, axes=None):
+def verify_transpose_grad(d_shape, axes=None, executor_kind="vm"):
     data = relay.var("data", relay.TensorType(d_shape, "float32"))
     fwd_func = relay.Function([data], relay.transpose(data, axes=axes))
-    check_grad(fwd_func)
+    check_grad(fwd_func, executor_kind=executor_kind)
 
 
-def test_transpose_grad():
-    verify_transpose_grad((1, 2, 3, 4))
-    verify_transpose_grad((1, 2, 3, 4), axes=(0, 2, 3, 1))
+def test_transpose_grad(executor_kind):
+    verify_transpose_grad((1, 2, 3, 4), executor_kind=executor_kind)
+    verify_transpose_grad((1, 2, 3, 4), axes=(0, 2, 3, 1), executor_kind=executor_kind)
 
 
-def test_negative_grad():
+def test_negative_grad(executor_kind):
     data = relay.var("data", relay.TensorType((10, 4), "float32"))
     fwd_func = relay.Function([data], relay.negative(data))
-    check_grad(fwd_func)
+    check_grad(fwd_func, executor_kind=executor_kind)
 
 
-def test_cast_grad():
+def test_cast_grad(executor_kind):
     data = relay.var("data", relay.TensorType((10, 4), "float32"))
     fwd_func = relay.Function([data], relay.cast(data, "float64"))
-    check_grad(fwd_func)
+    check_grad(fwd_func, executor_kind=executor_kind)
 
 
-def test_cast_like_grad():
+def test_cast_like_grad(executor_kind):
     data = relay.var("data", shape=(10, 4), dtype="float32")
     like = relay.var("like", shape=(1,), dtype="float64")
     fwd_func = relay.Function([data, like], relay.cast_like(data, like))
-    check_grad(fwd_func)
+    check_grad(fwd_func, executor_kind=executor_kind)
 
 
-def test_copy_grad():
+def test_copy_grad(executor_kind):
     data = relay.var("data", relay.TensorType((10, 4), "float64"))
     fwd_func = relay.Function([data], relay.copy(data))
-    check_grad(fwd_func)
+    check_grad(fwd_func, executor_kind=executor_kind)
 
 
-def test_take_grad():
+def test_take_grad(executor_kind):
     data_dtype = relay.TensorType((3, 4, 5), "float64")
     data = relay.var("data", data_dtype)
     indices = relay.var("indices", relay.TensorType((relay.Any(),), "int32"))
@@ -92,28 +94,28 @@ def test_take_grad():
 
     # take on axis
     fwd_func = relay.Function([data, indices], relay.take(data, indices, axis=1))
-    check_grad(fwd_func, inputs=inputs, test_inputs=test_inputs)
+    check_grad(fwd_func, inputs=inputs, test_inputs=test_inputs, executor_kind=executor_kind)
 
     # take on flattened
     fwd_func = relay.Function([data, indices], relay.take(data, indices, axis=None))
-    check_grad(fwd_func, inputs=inputs, test_inputs=test_inputs)
+    check_grad(fwd_func, inputs=inputs, test_inputs=test_inputs, executor_kind=executor_kind)
 
 
-def test_stack_grad():
+def test_stack_grad(executor_kind):
     args = [relay.var(c, shape=(2, 3, 4), dtype="float64") for c in "xyz"]
     fwd_func = relay.Function(args, relay.stack(args, axis=0))
-    check_grad(fwd_func)
+    check_grad(fwd_func, executor_kind=executor_kind)
 
 
-def test_squeeze_grad():
+def test_squeeze_grad(executor_kind):
     data = relay.var("data", shape=(2, 1, 1, 3, 4, 1), dtype="float64")
     fwd_func = relay.Function([data], relay.squeeze(data))
     fwd_func_subset = relay.Function([data], relay.squeeze(data, axis=[1, -1]))
-    check_grad(fwd_func)
-    check_grad(fwd_func_subset)
+    check_grad(fwd_func, executor_kind=executor_kind)
+    check_grad(fwd_func_subset, executor_kind=executor_kind)
 
 
-def test_arange_grad():
+def test_arange_grad(executor_kind):
     # TODO: testing arange numerically is strange because two-sided approx can
     #       produce different output shapes
     dtype = "float64"
@@ -122,23 +124,25 @@ def test_arange_grad():
     step = relay.var("step", relay.TensorType((), dtype))
     values = [np.array(v, dtype=dtype) for v in [2.5, 9.5, 1.8]]
     fwd_func = relay.Function([start, stop, step], relay.arange(start, stop, step, dtype))
-    check_grad(fwd_func, inputs=values)
+    check_grad(fwd_func, inputs=values, executor_kind=executor_kind)
 
 
-def test_gather_nd_grad():
+def test_gather_nd_grad(executor_kind):
     data = relay.var("data", relay.TensorType((2, 3), "float64"))
     indices = relay.var("indices", relay.TensorType((2, 4), "int64"))
     fwd = relay.Function([data, indices], relay.gather_nd(data, indices))
     data_np = np.random.rand(2, 3).astype("float64")
     indices_np = np.array([[0, 1, 1, 0], [0, 1, 0, 0]], dtype="int64")
-    check_grad(fwd, inputs=[data_np, indices_np], test_inputs=[data_np])
+    check_grad(
+        fwd, inputs=[data_np, indices_np], test_inputs=[data_np], executor_kind=executor_kind
+    )
 
 
-def test_reshape_like_grad():
+def test_reshape_like_grad(executor_kind):
     data = relay.var("data", shape=(2, 3, 4), dtype="float32")
     shape_like = relay.var("shape_like", shape=(6, 2, 2), dtype="float32")
     fwd_func = relay.Function([data, shape_like], relay.reshape_like(data, shape_like))
-    check_grad(fwd_func)
+    check_grad(fwd_func, executor_kind=executor_kind)
 
 
 def test_zeros_ones_grad_const_ints():
@@ -172,7 +176,7 @@ def test_zeros_ones_grad_const_expr():
         tvm.ir.assert_structural_equal(bwd_func.ret_type, expected_ty_dyn)
 
 
-def test_zeros_ones_grad_dynamic():
+def test_zeros_ones_grad_dynamic(executor_kind):
     rank = np.random.randint(low=1, high=5, dtype="int32")
     dyn_shape = np.random.randint(low=1, high=4, size=(rank,), dtype="int32")
     shape_data = relay.var("shape_data", shape=(rank,), dtype="int32")
@@ -182,9 +186,9 @@ def test_zeros_ones_grad_dynamic():
         bwd_func = run_infer_type(gradient(run_infer_type(fwd_func)))
 
         for target, dev in tvm.testing.enabled_targets():
-            res, (grad,) = relay.create_executor(device=dev, target=target).evaluate(bwd_func)(
-                dyn_shape
-            )
+            res, (grad,) = relay.create_executor(executor_kind, device=dev, target=target).evaluate(
+                bwd_func
+            )(dyn_shape)
             tvm.testing.assert_allclose(res.numpy(), op_ref(dyn_shape, dtype="float32"))
             tvm.testing.assert_allclose(grad.numpy(), np.zeros((rank,), dtype="int32"))
 
diff --git a/tests/python/relay/test_op_grad_level4.py b/tests/python/relay/test_op_grad_level4.py
index 17d30cacac..9ed2ef2627 100644
--- a/tests/python/relay/test_op_grad_level4.py
+++ b/tests/python/relay/test_op_grad_level4.py
@@ -16,43 +16,46 @@
 # under the License.
 import pytest
 import numpy as np
+import tvm.testing
 from tvm import relay
 from tvm.relay.testing import check_grad, _np_randn_from_type
 
+executor_kind = tvm.testing.parameter("debug")
 
-def verify_reduction_grad(red_fn, d_shape, axis=None, keepdims=False, exclude=False):
+
+def verify_reduction_grad(executor_kind, red_fn, d_shape, axis=None, keepdims=False, exclude=False):
     data = relay.var("data", relay.TensorType(d_shape, "float32"))
     fwd_func = relay.Function([data], red_fn(data, axis=axis, keepdims=keepdims, exclude=exclude))
-    check_grad(fwd_func)
+    check_grad(fwd_func, executor_kind=executor_kind)
 
 
-def test_reduction_grad():
+def test_reduction_grad(executor_kind):
     def _unbiased_variance(x, axis=None, keepdims=False, exclude=False):
         return relay.variance(x, axis=axis, keepdims=keepdims, exclude=exclude, unbiased=True)
 
     for op in (relay.sum, relay.variance, _unbiased_variance, relay.mean):
-        verify_reduction_grad(op, (4, 2))
-        verify_reduction_grad(op, (4, 2), axis=-1, keepdims=True)
-        verify_reduction_grad(op, (4, 2, 1), axis=(1, 2), exclude=True)
-        verify_reduction_grad(op, (4, 2, 1), axis=1)
+        verify_reduction_grad(executor_kind, op, (4, 2))
+        verify_reduction_grad(executor_kind, op, (4, 2), axis=-1, keepdims=True)
+        verify_reduction_grad(executor_kind, op, (4, 2, 1), axis=(1, 2), exclude=True)
+        verify_reduction_grad(executor_kind, op, (4, 2, 1), axis=1)
 
 
-def verify_max_grad(d_shape, axis=None, keepdims=False, exclude=False):
+def verify_max_grad(executor_kind, d_shape, axis=None, keepdims=False, exclude=False):
     data = relay.var("data", relay.TensorType(d_shape, "float32"))
     fwd_func = relay.Function(
         [data], relay.max(data, axis=axis, keepdims=keepdims, exclude=exclude)
     )
-    check_grad(fwd_func, scale=1e-3)
+    check_grad(fwd_func, scale=1e-3, executor_kind=executor_kind)
 
 
-def test_max_grad():
-    verify_max_grad((10, 10), axis=None)
-    verify_max_grad((10, 10), axis=-1)
-    verify_max_grad((6, 3, 2), axis=(1, 2), keepdims=True)
-    verify_max_grad((5, 4, 3), axis=(0, 2), exclude=True)
+def test_max_grad(executor_kind):
+    verify_max_grad(executor_kind, (10, 10), axis=None)
+    verify_max_grad(executor_kind, (10, 10), axis=-1)
+    verify_max_grad(executor_kind, (6, 3, 2), axis=(1, 2), keepdims=True)
+    verify_max_grad(executor_kind, (5, 4, 3), axis=(0, 2), exclude=True)
 
 
-def test_where_grad():
+def test_where_grad(executor_kind):
     cond_type = relay.TensorType((2, 3, 4), "int32")
     lhs_type = relay.TensorType((1, 3, 4), "float32")
     rhs_type = relay.TensorType((2, 1, 4), "float32")
@@ -66,10 +69,10 @@ def test_where_grad():
     lhs = relay.var("lhs", type_annotation=lhs_type)
     rhs = relay.var("rhs", type_annotation=rhs_type)
     fwd_func = relay.Function([cond, lhs, rhs], relay.where(cond, lhs, rhs))
-    check_grad(fwd_func, inputs=inputs, test_inputs=inputs[1:])
+    check_grad(fwd_func, inputs=inputs, test_inputs=inputs[1:], executor_kind=executor_kind)
 
 
-def test_less_equal_grad():
+def test_less_equal_grad(executor_kind):
     x_type = relay.TensorType((2, 3, 4), "float32")
     y_type = relay.TensorType((3, 1), "float32")
     # We need to generate inputs far apart to get correct numerical gradients
@@ -83,10 +86,10 @@ def test_less_equal_grad():
     x = relay.var("x", type_annotation=x_type)
     y = relay.var("y", type_annotation=y_type)
     fwd_func = relay.Function([x, y], relay.less_equal(x, y))
-    check_grad(fwd_func, inputs=inputs, test_inputs=inputs, eps=1e-6)
+    check_grad(fwd_func, inputs=inputs, test_inputs=inputs, eps=1e-6, executor_kind=executor_kind)
 
 
-def test_not_equal_grad():
+def test_not_equal_grad(executor_kind):
     x_type = relay.TensorType((2, 3, 4), "float32")
     y_type = relay.TensorType((3, 1), "float32")
     # We need to generate inputs far apart to get correct numerical gradients
@@ -100,17 +103,17 @@ def test_not_equal_grad():
     x = relay.var("x", type_annotation=x_type)
     y = relay.var("y", type_annotation=y_type)
     fwd_func = relay.Function([x, y], relay.not_equal(x, y))
-    check_grad(fwd_func, inputs=inputs, test_inputs=inputs, eps=1e-6)
+    check_grad(fwd_func, inputs=inputs, test_inputs=inputs, eps=1e-6, executor_kind=executor_kind)
 
 
-def test_strided_slice_grad():
+def test_strided_slice_grad(executor_kind):
     def check(sh, dtype, begin, end, strides, slice_mode):
         x = relay.var("x", shape=sh, dtype=dtype)
         f = relay.Function(
             [x],
             relay.strided_slice(x, begin=begin, end=end, strides=strides, slice_mode=slice_mode),
         )
-        check_grad(f)
+        check_grad(f, executor_kind=executor_kind)
 
     check((2, 3, 4), "float32", (0, 1, 0), (-1, -1, 1), (1, 1, 1), "size")
     check((2, 3, 4), "float32", (0, 1, 0), (2, 3, 1), (1, 1, 1), "end")
diff --git a/tests/python/relay/test_op_level1.py b/tests/python/relay/test_op_level1.py
index d4238f81e0..1b72e5ce51 100644
--- a/tests/python/relay/test_op_level1.py
+++ b/tests/python/relay/test_op_level1.py
@@ -26,6 +26,8 @@ import tvm.topi.testing
 from tvm.contrib.nvcc import have_fp16
 import tvm.testing
 
+executor_kind = tvm.testing.parameter("graph", "vm")
+
 
 def sigmoid(x):
     one = np.ones_like(x)
@@ -286,7 +288,7 @@ def test_log_softmax():
 
 
 @tvm.testing.uses_gpu
-def test_concatenate():
+def test_concatenate(executor_kind):
     for dtype in ["float16", "float32"]:
         n, t, d = te.size_var("n"), te.size_var("t"), 100
         x = relay.var("x", shape=(n, t, d))
@@ -336,17 +338,13 @@ def test_concatenate():
                 and not have_fp16(tvm.cuda(0).compute_version)
             ):
                 continue
-            op_res1 = relay.create_executor("graph", device=dev, target=target).evaluate(func)(
-                x_data, y_data, t_data
-            )
-            tvm.testing.assert_allclose(op_res1.numpy(), ref_res, rtol=0.01)
-            op_res2 = relay.create_executor("debug", device=dev, target=target).evaluate(func)(
+            op_res = relay.create_executor(executor_kind, device=dev, target=target).evaluate(func)(
                 x_data, y_data, t_data
             )
-            tvm.testing.assert_allclose(op_res2.numpy(), ref_res, rtol=0.01)
+            tvm.testing.assert_allclose(op_res.numpy(), ref_res, rtol=0.01)
 
 
-def test_dropout():
+def test_dropout(executor_kind):
     for dtype in ["float16", "float32"]:
         n, t, d = te.size_var("n"), te.size_var("t"), te.size_var("d")
         input_ty = relay.TensorType((n, t, d), dtype)
@@ -361,9 +359,8 @@ def test_dropout():
     y = relay.nn.dropout(x, rate=0.5)
     func = relay.Function([], y)
     for target, dev in tvm.testing.enabled_targets():
-        for backend in ["debug", "graph"]:
-            op_res = relay.create_executor("debug", device=dev, target=target).evaluate(func)()
-            tvm.testing.assert_allclose(op_res.numpy(), in_np, rtol=0.01)
+        op_res = relay.create_executor(executor_kind, device=dev, target=target).evaluate(func)()
+        tvm.testing.assert_allclose(op_res.numpy(), in_np, rtol=0.01)
 
 
 def test_batch_norm():
@@ -490,7 +487,7 @@ def test_matmul_type_check():
 
 
 @tvm.testing.uses_gpu
-def test_matmul():
+def test_matmul(executor_kind):
     for dtype in ["float16", "float32"]:
         # Matmul accuracy for float16 is poor
         if dtype == "float16":
@@ -529,14 +526,10 @@ def test_matmul():
         ref_res = np.dot(x_data.transpose(), w_data)
 
         for target, dev in tvm.testing.enabled_targets():
-            op_res1 = relay.create_executor("graph", device=dev, target=target).evaluate(func)(
+            op_res = relay.create_executor(executor_kind, device=dev, target=target).evaluate(func)(
                 x_data, w_data
             )
-            tvm.testing.assert_allclose(op_res1.numpy(), ref_res, rtol=1e-5)
-            op_res2 = relay.create_executor("debug", device=dev, target=target).evaluate(func)(
-                x_data, w_data
-            )
-            tvm.testing.assert_allclose(op_res2.numpy(), ref_res, rtol=1e-5)
+            tvm.testing.assert_allclose(op_res.numpy(), ref_res, rtol=1e-5)
 
 
 @pytest.mark.xfail
@@ -552,7 +545,7 @@ def test_dense_type_check():
 
 
 @tvm.testing.uses_gpu
-def test_dense():
+def test_dense(executor_kind):
     for dtype in ["float16", "float32"]:
         # Dense accuracy for float16 is poor
         if dtype == "float16":
@@ -591,14 +584,10 @@ def test_dense():
         ref_res = np.dot(x_data, w_data.T)
 
         for target, dev in tvm.testing.enabled_targets():
-            op_res1 = relay.create_executor("graph", device=dev, target=target).evaluate(func)(
-                x_data, w_data
-            )
-            tvm.testing.assert_allclose(op_res1.numpy(), ref_res, rtol=1e-5)
-            op_res2 = relay.create_executor("debug", device=dev, target=target).evaluate(func)(
+            op_res = relay.create_executor(executor_kind, device=dev, target=target).evaluate(func)(
                 x_data, w_data
             )
-            tvm.testing.assert_allclose(op_res2.numpy(), ref_res, rtol=1e-5)
+            tvm.testing.assert_allclose(op_res.numpy(), ref_res, rtol=1e-5)
 
 
 @tvm.testing.uses_gpu
diff --git a/tests/python/relay/test_op_level10.py b/tests/python/relay/test_op_level10.py
index 8ee5adbb31..7e0b8ad89f 100644
--- a/tests/python/relay/test_op_level10.py
+++ b/tests/python/relay/test_op_level10.py
@@ -27,9 +27,11 @@ from tvm import relay, te, topi
 from tvm.relay import transform
 from tvm.relay.testing import run_infer_type
 
+executor_kind = tvm.testing.parameter("graph", "vm")
+
 
 @tvm.testing.uses_gpu
-def test_checkpoint():
+def test_checkpoint(executor_kind):
     dtype = "float32"
     xs = [relay.var("x{}".format(i), dtype) for i in range(4)]
     f = relay.multiply(relay.add(xs[0], xs[1]), relay.add(xs[2], xs[3]))
@@ -41,12 +43,11 @@ def test_checkpoint():
 
     inputs = [np.random.uniform() for _ in range(len(xs))]
     for target, dev in tvm.testing.enabled_targets():
-        for kind in ["graph", "debug"]:
-            f_res = relay.create_executor(kind, device=dev, target=target).evaluate(f)(*inputs)
-            f_checkpoint_res = relay.create_executor(kind, device=dev, target=target).evaluate(
-                f_checkpoint
-            )(*inputs)
-            tvm.testing.assert_allclose(f_res.numpy(), f_checkpoint_res.numpy(), 0, 0)
+        f_res = relay.create_executor(executor_kind, device=dev, target=target).evaluate(f)(*inputs)
+        f_checkpoint_res = relay.create_executor(executor_kind, device=dev, target=target).evaluate(
+            f_checkpoint
+        )(*inputs)
+        tvm.testing.assert_allclose(f_res.numpy(), f_checkpoint_res.numpy(), 0, 0)
 
 
 def test_checkpoint_alpha_equal():
@@ -171,7 +172,7 @@ def test_checkpoint_alpha_equal_tuple():
 
 
 @tvm.testing.uses_gpu
-def test_collapse_sum_like():
+def test_collapse_sum_like(executor_kind):
     shape = (3, 4, 5, 6)
     shape_like = (4, 5, 6)
     dtype = "float32"
@@ -186,13 +187,14 @@ def test_collapse_sum_like():
     y = np.random.uniform(size=shape_like).astype(dtype)
     ref_res = np.sum(x, 0)
     for target, dev in tvm.testing.enabled_targets():
-        for kind in ["graph", "debug"]:
-            op_res = relay.create_executor(kind, device=dev, target=target).evaluate(func)(x, y)
-            tvm.testing.assert_allclose(op_res.numpy(), ref_res, rtol=1e-5)
+        op_res = relay.create_executor(executor_kind, device=dev, target=target).evaluate(func)(
+            x, y
+        )
+        tvm.testing.assert_allclose(op_res.numpy(), ref_res, rtol=1e-5)
 
 
 @tvm.testing.uses_gpu
-def test_collapse_sum_to():
+def test_collapse_sum_to(executor_kind):
     shape = (3, 4, 5, 6)
     shape_to = (4, 5, 6)
     dtype = "float32"
@@ -205,13 +207,12 @@ def test_collapse_sum_to():
     x = np.random.uniform(size=shape).astype(dtype)
     ref_res = np.sum(x, 0)
     for target, dev in tvm.testing.enabled_targets():
-        for kind in ["graph", "debug"]:
-            op_res = relay.create_executor(kind, device=dev, target=target).evaluate(func)(x)
-            tvm.testing.assert_allclose(op_res.numpy(), ref_res, rtol=1e-5)
+        op_res = relay.create_executor(executor_kind, device=dev, target=target).evaluate(func)(x)
+        tvm.testing.assert_allclose(op_res.numpy(), ref_res, rtol=1e-5)
 
 
 @tvm.testing.uses_gpu
-def test_broadcast_to():
+def test_broadcast_to(executor_kind):
     shape = (4, 1, 6)
     shape_like = (3, 4, 5, 6)
     dtype = "float32"
@@ -224,13 +225,12 @@ def test_broadcast_to():
     x = np.random.uniform(size=shape).astype(dtype)
     ref_res = np.broadcast_to(x, shape_like)
     for target, dev in tvm.testing.enabled_targets():
-        for kind in ["graph", "debug"]:
-            op_res = relay.create_executor(kind, device=dev, target=target).evaluate(func)(x)
-            tvm.testing.assert_allclose(op_res.numpy(), ref_res, rtol=1e-5)
+        op_res = relay.create_executor(executor_kind, device=dev, target=target).evaluate(func)(x)
+        tvm.testing.assert_allclose(op_res.numpy(), ref_res, rtol=1e-5)
 
 
 @tvm.testing.uses_gpu
-def test_broadcast_to_const_shape_int64():
+def test_broadcast_to_const_shape_int64(executor_kind):
     shape_like = relay.const(np.array([1, 5]), dtype="int64")
     x = relay.var("x", shape=(1,), dtype="int64")
     z = relay.broadcast_to(x, shape=shape_like)
@@ -241,13 +241,12 @@ def test_broadcast_to_const_shape_int64():
     x = np.random.randint(10, size=(1,), dtype="int64")
     ref_res = np.broadcast_to(x, (5,))
     for target, dev in tvm.testing.enabled_targets():
-        for kind in ["graph", "debug"]:
-            op_res = relay.create_executor(kind, device=dev, target=target).evaluate(f)(x)
-            tvm.testing.assert_allclose(op_res.numpy(), ref_res)
+        op_res = relay.create_executor(executor_kind, device=dev, target=target).evaluate(f)(x)
+        tvm.testing.assert_allclose(op_res.numpy(), ref_res)
 
 
 @tvm.testing.uses_gpu
-def test_broadcast_to_like():
+def test_broadcast_to_like(executor_kind):
     shape = (4, 1, 6)
     shape_like = (3, 4, 5, 6)
     dtype = "float32"
@@ -264,9 +263,10 @@ def test_broadcast_to_like():
     ref_res = np.broadcast_to(x, shape_like)
 
     for target, dev in tvm.testing.enabled_targets():
-        for kind in ["graph", "debug"]:
-            op_res = relay.create_executor(kind, device=dev, target=target).evaluate(func)(x, y)
-            tvm.testing.assert_allclose(op_res.numpy(), ref_res, rtol=1e-5)
+        op_res = relay.create_executor(executor_kind, device=dev, target=target).evaluate(func)(
+            x, y
+        )
+        tvm.testing.assert_allclose(op_res.numpy(), ref_res, rtol=1e-5)
 
 
 def np_slice_like(np_data, np_shape_like, axis=None):
@@ -288,7 +288,7 @@ def np_slice_like(np_data, np_shape_like, axis=None):
     return np_result
 
 
-def verify_slice_like(data, slice_like, axes, output, dtype="float32"):
+def verify_slice_like(executor_kind, data, slice_like, axes, output, dtype="float32"):
     x = relay.var("data", relay.TensorType(data, dtype))
     y = relay.var("slice_like", relay.TensorType(slice_like, dtype))
     z = relay.slice_like(x, y, axes)
@@ -308,31 +308,46 @@ def verify_slice_like(data, slice_like, axes, output, dtype="float32"):
     ref_res = np_slice_like(x_data, y_data, axes)
 
     for target, dev in tvm.testing.enabled_targets():
-        for kind in ["graph", "debug"]:
-            op_res = relay.create_executor(kind, device=dev, target=target).evaluate(func)(
-                x_data, y_data
-            )
-            tvm.testing.assert_allclose(op_res.numpy(), ref_res, rtol=1e-5)
+        op_res = relay.create_executor(executor_kind, device=dev, target=target).evaluate(func)(
+            x_data, y_data
+        )
+        tvm.testing.assert_allclose(op_res.numpy(), ref_res, rtol=1e-5)
 
 
 @tvm.testing.uses_gpu
-def test_slice_like():
+def test_slice_like(executor_kind):
     d1, d2, d3, d4 = te.var("d1"), te.var("d2"), te.var("d3"), te.var("d4")
-    verify_slice_like(data=(d1, d2, d3), slice_like=(1, 2, 3), axes=None, output=(1, 2, 3))
-    verify_slice_like(data=(1, 2, 3), slice_like=(d1, d2, d3), axes=None, output=(d1, d2, d3))
-    verify_slice_like(data=(d2, d3, d4), slice_like=(d1, d2, d3), axes=(1, 2), output=(d2, d2, d3))
-    verify_slice_like(data=(3, 4, 5), slice_like=(1, 2, 3), axes=None, output=(1, 2, 3))
-    verify_slice_like(data=(3, 4, 5), slice_like=(1, 2), axes=None, output=(1, 2, 5))
-    verify_slice_like(data=(3, 4, 5), slice_like=(1, 2, 3), axes=(1, 2), output=(3, 2, 3))
-    verify_slice_like(data=(3, 4, 5), slice_like=(1, 2, 3), axes=(-1, -3), output=(1, 4, 3))
     verify_slice_like(
-        data=(1, 3, 224, 224), slice_like=(1, 3, 112, 112), axes=(2, 3), output=(1, 3, 112, 112)
+        executor_kind, data=(d1, d2, d3), slice_like=(1, 2, 3), axes=None, output=(1, 2, 3)
+    )
+    verify_slice_like(
+        executor_kind, data=(1, 2, 3), slice_like=(d1, d2, d3), axes=None, output=(d1, d2, d3)
+    )
+    verify_slice_like(
+        executor_kind, data=(d2, d3, d4), slice_like=(d1, d2, d3), axes=(1, 2), output=(d2, d2, d3)
+    )
+    verify_slice_like(
+        executor_kind, data=(3, 4, 5), slice_like=(1, 2, 3), axes=None, output=(1, 2, 3)
+    )
+    verify_slice_like(executor_kind, data=(3, 4, 5), slice_like=(1, 2), axes=None, output=(1, 2, 5))
+    verify_slice_like(
+        executor_kind, data=(3, 4, 5), slice_like=(1, 2, 3), axes=(1, 2), output=(3, 2, 3)
+    )
+    verify_slice_like(
+        executor_kind, data=(3, 4, 5), slice_like=(1, 2, 3), axes=(-1, -3), output=(1, 4, 3)
+    )
+    verify_slice_like(
+        executor_kind,
+        data=(1, 3, 224, 224),
+        slice_like=(1, 3, 112, 112),
+        axes=(2, 3),
+        output=(1, 3, 112, 112),
     )
 
 
 @tvm.testing.uses_gpu
-def test_reverse_reshape():
-    def verify_reverse_reshape(shape, newshape, oshape):
+def test_reverse_reshape(executor_kind):
+    def verify_reverse_reshape(executor_kind, shape, newshape, oshape):
         x = relay.var("x", relay.TensorType(shape, "float32"))
         z = relay.reverse_reshape(x, newshape=newshape)
         zz = run_infer_type(z)
@@ -343,21 +358,20 @@ def test_reverse_reshape():
         x_data = np.random.uniform(low=-1, high=1, size=shape).astype("float32")
         ref_res = np.reshape(x_data, oshape)
         for target, dev in tvm.testing.enabled_targets():
-            for kind in ["graph", "debug"]:
-                op_res = relay.create_executor(kind, device=dev, target=target).evaluate(func)(
-                    x_data
-                )
-                tvm.testing.assert_allclose(op_res.numpy(), ref_res, rtol=1e-5)
+            op_res = relay.create_executor(executor_kind, device=dev, target=target).evaluate(func)(
+                x_data
+            )
+            tvm.testing.assert_allclose(op_res.numpy(), ref_res, rtol=1e-5)
 
-    verify_reverse_reshape((2, 3, 4), (4, 0, 2), (4, 3, 2))
-    verify_reverse_reshape((2, 3, 4), (2, 0, 0), (2, 3, 4))
-    verify_reverse_reshape((2, 3, 4), (0, -1), (3, 8))
-    verify_reverse_reshape((2, 3, 4), (-1, 0), (6, 4))
-    verify_reverse_reshape((2, 3, 4), (0, -3), (2, 12))
+    verify_reverse_reshape(executor_kind, (2, 3, 4), (4, 0, 2), (4, 3, 2))
+    verify_reverse_reshape(executor_kind, (2, 3, 4), (2, 0, 0), (2, 3, 4))
+    verify_reverse_reshape(executor_kind, (2, 3, 4), (0, -1), (3, 8))
+    verify_reverse_reshape(executor_kind, (2, 3, 4), (-1, 0), (6, 4))
+    verify_reverse_reshape(executor_kind, (2, 3, 4), (0, -3), (2, 12))
 
 
 def verify_batch_matmul_with_inputs(
-    x, y, x_np, y_np, out_shape, dtype="float32", trans_x=False, trans_y=True
+    executor_kind, x, y, x_np, y_np, out_shape, dtype="float32", trans_x=False, trans_y=True
 ):
     z = relay.nn.batch_matmul(x, y, transpose_a=trans_x, transpose_b=trans_y)
     zz = run_infer_type(z)
@@ -368,26 +382,29 @@ def verify_batch_matmul_with_inputs(
     z_np = tvm.topi.testing.batch_matmul(x_np, y_np, trans_x=trans_x, trans_y=trans_y)
 
     for target, dev in tvm.testing.enabled_targets():
-        for kind in ["graph", "debug"]:
-            if len(input_vars) == 2:
-                z = relay.create_executor(kind, device=dev, target=target).evaluate(func)(
-                    x_np, y_np
-                )
-            else:
-                z = relay.create_executor(kind, device=dev, target=target).evaluate(func)(x_np)
-            tvm.testing.assert_allclose(z.numpy(), z_np, rtol=1e-5, atol=1e-5)
+        if len(input_vars) == 2:
+            z = relay.create_executor(executor_kind, device=dev, target=target).evaluate(func)(
+                x_np, y_np
+            )
+        else:
+            z = relay.create_executor(executor_kind, device=dev, target=target).evaluate(func)(x_np)
+        tvm.testing.assert_allclose(z.numpy(), z_np, rtol=1e-5, atol=1e-5)
 
 
-def verify_batch_matmul(x_shape, y_shape, out_shape, dtype="float32", trans_x=False, trans_y=True):
+def verify_batch_matmul(
+    executor_kind, x_shape, y_shape, out_shape, dtype="float32", trans_x=False, trans_y=True
+):
     x = relay.var("x", relay.TensorType(x_shape, dtype))
     y = relay.var("y", relay.TensorType(y_shape, dtype))
     x_np = np.random.uniform(size=x_shape).astype(dtype)
     y_np = np.random.uniform(size=y_shape).astype(dtype)
-    verify_batch_matmul_with_inputs(x, y, x_np, y_np, out_shape, dtype, trans_x, trans_y)
+    verify_batch_matmul_with_inputs(
+        executor_kind, x, y, x_np, y_np, out_shape, dtype, trans_x, trans_y
+    )
 
 
 @tvm.testing.uses_gpu
-def test_batch_matmul():
+def test_batch_matmul(executor_kind):
     b, m, n, k = te.size_var("b"), te.size_var("m"), te.size_var("n"), te.size_var("k")
     x = relay.var("x", relay.TensorType((b, m, k), "float32"))
     y = relay.var("y", relay.TensorType((b, n, k), "float32"))
@@ -395,17 +412,31 @@ def test_batch_matmul():
     zz = run_infer_type(z)
     assert zz.checked_type == relay.TensorType((b, m, n), "float32")
 
-    verify_batch_matmul((1, 16, 32), (1, 16, 32), (1, 16, 16), trans_x=False, trans_y=True)
-    verify_batch_matmul((5, 16, 32), (5, 16, 32), (5, 16, 16), trans_x=False, trans_y=True)
-    verify_batch_matmul((5, 16, 32), (5, 20, 32), (5, 16, 20), trans_x=False, trans_y=True)
-    verify_batch_matmul((30, 16, 32), (30, 20, 32), (30, 16, 20), trans_x=False, trans_y=True)
-    verify_batch_matmul((1, 32, 16), (1, 16, 32), (1, 16, 16), trans_x=True, trans_y=True)
-    verify_batch_matmul((5, 16, 32), (5, 32, 16), (5, 16, 16), trans_x=False, trans_y=False)
-    verify_batch_matmul((5, 32, 16), (5, 32, 20), (5, 16, 20), trans_x=True, trans_y=False)
+    verify_batch_matmul(
+        executor_kind, (1, 16, 32), (1, 16, 32), (1, 16, 16), trans_x=False, trans_y=True
+    )
+    verify_batch_matmul(
+        executor_kind, (5, 16, 32), (5, 16, 32), (5, 16, 16), trans_x=False, trans_y=True
+    )
+    verify_batch_matmul(
+        executor_kind, (5, 16, 32), (5, 20, 32), (5, 16, 20), trans_x=False, trans_y=True
+    )
+    verify_batch_matmul(
+        executor_kind, (30, 16, 32), (30, 20, 32), (30, 16, 20), trans_x=False, trans_y=True
+    )
+    verify_batch_matmul(
+        executor_kind, (1, 32, 16), (1, 16, 32), (1, 16, 16), trans_x=True, trans_y=True
+    )
+    verify_batch_matmul(
+        executor_kind, (5, 16, 32), (5, 32, 16), (5, 16, 16), trans_x=False, trans_y=False
+    )
+    verify_batch_matmul(
+        executor_kind, (5, 32, 16), (5, 32, 20), (5, 16, 20), trans_x=True, trans_y=False
+    )
 
     x_np = np.random.randn(10, 27, 64).astype("float32")
     x = relay.var("x", shape=x_np.shape)
-    verify_batch_matmul_with_inputs(x, x, x_np, x_np, (10, 27, 27))
+    verify_batch_matmul_with_inputs(executor_kind, x, x, x_np, x_np, (10, 27, 27))
 
 
 @pytest.mark.skip("Requires cascadelake")
@@ -492,13 +523,13 @@ def test_shape_of():
     for target, dev in tvm.testing.enabled_targets():
         # Because using graph executor, this op will be optimized after
         # constant folding pass, here we only test with interpreter
-        for kind in ["debug"]:
+        for kind in ["vm"]:
             op_res = relay.create_executor(kind, device=dev, target=target).evaluate(func)(x_data)
             tvm.testing.assert_allclose(op_res.numpy(), np.array(shape).astype("int32"))
 
 
 @tvm.testing.uses_gpu
-def test_ndarray_size():
+def test_ndarray_size(executor_kind):
     def verify_ndarray_size(shape):
         x = relay.var("x", shape=shape)
         func = relay.Function([x], relay.op.ndarray_size(x))
@@ -507,11 +538,10 @@ def test_ndarray_size():
         x_data = np.random.uniform(size=shape).astype("float32")
         ref_res = np.size(x_data)
         for target, dev in tvm.testing.enabled_targets():
-            for kind in ["graph", "debug"]:
-                op_res = relay.create_executor(kind, device=dev, target=target).evaluate(func)(
-                    x_data
-                )
-                tvm.testing.assert_allclose(op_res.numpy(), ref_res)
+            op_res = relay.create_executor(executor_kind, device=dev, target=target).evaluate(func)(
+                x_data
+            )
+            tvm.testing.assert_allclose(op_res.numpy(), ref_res)
 
     verify_ndarray_size((2, 3, 5))
     verify_ndarray_size((2, 3, 5, 7))
@@ -573,7 +603,7 @@ def test_adaptive_pool():
 
 
 @tvm.testing.uses_gpu
-def test_sequence_mask():
+def test_sequence_mask(executor_kind):
     def _verify(data_shape, mask_value, axis, dtype, itype):
         max_length = data_shape[axis]
         nbatch = data_shape[1 - axis]
@@ -588,11 +618,10 @@ def test_sequence_mask():
         gt_out_np = tvm.topi.testing.sequence_mask(data_np, valid_length_np, mask_value, axis)
 
         for target, dev in tvm.testing.enabled_targets():
-            for kind in ["graph", "debug"]:
-                out_relay = relay.create_executor(kind, device=dev, target=target).evaluate(func)(
-                    data_np, valid_length_np
-                )
-                tvm.testing.assert_allclose(out_relay.numpy(), gt_out_np)
+            out_relay = relay.create_executor(executor_kind, device=dev, target=target).evaluate(
+                func
+            )(data_np, valid_length_np)
+            tvm.testing.assert_allclose(out_relay.numpy(), gt_out_np)
 
     _verify((5, 10), 0.0, 1, "float32", "int32")
     _verify((2, 3, 5, 3), 0.0, 0, "float32", "int64")
@@ -600,7 +629,7 @@ def test_sequence_mask():
 
 
 @tvm.testing.uses_gpu
-def test_one_hot():
+def test_one_hot(executor_kind):
     def _get_oshape(indices_shape, depth, axis):
         oshape = []
         true_axis = len(indices_shape) if axis == -1 else axis
@@ -629,11 +658,10 @@ def test_one_hot():
         out_np = tvm.topi.testing.one_hot(indices_np, on_value, off_value, depth, axis, dtype)
 
         for target, dev in tvm.testing.enabled_targets():
-            for kind in ["graph", "debug"]:
-                out_relay = relay.create_executor(kind, device=dev, target=target).evaluate(func)(
-                    indices_np
-                )
-                tvm.testing.assert_allclose(out_relay.numpy(), out_np)
+            out_relay = relay.create_executor(executor_kind, device=dev, target=target).evaluate(
+                func
+            )(indices_np)
+            tvm.testing.assert_allclose(out_relay.numpy(), out_np)
 
     _verify((3,), 3, 1, 0, -1, "int32")
     _verify((3,), 3, 1.0, 0.0, -1, "float32")
@@ -644,7 +672,7 @@ def test_one_hot():
 
 
 @tvm.testing.uses_gpu
-def test_matrix_set_diag():
+def test_matrix_set_diag(executor_kind):
     def _verify(input_shape, diagonal_shape, dtype, k=0, align="RIGHT_LEFT"):
         input = relay.var("input", relay.TensorType(input_shape, dtype))
         diagonal = relay.var("diagonal", relay.TensorType(diagonal_shape, dtype))
@@ -660,11 +688,10 @@ def test_matrix_set_diag():
         out_np = tvm.topi.testing.matrix_set_diag(input_np, diagonal_np, k, align)
 
         for target, dev in tvm.testing.enabled_targets():
-            for kind in ["graph", "debug"]:
-                out_relay = relay.create_executor(kind, device=dev, target=target).evaluate(func)(
-                    input_np, diagonal_np
-                )
-                tvm.testing.assert_allclose(out_relay.numpy(), out_np)
+            out_relay = relay.create_executor(executor_kind, device=dev, target=target).evaluate(
+                func
+            )(input_np, diagonal_np)
+            tvm.testing.assert_allclose(out_relay.numpy(), out_np)
 
     _verify((2, 2), (2,), "float32")
     _verify((4, 3, 3), (4, 3), "int32")
@@ -675,7 +702,7 @@ def test_matrix_set_diag():
 
 
 @tvm.testing.parametrize_targets
-def test_nll_loss(dev, target):
+def test_nll_loss(executor_kind, dev, target):
     def _get_oshape(target_shape, reduction):
         if reduction == "none":
             return target_shape
@@ -702,11 +729,10 @@ def test_nll_loss(dev, target):
             predictions_np, targets_np, weights_np, reduction, ignore_index
         )
 
-        for kind in ["graph", "debug"]:
-            out_relay = relay.create_executor(kind, device=dev, target=target).evaluate(func)(
-                predictions_np, targets_np, weights_np
-            )
-            tvm.testing.assert_allclose(out_relay.numpy(), out_np, rtol=1e-6, atol=1e-6)
+        out_relay = relay.create_executor(executor_kind, device=dev, target=target).evaluate(func)(
+            predictions_np, targets_np, weights_np
+        )
+        tvm.testing.assert_allclose(out_relay.numpy(), out_np, rtol=1e-6, atol=1e-6)
 
     _verify((10, 5))
     _verify((10, 5, 2, 2))
diff --git a/tests/python/relay/test_op_level2.py b/tests/python/relay/test_op_level2.py
index c644890bbc..726ee578da 100644
--- a/tests/python/relay/test_op_level2.py
+++ b/tests/python/relay/test_op_level2.py
@@ -30,6 +30,8 @@ from tvm.relay import transform
 from tvm.relay.testing import run_infer_type
 from tvm.topi.cuda.conv3d_winograd import _infer_tile_size
 
+executor_kind = tvm.testing.parameter("graph", "vm")
+
 
 @tvm.testing.uses_gpu
 def test_conv1d_infer_type():
@@ -1301,7 +1303,7 @@ def test_avg_pool2d_no_count_pad():
 
 
 @tvm.testing.uses_gpu
-def test_flatten_infer_type():
+def test_flatten_infer_type(executor_kind):
     d1, d2, d3, d4 = te.size_var("d1"), te.size_var("d2"), te.size_var("d3"), te.size_var("d4")
     x = relay.var("x", relay.TensorType((d1, d2, d3, d4), "float32"))
     y = relay.nn.batch_flatten(x)
@@ -1330,10 +1332,10 @@ def test_flatten_infer_type():
     ref_res = x_data.flatten().reshape(o_shape)
 
     for target, dev in tvm.testing.enabled_targets():
-        op_res1 = relay.create_executor("graph", device=dev, target=target).evaluate(func)(x_data)
-        tvm.testing.assert_allclose(op_res1.numpy(), ref_res, rtol=1e-5)
-        op_res2 = relay.create_executor("debug", device=dev, target=target).evaluate(func)(x_data)
-        tvm.testing.assert_allclose(op_res2.numpy(), ref_res, rtol=1e-5)
+        op_res = relay.create_executor(executor_kind, device=dev, target=target).evaluate(func)(
+            x_data
+        )
+        tvm.testing.assert_allclose(op_res.numpy(), ref_res, rtol=1e-5)
 
 
 @tvm.testing.uses_gpu
@@ -1438,7 +1440,7 @@ def test_pad_run_dynamic_pad_value():
 
 @tvm.testing.uses_gpu
 @pytest.mark.parametrize("dtype", ["float32", "float16"])
-def test_lrn(dtype):
+def test_lrn(executor_kind, dtype):
     n, c, h, w = te.size_var("n"), te.size_var("c"), te.size_var("h"), te.size_var("w")
     x = relay.var("x", shape=(n, c, h, w), dtype=dtype)
     y = relay.nn.lrn(x, size=10, axis=2, bias=0.5, alpha=0.00001, beta=0.75)
@@ -1461,14 +1463,14 @@ def test_lrn(dtype):
     ref_res = tvm.topi.testing.lrn_python(x_data, size, axis, bias, alpha, beta)
 
     for target, dev in tvm.testing.enabled_targets():
-        op_res1 = relay.create_executor("graph", device=dev, target=target).evaluate(func)(x_data)
-        tvm.testing.assert_allclose(op_res1.numpy(), ref_res, rtol=1e-5)
-        op_res2 = relay.create_executor("debug", device=dev, target=target).evaluate(func)(x_data)
-        tvm.testing.assert_allclose(op_res2.numpy(), ref_res, rtol=1e-5)
+        op_res = relay.create_executor(executor_kind, device=dev, target=target).evaluate(func)(
+            x_data
+        )
+        tvm.testing.assert_allclose(op_res.numpy(), ref_res, rtol=1e-5)
 
 
 @tvm.testing.uses_gpu
-def test_l2_normalize():
+def test_l2_normalize(executor_kind):
     n, c, h, w = te.size_var("n"), te.size_var("c"), te.size_var("h"), te.size_var("w")
     x = relay.var("x", shape=(n, c, h, w))
     y = relay.nn.l2_normalize(x, eps=0.001, axis=[1])
@@ -1489,10 +1491,10 @@ def test_l2_normalize():
     ref_res = tvm.topi.testing.l2_normalize_python(x_data, eps, axis)
 
     for target, dev in tvm.testing.enabled_targets():
-        op_res1 = relay.create_executor("graph", device=dev, target=target).evaluate(func)(x_data)
-        tvm.testing.assert_allclose(op_res1.numpy(), ref_res, rtol=1e-5)
-        op_res2 = relay.create_executor("debug", device=dev, target=target).evaluate(func)(x_data)
-        tvm.testing.assert_allclose(op_res2.numpy(), ref_res, rtol=1e-5)
+        op_res = relay.create_executor(executor_kind, device=dev, target=target).evaluate(func)(
+            x_data
+        )
+        tvm.testing.assert_allclose(op_res.numpy(), ref_res, rtol=1e-5)
 
 
 def batch_flatten(data):
diff --git a/tests/python/relay/test_op_level3.py b/tests/python/relay/test_op_level3.py
index ef4b45ade9..9d27839c47 100644
--- a/tests/python/relay/test_op_level3.py
+++ b/tests/python/relay/test_op_level3.py
@@ -30,7 +30,7 @@ from tvm.relay.testing import check_grad, run_infer_type
 
 from utils import ref_funcs
 
-executor_kind = tvm.testing.parameter("graph", "debug")
+executor_kind = tvm.testing.parameter("graph", "vm")
 
 
 class TestZerosOnes:
@@ -644,7 +644,7 @@ def test_full_like_infer_type():
     assert yy.checked_type == relay.TensorType((n, c, h, w), "float32")
 
 
-def test_infer_type_leaky_relu(target, dev):
+def test_infer_type_leaky_relu(target, dev, executor_kind):
     n, c, h, w = te.size_var("n"), te.size_var("c"), te.size_var("h"), te.size_var("w")
     x = relay.var("x", relay.TensorType((n, c, h, w), "float32"))
     y = relay.nn.leaky_relu(x, alpha=0.1)
@@ -663,10 +663,8 @@ def test_infer_type_leaky_relu(target, dev):
     x_data = np.random.uniform(low=-1, high=1, size=shape).astype(dtype)
     ref_res = np.where(x_data > 0, x_data, x_data * 0.1)
 
-    op_res1 = relay.create_executor("graph", device=dev, target=target).evaluate(func)(x_data)
-    tvm.testing.assert_allclose(op_res1.numpy(), ref_res, rtol=1e-5)
-    op_res2 = relay.create_executor("debug", device=dev, target=target).evaluate(func)(x_data)
-    tvm.testing.assert_allclose(op_res2.numpy(), ref_res, rtol=1e-5)
+    op_res = relay.create_executor(executor_kind, device=dev, target=target).evaluate(func)(x_data)
+    tvm.testing.assert_allclose(op_res.numpy(), ref_res, rtol=1e-5)
 
 
 class TestInferTypePrelu:
@@ -684,7 +682,7 @@ class TestInferTypePrelu:
         ((1, 2, 2, 3), None, 3, (1, 2, 2, 3)),
     )
 
-    def test_infer_type_prelu(self, target, dev, data, alpha, axis, output, dtype):
+    def test_infer_type_prelu(self, target, dev, executor_kind, data, alpha, axis, output, dtype):
         x = relay.var("data", relay.TensorType(data, dtype))
         if alpha:
             y = relay.var("alpha", relay.TensorType(alpha, dtype))
@@ -712,14 +710,10 @@ class TestInferTypePrelu:
         else:
             ref_res = (x_data < 0) * (x_data * a_data.reshape(1, 1, 3)) + (x_data >= 0) * x_data
 
-        op_res1 = relay.create_executor("graph", device=dev, target=target).evaluate(func)(
-            x_data, a_data
-        )
-        tvm.testing.assert_allclose(op_res1.numpy(), ref_res, rtol=1e-5)
-        op_res2 = relay.create_executor("debug", device=dev, target=target).evaluate(func)(
+        op_res = relay.create_executor(executor_kind, device=dev, target=target).evaluate(func)(
             x_data, a_data
         )
-        tvm.testing.assert_allclose(op_res2.numpy(), ref_res, rtol=1e-5)
+        tvm.testing.assert_allclose(op_res.numpy(), ref_res, rtol=1e-5)
 
 
 class TestArange:
@@ -1051,7 +1045,7 @@ class TestDynamicScatter:
         ((16, 16, 4, 5), (16, 16, 4, 5), 3),
     )
 
-    @pytest.mark.parametrize("executor_kind", ["vm", "debug"])
+    @pytest.mark.parametrize("executor_kind", ["vm"])
     def test_dynamic_scatter(self, target, dev, executor_kind, dshape, ishape, axis):
         d = relay.var("d", relay.TensorType([relay.Any() for i in range(len(dshape))], "float32"))
         i = relay.var("i", relay.TensorType([relay.Any() for i in range(len(ishape))], "int64"))
@@ -2033,31 +2027,30 @@ def test_unique(target, dev):
         x_data = np.random.randint(50, size=n).astype(dtype)
 
         if is_dyn:
-            backends = ["vm", "debug"]
+            backend = "vm"
         else:
-            backends = ["graph", "debug"]
-
-        for kind in backends:
-            mod = tvm.ir.IRModule.from_expr(func)
-            tvm_res = relay.create_executor(kind, mod=mod, device=dev, target=target).evaluate()(
-                x_data
-            )  # unique, indices, inverse_indices, num_unique, (counts)
-            np_res = calc_numpy_unique(
-                x_data, is_sorted
-            )  # unique, indices, inverse_indices, num_unique, counts
-            num_unique = np_res[3][0]
-
-            # num_unique
-            assert num_unique == tvm_res[3].numpy()[0]
-            # unique
-            tvm.testing.assert_allclose(tvm_res[0].numpy()[:num_unique], np_res[0], rtol=1e-5)
-            # indices
-            tvm.testing.assert_allclose(tvm_res[1].numpy()[:num_unique], np_res[1], rtol=1e-5)
-            # inverse_indices
-            tvm.testing.assert_allclose(tvm_res[2].numpy(), np_res[2], rtol=1e-5)
-            # counts
-            if return_counts:
-                tvm.testing.assert_allclose(tvm_res[4].numpy()[:num_unique], np_res[4], rtol=1e-5)
+            backend = "graph"
+
+        mod = tvm.ir.IRModule.from_expr(func)
+        tvm_res = relay.create_executor(backend, mod=mod, device=dev, target=target).evaluate()(
+            x_data
+        )  # unique, indices, inverse_indices, num_unique, (counts)
+        np_res = calc_numpy_unique(
+            x_data, is_sorted
+        )  # unique, indices, inverse_indices, num_unique, counts
+        num_unique = np_res[3][0]
+
+        # num_unique
+        assert num_unique == tvm_res[3].numpy()[0]
+        # unique
+        tvm.testing.assert_allclose(tvm_res[0].numpy()[:num_unique], np_res[0], rtol=1e-5)
+        # indices
+        tvm.testing.assert_allclose(tvm_res[1].numpy()[:num_unique], np_res[1], rtol=1e-5)
+        # inverse_indices
+        tvm.testing.assert_allclose(tvm_res[2].numpy(), np_res[2], rtol=1e-5)
+        # counts
+        if return_counts:
+            tvm.testing.assert_allclose(tvm_res[4].numpy()[:num_unique], np_res[4], rtol=1e-5)
 
     for dtype in ["int32", "int64"]:
         for i in range(8):
diff --git a/tests/python/relay/test_op_level4.py b/tests/python/relay/test_op_level4.py
index b9bbef9515..e46832d570 100644
--- a/tests/python/relay/test_op_level4.py
+++ b/tests/python/relay/test_op_level4.py
@@ -26,7 +26,7 @@ from tvm import relay, te
 from tvm.relay import transform
 from tvm.relay.testing import run_infer_type
 
-executor_kind = tvm.testing.parameter("graph", "debug")
+executor_kind = tvm.testing.parameter("graph", "vm")
 
 
 @tvm.testing.uses_gpu
@@ -153,14 +153,13 @@ def test_binary_int_broadcast_2():
 
 
 @tvm.testing.uses_gpu
-def test_where():
+def test_where(executor_kind):
     def run(func, inputs, ref_res):
         for target, dev in tvm.testing.enabled_targets():
-            for kind in ["graph", "debug"]:
-                op_res = relay.create_executor(kind, device=dev, target=target).evaluate(func)(
-                    *inputs
-                )
-                tvm.testing.assert_allclose(op_res.numpy(), ref_res, rtol=1e-5)
+            op_res = relay.create_executor(executor_kind, device=dev, target=target).evaluate(func)(
+                *inputs
+            )
+            tvm.testing.assert_allclose(op_res.numpy(), ref_res, rtol=1e-5)
 
     def verify(x_np, y_np, cond_np):
         ref_res = np.where(cond_np, x_np, y_np)
@@ -398,7 +397,7 @@ def test_argmin_argmax_get_last_elements():
                 assert op_res.numpy().item() == ans
 
 
-def verify_mean_var_std(funcs, shape, axis, keepdims):
+def verify_mean_var_std(executor_kind, funcs, shape, axis, keepdims):
     test_func = funcs[0]
     ref_func = funcs[1]
     dtype = "float32"
@@ -411,27 +410,26 @@ def verify_mean_var_std(funcs, shape, axis, keepdims):
     ref_res = ref_func(x_data, axis=axis, dtype=dtype, keepdims=keepdims)
 
     for target, dev in tvm.testing.enabled_targets():
-        op_res1 = relay.create_executor("graph", device=dev, target=target).evaluate(func)(x_data)
-        tvm.testing.assert_allclose(op_res1[0].numpy(), ref_mean, rtol=1e-5)
-        tvm.testing.assert_allclose(op_res1[1].numpy(), ref_res, rtol=1e-5)
-        op_res2 = relay.create_executor("debug", device=dev, target=target).evaluate(func)(x_data)
-        tvm.testing.assert_allclose(op_res2[0].numpy(), ref_mean, rtol=1e-5)
-        tvm.testing.assert_allclose(op_res2[1].numpy(), ref_res, rtol=1e-5)
+        op_res = relay.create_executor(executor_kind, device=dev, target=target).evaluate(func)(
+            x_data
+        )
+        tvm.testing.assert_allclose(op_res[0].numpy(), ref_mean, rtol=1e-5)
+        tvm.testing.assert_allclose(op_res[1].numpy(), ref_res, rtol=1e-5)
 
 
 @tvm.testing.uses_gpu
-def test_mean_var_std():
+def test_mean_var_std(executor_kind):
     for func in [[relay.mean_variance, np.var], [relay.mean_std, np.std]]:
-        verify_mean_var_std(func, (2, 3, 4), 1, True)
-        verify_mean_var_std(func, (2, 3, 4), (1,), True)
-        verify_mean_var_std(func, (2, 3, 4), -1, True)
-        verify_mean_var_std(func, (2, 3, 4), (0, 1, 2), False)
-        verify_mean_var_std(func, (4, 4, 3), None, False)
-        verify_mean_var_std(func, (4, 4, 3), (0, 2), False)
-        verify_mean_var_std(func, (128, 24, 128), (0, 1), False)
-        verify_mean_var_std(func, (128, 24, 128), (0, 2), False)
-        verify_mean_var_std(func, (128, 24, 128), (0, 1), True)
-        verify_mean_var_std(func, (128, 24, 128), (0, 2), True)
+        verify_mean_var_std(executor_kind, func, (2, 3, 4), 1, True)
+        verify_mean_var_std(executor_kind, func, (2, 3, 4), (1,), True)
+        verify_mean_var_std(executor_kind, func, (2, 3, 4), -1, True)
+        verify_mean_var_std(executor_kind, func, (2, 3, 4), (0, 1, 2), False)
+        verify_mean_var_std(executor_kind, func, (4, 4, 3), None, False)
+        verify_mean_var_std(executor_kind, func, (4, 4, 3), (0, 2), False)
+        verify_mean_var_std(executor_kind, func, (128, 24, 128), (0, 1), False)
+        verify_mean_var_std(executor_kind, func, (128, 24, 128), (0, 2), False)
+        verify_mean_var_std(executor_kind, func, (128, 24, 128), (0, 1), True)
+        verify_mean_var_std(executor_kind, func, (128, 24, 128), (0, 2), True)
 
 
 @tvm.testing.uses_gpu
diff --git a/tests/python/relay/test_op_level5.py b/tests/python/relay/test_op_level5.py
index 10cd914157..af9c08409c 100644
--- a/tests/python/relay/test_op_level5.py
+++ b/tests/python/relay/test_op_level5.py
@@ -29,7 +29,7 @@ import tvm.topi.testing
 from tvm import relay, te
 from tvm.relay.testing import run_infer_type
 
-executor_kind = tvm.testing.parameter("graph", "debug")
+executor_kind = tvm.testing.parameter("graph", "vm")
 
 
 def test_resize1d_infer_type():
@@ -279,7 +279,7 @@ class TestCropAndResize:
 
 
 @tvm.testing.uses_gpu
-def test_multibox_prior():
+def test_multibox_prior(executor_kind):
     def get_ref_result(
         dshape, sizes=(1.0,), ratios=(1.0,), steps=(-1.0, -1.0), offsets=(0.5, 0.5), clip=True
     ):
@@ -358,10 +358,10 @@ def test_multibox_prior():
         func = relay.Function([x], z)
         func = run_infer_type(func)
         for target, dev in tvm.testing.enabled_targets():
-            op_res1 = relay.create_executor("graph", device=dev, target=target).evaluate(func)(data)
-            tvm.testing.assert_allclose(op_res1.numpy(), ref_res, rtol=1e-5)
-            op_res2 = relay.create_executor("debug", device=dev, target=target).evaluate(func)(data)
-            tvm.testing.assert_allclose(op_res2.numpy(), ref_res, rtol=1e-5)
+            op_res = relay.create_executor(executor_kind, device=dev, target=target).evaluate(func)(
+                data
+            )
+            tvm.testing.assert_allclose(op_res.numpy(), ref_res, rtol=1e-5)
 
     sizes = (0.3, 1.5, 0.7)
     ratios = (1.3, 2.4)
@@ -415,7 +415,7 @@ def test_get_valid_counts():
         func = relay.Function([x], z.astuple())
         func = run_infer_type(func)
         for target, dev in tvm.testing.enabled_targets():
-            out = relay.create_executor("debug", device=dev, target=target).evaluate(func)(np_data)
+            out = relay.create_executor("vm", device=dev, target=target).evaluate(func)(np_data)
 
             tvm.testing.assert_allclose(out[0].numpy(), np_out1, rtol=1e-3, atol=1e-04)
             tvm.testing.assert_allclose(out[1].numpy(), np_out2, rtol=1e-3, atol=1e-04)
@@ -428,7 +428,7 @@ def test_get_valid_counts():
 
 
 @tvm.testing.uses_gpu
-def test_non_max_suppression():
+def test_non_max_suppression(executor_kind):
     def verify_nms(
         x0_data,
         x1_data,
@@ -486,22 +486,14 @@ def test_non_max_suppression():
         func_indices = relay.Function([x0, x1, x2, x3], z_indices)
         func_indices = run_infer_type(func_indices)
         for target, dev in tvm.testing.enabled_targets():
-            op_res1 = relay.create_executor("graph", device=dev, target=target).evaluate(func)(
-                x0_data, x1_data, x2_data, x3_data
-            )
-            tvm.testing.assert_allclose(op_res1.numpy(), ref_res, rtol=1e-5)
-            op_res2 = relay.create_executor("debug", device=dev, target=target).evaluate(func)(
+            op_res = relay.create_executor(executor_kind, device=dev, target=target).evaluate(func)(
                 x0_data, x1_data, x2_data, x3_data
             )
-            tvm.testing.assert_allclose(op_res2.numpy(), ref_res, rtol=1e-5)
-            op_indices_res1 = relay.create_executor("graph", device=dev, target=target).evaluate(
-                func_indices
-            )(x0_data, x1_data, x2_data, x3_data)
-            tvm.testing.assert_allclose(op_indices_res1[0].numpy(), ref_indices_res, rtol=1e-5)
-            op_indices_res2 = relay.create_executor("debug", device=dev, target=target).evaluate(
-                func_indices
-            )(x0_data, x1_data, x2_data, x3_data)
-            tvm.testing.assert_allclose(op_indices_res2[0].numpy(), ref_indices_res, rtol=1e-5)
+            tvm.testing.assert_allclose(op_res.numpy(), ref_res, rtol=1e-5)
+            op_indices_res = relay.create_executor(
+                executor_kind, device=dev, target=target
+            ).evaluate(func_indices)(x0_data, x1_data, x2_data, x3_data)
+            tvm.testing.assert_allclose(op_indices_res[0].numpy(), ref_indices_res, rtol=1e-5)
 
     np_data = np.array(
         [
@@ -633,7 +625,7 @@ def test_non_max_suppression():
 
 
 @tvm.testing.uses_gpu
-def test_multibox_transform_loc():
+def test_multibox_transform_loc(executor_kind):
     def test_default_value():
         num_anchors = 3
         num_classes = 3
@@ -683,14 +675,10 @@ def test_multibox_transform_loc():
         func = relay.Function([cls_prob, loc_pred, anchors], nms)
         func = run_infer_type(func)
         for target, dev in tvm.testing.enabled_targets():
-            op_res1 = relay.create_executor("graph", device=dev, target=target).evaluate(func)(
-                np_cls_prob, np_loc_preds, np_anchors
-            )
-            tvm.testing.assert_allclose(op_res1.numpy(), expected_np_out, rtol=1e-5)
-            op_res2 = relay.create_executor("debug", device=dev, target=target).evaluate(func)(
+            op_res = relay.create_executor(executor_kind, device=dev, target=target).evaluate(func)(
                 np_cls_prob, np_loc_preds, np_anchors
             )
-            tvm.testing.assert_allclose(op_res2.numpy(), expected_np_out, rtol=1e-5)
+            tvm.testing.assert_allclose(op_res.numpy(), expected_np_out, rtol=1e-5)
 
     def test_threshold():
         num_anchors = 5
@@ -727,7 +715,7 @@ def test_multibox_transform_loc():
 
 
 @tvm.testing.uses_gpu
-def test_roi_align():
+def test_roi_align(executor_kind):
     def verify_roi_align(
         data_shape,
         rois_shape,
@@ -778,14 +766,10 @@ def test_roi_align():
             mode=mode,
         )
         for target, dev in tvm.testing.enabled_targets():
-            op_res1 = relay.create_executor("graph", device=dev, target=target).evaluate(func)(
+            op_res = relay.create_executor(executor_kind, device=dev, target=target).evaluate(func)(
                 np_data, np_rois
             )
-            tvm.testing.assert_allclose(op_res1.numpy(), ref_res, atol=1e-6, rtol=1e-3)
-            op_res2 = relay.create_executor("debug", device=dev, target=target).evaluate(func)(
-                np_data, np_rois
-            )
-            tvm.testing.assert_allclose(op_res2.numpy(), ref_res, atol=1e-6, rtol=1e-3)
+            tvm.testing.assert_allclose(op_res.numpy(), ref_res, atol=1e-6, rtol=1e-3)
 
     def verify_roi_align_nchw(
         data_shape, rois_shape, pooled_size, spatial_scale, sample_ratio, mode
@@ -848,7 +832,7 @@ def test_roi_align():
 
 
 @tvm.testing.uses_gpu
-def test_roi_pool():
+def test_roi_pool(executor_kind):
     def verify_roi_pool(data_shape, rois_shape, pooled_size, spatial_scale):
         data = relay.var("data", relay.ty.TensorType(data_shape, "float32"))
         rois = relay.var("rois", relay.ty.TensorType(rois_shape, "float32"))
@@ -875,21 +859,17 @@ def test_roi_pool():
             np_data, np_rois, pooled_size=pooled_size, spatial_scale=spatial_scale
         )
         for target, dev in tvm.testing.enabled_targets():
-            op_res1 = relay.create_executor("graph", device=dev, target=target).evaluate(func)(
-                np_data, np_rois
-            )
-            tvm.testing.assert_allclose(op_res1.numpy(), ref_res, rtol=1e-4)
-            op_res2 = relay.create_executor("debug", device=dev, target=target).evaluate(func)(
+            op_res = relay.create_executor(executor_kind, device=dev, target=target).evaluate(func)(
                 np_data, np_rois
             )
-            tvm.testing.assert_allclose(op_res2.numpy(), ref_res, rtol=1e-4)
+            tvm.testing.assert_allclose(op_res.numpy(), ref_res, rtol=1e-4)
 
     verify_roi_pool((1, 4, 16, 16), (32, 5), pooled_size=7, spatial_scale=1.0)
     verify_roi_pool((4, 4, 16, 16), (32, 5), pooled_size=7, spatial_scale=0.5)
 
 
 @tvm.testing.uses_gpu
-def test_proposal():
+def test_proposal(executor_kind):
     def verify_proposal(np_cls_prob, np_bbox_pred, np_im_info, np_out, attrs):
         cls_prob = relay.var("cls_prob", relay.ty.TensorType(np_cls_prob.shape, "float32"))
         bbox_pred = relay.var("bbox_pred", relay.ty.TensorType(np_bbox_pred.shape, "float32"))
@@ -905,14 +885,10 @@ def test_proposal():
                 print("Skip test because %s is not enabled." % target)
                 continue
             dev = tvm.device(target, 0)
-            op_res1 = relay.create_executor("graph", device=dev, target=target).evaluate(func)(
+            op_res = relay.create_executor(executor_kind, device=dev, target=target).evaluate(func)(
                 np_cls_prob, np_bbox_pred, np_im_info
             )
-            tvm.testing.assert_allclose(op_res1.numpy(), np_out, rtol=1e-4)
-            op_res2 = relay.create_executor("debug", device=dev, target=target).evaluate(func)(
-                np_cls_prob, np_bbox_pred, np_im_info
-            )
-            tvm.testing.assert_allclose(op_res2.numpy(), np_out, rtol=1e-4)
+            tvm.testing.assert_allclose(op_res.numpy(), np_out, rtol=1e-4)
 
     attrs = {
         "scales": (0.5,),
@@ -986,7 +962,7 @@ def test_yolo_reorg_infer_shape():
 
 
 @tvm.testing.uses_gpu
-def test_yolo_reorg():
+def test_yolo_reorg(executor_kind):
     def verify_yolo_reorg(shape, stride):
         x_data = np.random.uniform(low=-1, high=1, size=shape).astype("float32")
         ref_res = tvm.topi.testing.reorg_python(x_data, stride)
@@ -1000,11 +976,10 @@ def test_yolo_reorg():
         func = relay.Function([x], z)
 
         for target, dev in tvm.testing.enabled_targets():
-            for kind in ["graph", "debug"]:
-                op_res = relay.create_executor(kind, device=dev, target=target).evaluate(func)(
-                    x_data
-                )
-                tvm.testing.assert_allclose(op_res.numpy(), ref_res, rtol=1e-5)
+            op_res = relay.create_executor(executor_kind, device=dev, target=target).evaluate(func)(
+                x_data
+            )
+            tvm.testing.assert_allclose(op_res.numpy(), ref_res, rtol=1e-5)
 
     verify_yolo_reorg((1, 100, 20, 20), 10)
     verify_yolo_reorg((1, 4, 6, 6), 2)
@@ -1155,7 +1130,7 @@ class TestDeformableConv2D:
 
 
 @tvm.testing.uses_gpu
-def test_depth_to_space():
+def test_depth_to_space(executor_kind):
     def verify_depth_to_space(dshape, block_size, layout, mode):
         if layout == "NHWC":
             out_shape = [
@@ -1188,11 +1163,10 @@ def test_depth_to_space():
         func = relay.Function([x], z)
 
         for target, dev in tvm.testing.enabled_targets():
-            for kind in ["graph", "debug"]:
-                op_res = relay.create_executor(kind, device=dev, target=target).evaluate(func)(
-                    x_data
-                )
-                tvm.testing.assert_allclose(op_res.numpy(), ref_res, rtol=1e-4)
+            op_res = relay.create_executor(executor_kind, device=dev, target=target).evaluate(func)(
+                x_data
+            )
+            tvm.testing.assert_allclose(op_res.numpy(), ref_res, rtol=1e-4)
 
     for layout in ["NHWC", "NCHW"]:
         for mode in ["DCR", "CDR"]:
@@ -1200,7 +1174,7 @@ def test_depth_to_space():
 
 
 @tvm.testing.uses_gpu
-def test_space_to_depth():
+def test_space_to_depth(executor_kind):
     def verify_space_to_depth(dshape, block_size, layout):
         if layout == "NHWC":
             out_shape = [
@@ -1233,11 +1207,10 @@ def test_space_to_depth():
         func = relay.Function([x], z)
 
         for target, dev in tvm.testing.enabled_targets():
-            for kind in ["graph", "debug"]:
-                op_res = relay.create_executor(kind, device=dev, target=target).evaluate(func)(
-                    x_data
-                )
-                tvm.testing.assert_allclose(op_res.numpy(), ref_res, rtol=1e-4)
+            op_res = relay.create_executor(executor_kind, device=dev, target=target).evaluate(func)(
+                x_data
+            )
+            tvm.testing.assert_allclose(op_res.numpy(), ref_res, rtol=1e-4)
 
     for layout in ["NHWC", "NCHW"]:
         verify_space_to_depth((1, 4, 4, 4), 2, layout)
@@ -1369,7 +1342,7 @@ class TestDilation2DRun:
 
 
 @tvm.testing.uses_gpu
-def test_affine_grid():
+def test_affine_grid(executor_kind):
     def verify_affine_grid(num_batch, target_shape):
         dtype = "float32"
         data_shape = (num_batch, 2, 3)
@@ -1385,18 +1358,17 @@ def test_affine_grid():
         ref_res = tvm.topi.testing.affine_grid_python(data_np, target_shape)
 
         for target, dev in tvm.testing.enabled_targets():
-            for kind in ["graph", "debug"]:
-                op_res1 = relay.create_executor(kind, device=dev, target=target).evaluate(func)(
-                    data_np
-                )
-                tvm.testing.assert_allclose(op_res1.numpy(), ref_res, rtol=1e-5, atol=1e-5)
+            op_res = relay.create_executor(executor_kind, device=dev, target=target).evaluate(func)(
+                data_np
+            )
+            tvm.testing.assert_allclose(op_res.numpy(), ref_res, rtol=1e-5, atol=1e-5)
 
     verify_affine_grid(1, (16, 32))
     verify_affine_grid(4, (16, 32))
 
 
 @tvm.testing.uses_gpu
-def test_grid_sample():
+def test_grid_sample(executor_kind):
     def verify_grid_sample(
         data_shape, grid_shape, method="bilinear", padding_mode="zeros", align_corners=True
     ):
@@ -1436,11 +1408,10 @@ def test_grid_sample():
         )
 
         for target, dev in tvm.testing.enabled_targets():
-            for kind in ["graph", "debug"]:
-                op_res1 = relay.create_executor(kind, device=dev, target=target).evaluate(func)(
-                    data_np, grid_np
-                )
-                tvm.testing.assert_allclose(op_res1.numpy(), ref_res, rtol=1e-5, atol=1e-5)
+            op_res = relay.create_executor(executor_kind, device=dev, target=target).evaluate(func)(
+                data_np, grid_np
+            )
+            tvm.testing.assert_allclose(op_res.numpy(), ref_res, rtol=1e-5, atol=1e-5)
 
     methods = ["nearest", "bilinear", "bicubic"]
     padding_modes = ["zeros", "border", "reflection"]
@@ -1462,7 +1433,7 @@ def test_grid_sample():
 
 
 @tvm.testing.uses_gpu
-def test_space_to_batch_nd():
+def test_space_to_batch_nd(executor_kind):
     def verify_space_to_batch_nd(dshape, block_shape, paddings):
         x_data = np.random.uniform(size=dshape).astype("float32")
         pad_before, pad_after = map(list, zip(*paddings))
@@ -1479,18 +1450,17 @@ def test_space_to_batch_nd():
         func = relay.Function([x], z)
 
         for target, dev in tvm.testing.enabled_targets():
-            for kind in ["graph", "debug"]:
-                op_res = relay.create_executor(kind, device=dev, target=target).evaluate(func)(
-                    x_data
-                )
-                tvm.testing.assert_allclose(op_res.numpy(), ref_res, rtol=1e-4)
+            op_res = relay.create_executor(executor_kind, device=dev, target=target).evaluate(func)(
+                x_data
+            )
+            tvm.testing.assert_allclose(op_res.numpy(), ref_res, rtol=1e-4)
 
     verify_space_to_batch_nd([3, 3, 2, 1], [3], [[0, 0]])
     verify_space_to_batch_nd([2, 2, 4, 1], [2, 2], [[0, 0], [2, 0]])
 
 
 @tvm.testing.uses_gpu
-def test_batch_to_space_nd():
+def test_batch_to_space_nd(executor_kind):
     def verify_batch_to_space_nd(dshape, block_shape, crops):
         x_data = np.random.uniform(size=dshape).astype("float32")
         crop_begin_list, crop_end_list = map(list, zip(*crops))
@@ -1507,18 +1477,17 @@ def test_batch_to_space_nd():
         func = relay.Function([x], z)
 
         for target, dev in tvm.testing.enabled_targets():
-            for kind in ["graph", "debug"]:
-                op_res = relay.create_executor(kind, device=dev, target=target).evaluate(func)(
-                    x_data
-                )
-                tvm.testing.assert_allclose(op_res.numpy(), ref_res, rtol=1e-4)
+            op_res = relay.create_executor(executor_kind, device=dev, target=target).evaluate(func)(
+                x_data
+            )
+            tvm.testing.assert_allclose(op_res.numpy(), ref_res, rtol=1e-4)
 
     verify_batch_to_space_nd([4, 1, 1, 3], [2, 2], [[0, 0], [0, 0]])
     verify_batch_to_space_nd([8, 1, 3, 1], [2, 2], [[0, 0], [2, 0]])
 
 
 @tvm.testing.uses_gpu
-def test_all_class_non_max_suppression():
+def test_all_class_non_max_suppression(executor_kind):
     def verify_all_class_non_max_suppression(
         boxes_np,
         scores_np,
@@ -1542,12 +1511,11 @@ def test_all_class_non_max_suppression():
         func = run_infer_type(func)
 
         for target, dev in tvm.testing.enabled_targets():
-            for kind in ["graph", "debug"]:
-                selected_indices, num_detections = relay.create_executor(
-                    kind, device=dev, target=target
-                ).evaluate(func)(boxes_np, scores_np)
-                tvm_res = selected_indices.numpy()[: num_detections.numpy()[0]]
-                np.testing.assert_equal(tvm_res, expected_indices)
+            selected_indices, num_detections = relay.create_executor(
+                executor_kind, device=dev, target=target
+            ).evaluate(func)(boxes_np, scores_np)
+            tvm_res = selected_indices.numpy()[: num_detections.numpy()[0]]
+            np.testing.assert_equal(tvm_res, expected_indices)
 
     boxes = np.array(
         [
diff --git a/tests/python/relay/test_op_level6.py b/tests/python/relay/test_op_level6.py
index 48c58dc2dc..78db5b8738 100644
--- a/tests/python/relay/test_op_level6.py
+++ b/tests/python/relay/test_op_level6.py
@@ -23,6 +23,8 @@ from tvm import relay
 from tvm.topi.testing import searchsorted_ref
 import tvm.testing
 
+executor_kind = tvm.testing.parameter("graph", "vm")
+
 
 @tvm.testing.uses_gpu
 def test_sort():
@@ -40,16 +42,15 @@ def test_sort():
             ref_res = -np.sort(-x_data, axis=axis)
 
         if is_dyn:
-            backends = ["vm", "debug"]
+            backend = "vm"
         else:
-            backends = ["graph", "debug"]
+            backend = "graph"
         for target, dev in tvm.testing.enabled_targets():
-            for kind in backends:
-                mod = tvm.ir.IRModule.from_expr(func)
-                op_res = relay.create_executor(kind, mod=mod, device=dev, target=target).evaluate()(
-                    x_data
-                )
-                tvm.testing.assert_allclose(op_res.numpy(), ref_res, rtol=1e-5)
+            mod = tvm.ir.IRModule.from_expr(func)
+            op_res = relay.create_executor(backend, mod=mod, device=dev, target=target).evaluate()(
+                x_data
+            )
+            tvm.testing.assert_allclose(op_res.numpy(), ref_res, rtol=1e-5)
 
     for is_dyn in [False, True]:
         verify_sort((2, 3, 4), axis=0, is_ascend=False, is_dyn=is_dyn)
@@ -76,16 +77,15 @@ def test_argsort():
             ref_res = np.argsort(-x_data, axis=axis, kind="stable")
 
         if is_dyn:
-            backends = ["vm", "debug"]
+            backend = "vm"
         else:
-            backends = ["graph", "debug"]
+            backend = "graph"
         for target, dev in tvm.testing.enabled_targets():
-            for kind in backends:
-                mod = tvm.ir.IRModule.from_expr(func)
-                op_res = relay.create_executor(kind, mod=mod, device=dev, target=target).evaluate()(
-                    x_data
-                )
-                tvm.testing.assert_allclose(op_res.numpy(), ref_res.astype(dtype), rtol=1e-5)
+            mod = tvm.ir.IRModule.from_expr(func)
+            op_res = relay.create_executor(backend, mod=mod, device=dev, target=target).evaluate()(
+                x_data
+            )
+            tvm.testing.assert_allclose(op_res.numpy(), ref_res.astype(dtype), rtol=1e-5)
 
     for is_dyn in [False, True]:
         for dtype in ["int32", "int64", "float32", "float64"]:
@@ -102,7 +102,7 @@ def test_argsort():
 
 
 @tvm.testing.uses_gpu
-def test_topk():
+def test_topk(executor_kind):
     def verify_topk(k, axis, ret_type, is_ascend, dtype, in_dtype="float32"):
         shape = (20, 100)
         x = relay.var("x", relay.TensorType(shape, in_dtype))
@@ -129,17 +129,16 @@ def test_topk():
         np_indices = np_indices.astype(dtype)
 
         for target, dev in tvm.testing.enabled_targets():
-            for kind in ["graph", "debug"]:
-                op_res = relay.create_executor(kind, device=dev, target=target).evaluate(func)(
-                    np_data
-                )
-                if ret_type == "both":
-                    tvm.testing.assert_allclose(op_res[0].numpy(), np_values)
-                    tvm.testing.assert_allclose(op_res[1].numpy(), np_indices)
-                elif ret_type == "values":
-                    tvm.testing.assert_allclose(op_res.numpy(), np_values)
-                else:
-                    tvm.testing.assert_allclose(op_res.numpy(), np_indices)
+            op_res = relay.create_executor(executor_kind, device=dev, target=target).evaluate(func)(
+                np_data
+            )
+            if ret_type == "both":
+                tvm.testing.assert_allclose(op_res[0].numpy(), np_values)
+                tvm.testing.assert_allclose(op_res[1].numpy(), np_indices)
+            elif ret_type == "values":
+                tvm.testing.assert_allclose(op_res.numpy(), np_values)
+            else:
+                tvm.testing.assert_allclose(op_res.numpy(), np_indices)
 
     np.random.seed(0)
     for k in [0, 1, 5]: