You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tvm.apache.org by jc...@apache.org on 2021/03/30 09:54:16 UTC
[tvm] branch main updated: [Autoscheduler][Sparse] Add sparse dense
end to end model tuning support for x86/arm cpu & Some bug fix (#7635)
This is an automated email from the ASF dual-hosted git repository.
jcf94 pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tvm.git
The following commit(s) were added to refs/heads/main by this push:
new 612f6ce [Autoscheduler][Sparse] Add sparse dense end to end model tuning support for x86/arm cpu & Some bug fix (#7635)
612f6ce is described below
commit 612f6cefcbaedb81789f4de85f9bb1180fc4924e
Author: Chenfan <jc...@outlook.com>
AuthorDate: Tue Mar 30 17:53:57 2021 +0800
[Autoscheduler][Sparse] Add sparse dense end to end model tuning support for x86/arm cpu & Some bug fix (#7635)
* Add sparse dense end to end model tuning support
* Add sparse tuning for arm network
* Bug fix for tflite frontend dense with layout rewrite
* Move the random_bsr_matrix to sparse.utils
---
python/tvm/auto_scheduler/measure.py | 6 +-
python/tvm/auto_scheduler/relay_integration.py | 30 +++++-
python/tvm/auto_scheduler/search_task.py | 9 +-
python/tvm/relay/analysis/sparse_dense.py | 23 +++++
python/tvm/relay/frontend/tflite.py | 2 +-
python/tvm/topi/nn/sparse.py | 2 +-
python/tvm/topi/sparse/utils.py | 126 +++++++++++++++++++++++++
tutorials/auto_scheduler/tune_network_arm.py | 36 +++++--
tutorials/auto_scheduler/tune_network_x86.py | 23 ++++-
tutorials/auto_scheduler/tune_sparse_x86.py | 33 +------
10 files changed, 243 insertions(+), 47 deletions(-)
diff --git a/python/tvm/auto_scheduler/measure.py b/python/tvm/auto_scheduler/measure.py
index 039914b..95d3942 100644
--- a/python/tvm/auto_scheduler/measure.py
+++ b/python/tvm/auto_scheduler/measure.py
@@ -775,7 +775,7 @@ def register_task_input_check_func(func_name, f=None, override=False):
return register
-def _prepare_input_map(args):
+def prepare_input_map(args):
"""This function deals with special task inputs. Map the input Tensor of a TVM subgraph
to a specific buffer name in the global buffer map.
@@ -861,7 +861,7 @@ def _timed_eval_func(
random_fill = tvm.get_global_func("tvm.contrib.random.random_fill", True)
assert random_fill, "Please make sure USE_RANDOM is ON in the config.cmake"
- tensor_input_map = _prepare_input_map(build_res.args) if task_input_names else {}
+ tensor_input_map = prepare_input_map(build_res.args) if task_input_names else {}
args = []
task_inputs_count = 0
for arg in build_res.args:
@@ -1076,7 +1076,7 @@ def _timed_rpc_run(
random_fill
), "Please make sure USE_RANDOM is ON in the config.cmake on the remote devices"
- tensor_input_map = _prepare_input_map(build_res.args) if task_input_names else {}
+ tensor_input_map = prepare_input_map(build_res.args) if task_input_names else {}
args = []
task_inputs_count = 0
for arg in build_res.args:
diff --git a/python/tvm/auto_scheduler/relay_integration.py b/python/tvm/auto_scheduler/relay_integration.py
index 72e3e06..d10f0fb 100644
--- a/python/tvm/auto_scheduler/relay_integration.py
+++ b/python/tvm/auto_scheduler/relay_integration.py
@@ -142,6 +142,12 @@ def extract_tasks(
# When auto scheduler is used in end to end network, try to apply layout rewrite
# to improve the overall performance
layout_rewrite_option=LayoutRewriteOption.get_target_default(target, True),
+ task_inputs=(
+ env.wkl_key_to_input_names[wkl_key]
+ if wkl_key in env.wkl_key_to_input_names
+ else None
+ ),
+ task_inputs_save_to_file=True,
)
)
weights.append(weight)
@@ -166,6 +172,7 @@ class TracingEnvironment:
self.tracing_mode = tracing_mode
self.relay_disable_build_cache = "false"
self.wkl_key_to_weight = {}
+ self.wkl_key_to_input_names = {}
def __enter__(self):
TracingEnvironment.current = self
@@ -175,17 +182,30 @@ class TracingEnvironment:
TracingEnvironment.current = None
def add_workload_key(self, workload_key):
- """Add the workload key of a search task
+ """Add the workload key of a search task.
Parameters
----------
workload_key: str
- The workload key of a task
+ The workload key of a task.
"""
if workload_key not in self.wkl_key_to_weight:
self.wkl_key_to_weight[workload_key] = 0
self.wkl_key_to_weight[workload_key] += 1
+ def add_workload_input_names(self, workload_key, input_names):
+ """Add special task inputs to this workload.
+
+ Parameters
+ ----------
+ workload_key : str
+ The workload key of a task.
+
+ input_names : List[str]
+ A list of input names.
+ """
+ self.wkl_key_to_input_names[workload_key] = input_names
+
@tvm._ffi.register_func("auto_scheduler.enter_layout_rewrite")
def enter_layout_rewrite():
@@ -277,6 +297,9 @@ def auto_schedule_topi(func_name, outs):
None in the tracing mode so that the fallback topi schedule will be used.
"""
# pylint: disable=import-outside-toplevel
+ from tvm.auto_scheduler.measure import (
+ prepare_input_map,
+ ) # lazily import to avoid recursive dependency
io_tensors, has_layout_free, has_complex_op = traverse_to_get_io_tensors(outs)
if not io_tensors: # The compute includes dynamic shapes which are not supported yet.
@@ -308,6 +331,9 @@ def auto_schedule_topi(func_name, outs):
# in the task extraction mode
if has_complex_op or env.tracing_mode == TracingMode.EXTRACT_TASK:
env.add_workload_key(key)
+ input_map = prepare_input_map(io_tensors)
+ if input_map:
+ env.add_workload_input_names(key, list(input_map.values()))
elif env.tracing_mode == TracingMode.PREPARE_LAYOUT_REWRITE:
# in prepare_layout_rewrite mode
if (
diff --git a/python/tvm/auto_scheduler/search_task.py b/python/tvm/auto_scheduler/search_task.py
index 57e239c..c5c2b5b 100644
--- a/python/tvm/auto_scheduler/search_task.py
+++ b/python/tvm/auto_scheduler/search_task.py
@@ -299,13 +299,18 @@ def get_task_input_buffer(workload_key, input_name):
TASK_INPUT_BUFFER_TABLE[workload_key] = {}
input_table = TASK_INPUT_BUFFER_TABLE[workload_key]
- if input_name not in input_table.keys():
+ if input_name not in input_table:
# Try to load buffer data from local file
tensor_from_file = _try_load_buffer_from_file(input_name)
if tensor_from_file:
input_table[input_name] = tensor_from_file
- if input_name in input_table.keys():
+ # Then check for the default table, the input names extracted from a relay model will be
+ # stored here for we're not able to get the workload_key at that time
+ if input_name not in input_table:
+ input_table = TASK_INPUT_BUFFER_TABLE["default"]
+
+ if input_name in input_table:
return input_table[input_name]
raise ValueError(
diff --git a/python/tvm/relay/analysis/sparse_dense.py b/python/tvm/relay/analysis/sparse_dense.py
index d521748..23929f4 100644
--- a/python/tvm/relay/analysis/sparse_dense.py
+++ b/python/tvm/relay/analysis/sparse_dense.py
@@ -73,6 +73,12 @@ def process_params(expr, params, block_size, sparsity_threshold):
ret : Namedtuple[weight_name: Array[String], weight_shape: Array[Array[IntImm]]]
return names of qualified dense weight and the shape in BSR format
"""
+
+ # pylint: disable=import-outside-toplevel
+ from tvm.auto_scheduler.search_task import (
+ register_task_input_buffer,
+ ) # lazily import to avoid recursive dependency
+
memo = SparseAnalysisResult(weight_name=[], weight_shape=[])
weight_names = _search_dense_op_weight(expr)
for name in weight_names:
@@ -92,6 +98,23 @@ def process_params(expr, params, block_size, sparsity_threshold):
params[name + ".data"] = tvm.nd.array(sparse_weight.data)
params[name + ".indices"] = tvm.nd.array(sparse_weight.indices)
params[name + ".indptr"] = tvm.nd.array(sparse_weight.indptr)
+
+ prefix = "sparse_dense_bsr_%d_%d_%d_%d_%.2f_" % (
+ w_np.shape[0],
+ w_np.shape[1],
+ block_size[0],
+ block_size[1],
+ 1 - sparsity,
+ )
+ register_task_input_buffer(
+ "default", prefix + "W_data", tvm.runtime.ndarray.array(sparse_weight.data)
+ )
+ register_task_input_buffer(
+ "default", prefix + "W_indices", tvm.runtime.ndarray.array(sparse_weight.indices)
+ )
+ register_task_input_buffer(
+ "default", prefix + "W_indptr", tvm.runtime.ndarray.array(sparse_weight.indptr)
+ )
ret = SparseAnalysisResult(
weight_name=tvm.runtime.convert(memo.weight_name),
weight_shape=tvm.runtime.convert(memo.weight_shape),
diff --git a/python/tvm/relay/frontend/tflite.py b/python/tvm/relay/frontend/tflite.py
index a5c9a58..026583d 100644
--- a/python/tvm/relay/frontend/tflite.py
+++ b/python/tvm/relay/frontend/tflite.py
@@ -1872,7 +1872,7 @@ class OperatorConverter(object):
out_dtype="int32",
)
else:
- out = _op.nn.dense(in_expr, weight_expr)
+ out = _op.nn.dense(in_expr, weight_expr, units=weight_shape[0])
# if we have bias
if len(input_tensors) == 3:
diff --git a/python/tvm/topi/nn/sparse.py b/python/tvm/topi/nn/sparse.py
index 7561106..f5737d0 100644
--- a/python/tvm/topi/nn/sparse.py
+++ b/python/tvm/topi/nn/sparse.py
@@ -426,7 +426,7 @@ def try_get_sparse_input(args):
density *= i
density /= k * n
density = density.value
- sparse_prefix = "%s_%d_%d_%d_%d_%d_%.2f_" % (prefix_init, m, n, k, bs_r, bs_c, density)
+ sparse_prefix = "%s_%d_%d_%d_%d_%.2f_" % (prefix_init, n, k, bs_r, bs_c, density)
visited = set()
diff --git a/python/tvm/topi/sparse/utils.py b/python/tvm/topi/sparse/utils.py
new file mode 100644
index 0000000..43bc6e0
--- /dev/null
+++ b/python/tvm/topi/sparse/utils.py
@@ -0,0 +1,126 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""Some utils for Sparse operation."""
+import tvm
+from tvm import relay
+
+
+def random_bsr_matrix(m, n, bs_r, bs_c, density, dtype):
+ """Generate a random sparse matrix in bsr format.
+
+ Returns
+ -------
+ scipy.sparse.bsr_matrix
+ """
+ # pylint: disable=import-outside-toplevel
+ import numpy as np
+ import itertools
+ import scipy.sparse as sp
+
+ y = np.zeros((m, n), dtype=dtype)
+ assert m % bs_r == 0
+ assert n % bs_c == 0
+ nnz = int(density * m * n)
+ num_blocks = int(nnz / (bs_r * bs_c)) + 1
+ candidate_blocks = np.asarray(list(itertools.product(range(0, m, bs_r), range(0, n, bs_c))))
+ assert candidate_blocks.shape[0] == m // bs_r * n // bs_c
+ chosen_blocks = candidate_blocks[
+ np.random.choice(candidate_blocks.shape[0], size=num_blocks, replace=False)
+ ]
+ # pylint: disable=invalid-name
+ for (r, c) in chosen_blocks:
+ y[r : r + bs_r, c : c + bs_c] = np.random.randn(bs_r, bs_c)
+ s = sp.bsr_matrix(y, blocksize=(bs_r, bs_c))
+ assert s.data.shape == (num_blocks, bs_r, bs_c)
+ assert s.indices.shape == (num_blocks,)
+ assert s.indptr.shape == (m // bs_r + 1,)
+ return s
+
+
+def random_sparse_dense_params(func, params, bs_r, bs_c, density):
+ """Replace the dense parameters with random sparse parameters. Mainly used for testing.
+
+ Parameters
+ ----------
+ func : tvm.relay.Expr
+ Expr will be optimized to sparse operation.
+ params : Dict[Srting, tvm.nd.array]
+ Parameters of the Expr.
+ bs_r : int
+ The row of BSR matrix block.
+ bs_c : int
+ The column of BSR matrix block.
+ density : float
+ The density of the random sparse parameters.
+
+ Returns
+ -------
+ Dict[Srting, tvm.nd.array]
+ The generated random parameters.
+ """
+
+ def deepcopy(param_dic):
+ ret = {}
+ for k, v in param_dic.items():
+ ret[k] = tvm.nd.array(v.asnumpy())
+ return ret
+
+ new_params = deepcopy(params)
+ dense_weight_names = relay.analysis.sparse_dense._search_dense_op_weight(func)
+ for item in dense_weight_names:
+ name = str(item)
+ shape = new_params[name].shape
+ if shape[0] % bs_r == 0 and shape[1] % bs_c == 0:
+ new_w = random_bsr_matrix(shape[0], shape[1], bs_r, bs_c, density, "float32").todense()
+ new_params[name] = tvm.nd.array(new_w)
+ return new_params
+
+
+def convert_model_dense_to_sparse(mod, params, random_params=False, bs_r=1, bs_c=1, sparsity=0.85):
+ """Convert a dense model to sparse model.
+
+ Parameters
+ ----------
+ mod : tvm.Module
+ The dense model.
+ params : Dict[Srting, tvm.nd.array]
+ Parameters of the dense model.
+ random_params : Bool = False
+ True to replace the parameters of the dense model with some random sparse tensors.
+ This is mainly used for testing.
+ bs_r : int
+ The row of BSR matrix block.
+ bs_c : int
+ The column of BSR matrix block.
+ sparsity : float
+ The sparsity of the random sparse parameters.
+
+ Returns
+ -------
+ tvm.Module
+ The updated sparse model.
+ Dict[Srting, tvm.nd.array]
+ The updated parameters.
+ """
+ mod, params = ddo.simplify_fc_transpose.convert(mod["main"], params)
+ if random_params:
+ # Manually replace the parameters of dense model to sparse tensors
+ params = random_sparse_dense_params(mod, params, bs_r=bs_r, bs_c=bs_c, density=1 - sparsity)
+ # Currently we only support to conver dense matmul to sparse dense matmul
+ mod, params = ddo.bsr_dense.convert(mod, params, (bs_r, bs_c), sparsity_threshold=0.8)
+
+ return tvm.IRModule.from_expr(mod), params
diff --git a/tutorials/auto_scheduler/tune_network_arm.py b/tutorials/auto_scheduler/tune_network_arm.py
index 153143d..46d95c3 100644
--- a/tutorials/auto_scheduler/tune_network_arm.py
+++ b/tutorials/auto_scheduler/tune_network_arm.py
@@ -17,7 +17,9 @@
"""
Auto-scheduling a Neural Network for ARM CPU
=============================================
-**Author**: `Thierry Moreau <https://github.com/tmoreau89, Lianmin Zheng <https://github.com/merrymercy>>`_
+**Author**: `Thierry Moreau <https://github.com/tmoreau89>_`, \
+ `Lianmin Zheng <https://github.com/merrymercy>_`, \
+ `Chengfan Jia <https://github.com/jcf94/>`_
Auto-tuning for specific devices and workloads is critical for getting the
best performance. This is a tutorial on how to tune a whole neural
@@ -45,9 +47,11 @@ __name__ == "__main__":` block.
"""
import numpy as np
+import os
import tvm
from tvm import relay, auto_scheduler
+from tvm.relay import data_dep_optimization as ddo
import tvm.relay.testing
from tvm.contrib import graph_executor
from tvm.contrib.utils import tempdir
@@ -67,7 +71,7 @@ from tvm.contrib.utils import tempdir
# You can use :ref:`ConvertLayout <convert-layout-usage>` pass to do the layout conversion in TVM.
-def get_network(name, batch_size, layout="NHWC", dtype="float32"):
+def get_network(name, batch_size, layout="NHWC", dtype="float32", use_sparse=False):
"""Get the symbol definition and random weight of a network"""
# auto-scheduler prefers NHWC layout
@@ -127,6 +131,17 @@ def get_network(name, batch_size, layout="NHWC", dtype="float32"):
net.params, relay.nn.softmax(net.body), None, net.type_params, net.attrs
)
mod = tvm.IRModule.from_expr(net)
+ elif name == "mlp":
+ mod, params = relay.testing.mlp.get_workload(
+ batch_size=batch_size, dtype=dtype, image_shape=image_shape, num_classes=1000
+ )
+ else:
+ raise ValueError("Network not found.")
+
+ if use_sparse:
+ from tvm.topi.sparse.utils import convert_model_dense_to_sparse
+
+ mod, params = convert_model_dense_to_sparse(mod, params, random_params=True)
return mod, params, input_shape, output_shape
@@ -217,8 +232,10 @@ def get_network(name, batch_size, layout="NHWC", dtype="float32"):
# because we're sharing x86 op strategy.
target = tvm.target.Target("llvm -mtriple=aarch64-linux-gnu -mattr=+neon")
-# Also replace this with the device key in your tracker
+# Also replace this with the device key, rpc host and rpc port in your tracker
device_key = "rasp4b-64"
+rpc_host = "0.0.0.0"
+rpc_port = 9191
# Set this to True if you use ndk tools for cross compiling
# And also set the environment variable below to point to the cross compiler
@@ -227,6 +244,7 @@ use_ndk = False
#### TUNING OPTION ####
network = "mobilenet"
+use_sparse = False
batch_size = 1
layout = "NHWC"
dtype = "float32"
@@ -244,8 +262,11 @@ log_file = "%s-%s-B%d-%s.json" % (network, layout, batch_size, target.kind.name)
# The task scheduler will just optimize this objective.
# Extract tasks from the network
+print("Get model...")
+mod, params, input_shape, output_shape = get_network(
+ network, batch_size, layout, dtype=dtype, use_sparse=use_sparse
+)
print("Extract tasks...")
-mod, params, input_shape, output_shape = get_network(network, batch_size, layout, dtype=dtype)
tasks, task_weights = auto_scheduler.extract_tasks(mod["main"], params, target)
for idx, task in enumerate(tasks):
@@ -280,10 +301,11 @@ def tune_and_evaluate():
tuner = auto_scheduler.TaskScheduler(tasks, task_weights)
tune_option = auto_scheduler.TuningOptions(
num_measure_trials=200, # change this to 20000 to achieve the best performance
+ builder=auto_scheduler.LocalBuilder(build_func="ndk" if use_ndk else "default"),
runner=auto_scheduler.RPCRunner(
device_key,
- host="0.0.0.0",
- port=9191,
+ host=rpc_host,
+ port=rpc_port,
timeout=30,
repeat=1,
min_repeat_ms=200,
@@ -315,7 +337,7 @@ def tune_and_evaluate():
# Upload module to device
print("Upload...")
- remote = auto_scheduler.utils.request_remote(device_key, "0.0.0.0", 9191, timeout=10000)
+ remote = auto_scheduler.utils.request_remote(device_key, rpc_host, rpc_port, timeout=10000)
remote.upload(tmp.relpath(filename))
rlib = remote.load_module(filename)
diff --git a/tutorials/auto_scheduler/tune_network_x86.py b/tutorials/auto_scheduler/tune_network_x86.py
index 91dc64e..55e9e4e 100644
--- a/tutorials/auto_scheduler/tune_network_x86.py
+++ b/tutorials/auto_scheduler/tune_network_x86.py
@@ -17,7 +17,8 @@
"""
Auto-scheduling a Neural Network for x86 CPU
============================================
-**Author**: `Lianmin Zheng <https://github.com/merrymercy>`_
+**Author**: `Lianmin Zheng <https://github.com/merrymercy>`_, \
+ `Chengfan Jia <https://github.com/jcf94/>`_
Auto-tuning for specific devices and workloads is critical for getting the
best performance. This is a tutorial on how to tune a whole neural
@@ -48,6 +49,7 @@ import numpy as np
import tvm
from tvm import relay, auto_scheduler
+from tvm.relay import data_dep_optimization as ddo
import tvm.relay.testing
from tvm.contrib import graph_executor
@@ -66,7 +68,7 @@ from tvm.contrib import graph_executor
# You can use :ref:`ConvertLayout <convert-layout-usage>` pass to do the layout conversion in TVM.
-def get_network(name, batch_size, layout="NHWC", dtype="float32"):
+def get_network(name, batch_size, layout="NHWC", dtype="float32", use_sparse=False):
"""Get the symbol definition and random weight of a network"""
# auto-scheduler prefers NHWC layout
@@ -126,6 +128,17 @@ def get_network(name, batch_size, layout="NHWC", dtype="float32"):
net.params, relay.nn.softmax(net.body), None, net.type_params, net.attrs
)
mod = tvm.IRModule.from_expr(net)
+ elif name == "mlp":
+ mod, params = relay.testing.mlp.get_workload(
+ batch_size=batch_size, dtype=dtype, image_shape=image_shape, num_classes=1000
+ )
+ else:
+ raise ValueError("Network not found.")
+
+ if use_sparse:
+ from tvm.topi.sparse.utils import convert_model_dense_to_sparse
+
+ mod, params = convert_model_dense_to_sparse(mod, params, random_params=True)
return mod, params, input_shape, output_shape
@@ -134,6 +147,7 @@ def get_network(name, batch_size, layout="NHWC", dtype="float32"):
# If the target machine supports avx512 instructions, replace the
# "llvm -mcpu=core-avx2" with "llvm -mcpu=skylake-avx512"
network = "resnet-50"
+use_sparse = False
batch_size = 1
layout = "NHWC"
target = tvm.target.Target("llvm -mcpu=core-avx2")
@@ -152,8 +166,11 @@ log_file = "%s-%s-B%d-%s.json" % (network, layout, batch_size, target.kind.name)
# The task scheduler will just optimize this objective.
# Extract tasks from the network
+print("Get model...")
+mod, params, input_shape, output_shape = get_network(
+ network, batch_size, layout, dtype=dtype, use_sparse=use_sparse
+)
print("Extract tasks...")
-mod, params, input_shape, output_shape = get_network(network, batch_size, layout, dtype=dtype)
tasks, task_weights = auto_scheduler.extract_tasks(mod["main"], params, target)
for idx, task in enumerate(tasks):
diff --git a/tutorials/auto_scheduler/tune_sparse_x86.py b/tutorials/auto_scheduler/tune_sparse_x86.py
index 3df3720..a635a74 100644
--- a/tutorials/auto_scheduler/tune_sparse_x86.py
+++ b/tutorials/auto_scheduler/tune_sparse_x86.py
@@ -36,15 +36,13 @@ __name__ == "__main__":` block.
"""
import os
-import itertools
import numpy as np
import tvm
from tvm import te, auto_scheduler, runtime, topi
from tvm.auto_scheduler import _ffi_api
from tvm.topi.utils import get_const_tuple
-
-import scipy.sparse as sp
+from tvm.topi.sparse.utils import random_bsr_matrix
######################################################################
# Define the computation
@@ -53,29 +51,6 @@ import scipy.sparse as sp
# The function should return the list of input/output tensors.
# From these tensors, the auto-scheduler can get the whole computational graph.
-# We use this function to generate a random bsr matrix
-def random_bsr_matrix(M, N, BS_R, BS_C, density, dtype):
- import itertools
-
- Y = np.zeros((M, N), dtype=dtype)
- assert M % BS_R == 0
- assert N % BS_C == 0
- nnz = int(density * M * N)
- num_blocks = int(nnz / (BS_R * BS_C)) + 1
- candidate_blocks = np.asarray(list(itertools.product(range(0, M, BS_R), range(0, N, BS_C))))
- assert candidate_blocks.shape[0] == M // BS_R * N // BS_C
- chosen_blocks = candidate_blocks[
- np.random.choice(candidate_blocks.shape[0], size=num_blocks, replace=False)
- ]
- for i in range(len(chosen_blocks)):
- r, c = chosen_blocks[i]
- Y[r : r + BS_R, c : c + BS_C] = np.random.randn(BS_R, BS_C)
- s = sp.bsr_matrix(Y, blocksize=(BS_R, BS_C))
- assert s.data.shape == (num_blocks, BS_R, BS_C)
- assert s.indices.shape == (num_blocks,)
- assert s.indptr.shape == (M // BS_R + 1,)
- return s
-
@auto_scheduler.register_workload
def sparse_dense(M, N, K, w_data_shape, w_indices_shape, w_indptr_shape, dtype):
@@ -104,7 +79,9 @@ def sparse_dense(M, N, K, w_data_shape, w_indices_shape, w_indptr_shape, dtype):
# See the `tvm.auto_scheduler.measure.py` for more details.
# Define the basic shapes of this sparse computation
-M = K = N = 512
+M = 128
+K = 256
+N = 512
BS_R = 16
BS_C = 1
density = 0.6
@@ -131,7 +108,7 @@ Y_np = np.maximum(np.zeros((M, N), dtype="float32"), Y_np) # Relu
target = tvm.target.Target("llvm")
# Register the sparse data to task inputs
-prefix = "sparse_dense_bsr_%d_%d_%d_%d_%d_%.2f_" % (M, N, K, BS_R, BS_C, density)
+prefix = "sparse_dense_bsr_%d_%d_%d_%d_%.2f_" % (N, K, BS_R, BS_C, density)
task = tvm.auto_scheduler.SearchTask(
func=sparse_dense,
args=(M, N, K, W_sp_np.data.shape, W_sp_np.indices.shape, W_sp_np.indptr.shape, "float32"),