You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tvm.apache.org by GitBox <gi...@apache.org> on 2022/07/20 23:34:37 UTC

[GitHub] [tvm] huajsj commented on a diff in pull request #11557: [Runtime][PipelineExecutor] Tutorial of using pipeline executor.

huajsj commented on code in PR #11557:
URL: https://github.com/apache/tvm/pull/11557#discussion_r926132216


##########
gallery/how_to/work_with_relay/using_with_pipeline_executor.py:
##########
@@ -0,0 +1,262 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""
+Using Pipeline Executor in Relay
+=================================
+**Author**: `Hua Jiang <https://https://github.com/huajsj>`_
+
+This is a short tutorial on how to use "Pipeline Executor" with Relay.
+"""
+import tvm
+from tvm import te
+import numpy as np
+from tvm.contrib import graph_executor as runtime
+from tvm.relay.op.contrib.cutlass import partition_for_cutlass
+from tvm import relay
+from tvm.relay import testing
+import tvm.testing
+import time
+from tvm.contrib.cutlass import (
+    has_cutlass,
+    num_cutlass_partitions,
+    finalize_modules,
+    finalize_modules_vm,
+)
+
+img_size = 8
+#######################################################################
+# Create a simple network, this network can be a pre-trained model too.
+# ---------------------------------------------------------------------
+# Let's create a very simple network for demonstration.
+# It consists of convolution, batch normalization, and ReLU activation.
+def get_network():
+    out_channels = 16
+    batch_size = 1
+    data = relay.var("data", relay.TensorType((batch_size, 3, img_size, img_size), "float16"))
+    dense_weight = relay.var(
+        "dweight", relay.TensorType((batch_size, 16 * img_size * img_size), "float16")
+    )
+    weight = relay.var("weight")
+    second_weight = relay.var("second_weight")
+    bn_gamma = relay.var("bn_gamma")
+    bn_beta = relay.var("bn_beta")
+    bn_mmean = relay.var("bn_mean")
+    bn_mvar = relay.var("bn_var")
+    simple_net = relay.nn.conv2d(
+        data=data, weight=weight, kernel_size=(3, 3), channels=out_channels, padding=(1, 1)
+    )
+    simple_net = relay.nn.batch_norm(simple_net, bn_gamma, bn_beta, bn_mmean, bn_mvar)[0]
+    simple_net = relay.nn.relu(simple_net)
+    simple_net = relay.nn.batch_flatten(simple_net)
+    simple_net = relay.nn.dense(simple_net, dense_weight)
+    simple_net = relay.Function(relay.analysis.free_vars(simple_net), simple_net)
+    data_shape = (batch_size, 3, img_size, img_size)
+    net, params = testing.create_workload(simple_net)
+    return net, params, data_shape
+
+
+net, params, data_shape = get_network()
+###########################################
+# Splitting the network into two subgraphs.
+# -----------------------------------------
+# We use an testing linear graph splitting function as a example. User also can create their
+# own splitting function logic.
+import inspect
+import os
+
+test_path = os.path.dirname(inspect.getfile(lambda: None))
+os.sys.path.append(os.path.join(test_path, "../../../tests/python/relay"))
+from test_pipeline_executor import graph_split
+
+###########################################
+# Splitting the network into two subgraphs.
+split_config = [{"op_name": "nn.relu", "op_index": 0}]
+subgraphs = graph_split(net["main"], split_config, params)
+###########################################################
+# The generated subgraphs should look something like below.
+
+"""
+#subgraphs[0])
+
+ def @main(%data: Tensor[(1, 3, img_size, img_size), float16]) {
+  %0 = nn.conv2d(%data, meta[relay.Constant][0] /* ty=Tensor[(16, 3, 3, 3), float16] */, padding=[1, 1, 1, 1], channels=16, kernel_size=[3, 3]) /* ty=Tensor[(1, 16, img_size, img_size), float16] */;
+  %1 = nn.batch_norm(%0, meta[relay.Constant][1] /* ty=Tensor[(16), float16] */, meta[relay.Constant][2] /* ty=Tensor[(16), float16]*/, meta[relay.Constant][3] /* ty=Tensor[(16), float16] */, meta[relay.Constant][4] /* ty=Tensor[(16), float16] */) /* ty=(Tensor[(1,16, img_size, img_size), float16], Tensor[(16), float16], Tensor[(16), float16]) */;
+  %2 = %1.0;
+  nn.relu(%2) /* ty=Tensor[(1, 16, img_size, img_size), float16] */
+ }
+
+peline-tutorial
+
+#subgraphs[1]
+
+ def @main(%data_n_0: Tensor[(1, 16, 8, 8), float16] /* ty=Tensor[(1, 16, 8, 8), float16] */) {
+  %0 = nn.batch_flatten(%data_n_0) /* ty=Tensor[(1, 1024), float16] */;
+  nn.dense(%0, meta[relay.Constant][0] /* ty=Tensor[(1, 1024), float16] */, units=None) /* ty=Tensor[(1, 1), float16] */
+ }
+
+"""
+
+# sphinx_gallery_start_ignore
+from tvm import testing
+
+testing.utils.install_request_hook(depth=3)
+# sphinx_gallery_end_ignore
+
+#########################################
+# Build the subgraph with cutlass target.
+# ---------------------------------------
+
+cutlass = tvm.target.Target(
+    {
+        "kind": "cutlass",
+        "sm": int(tvm.target.Target("cuda").arch.split("_")[1]),
+        "use_3xtf32": True,
+        "split_k_slices": [1],
+        "profile_all_alignments": False,
+        "find_first_valid": True,
+        "use_multiprocessing": True,
+        "use_fast_math": False,
+        "tmp_dir": "./tmp",
+    },
+    host=tvm.target.Target("llvm"),
+)
+
+
+def cutlass_build(mod, target, params=None, target_host=None, mod_name="default"):
+    target = [target, cutlass]
+    lib = relay.build_module.build(
+        mod, target=target, params=params, target_host=target_host, mod_name=mod_name
+    )
+    return lib
+
+
+###########################################################
+# Run the two subgraphs in pipeline with pipeline executor.
+# ---------------------------------------------------------
+# Define a function to do all the codegen and pipeline executor works.
+# To run pipeline executor with dnnl, USE_PIPELINE_EXECUTOR need to get set as ON.
+# and the 'USE_CUTLASS' should set as ON in config.cmake.
+from tvm.contrib import graph_executor, pipeline_executor, pipeline_executor_build
+
+#########################################
+# Create subgraph pipeline configuration.
+# Associate the subgraph module with a target.
+# Using BYOC to set the codegen of the second subgraph module.
+# To use cutlass the 'USE_CUTLASS' should set as ON.
+mod0, mod1 = subgraphs[0], subgraphs[1]
+# Use cutlass as the codegen.
+mod1 = partition_for_cutlass(mod1)
+#################################################
+# Get the pipeline executor configuration object.
+pipe_config = pipeline_executor_build.PipelineConfig()
+###########################################################################
+# Set the compile target of the second subgraph module for example as LLVM.
+pipe_config[mod0].target = "llvm"
+pipe_config[mod0].dev = tvm.cpu(0)
+###############################################################################
+# Set the cpu afinity for control flow, for example using cpu 0 for control flow.
+pipe_config[mod1].cpu_affinity = "0"
+pipe_config[mod1].export_cc = None
+##############################################################
+# Set the compile target of the second subgraph module as LLVM.
+pipe_config[mod1].target = "cuda"
+pipe_config[mod1].dev = tvm.device("cuda", 0)
+pipe_config[mod1].build_func = cutlass_build
+pipe_config[mod1].export_cc = "nvcc"
+#################################################################################
+# Set the cpu afinity for control flow, for example using cpu 1 for control flow.
+pipe_config[mod1].cpu_affinity = "1"
+pipe_config["input"]["data"].connect(pipe_config[mod0]["input"]["data"])
+pipe_config[mod0]["output"][0].connect(pipe_config[mod1]["input"]["data_n_0"])
+pipe_config[mod1]["output"]["0"].connect(pipe_config["output"][0])
+######################################
+# The pipeline configuration as below.
+"""
+print(pipe_config)
+ Inputs
+  |data: mod0:data
+
+ output
+  |output(0) : mod1.output(0)
+
+ connections
+  |mod0.output(0)-> mod1.data_n_0
+"""
+
+# sphinx_gallery_start_ignore
+from tvm import testing
+
+# testing.utils.install_request_hook(depth=3)
+# sphinx_gallery_end_ignore
+##############################
+# Build the pipeline executor.
+# ----------------------------
+with tvm.transform.PassContext(opt_level=3):
+    pipeline_mod_factory = pipeline_executor_build.build(pipe_config)
+###############################################
+# Export the parameter configuration to a file.
+directory_path = tvm.contrib.utils.tempdir().temp_dir
+#############################################
+# If the directory does not exist, create it.
+if not os.path.exists(directory_path):
+    os.makedirs(directory_path)
+config_file_name = pipeline_mod_factory.export_library(directory_path)
+################################################################
+# Use the load function to create and initialize PipelineModule.
+# --------------------------------------------------------------
+pipeline_module = pipeline_executor.PipelineModule.load_library(config_file_name)
+
+############################
+# Run the pipeline executor.
+# --------------------------
+# Allocated a input data.
+data = np.random.uniform(-1, 1, size=data_shape).astype("float16")
+pipeline_module.set_input("data", tvm.nd.array(data))
+##########################################################################
+# Run the two subgraph in pipeline mode and get the output asynchronously.
+pipeline_module.run()
+outputs = []
+while not outputs:

Review Comment:
   the pipeline_module.run() is an asynchronous function call, that means after the call of "pipeline_module.run()" the output may not there yet, hence we need a while loop to check the output.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@tvm.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org