You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tvm.apache.org by an...@apache.org on 2022/09/28 20:58:42 UTC

[tvm] branch v0.10.0 updated: [usmp] Also remap VarNode to USMP-allocated buffer (#12880)

This is an automated email from the ASF dual-hosted git repository.

andrewzhaoluo pushed a commit to branch v0.10.0
in repository https://gitbox.apache.org/repos/asf/tvm.git


The following commit(s) were added to refs/heads/v0.10.0 by this push:
     new 58fc568f54 [usmp] Also remap VarNode to USMP-allocated buffer (#12880)
58fc568f54 is described below

commit 58fc568f5443572e35c56b12a82b60b0f72b33b4
Author: Andrew Reusch <ar...@gmail.com>
AuthorDate: Tue Sep 27 16:11:14 2022 -0700

    [usmp] Also remap VarNode to USMP-allocated buffer (#12880)
    
    Before this patch, ConvertPoolAllocationsToOffsets would generate TIR
    like the following:
    
      let dense_let: Pointer(global int32) = @tir.address_of(global_workspace_37_buffer_var[69952], dtype=handle)
      for (k.outer: int32, 0, 64) {
        @tir.call_extern("gemm_1x1x1_update_UKVNAEBL", ..., dense, ...)
      }
    
      T_multiply[ax1] = @tir.q_multiply_shift(((dense: Buffer(dense_let,
          int32, [10], [], align=32)[ax1], ...)
    
    This caused CodegenSourceBase to later fail with this error:
      "src/target/source/codegen_source_base.cc", line 67
      Check failed: (it != var_idmap_.end()) is false: Find undefined
        Variable dense
    
    After this patch, "dense" in the call_extern is changed to read "dense_let."
---
 src/tir/usmp/analysis/extract_buffer_info.cc       | 20 ++---
 .../convert_pool_allocations_to_offsets.cc         | 10 +++
 ...ransform_convert_pool_allocations_to_offsets.py | 93 ++++++++++++++++++++++
 3 files changed, 114 insertions(+), 9 deletions(-)

diff --git a/src/tir/usmp/analysis/extract_buffer_info.cc b/src/tir/usmp/analysis/extract_buffer_info.cc
index 74d428f6dd..2680589457 100644
--- a/src/tir/usmp/analysis/extract_buffer_info.cc
+++ b/src/tir/usmp/analysis/extract_buffer_info.cc
@@ -429,15 +429,17 @@ void BufferInfoExtractor::VisitExpr_(const VarNode* op) {
 
 Array<Var> static GetMatchedBuffers(const PrimFunc& func) {
   Array<Var> buffer_vars;
-  for (unsigned int i = 0; i < func->params.size() - 1; i++) {
-    Var param = func->params[i];
-    buffer_vars.push_back(func->buffer_map[param]->data);
-  }
-  Var last_param = func->params.back();
-  // Checks whether last var is present in the buffer map
-  // because it could be the resource handle
-  if (func->buffer_map.find(last_param) != func->buffer_map.end()) {
-    buffer_vars.push_back(func->buffer_map[last_param]->data);
+  if (func->params.size() > 0) {
+    for (unsigned int i = 0; i < func->params.size() - 1; i++) {
+      Var param = func->params[i];
+      buffer_vars.push_back(func->buffer_map[param]->data);
+    }
+    Var last_param = func->params.back();
+    // Checks whether last var is present in the buffer map
+    // because it could be the resource handle
+    if (func->buffer_map.find(last_param) != func->buffer_map.end()) {
+      buffer_vars.push_back(func->buffer_map[last_param]->data);
+    }
   }
   return buffer_vars;
 }
diff --git a/src/tir/usmp/transform/convert_pool_allocations_to_offsets.cc b/src/tir/usmp/transform/convert_pool_allocations_to_offsets.cc
index 601e347196..56aba654b5 100644
--- a/src/tir/usmp/transform/convert_pool_allocations_to_offsets.cc
+++ b/src/tir/usmp/transform/convert_pool_allocations_to_offsets.cc
@@ -96,6 +96,7 @@ class PoolAllocationToOffsetConverter : public StmtExprMutator {
  private:
   PrimExpr VisitExpr_(const CallNode* op) override;
   Stmt VisitStmt_(const AllocateNode* op) override;
+  PrimExpr VisitExpr_(const VarNode* op) override;
   PrimExpr VisitExpr_(const BufferLoadNode* op) override;
   Stmt VisitStmt_(const BufferStoreNode* op) override;
 
@@ -395,6 +396,15 @@ PrimExpr PoolAllocationToOffsetConverter::VisitExpr_(const BufferLoadNode* op) {
   return std::move(load);
 }
 
+PrimExpr PoolAllocationToOffsetConverter::VisitExpr_(const VarNode* op) {
+  auto it = allocate_var_to_let_var_.find(GetRef<Var>(op));
+  if (it != allocate_var_to_let_var_.end()) {
+    return (*it).second;
+  }
+
+  return StmtExprMutator::VisitExpr_(op);
+}
+
 Buffer PoolAllocationToOffsetConverter::GetRemappedBuffer(Buffer original) {
   {
     auto it = original_buf_to_let_buf_.find(original);
diff --git a/tests/python/unittest/test_tir_usmp_transform_convert_pool_allocations_to_offsets.py b/tests/python/unittest/test_tir_usmp_transform_convert_pool_allocations_to_offsets.py
index fdda400a77..31cc6e07de 100644
--- a/tests/python/unittest/test_tir_usmp_transform_convert_pool_allocations_to_offsets.py
+++ b/tests/python/unittest/test_tir_usmp_transform_convert_pool_allocations_to_offsets.py
@@ -600,5 +600,98 @@ def test_resnet_subgraph():
         tvm.ir.assert_structural_equal(actual_func, ref_func)
 
 
+@tvm.script.ir_module
+class TensorIntrinStructure:
+    @T.prim_func
+    def tensor_intrin_primfunc() -> None:
+        dense_data = T.allocate([10], "int32", "global")
+        T.evaluate(
+            T.call_extern(
+                "intrin_function",
+                T.tvm_access_ptr(
+                    T.type_annotation(dtype="int32"), dense_data, 0, 1, 2, dtype="handle"
+                ),
+                dtype="int32",
+            )
+        )
+
+        dense = T.buffer_decl([10], "int32", data=dense_data)
+        dense[0] = T.q_multiply_shift(dense[0], 1608879842, 31, -7, dtype="int32")
+
+    @T.prim_func
+    def __tvm_main__(input: T.handle, output: T.handle) -> None:
+        T.evaluate(T.call_extern("tensor_intrin_primfunc", dtype="int32"))
+
+
+@tvm.script.ir_module
+class TensorIntrinStructurePlanned:
+    @T.prim_func
+    def tensor_intrin_primfunc(global_workspace_1_var: T.Ptr[T.uint8]) -> None:
+        global_workspace_1_buffer_var = T.match_buffer(
+            global_workspace_1_var, [40], dtype="uint8", strides=[1], elem_offset=0, align=16
+        )
+        T.preflattened_buffer(
+            global_workspace_1_buffer_var, [40], dtype="uint8", strides=[1], elem_offset=0, align=16
+        )
+        dense_let = T.buffer_decl([10], "int32")
+        with T.let(dense_let.data, T.address_of(global_workspace_1_buffer_var[0], dtype="handle")):
+            T.evaluate(
+                T.call_extern(
+                    "intrin_function",
+                    T.tvm_access_ptr(
+                        T.type_annotation(dtype="int32"), dense_let.data, 0, 1, 2, dtype="handle"
+                    ),
+                    dtype="int32",
+                )
+            )
+            dense_let[0] = T.q_multiply_shift(dense_let[0], 1608879842, 31, -7, dtype="int32")
+
+    @T.prim_func
+    def __tvm_main__(
+        input: T.handle, global_workspace_1_var: T.Ptr[T.uint8], output: T.handle
+    ) -> None:
+        global_workspace_1_buffer_var = T.match_buffer(
+            global_workspace_1_var, [40], dtype="uint8", strides=[1], elem_offset=0, align=16
+        )
+        T.evaluate(
+            T.call_extern(
+                "tensor_intrin_primfunc", global_workspace_1_buffer_var.data, dtype="int32"
+            )
+        )
+
+
+def test_tensor_intrin():
+    target = Target("c")
+    global_workspace_pool = WorkspacePoolInfo(
+        "global_workspace",
+        [target],
+    )
+
+    tir_mod = TensorIntrinStructure
+    tir_mod = _assign_targets_to_primfuncs_irmodule(tir_mod, target)
+    tir_mod = assign_poolinfos_to_allocates_in_irmodule(tir_mod, [global_workspace_pool])
+    main_func = tir_mod["__tvm_main__"]
+    buffer_analysis = tvm.tir.usmp.analysis.extract_buffer_info(main_func, tir_mod)
+    buffer_info_map = buffer_analysis.buffer_info_stmts
+
+    fcreate_array_bi = tvm.get_global_func("tir.usmp.CreateArrayBufferInfo")
+    buffer_info_arr = fcreate_array_bi(buffer_info_map)
+    fusmp_algo_greedy_by_size = tvm.get_global_func("tir.usmp.algo.greedy_by_size")
+    buffer_pool_allocations = fusmp_algo_greedy_by_size(
+        buffer_info_arr, buffer_analysis.memory_pressure
+    )
+    fassign_stmt_pool_allocations = tvm.get_global_func("tir.usmp.AssignStmtPoolAllocations")
+    pool_allocations = fassign_stmt_pool_allocations(buffer_info_map, buffer_pool_allocations)
+    tir_mod_with_offsets = tvm.tir.usmp.transform.convert_pool_allocations_to_offsets(
+        pool_allocations, emit_tvmscript_printable=True
+    )(tir_mod)
+
+    expected = TensorIntrinStructurePlanned
+
+    for gv, ref_func in expected.functions.items():
+        actual_func = tir_mod_with_offsets[gv.name_hint]
+        tvm.ir.assert_structural_equal(actual_func, ref_func)
+
+
 if __name__ == "__main__":
     pytest.main([__file__] + sys.argv[1:])