You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tvm.apache.org by GitBox <gi...@apache.org> on 2021/10/30 02:25:39 UTC

[GitHub] [tvm] vinx13 commented on a change in pull request #9341: [CUDA] Support memory reuse for dynamic shared memory

vinx13 commented on a change in pull request #9341:
URL: https://github.com/apache/tvm/pull/9341#discussion_r739594556



##########
File path: tests/python/unittest/test_tir_transform_merge_dynamic_shared_memory_allocations.py
##########
@@ -82,24 +82,25 @@ def test_matmul_ir(A, B, C):
         # Create a dynamic shared memory for the accumulation.
         # This is for testing merging dynamic shared memory alloctions with different data type.
         # In practice, there is no need to allocate a shared memory for C.
+        C_local = ib.allocate(C.dtype, (1), scope="local", name="C_local")

Review comment:
       ```suggestion
           C_local = ib.allocate(C.dtype, (1,), scope="local", name="C_local")
   ```
   

##########
File path: src/tir/transforms/merge_dynamic_shared_memory_allocations.cc
##########
@@ -31,51 +31,256 @@
 #include <unordered_set>
 
 #include "../../runtime/thread_storage_scope.h"
+#include "../../support/arena.h"
 #include "ir_utils.h"
 
 namespace tvm {
 namespace tir {
 
+using runtime::StorageRank;
+using runtime::StorageScope;
+
 bool IsDynamicSharedMemory(Var buffer_var) {
-  auto storage_scope = runtime::StorageScope::Create(GetPtrStorageScope(buffer_var));
+  StorageScope storage_scope = runtime::StorageScope::Create(GetPtrStorageScope(buffer_var));
   return storage_scope.rank == runtime::StorageRank::kShared && storage_scope.tag == ".dyn";
 }
 
+/*!
+ * \brief collect the mapping from the buffer var to its allocate
+ */
 class AllocateCollector : public StmtExprVisitor {
  public:
   void VisitStmt_(const AllocateNode* op) final {
     if (IsDynamicSharedMemory(op->buffer_var)) {
-      dyn_shmem_allocs_.insert(op);
+      dyn_shmem_allocs_[op->buffer_var.get()] = op;
+    }
+    StmtExprVisitor::VisitStmt_(op);
+  }
+  // The mapping from the original buffer var to its allocate
+  std::unordered_map<const VarNode*, const AllocateNode*> dyn_shmem_allocs_;
+};
+
+// Find a linear pattern of storage access
+// Used for liveness analysis.
+// "linear" means fitting a complex access pattern into an array of StmtEntry
+//
+// Define "scope" as the body of For/thread_launch/IfThenElse
+// Composite scopes(loop/thread_launch/IfThen) is represented by three StmtEntry:
+// before_scope -> scope_body -> after_scope
+//
+// This pass tries to detect last point that we need to keep memory
+// alive under the same scope as Allocate.
+// The storage need to be kept alive between Allocate and last access.
+// The free point is only inserted at the same scope of Allocate.
+//
+class DynSharedMemLinearAccessPatternFinder final : public StmtExprVisitor {
+ public:
+  /*! \brief record the touch list of statment. */

Review comment:
       ```suggestion
     /*! \brief record the touch list of statement. */
   ```
   




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@tvm.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org