You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tvm.apache.org by ju...@apache.org on 2022/05/03 22:50:30 UTC
[tvm] branch main updated: [TIR] Fix an index out of bound issue of compact buffer region (#11201)

This is an automated email from the ASF dual-hosted git repository.

junrushao pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tvm.git


The following commit(s) were added to refs/heads/main by this push:
     new e1917f4943 [TIR] Fix an index out of bound issue of compact buffer region (#11201)
e1917f4943 is described below

commit e1917f4943a23d8a3f32e13855e7fae12478a175
Author: wrongtest <wr...@gmail.com>
AuthorDate: Wed May 4 06:50:25 2022 +0800

    [TIR] Fix an index out of bound issue of compact buffer region (#11201)
    
    After https://github.com/apache/tvm/pull/10557, the region extent after compaction is ensured to not exceed original shape. Now when the inferred region min is negative, the index remap rule `idx -> (idx - region_min)` would introduce out of bound accesses, which would cause crashes at runtime.
    
    The two updated cases in UT:
    - padding block inlined to pooling
    Current version results to out of bound accesses in `cache` block, since the H/W extents are compacted to no more than 224 but accesses are shifted by `- (-1)`.
    ```python
    @T.prim_func
    def func(X: T.Buffer[(224, 224), "float32"], Y: T.Buffer[(224, 224), "float32"]) -> None:
        cache = T.alloc_buffer([224, 224], dtype="float32")
        for h, w in T.grid(224, 224):
            with T.block("cache"):
                cache[h + 1, w + 1] = X[h, w]
        for h, w, kh, kw in T.grid(224, 224, 3, 3):
            with T.block("compute"):
                Y[h, w] = T.max(Y[h, w], T.if_then_else(T.likely(1 <= h + kh, dtype="bool") and T.likely(h + kh < 225, dtype="bool") and T.likely(1 <= w + kw, dtype="bool") and T.likely(w + kw < 225, dtype="bool"), cache[h + kh, w + kw], T.float32(0), dtype="float32"))
    ```
    
    -  sparse access
    `A_data_local[A_indptr[i] + k]` is rewritten to `A_data_local[T.min(A_indptr[i] + k, 0)]` instead of `A_data_local[0]`. Compared to current version, interestingly, it keeps the semantic that negative sparse index result to oob behavior.
---
 src/tir/transforms/compact_buffer_region.cc        |  5 +-
 .../test_tir_transform_compact_buffer_region.py    | 58 ++++++++++++++++++++--
 2 files changed, 58 insertions(+), 5 deletions(-)

diff --git a/src/tir/transforms/compact_buffer_region.cc b/src/tir/transforms/compact_buffer_region.cc
index 09f56194eb..fe7b38e67a 100644
--- a/src/tir/transforms/compact_buffer_region.cc
+++ b/src/tir/transforms/compact_buffer_region.cc
@@ -53,8 +53,9 @@ Region SimplifyAndNarrowBufferRegionFromNDIntSet(const NDIntSet& nd_int_set,
   for (size_t i = 0; i < nd_int_set.size(); ++i) {
     const arith::IntSet& int_set = nd_int_set[i];
     Range range = int_set.CoverRange(Range(/*begin=*/0, /*end=*/original_shape[i]));
-    result.push_back(Range::FromMinExtent(
-        range->min, analyzer->Simplify(min(original_shape[i], range->extent))));
+    result.push_back(
+        Range::FromMinExtent(analyzer->Simplify(max(0, range->min)),
+                             analyzer->Simplify(min(original_shape[i], range->extent))));
   }
   return result;
 }
diff --git a/tests/python/unittest/test_tir_transform_compact_buffer_region.py b/tests/python/unittest/test_tir_transform_compact_buffer_region.py
index 7d93038c0d..8ad95bd4bc 100644
--- a/tests/python/unittest/test_tir_transform_compact_buffer_region.py
+++ b/tests/python/unittest/test_tir_transform_compact_buffer_region.py
@@ -422,6 +422,54 @@ def compacted_padding_pattern_func(a: T.handle, c: T.handle) -> None:
                 )
 
 
+@T.prim_func
+def padding_pattern_inlined(a: T.handle, b: T.handle) -> None:
+    X = T.match_buffer(a, [224, 224], dtype="float32")
+    Y = T.match_buffer(b, [224, 224], dtype="float32")
+    cache = T.alloc_buffer([224, 224], dtype="float32")
+    for h, w in T.grid(224, 224):
+        with T.block("cache"):
+            cache[h, w] = X[h, w]
+    for h, w, kh, kw in T.grid(224, 224, 3, 3):
+        with T.block("compute"):
+            Y[h, w] = T.max(
+                Y[h, w],
+                T.if_then_else(
+                    T.likely(1 <= h + kh, dtype="bool")
+                    and T.likely(h + kh < 225, dtype="bool")
+                    and T.likely(1 <= w + kw, dtype="bool")
+                    and T.likely(w + kw < 225, dtype="bool"),
+                    cache[h + kh - 1, w + kw - 1],
+                    0.0,
+                    dtype="float32",
+                ),
+            )
+
+
+@T.prim_func
+def compacted_padding_pattern_inlined(
+    X: T.Buffer[(224, 224), "float32"], Y: T.Buffer[(224, 224), "float32"]
+) -> None:
+    cache = T.alloc_buffer([224, 224], dtype="float32")
+    for h, w in T.grid(224, 224):
+        with T.block("cache"):
+            cache[h, w] = X[h, w]
+    for h, w, kh, kw in T.grid(224, 224, 3, 3):
+        with T.block("compute"):
+            Y[h, w] = T.max(
+                Y[h, w],
+                T.if_then_else(
+                    T.likely(1 <= h + kh, dtype="bool")
+                    and T.likely(h + kh < 225, dtype="bool")
+                    and T.likely(1 <= w + kw, dtype="bool")
+                    and T.likely(w + kw < 225, dtype="bool"),
+                    cache[h + kh - 1, w + kw - 1],
+                    0.0,
+                    dtype="float32",
+                ),
+            )
+
+
 @T.prim_func
 def mem_access_in_branch_func(a: T.handle) -> None:
     A = T.match_buffer(a, (224, 224), "float32")
@@ -570,12 +618,12 @@ def compacted_sparse_read_cache(
                     A_data_local = T.alloc_buffer([1], dtype="float32", scope="local")
                     with T.block("A_data_cache_read"):
                         T.reads(A_indptr[i], A_data[A_indptr[i] + k])
-                        T.writes(A_data_local[A_indptr[i] + k - (A_indptr[i] + k)])
-                        A_data_local[A_indptr[i] + k - (A_indptr[i] + k)] = A_data[A_indptr[i] + k]
+                        T.writes(A_data_local[T.min(A_indptr[i] + k, 0)])
+                        A_data_local[T.min(A_indptr[i] + k, 0)] = A_data[A_indptr[i] + k]
                     with T.block("rowsum_inner"):
                         T.reads(B[i], A_indptr[i], A_data[A_indptr[i] + k])
                         T.writes(B[i])
-                        B[i] = B[i] + A_data_local[A_indptr[i] + k - (A_indptr[i] + k)]
+                        B[i] = B[i] + A_data_local[T.min(A_indptr[i] + k, 0)]
 
 
 @T.prim_func
@@ -654,6 +702,10 @@ def test_padding_pattern():
     _check(padding_pattern_func, compacted_padding_pattern_func)
 
 
+def test_padding_pattern_inlined():
+    _check(padding_pattern_inlined, compacted_padding_pattern_inlined)
+
+
 def test_mem_access_in_branch_func():
     _check(mem_access_in_branch_func, compacted_mem_access_in_branch_func)