You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tvm.apache.org by ju...@apache.org on 2022/05/03 22:50:30 UTC
[tvm] branch main updated: [TIR] Fix an index out of bound issue of compact buffer region (#11201)
This is an automated email from the ASF dual-hosted git repository.
junrushao pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tvm.git
The following commit(s) were added to refs/heads/main by this push:
new e1917f4943 [TIR] Fix an index out of bound issue of compact buffer region (#11201)
e1917f4943 is described below
commit e1917f4943a23d8a3f32e13855e7fae12478a175
Author: wrongtest <wr...@gmail.com>
AuthorDate: Wed May 4 06:50:25 2022 +0800
[TIR] Fix an index out of bound issue of compact buffer region (#11201)
After https://github.com/apache/tvm/pull/10557, the region extent after compaction is ensured to not exceed original shape. Now when the inferred region min is negative, the index remap rule `idx -> (idx - region_min)` would introduce out of bound accesses, which would cause crashes at runtime.
The two updated cases in UT:
- padding block inlined to pooling
Current version results to out of bound accesses in `cache` block, since the H/W extents are compacted to no more than 224 but accesses are shifted by `- (-1)`.
```python
@T.prim_func
def func(X: T.Buffer[(224, 224), "float32"], Y: T.Buffer[(224, 224), "float32"]) -> None:
cache = T.alloc_buffer([224, 224], dtype="float32")
for h, w in T.grid(224, 224):
with T.block("cache"):
cache[h + 1, w + 1] = X[h, w]
for h, w, kh, kw in T.grid(224, 224, 3, 3):
with T.block("compute"):
Y[h, w] = T.max(Y[h, w], T.if_then_else(T.likely(1 <= h + kh, dtype="bool") and T.likely(h + kh < 225, dtype="bool") and T.likely(1 <= w + kw, dtype="bool") and T.likely(w + kw < 225, dtype="bool"), cache[h + kh, w + kw], T.float32(0), dtype="float32"))
```
- sparse access
`A_data_local[A_indptr[i] + k]` is rewritten to `A_data_local[T.min(A_indptr[i] + k, 0)]` instead of `A_data_local[0]`. Compared to current version, interestingly, it keeps the semantic that negative sparse index result to oob behavior.
---
src/tir/transforms/compact_buffer_region.cc | 5 +-
.../test_tir_transform_compact_buffer_region.py | 58 ++++++++++++++++++++--
2 files changed, 58 insertions(+), 5 deletions(-)
diff --git a/src/tir/transforms/compact_buffer_region.cc b/src/tir/transforms/compact_buffer_region.cc
index 09f56194eb..fe7b38e67a 100644
--- a/src/tir/transforms/compact_buffer_region.cc
+++ b/src/tir/transforms/compact_buffer_region.cc
@@ -53,8 +53,9 @@ Region SimplifyAndNarrowBufferRegionFromNDIntSet(const NDIntSet& nd_int_set,
for (size_t i = 0; i < nd_int_set.size(); ++i) {
const arith::IntSet& int_set = nd_int_set[i];
Range range = int_set.CoverRange(Range(/*begin=*/0, /*end=*/original_shape[i]));
- result.push_back(Range::FromMinExtent(
- range->min, analyzer->Simplify(min(original_shape[i], range->extent))));
+ result.push_back(
+ Range::FromMinExtent(analyzer->Simplify(max(0, range->min)),
+ analyzer->Simplify(min(original_shape[i], range->extent))));
}
return result;
}
diff --git a/tests/python/unittest/test_tir_transform_compact_buffer_region.py b/tests/python/unittest/test_tir_transform_compact_buffer_region.py
index 7d93038c0d..8ad95bd4bc 100644
--- a/tests/python/unittest/test_tir_transform_compact_buffer_region.py
+++ b/tests/python/unittest/test_tir_transform_compact_buffer_region.py
@@ -422,6 +422,54 @@ def compacted_padding_pattern_func(a: T.handle, c: T.handle) -> None:
)
+@T.prim_func
+def padding_pattern_inlined(a: T.handle, b: T.handle) -> None:
+ X = T.match_buffer(a, [224, 224], dtype="float32")
+ Y = T.match_buffer(b, [224, 224], dtype="float32")
+ cache = T.alloc_buffer([224, 224], dtype="float32")
+ for h, w in T.grid(224, 224):
+ with T.block("cache"):
+ cache[h, w] = X[h, w]
+ for h, w, kh, kw in T.grid(224, 224, 3, 3):
+ with T.block("compute"):
+ Y[h, w] = T.max(
+ Y[h, w],
+ T.if_then_else(
+ T.likely(1 <= h + kh, dtype="bool")
+ and T.likely(h + kh < 225, dtype="bool")
+ and T.likely(1 <= w + kw, dtype="bool")
+ and T.likely(w + kw < 225, dtype="bool"),
+ cache[h + kh - 1, w + kw - 1],
+ 0.0,
+ dtype="float32",
+ ),
+ )
+
+
+@T.prim_func
+def compacted_padding_pattern_inlined(
+ X: T.Buffer[(224, 224), "float32"], Y: T.Buffer[(224, 224), "float32"]
+) -> None:
+ cache = T.alloc_buffer([224, 224], dtype="float32")
+ for h, w in T.grid(224, 224):
+ with T.block("cache"):
+ cache[h, w] = X[h, w]
+ for h, w, kh, kw in T.grid(224, 224, 3, 3):
+ with T.block("compute"):
+ Y[h, w] = T.max(
+ Y[h, w],
+ T.if_then_else(
+ T.likely(1 <= h + kh, dtype="bool")
+ and T.likely(h + kh < 225, dtype="bool")
+ and T.likely(1 <= w + kw, dtype="bool")
+ and T.likely(w + kw < 225, dtype="bool"),
+ cache[h + kh - 1, w + kw - 1],
+ 0.0,
+ dtype="float32",
+ ),
+ )
+
+
@T.prim_func
def mem_access_in_branch_func(a: T.handle) -> None:
A = T.match_buffer(a, (224, 224), "float32")
@@ -570,12 +618,12 @@ def compacted_sparse_read_cache(
A_data_local = T.alloc_buffer([1], dtype="float32", scope="local")
with T.block("A_data_cache_read"):
T.reads(A_indptr[i], A_data[A_indptr[i] + k])
- T.writes(A_data_local[A_indptr[i] + k - (A_indptr[i] + k)])
- A_data_local[A_indptr[i] + k - (A_indptr[i] + k)] = A_data[A_indptr[i] + k]
+ T.writes(A_data_local[T.min(A_indptr[i] + k, 0)])
+ A_data_local[T.min(A_indptr[i] + k, 0)] = A_data[A_indptr[i] + k]
with T.block("rowsum_inner"):
T.reads(B[i], A_indptr[i], A_data[A_indptr[i] + k])
T.writes(B[i])
- B[i] = B[i] + A_data_local[A_indptr[i] + k - (A_indptr[i] + k)]
+ B[i] = B[i] + A_data_local[T.min(A_indptr[i] + k, 0)]
@T.prim_func
@@ -654,6 +702,10 @@ def test_padding_pattern():
_check(padding_pattern_func, compacted_padding_pattern_func)
+def test_padding_pattern_inlined():
+ _check(padding_pattern_inlined, compacted_padding_pattern_inlined)
+
+
def test_mem_access_in_branch_func():
_check(mem_access_in_branch_func, compacted_mem_access_in_branch_func)