You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tvm.apache.org by GitBox <gi...@apache.org> on 2022/11/14 20:11:21 UTC

[GitHub] [tvm] csullivan commented on a diff in pull request #13130: [TIR][Arith] Implement basic data-flow analysis

csullivan commented on code in PR #13130:
URL: https://github.com/apache/tvm/pull/13130#discussion_r1019359743


##########
src/arith/conjunctive_normal_form.cc:
##########
@@ -264,14 +264,14 @@ void AndOfOrs::TrySimplifyAnd(Key* a_ptr, Key* b_ptr, Analyzer* analyzer) {
   Key& a = *a_ptr;
   Key& b = *b_ptr;
   PrimExpr joint = GetExpr(a) && GetExpr(b);
-  PrimExpr simplified = analyzer->Simplify(joint);
+  PrimExpr simplified = analyzer->rewrite_simplify(joint);
   if (!ExprDeepEqual()(simplified, joint)) {
     if (auto* simplified_and = simplified.as<AndNode>()) {
       a = GetKey(simplified_and->a);
       b = GetKey(simplified_and->b);
     } else {
-      a = GetKey(simplified);
-      b = key_true_;
+      a = key_true_;
+      b = GetKey(simplified);

Review Comment:
   Same question here as above



##########
src/tir/analysis/control_flow_graph.cc:
##########
@@ -0,0 +1,1641 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * \file control_flow_graph.cc
+ * \brief Utility to deduce bound of expression
+ */
+
+#include "control_flow_graph.h"
+
+#include <tvm/runtime/registry.h>
+#include <tvm/tir/analysis.h>
+#include <tvm/tir/builtin.h>
+#include <tvm/tir/expr.h>
+#include <tvm/tir/op.h>
+#include <tvm/tir/stmt_functor.h>
+
+#include <numeric>
+#include <optional>
+#include <queue>
+#include <set>
+#include <sstream>
+#include <unordered_set>
+
+#include "../../arith/conjunctive_normal_form.h"
+#include "../../arith/constraint_extract.h"
+#include "../../arith/ir_mutator_with_analyzer.h"
+#include "../../arith/ir_visitor_with_analyzer.h"
+#include "../../arith/narrow_predicate_expression.h"
+#include "../../arith/unwrap_vector_expr.h"
+
+namespace tvm {
+namespace tir {
+
+using namespace arith;
+
+namespace {
+bool HasBufferLoad(PrimExpr expr) {
+  struct Visitor : public ExprVisitor {
+    void VisitExpr_(const BufferLoadNode* node) override { found_buffer_load = true; }
+    bool found_buffer_load{false};
+  };
+
+  Visitor visitor;
+  visitor(expr);
+  return visitor.found_buffer_load;
+}
+
+Optional<PrimExpr> SubstituteParamValues(const Array<Var>& param_vars,
+                                         const Array<PrimExpr>& param_values,
+                                         const PrimExpr& expr) {
+  ICHECK_EQ(param_vars.size(), param_values.size())
+      << "Expression was defined as having " << param_vars.size() << " parameters, but received "
+      << param_values.size() << " arguments.";
+
+  Map<tir::Var, PrimExpr> var_map;
+  for (size_t i = 0; i < param_values.size(); i++) {
+    var_map.Set(param_vars[i], param_values[i]);
+  }
+
+  return Substitute(expr, var_map);
+}
+}  // namespace
+
+PrimExpr BufferTouch::BeforeLoopIteration() const {
+  PrimExpr loop_predicate = Bool(true);
+  for (auto it = loop_var_expressions.rbegin(); it != loop_var_expressions.rend(); it++) {
+    const Var& loop_var = it->first;
+    const PrimExpr& loop_expr = it->second;
+    loop_predicate = (loop_var <= loop_expr) || ((loop_var == loop_expr) && loop_predicate);
+  }
+  return loop_predicate;
+}
+
+PrimExpr BufferTouch::AtLoopIteration() const {
+  PrimExpr loop_predicate = Bool(true);
+  for (auto it = loop_var_expressions.rbegin(); it != loop_var_expressions.rend(); it++) {
+    const Var& loop_var = it->first;
+    const PrimExpr& loop_expr = it->second;
+    loop_predicate = (loop_var == loop_expr) && loop_predicate;
+  }
+  return loop_predicate;
+}
+
+PrimExpr BufferTouch::AfterLoopIteration() const {
+  PrimExpr loop_predicate = Bool(true);
+  for (auto it = loop_var_expressions.rbegin(); it != loop_var_expressions.rend(); it++) {
+    const Var& loop_var = it->first;
+    const PrimExpr& loop_expr = it->second;
+    loop_predicate = (loop_var >= loop_expr) || ((loop_var == loop_expr) && loop_predicate);
+  }
+  return loop_predicate;
+}
+
+bool BufferTouch::IsSubsetOf(const BufferTouch& other, Analyzer* analyzer) const {
+  if (this->buffer.same_as(other.buffer)) {
+    With<ConstraintContext> constraint(analyzer, predicate);
+
+    return analyzer->CanProve(other.predicate);
+  } else {
+    return false;
+  }
+}
+
+bool BufferTouch::IsDistinctFrom(const BufferTouch& other, Analyzer* analyzer) const {
+  if (this->buffer.same_as(other.buffer)) {
+    With<ConstraintContext> constraint(analyzer, predicate);
+
+    return analyzer->CanProve(!other.predicate);
+  } else {
+    return true;
+  }
+}
+
+std::ostream& operator<<(std::ostream& os, const BufferTouch& tp) {
+  auto touch_type = [&]() {
+    if (tp.touch_type == BufferTouch::AccessType::Read) {
+      return "read";
+    } else if (tp.touch_type == BufferTouch::AccessType::Write) {
+      return "write";
+    } else if (tp.touch_type == BufferTouch::AccessType::Assume) {
+      return "assume";
+    } else {
+      return "???";
+    }
+  }();
+
+  os << "BufferTouch(" << tp.buffer->name << ", " << touch_type << ", " << tp.predicate
+     << ", value = " << tp.value << ")";
+  return os;
+}
+
+class BufferConstraintApply : public IRMutatorWithAnalyzer {
+ public:
+  using Parent = IRMutatorWithAnalyzer;
+
+  BufferConstraintApply(const Map<Buffer, Array<Var>>& axis_var_lookup,
+                        const std::vector<BufferTouch>& knowns, Analyzer* analyzer)
+      : Parent(analyzer), axis_var_lookup_(axis_var_lookup), knowns_(knowns) {}
+
+  using Parent::VisitExpr_;
+
+  PrimExpr VisitExpr_(const BufferLoadNode* op) override {
+    for (const auto& known : knowns_) {
+      if (!op->buffer.same_as(known.buffer)) {
+        continue;
+      }
+
+      Optional<Var> lane_var = NullOpt;
+      IntImm num_lanes;
+
+      Array<PrimExpr> indices = op->indices.Map([&](const auto& index) {
+        if (index.dtype().lanes() == 1) {
+          return index;
+        } else {
+          ICHECK(!lane_var) << "Multiple indices found with non-scalar values";
+          lane_var = Var("lane", index.dtype().element_of());
+          num_lanes = IntImm(index.dtype().element_of(), index.dtype().lanes());
+          return UnwrapVectorExpr(index, lane_var.value());
+        }
+      });
+
+      auto axis_vars = axis_var_lookup_.at(op->buffer);
+      PrimExpr predicate = SubstituteParamValues(axis_vars, indices, known.predicate).value();
+
+      std::optional<With<ConstraintContext>> context;
+      if (lane_var.defined()) {
+        Var lanes = lane_var.value();
+        PrimExpr known = (IntImm(lanes.dtype(), 0) <= lanes) && (lanes < num_lanes);
+        context.emplace(analyzer_, known);
+      }
+
+      if (analyzer_->CanProve(predicate)) {
+        return SubstituteParamValues(axis_vars, op->indices, known.value).value();
+      }
+    }
+
+    return GetRef<PrimExpr>(op);
+  }
+
+ private:
+  const Map<Buffer, Array<Var>>& axis_var_lookup_;
+  const std::vector<BufferTouch>& knowns_;
+};
+
+/*! \brief Extract the control-flow graph
+ *
+ * Walk through a statement, populating the control-flow graph.
+ */
+class ControlFlowGraphBuilder final : public IRVisitorWithAnalyzer {
+ public:
+  static void Build(ControlFlowGraph* out, const Stmt& stmt) {
+    ControlFlowGraphBuilder extractor(out);
+    extractor.AppendControlBlock();
+    extractor(stmt);
+  }
+
+ private:
+  ControlFlowGraphBuilder(ControlFlowGraph* out) : out_(out) {}
+
+  using Parent = IRVisitorWithAnalyzer;
+  using Parent::VisitExpr_;
+  using Parent::VisitStmt_;
+
+  void VisitStmt(const Stmt& stmt) override {
+    // Update the lookup table to determine which control-flow block
+    // contains the start of the specified statement.  This is used
+    // later to determine which set of known values should be used to
+    // simplify a statement.
+    out_->control_flow_lookup_[stmt.get()] = CurrentControlBlock();
+    Stmt prev_stmt = current_stmt_;
+    current_stmt_ = stmt;
+    Parent::VisitStmt(stmt);
+    current_stmt_ = prev_stmt;
+  }
+
+  void VisitStmt_(const EvaluateNode* op) override {
+    if (auto* call = op->value.as<CallNode>()) {
+      if (call->op.same_as(builtin::assume())) {
+        Assume(call->args[0], true);
+        return;
+      }
+    }
+
+    Parent::VisitStmt_(op);
+  }
+
+  void Assume(PrimExpr assumption, bool from_assume_statement) {
+    for (const auto& expr : ExtractConstraints(assumption, false)) {
+      AssumeConstraintComponent(expr, from_assume_statement);
+    }
+  }
+
+  void AssumeConstraintComponent(PrimExpr assumption, bool from_assume_statement) {
+    PrimExpr additional_predicate = Bool(true);
+
+    std::vector<PrimExpr> buffer_exprs;
+    for (const auto& expr : ExtractComponents(assumption)) {
+      auto side_effect = tir::SideEffect(expr);
+      if (side_effect <= tir::CallEffectKind::kPure) {
+        // Pulling out portions of the assumption that do not depend
+        // on a buffer value allows the following two forms to be
+        // treated identically.
+        //
+        // if i < 3: T.assume(buf[i] == value)
+        // T.assume(i>=3 or buf[i] == value)
+        additional_predicate = additional_predicate && logical_not(expr);
+      } else if (side_effect == tir::CallEffectKind::kReadState) {
+        buffer_exprs.push_back(expr);
+      } else {
+        LOG(FATAL) << "Assumption must be pure or read-only";
+      }
+    }
+
+    if (buffer_exprs.empty()) {
+      out_->non_buffer_assumptions_.push_back(!CurrentScopePredicate() || assumption);
+      return;
+    }
+
+    CHECK_EQ(buffer_exprs.size(), 1) << "T.assume must contain only a single buffer expression";
+
+    auto* as_equal_node = buffer_exprs[0].as<tir::EQNode>();
+    CHECK(as_equal_node || !from_assume_statement)
+        << "T.assume buffer constraint must be of the form 'buffer[indices] == "
+           "value', but received "
+        << assumption;
+    if (!as_equal_node) {
+      // This assumption is an inequality a data-dependent
+      // conditional.  Not an error for this to occur, but also not
+      // something that is currently supported.
+      return;
+    }
+
+    tir::BufferLoad load;
+    PrimExpr value;
+    if (auto* as_load = as_equal_node->a.as<tir::BufferLoadNode>()) {
+      load = GetRef<tir::BufferLoad>(as_load);
+      value = as_equal_node->b;
+    } else if (auto* as_load = as_equal_node->b.as<tir::BufferLoadNode>()) {
+      load = GetRef<tir::BufferLoad>(as_load);
+      value = as_equal_node->a;
+    } else if (!from_assume_statement) {
+      return;
+    } else {
+      LOG(FATAL) << "T.assume buffer constraint must be of the form 'buffer[indices] == value'";
+    }
+
+    auto has_side_effect = tir::SideEffect(value) > tir::CallEffectKind::kPure;
+    CHECK(!has_side_effect || !from_assume_statement)
+        << "Buffer value in constraint must be pure expression, but was " << value;
+    if (has_side_effect) {
+      return;
+    }
+
+    {
+      InternalConstraintContext context(this, additional_predicate);
+      VisitAccess(load, BufferTouch::AccessType::Assume, value);
+    }
+    // Appending a control block ensures that all control blocks have
+    // at most one statement that changes the known buffer contents.
+    auto prev_block = CurrentControlBlock();
+    auto new_block = AppendControlBlock();
+    MarkControlFlow(prev_block, new_block);
+  }
+
+  void VisitExpr_(const LetNode* op) override {
+    std::optional<BindLetVar> binding;
+    if (UsesLoopVar(op->value)) {
+      binding.emplace(this, op->var, op->value);
+    }
+    Parent::VisitExpr_(op);
+  }
+
+  void VisitStmt_(const LetStmtNode* op) override {
+    std::optional<BindLetVar> binding;
+    if (UsesLoopVar(op->value)) {
+      binding.emplace(this, op->var, op->value);
+    }
+    Parent::VisitStmt_(op);
+  }
+
+  void VisitExpr_(const BufferLoadNode* op) override {
+    Parent::VisitExpr_(op);
+    BufferLoad load = GetRef<BufferLoad>(op);
+    VisitAccess(load, BufferTouch::AccessType::Read, load);
+  }
+
+  void VisitStmt_(const BufferStoreNode* op) override {
+    Parent::VisitStmt_(op);
+    VisitAccess(GetRef<BufferStore>(op), BufferTouch::AccessType::Write, op->value);
+    // Appending a control block ensures that all control blocks have
+    // at most one statement that changes the buffer contents.
+    auto prev_block = CurrentControlBlock();
+    auto new_block = AppendControlBlock();
+    MarkControlFlow(prev_block, new_block);
+  }
+
+  void VisitStmt_(const ForNode* op) override {
+    out_->iterator_ranges_.Set(op->loop_var, Range::FromMinExtent(op->min, op->extent));
+
+    auto before_loop = CurrentControlBlock();
+    size_t loop_start = -1;
+
+    {
+      BindActiveLoopVar binding(this, op->loop_var, op->min, op->extent);
+      loop_start = AppendControlBlock();
+      Parent::VisitStmt_(op);
+    }
+
+    auto loop_end = CurrentControlBlock();
+    auto after_loop = AppendControlBlock();
+    PrimExpr max_iterator_value = analyzer_.Simplify(op->min + op->extent - 1);
+    {
+      auto [forward, backward] = MarkControlFlow(before_loop, loop_start);
+      backward.post_condition = (op->loop_var == op->min);
+      forward.var_remap = {{op->loop_var, op->min}};
+    }
+    {
+      auto [forward, backward] = MarkControlFlow(loop_end, after_loop);
+      backward.var_remap = {{op->loop_var, max_iterator_value}};
+      forward.post_condition = (op->loop_var == max_iterator_value);
+    }
+    {
+      auto [forward, backward] = MarkControlFlow(loop_end, loop_start);
+      backward.var_remap = {{op->loop_var, op->loop_var - 1}};
+      forward.var_remap = {{op->loop_var, op->loop_var + 1}};
+      backward.post_condition = (op->loop_var > op->min);
+      forward.post_condition = (op->loop_var < max_iterator_value);
+    }
+  }
+
+  void VisitStmt_(const IfThenElseNode* op) override {
+    this->VisitExpr(op->condition);
+
+    PrimExpr real_condition = ExtractRealCondition(op->condition);
+
+    auto before_branching = CurrentControlBlock();
+
+    auto branch_start = AppendControlBlock();
+    MarkControlFlow(before_branching, branch_start);
+
+    {
+      InternalConstraintContext context(this, real_condition);
+      auto then_start = AppendControlBlock();
+      if (context.assume.defined()) {
+        Assume(context.assume.value(), false);
+      }
+      auto [forward, backward] = MarkControlFlow(branch_start, then_start);
+      backward.post_condition = real_condition;
+      forward.post_condition = real_condition;
+      this->VisitStmt(op->then_case);
+    }
+    auto then_end = CurrentControlBlock();
+
+    auto negation = analyzer_.rewrite_simplify(!real_condition);
+    {
+      InternalConstraintContext context(this, negation);
+      auto else_start = AppendControlBlock();
+      if (context.assume.defined()) {
+        Assume(context.assume.value(), false);
+      }
+      auto [forward, backward] = MarkControlFlow(branch_start, else_start);
+      backward.post_condition = negation;
+      forward.post_condition = negation;
+
+      if (op->else_case.defined()) {
+        this->VisitStmt(op->else_case.value());
+      }
+    }
+
+    auto else_end = CurrentControlBlock();
+    auto after_branching = AppendControlBlock();
+
+    if (HasBufferLoad(real_condition)) {
+      // The buffer value may have changed during the body of the
+      // condition, so we can't provide it as a post-condition.
+      MarkControlFlow(then_end, after_branching);
+      MarkControlFlow(else_end, after_branching);
+    } else {
+      {
+        auto [forward, backward] = MarkControlFlow(then_end, after_branching);
+        backward.post_condition = real_condition;
+        forward.post_condition = real_condition;
+      }
+      {
+        auto [forward, backward] = MarkControlFlow(else_end, after_branching);
+        backward.post_condition = negation;
+        forward.post_condition = negation;
+      }
+    }
+  }
+
+  /*! \brief Internal utility, returns true if the expression depends
+   *  on a loop iterator
+   */
+  bool UsesLoopVar(const PrimExpr& expr) {
+    return UsesVar(expr, [&](const VarNode* expr_var) {
+      return loop_dependent_vars_.find(expr_var) != loop_dependent_vars_.end();
+    });
+  }
+
+  /*! \brief Record the interaction with the buffer.
+   *
+   * \param node The TIR node that accesses the buffer.  Should be
+   * either a BufferLoad or BufferStore node.
+   *
+   * \param touch_type The type of buffer access being performed.  A
+   * BufferStore should always use AccessType::Write.  A BufferLoad
+   * may use either AccessType::Read or AccessType::Assume, depending
+   * on whether the BufferLoad occurs within `builtin::assume`.
+   *
+   * \param known_value_expr The value in the buffer following the access.
+   */
+  template <typename BufferAccess>
+  void VisitAccess(const BufferAccess& node, BufferTouch::AccessType touch_type,
+                   PrimExpr known_value_expr) {
+    auto& current_block = out_->control_flow_.back();
+    BufferTouch buffer_touch = current_block.MakeBufferTouch(out_, node->buffer, node->indices,
+                                                             touch_type, known_value_expr);
+    current_block.touch_points.push_back(buffer_touch);
+  }
+
+  /*! \brief Return a predicate for having reached the current
+   *  control-flow block
+   *
+   * For example, while inside an IfThenElse, will return the
+   * IfThenElse's condition.
+   */
+  PrimExpr CurrentScopePredicate() const {
+    PrimExpr predicate = Bool(true);
+    for (const auto& condition : conditions_) {
+      predicate = predicate && condition;
+    }
+    return predicate;
+  }
+
+  /* \brief Add a new control block, returning its index */
+  size_t AppendControlBlock() {
+    size_t index = out_->control_flow_.size();
+    auto& block = out_->control_flow_.emplace_back();
+    block.active_loop_iterators = active_loop_iterators_;
+    block.let_bindings_using_loop = let_bindings_using_loop_;
+    block.scope_predicate = CurrentScopePredicate();
+    return index;
+  }
+
+  /* \brief The index of the current control block */
+  size_t CurrentControlBlock() { return out_->control_flow_.size() - 1; }
+
+  /* \brief Mark a possible control from one block to another
+   *
+   * \param from_block The block from which control leaves
+   *
+   * \param to_block The block to which control enters
+   *
+   * \param var_remap Variable replacements that should be made in
+   * known expression while traversing this edge.  For example,
+   * replacing `i` with `i-1` when entering the next loop iteration,
+   * or replacing `i` with `n-1` when concluding a loop.
+   */
+  std::pair<ControlFlowGraph::ControlFlowEdge&, ControlFlowGraph::ControlFlowEdge&> MarkControlFlow(
+      size_t from_block, size_t to_block) {
+    ICHECK_LE(from_block, out_->control_flow_.size());
+    ICHECK_LE(to_block, out_->control_flow_.size());
+
+    auto& forward = out_->control_flow_[from_block].successors.emplace_back(
+        ControlFlowGraph::ControlFlowEdge{to_block, {}, NullOpt});
+    auto& backward = out_->control_flow_[to_block].predecessors.emplace_back(
+        ControlFlowGraph::ControlFlowEdge{from_block, {}, NullOpt});
+    return {forward, backward};
+  }
+
+  // Internal utility, context manager for entering/leaving a scoped constraint
+  struct InternalConstraintContext {
+    InternalConstraintContext(ControlFlowGraphBuilder* self, PrimExpr constraint)
+        : self(self), analyzer_context(&self->analyzer_, constraint) {
+      old_num_constraints = self->conditions_.size();
+
+      auto side_effect = tir::SideEffect(constraint);
+      if (side_effect <= tir::CallEffectKind::kPure) {
+        self->conditions_.push_back(constraint);
+      } else if (side_effect <= tir::CallEffectKind::kReadState) {
+        assume = constraint;
+      }
+
+      new_num_constraints = self->conditions_.size();
+    }
+    ~InternalConstraintContext() {
+      ICHECK_EQ(self->conditions_.size(), new_num_constraints)
+          << "Internal error: Each condition should only be popped once.";
+      self->conditions_.erase(self->conditions_.begin() + old_num_constraints,
+                              self->conditions_.end());
+    }
+
+    ControlFlowGraphBuilder* self{nullptr};
+    With<ConstraintContext> analyzer_context;
+    size_t old_num_constraints{0};
+    size_t new_num_constraints{0};
+    Optional<PrimExpr> assume{NullOpt};
+
+    // Disable default-generated copy/move assignment and constructors
+    InternalConstraintContext(const InternalConstraintContext&) = delete;
+    InternalConstraintContext& operator=(const InternalConstraintContext&) = delete;
+    InternalConstraintContext(InternalConstraintContext&&) = delete;
+    InternalConstraintContext& operator=(InternalConstraintContext&&) = delete;
+  };
+
+  // Internal utility, context manager for tracking a loop
+  struct BindActiveLoopVar {
+    BindActiveLoopVar(ControlFlowGraphBuilder* self, Var var, PrimExpr loop_min,
+                      PrimExpr loop_extent)
+        : self(self), var(var) {
+      PrimExpr loop_max = loop_min + (loop_extent - 1);
+      auto loop_range = Range::FromMinExtent(loop_min, loop_extent);
+      self->active_loop_iterators_.push_back({var, loop_min, loop_max, loop_range});
+      self->loop_dependent_vars_.insert(var.get());
+    }
+    ~BindActiveLoopVar() { self->active_loop_iterators_.pop_back(); }
+
+    ControlFlowGraphBuilder* self;
+    Var var;
+
+    // Disable default-generated copy/move assignment and constructors
+    BindActiveLoopVar(const BindActiveLoopVar&) = delete;
+    BindActiveLoopVar& operator=(const BindActiveLoopVar&) = delete;
+    BindActiveLoopVar(BindActiveLoopVar&&) = delete;
+    BindActiveLoopVar& operator=(BindActiveLoopVar&&) = delete;
+  };
+
+  // Internal utility, context manager for tracking a variable binding
+  struct BindLetVar {
+    BindLetVar(ControlFlowGraphBuilder* self, Var var, PrimExpr value) : self(self), var(var) {
+      self->let_bindings_using_loop_.Set(var, value);
+      self->loop_dependent_vars_.insert(var.get());
+    }
+    ~BindLetVar() {
+      self->loop_dependent_vars_.erase(var.get());
+      self->let_bindings_using_loop_.erase(var);
+    }
+    ControlFlowGraphBuilder* self;
+    Var var;
+
+    // Disable default-generated copy/move assignment and constructors
+    BindLetVar(const BindLetVar&) = delete;
+    BindLetVar& operator=(const BindLetVar&) = delete;
+    BindLetVar(BindLetVar&&) = delete;
+    BindLetVar& operator=(BindLetVar&&) = delete;
+  };
+
+  struct LoopEntry {
+    Var loop_var;
+    PrimExpr loop_min;
+    PrimExpr loop_max;
+    Range loop_range;
+  };
+
+  // Track in order to know which Vars to write in terms of the buffer
+  // indices and substitute out of the predicate.
+  std::vector<ControlFlowGraph::ControlFlowBlock::LoopEntry> active_loop_iterators_;
+
+  // Track all loop iterators, along with values derived from loop iterators.
+  std::unordered_set<const VarNode*> loop_dependent_vars_;
+
+  // Any let binding that depends, directly or indirectly, on a loop
+  // binding.  When making a predicate in terms of the buffer indices,
+  // these need to be substituted out.
+  // std::unordered_map<const VarNode*, PrimExpr> let_bindings_using_loop_;
+  Map<Var, PrimExpr> let_bindings_using_loop_;
+
+  // Track in order to know what conditions limit the buffer access
+  std::vector<PrimExpr> conditions_;
+
+  // Track in order to know what statement initiated the buffer access
+  Stmt current_stmt_;
+
+  // Output data structure
+  ControlFlowGraph* out_;
+};
+
+std::pair<BufferTouch, Map<Var, Range>> ControlFlowGraph::ControlFlowBlock::MakeBufferTouch(
+    const tir::Buffer& buf, Array<Var> index_variables, Array<PrimExpr> indices,
+    BufferTouch::AccessType touch_type, PrimExpr known_value_expr) const {
+  const auto& current_block = *this;
+
+  Analyzer local_analyzer;
+
+  Optional<Var> lane_var = NullOpt;
+  IntImm num_lanes;
+
+  Array<PrimExpr> index_expressions = indices.Map([&](const auto& index) {
+    if (index.dtype().lanes() == 1) {
+      return index;
+    } else {
+      ICHECK(!lane_var) << "Multiple indices found with non-scalar values";
+      lane_var = Var("lane", index.dtype().element_of());
+      num_lanes = IntImm(index.dtype().element_of(), index.dtype().lanes());
+      return UnwrapVectorExpr(index, lane_var.value());
+    }
+  });
+
+  Array<Var> loop_vars;
+
+  Map<Var, Range> loop_ranges;
+  for (const auto& loop_entry : current_block.active_loop_iterators) {
+    loop_vars.push_back(loop_entry.loop_var);
+    loop_ranges.Set(loop_entry.loop_var, loop_entry.loop_range);
+  }
+
+  // If the indices contain multiple lanes, treat the lane variable
+  // as an additional loop iterator to be solved for and substituted
+  // out.
+  if (lane_var) {
+    loop_vars.push_back(lane_var.value());
+    loop_ranges.Set(lane_var.value(), Range::FromMinExtent(0, num_lanes));
+  }
+
+  IntConstraintsTransform transform = [&]() {
+    ICHECK_EQ(index_variables.size(), index_expressions.size());
+
+    Array<PrimExpr> relations;
+
+    for (size_t i = 0; i < index_expressions.size(); i++) {
+      PrimExpr expr = index_expressions[i];
+      Var var = index_variables[i];
+
+      expr = Substitute(expr, current_block.let_bindings_using_loop);
+      relations.push_back(var == expr);
+    }
+
+    IntConstraints system(loop_vars, loop_ranges, relations);
+    return arith::SolveLinearEquations(system);
+  }();
+
+  Map<Var, PrimExpr> loop_var_to_axis_var = transform->src_to_dst;
+  Map<Var, Range> free_params = transform->dst->ranges;
+  PrimExpr transform_predicate =
+      std::accumulate(transform->dst->relations.begin(), transform->dst->relations.end(),
+                      PrimExpr(Bool(true)), [](PrimExpr a, PrimExpr b) { return a && b; });
+
+  transform_predicate = SimplifyAsAndOfOrs(transform_predicate, &local_analyzer);
+
+  auto find_removable_params = [&]() -> Map<Var, PrimExpr> {
+    Map<Var, PrimExpr> removable_params;
+
+    // The arith::SolveLinearEquations is more general than the
+    // utilities in iter_affine_map.h, but can introduce free
+    // parameters that could later be determined with the known
+    // constraints.  This step removes all such free parameters.
+    for (const auto& expr : ExtractConstraints(transform_predicate)) {
+      if (auto* as_equal = expr.as<EQNode>()) {
+        auto check_expr = [&](const PrimExpr& a, const PrimExpr& b) {
+          auto* var_ptr = a.as<VarNode>();
+          if (!var_ptr) {
+            return;
+          }
+
+          Var var = GetRef<Var>(var_ptr);
+          if (free_params.count(var) == 0) {
+            return;
+          }
+
+          bool uses_free_param =
+              UsesVar(b, [&](const VarNode* v) { return free_params.count(GetRef<Var>(v)) > 0; });
+          if (uses_free_param) {
+            return;
+          }
+          removable_params.Set(var, b);
+        };
+        check_expr(as_equal->a, as_equal->b);
+        check_expr(as_equal->b, as_equal->a);
+      }
+    }
+
+    // In addition, the arith::SolveLinearEquation can introduce
+    // free parameters with an extent of one.  Filtering them out here
+    // avoids needing to track them through later simplifications.
+    for (const auto [var, range] : free_params) {
+      if (is_one(range->extent)) {
+        removable_params.Set(var, range->min);
+      }
+    }
+
+    return removable_params;
+  };
+  for (auto removable_params = find_removable_params(); removable_params.size() > 0;
+       removable_params = find_removable_params()) {
+    auto update = [&](const PrimExpr& expr) {
+      return local_analyzer.Simplify(Substitute(expr, removable_params));
+    };
+
+    Map<Var, PrimExpr> new_map;
+    for (const auto [loop_var, expr] : loop_var_to_axis_var) {
+      static_cast<void>(expr);  // gcc 7.x bug, https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81767
+      new_map.Set(loop_var, update(expr));
+    }
+    loop_var_to_axis_var = new_map;
+
+    transform_predicate = update(transform_predicate);
+
+    for (const auto [var, expr] : removable_params) {
+      static_cast<void>(expr);  // gcc 7.x bug, https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81767
+      free_params.erase(var);
+    }
+  }
+
+  // Normalization function, applied to both the predicate and the
+  // known value.  Converts from an expression in terms of loop
+  // iterators to an expression in terms of buffer indices.
+  auto normalize_expr = [&](PrimExpr expr) -> PrimExpr {
+    expr = Substitute(expr, current_block.let_bindings_using_loop);
+
+    if (lane_var) {
+      expr = UnwrapVectorExpr(expr, lane_var.value());
+    }
+    expr = Substitute(expr, loop_var_to_axis_var);
+
+    return expr;
+  };
+
+  // Collect the current loop variables, along with an expression for
+  // the loop variables in terms of the buffer axis variables.  This
+  // is used during forward/backward propagation to generate predicate
+  // tracking whether a loop iteration has been reached.
+  std::vector<std::pair<Var, PrimExpr>> loop_var_expressions;
+  for (const auto& entry : current_block.active_loop_iterators) {
+    auto expr_it = loop_var_to_axis_var.find(entry.loop_var);
+    ICHECK(expr_it != loop_var_to_axis_var.end());
+    loop_var_expressions.push_back({entry.loop_var, (*expr_it).second});
+  }
+
+  // The full predicate is composed of the values required to reach
+  // the scope of the BufferStore or builtin::assume(), any bounds
+  // implied by solving for the axis variables, and any additional
+  // statements resulting from unpacking the expression contained in
+  // builtin::assume().
+  PrimExpr scope_predicate = normalize_expr(current_block.scope_predicate);
+  transform_predicate = normalize_expr(transform_predicate);
+
+  known_value_expr = local_analyzer.Simplify(normalize_expr(known_value_expr));
+
+  // Deliberately use an analyzer without scope-based information,
+  // to avoid simplifying `scope_predicate` to True.
+  PrimExpr predicate_expr = local_analyzer.Simplify(transform_predicate && scope_predicate);
+
+  BufferTouch buffer_touch = {buf, predicate_expr, known_value_expr, loop_var_expressions,
+                              touch_type};
+
+  return {buffer_touch, free_params};
+}
+
+BufferTouch ControlFlowGraph::ControlFlowBlock::MakeBufferTouch(ControlFlowGraph* graph,
+                                                                const tir::Buffer& buf,
+                                                                const Array<PrimExpr>& indices,
+                                                                BufferTouch::AccessType touch_type,
+                                                                PrimExpr known_value_expr) const {
+  ICHECK(graph);
+  auto [buffer_touch, free_params] = MakeBufferTouch(buf, graph->GetIndexVariables(buf, indices),
+                                                     indices, touch_type, known_value_expr);
+  for (const auto& pair : free_params) {
+    graph->free_predicate_parameters_.Set(pair.first, pair.second);
+  }
+  return buffer_touch;
+}
+
+ControlFlowGraph::ControlFlowGraph(const tir::Stmt& stmt, size_t max_revisits) {
+  ControlFlowGraphBuilder::Build(this, stmt);
+  ForwardPropagateKnownValues(max_revisits);
+  BackwardPropagateUnusedValues(max_revisits);
+}
+
+std::ostream& operator<<(std::ostream& os, const ControlFlowGraph::ControlFlowEdge& edge) {
+  os << edge.index;
+  if (edge.var_remap.size()) {
+    os << " with remap " << edge.var_remap;
+  }
+  if (edge.post_condition) {
+    os << " with postcondition " << edge.post_condition;
+  }
+
+  return os;
+}
+
+std::ostream& operator<<(std::ostream& os, const ControlFlowGraph::ControlFlowBlock& block) {
+  os << "Predecessors: [";
+  for (size_t i = 0; i < block.predecessors.size(); i++) {
+    if (i) {
+      os << ", ";
+    }
+    os << block.predecessors[i];
+  }
+  os << "]\n";
+
+  os << "Active loop iterators: [";
+  for (size_t i = 0; i < block.active_loop_iterators.size(); i++) {
+    if (i) {
+      os << ", ";
+    }
+    os << block.active_loop_iterators[i].loop_var;
+  }
+  os << "]\n";
+
+  os << "Before block knowns: " << block.known_at_block_start << "\n";
+
+  os << "Before block unused: " << block.unused_at_block_start << "\n";
+
+  for (size_t i = 0; i < block.touch_points.size(); i++) {
+    os << "Touch[" << i << "] = " << block.touch_points[i] << "\n";
+  }
+  os << "After block: " << block.known_at_block_end << "\n";
+
+  os << "After block unused: " << block.unused_at_block_end << "\n";
+
+  os << "Successors: [";
+  for (size_t i = 0; i < block.successors.size(); i++) {
+    if (i) {
+      os << ", ";
+    }
+    os << block.successors[i];
+  }
+  os << "]";
+  return os;
+}
+
+std::ostream& operator<<(std::ostream& os, const ControlFlowGraph& pattern) {
+  os << "Touch pattern contains " << pattern.control_flow_.size() << " control blocks."
+     << (pattern.control_flow_.size() ? "\n" : "");
+  for (size_t i = 0; i < pattern.control_flow_.size(); i++) {
+    os << "\t"
+       << "ControlBlock[" << i << "] = " << pattern.control_flow_[i] << "\n";
+  }
+
+  return os;
+}
+
+bool BufferTouch::IsEquivalentTo(const BufferTouch& other, Analyzer* analyzer) const {
+  // Constraints must apply to the same buffer to be equivalent
+  if (!buffer.same_as(other.buffer) || touch_type != other.touch_type) {
+    return false;
+  }
+
+  ExprDeepEqual deep_equal;
+
+  auto implies = [&](const PrimExpr& a, const PrimExpr& b) -> bool {
+    With<ConstraintContext> context(analyzer, a);
+    return analyzer->CanProve(b);
+  };
+
+  // Predicates must be equivalent expressions, or must both be undefined
+  bool equivalent_predicates =
+      deep_equal(predicate, other.predicate) ||
+      (implies(predicate, other.predicate) && implies(other.predicate, predicate));
+  if (!equivalent_predicates) {
+    return false;
+  }
+
+  // The known value must be equal
+  if (!deep_equal(value, other.value) && !analyzer->CanProveEqual(value, other.value)) {
+    return false;
+  }
+
+  return true;
+}
+
+std::ostream& operator<<(std::ostream& os, const BufferState& state) {
+  for (size_t i = 0; i < state.constraints.size(); i++) {
+    os << "constraints[" << i << "] = " << state.constraints[i]
+       << (i + 1 == state.constraints.size() ? "" : "\n");
+  }
+  return os;
+}
+
+PrimExpr BufferState::SubstituteKnownBufferValues(
+    PrimExpr expr, const Map<tir::Buffer, Array<tir::Var>>& axis_var_lookup,
+    Analyzer* analyzer) const {
+  BufferConstraintApply mutator(axis_var_lookup, constraints, analyzer);
+  return mutator(std::move(expr));
+}
+
+void BufferState::AddCondition(const PrimExpr& condition) {
+  for (auto& constraint : constraints) {
+    constraint.predicate = constraint.predicate && condition;
+  }
+}
+
+void BufferState::Substitute(const Map<Var, PrimExpr>& var_remap, Analyzer* analyzer) {
+  if (var_remap.size()) {
+    for (auto& prior : constraints) {
+      PrimExpr updated = tvm::tir::Substitute(prior.predicate, var_remap);
+      if (!updated.same_as(prior.predicate)) {
+        prior.predicate = SimplifyAsAndOfOrs(updated, analyzer);
+      }
+    }
+  }
+}
+
+void BufferState::Simplify(Analyzer* analyzer) {
+  for (auto& constraint : constraints) {
+    constraint.predicate = SimplifyAsAndOfOrs(constraint.predicate, analyzer);
+  }
+}
+
+void BufferState::Union(const BufferState& b, Analyzer* analyzer) {
+  for (const auto& b_constraint : b.constraints) {
+    bool used = false;
+    for (auto& a_constraint : constraints) {
+      if (a_constraint.buffer.same_as(b_constraint.buffer) &&
+          analyzer->CanProveEqual(a_constraint.value, b_constraint.value)) {
+        a_constraint.predicate =
+            SimplifyAsAndOfOrs(a_constraint.predicate || b_constraint.predicate, analyzer);
+        used = true;
+        break;
+      }
+    }
+    if (!used) {
+      constraints.push_back(b_constraint);
+    }
+  }
+}
+
+void BufferState::Intersection(const BufferState& b, Analyzer* analyzer) {
+  // For a constraint to be in the output, it must be present in both
+  // inputs.
+
+  std::vector<BufferTouch> new_constraints;
+  for (const auto& ai : constraints) {
+    for (const auto& bi : b.constraints) {
+      if (ai.buffer.same_as(bi.buffer)) {
+        PrimExpr predicate = SimplifyAsAndOfOrs(ai.predicate && bi.predicate, analyzer);
+        if (!is_zero(predicate)) {
+          With<ConstraintContext> context(analyzer, predicate);
+          PrimExpr known_value_a = ai.value;
+          PrimExpr known_value_b = bi.value;
+
+          bool is_consistent = analyzer->CanProveEqual(known_value_a, known_value_b);
+          if (is_consistent) {
+            new_constraints.push_back({ai.buffer, predicate, known_value_a});
+          }
+        }
+      }
+    }
+  }
+
+  constraints = std::move(new_constraints);
+}
+
+class BufferRegionCollector : public ExprVisitor {
+ public:
+  struct Region {
+    PrimExpr region_predicate;
+    std::unordered_map<const BufferLoadNode*, Optional<PrimExpr>> known_values;
+  };
+
+  static std::vector<Region> Collect(const Map<Buffer, Array<Var>>& axis_var_lookup,
+                                     const std::vector<BufferTouch>& knowns,
+                                     const std::vector<Optional<PrimExpr>>& exprs,
+                                     Analyzer* analyzer) {
+    BufferRegionCollector collector(axis_var_lookup, knowns, analyzer);
+    for (const auto& expr : exprs) {
+      if (expr) {
+        collector(expr.value());
+      }
+    }
+
+    return collector.regions_;
+  }
+
+ private:
+  using Parent = ExprVisitor;
+
+  BufferRegionCollector(const Map<Buffer, Array<Var>>& axis_var_lookup,
+                        const std::vector<BufferTouch>& knowns, Analyzer* analyzer)
+      : analyzer_(analyzer), axis_var_lookup_(axis_var_lookup), knowns_(knowns) {
+    regions_.push_back(Region{Bool(true), {}});
+  }
+
+  using Parent::VisitExpr_;
+
+  void VisitExpr_(const BufferLoadNode* op) override {
+    // Helper struct for the known values of this BufferLoad
+    struct Known {
+      PrimExpr predicate;
+      Optional<PrimExpr> value;
+    };
+
+    std::vector<Known> new_regions;
+
+    PrimExpr unknown_region = Bool(true);
+
+    for (const BufferTouch& constraint : knowns_) {
+      if (!op->buffer.same_as(constraint.buffer)) {
+        // This is a different buffer, so continue searching.
+        continue;
+      }
+
+      auto axis_vars = axis_var_lookup_.at(op->buffer);
+      PrimExpr touch_predicate =
+          SubstituteParamValues(axis_vars, op->indices, constraint.predicate).value();
+      touch_predicate = SimplifyAsAndOfOrs(touch_predicate, analyzer_);
+
+      if (!is_zero(touch_predicate)) {
+        Optional<PrimExpr> known_value =
+            SubstituteParamValues(axis_vars, op->indices, constraint.value);
+        new_regions.push_back(Known{touch_predicate, known_value});
+
+        unknown_region = unknown_region && !touch_predicate;
+        unknown_region = SimplifyAsAndOfOrs(unknown_region, analyzer_);
+      }
+    }
+
+    if (new_regions.size()) {
+      Analyzer local_analyzer;
+
+      if (!is_zero(unknown_region)) {
+        new_regions.insert(new_regions.begin(), Known{unknown_region, NullOpt});
+      }
+
+      std::vector<Region> updated_regions;
+      for (const auto& prev_region : regions_) {
+        for (const auto& new_region : new_regions) {
+          PrimExpr intersection =
+              SimplifyAsAndOfOrs(prev_region.region_predicate && new_region.predicate, analyzer_);
+
+          if (!is_zero(intersection)) {
+            Region merged{intersection, prev_region.known_values};
+            merged.known_values[op] = new_region.value;
+            updated_regions.push_back(std::move(merged));
+          }
+        }
+      }
+      regions_ = updated_regions;
+    }
+  }
+
+  Analyzer* analyzer_;
+  std::vector<Region> regions_;
+  const Map<Buffer, Array<Var>>& axis_var_lookup_;
+  const std::vector<BufferTouch>& knowns_;
+};
+
+class BufferRegionValueReplacer : public IRMutatorWithAnalyzer {
+ public:
+  static PrimExpr Apply(
+      const std::unordered_map<const BufferLoadNode*, Optional<PrimExpr>>& known_values,
+      PrimExpr expr, Analyzer* analyzer) {
+    BufferRegionValueReplacer mutator(known_values, analyzer);
+    PrimExpr result = mutator(expr);
+    // Simplification must occur after the substitution, as known
+    // values may provide enable simplifications.  Also, cannot track
+    // whether a BufferLoad was
+    result = analyzer->Simplify(result);
+    return result;
+  }
+
+ private:
+  using Parent = IRMutatorWithAnalyzer;
+
+  BufferRegionValueReplacer(
+      const std::unordered_map<const BufferLoadNode*, Optional<PrimExpr>>& known_values,
+      Analyzer* analyzer)
+      : Parent(analyzer), known_values_(known_values) {}
+
+  using Parent::VisitExpr_;
+
+  PrimExpr VisitExpr_(const BufferLoadNode* op) override {
+    auto it = known_values_.find(op);
+    if (it != known_values_.end() && it->second) {
+      return it->second.value();
+    } else {
+      return GetRef<PrimExpr>(op);
+    }
+  }
+
+  const std::unordered_map<const BufferLoadNode*, Optional<PrimExpr>>& known_values_;
+};
+
+void BufferState::ApplyTouches(const Map<Buffer, Array<Var>>& axis_var_lookup,
+                               const std::vector<BufferTouch>& touch_points, Analyzer* analyzer) {
+  std::vector<BufferTouch> new_knowns;
+  Map<Buffer, PrimExpr> keep_prior_known_at;
+
+  for (auto& touch : touch_points) {
+    if (touch.touch_type == BufferTouch::AccessType::Read) {
+      continue;
+    }
+
+    PrimExpr known_value = touch.value;
+
+    PrimExpr predicate = touch.predicate && touch.AfterLoopIteration();
+    auto regions = BufferRegionCollector::Collect(axis_var_lookup, constraints,
+                                                  {predicate, touch.value}, analyzer);
+
+    for (const auto& region : regions) {
+      PrimExpr updated_predicate = BufferRegionValueReplacer::Apply(
+          region.known_values, region.region_predicate && predicate, analyzer);
+
+      updated_predicate = SimplifyAsAndOfOrs(updated_predicate, analyzer);
+      PrimExpr updated_value =
+          BufferRegionValueReplacer::Apply(region.known_values, known_value, analyzer);
+
+      if (!is_zero(updated_predicate)) {
+        if (auto it = keep_prior_known_at.find(touch.buffer); it != keep_prior_known_at.end()) {
+          keep_prior_known_at.Set(touch.buffer, (*it).second && !updated_predicate);
+        } else {
+          keep_prior_known_at.Set(touch.buffer, !updated_predicate);
+        }
+
+        if (!HasBufferLoad(updated_value)) {
+          BufferTouch new_constraint{touch.buffer, updated_predicate, updated_value};
+          new_knowns.push_back(new_constraint);
+        }
+      }
+    }
+  }
+
+  if (keep_prior_known_at.size()) {
+    for (auto& constraint : constraints) {
+      if (auto it = keep_prior_known_at.find(constraint.buffer); it != keep_prior_known_at.end()) {
+        constraint.predicate = SimplifyAsAndOfOrs(constraint.predicate && (*it).second, analyzer);
+      }
+    }
+  }
+
+  if (new_knowns.size()) {
+    std::vector<bool> used(new_knowns.size(), false);
+
+    for (auto& constraint : constraints) {
+      PrimExpr expand_known_at = Bool(false);
+
+      PrimExpr prev_value = constraint.value;
+
+      for (size_t i = 0; i < new_knowns.size(); i++) {
+        if (new_knowns[i].buffer.same_as(constraint.buffer)) {
+          Optional<PrimExpr> overwritten_with = new_knowns[i].value;
+          if (overwritten_with && analyzer->CanProveEqual(prev_value, overwritten_with.value())) {
+            expand_known_at =
+                SimplifyAsAndOfOrs(expand_known_at || new_knowns[i].predicate, analyzer);
+            used[i] = true;
+          }
+        }
+      }
+
+      if (!is_zero(expand_known_at)) {
+        constraint.predicate =
+            SimplifyAsAndOfOrs(constraint.predicate || expand_known_at, analyzer);
+      }
+    }
+
+    for (size_t i = 0; i < new_knowns.size(); i++) {
+      if (!used[i]) {
+        constraints.push_back(new_knowns[i]);
+      }
+    }
+  }
+
+  constraints.erase(
+      std::remove_if(constraints.begin(), constraints.end(),
+                     [&](const auto& constraint) { return is_zero(constraint.predicate); }),
+      constraints.end());
+}
+
+void BufferState::BackpropUnusedIndices(const Map<Buffer, Array<Var>>& axis_var_lookup,
+                                        const std::vector<BufferTouch>& touch_points,
+                                        Analyzer* analyzer) {
+  std::vector<BufferTouch> new_knowns;
+  Map<Buffer, PrimExpr> keep_prior_known_at;
+
+  Map<Buffer, PrimExpr> regions_written;
+  Map<Buffer, PrimExpr> regions_read;
+  for (auto it = touch_points.rbegin(); it != touch_points.rend(); it++) {
+    const auto& touch = *it;
+
+    Map<Buffer, PrimExpr>* to_update{nullptr};
+    if (touch.touch_type == BufferTouch::AccessType::Write) {
+      to_update = &regions_written;
+
+    } else if (touch.touch_type == BufferTouch::AccessType::Read) {
+      to_update = &regions_read;
+    } else {
+      continue;
+    }
+
+    PrimExpr prev = to_update->Get(touch.buffer).value_or(Bool(false));
+    PrimExpr new_predicate = touch.predicate && touch.BeforeLoopIteration();
+    to_update->Set(touch.buffer, prev || new_predicate);
+  }
+
+  auto update_map = [&](auto& map) {
+    Map<Buffer, PrimExpr> new_map;
+    for (auto [buffer, predicate] : map) {
+      new_map.Set(buffer, SimplifyAsAndOfOrs(predicate, analyzer));
+    }
+    map = std::move(new_map);
+  };
+  update_map(regions_written);
+  update_map(regions_read);
+
+  // If buffer is already in used, widen the predicate
+  for (auto& prev_unused : constraints) {
+    if (auto opt_predicate = regions_written.Get(prev_unused.buffer)) {
+      PrimExpr new_predicate = prev_unused.predicate || opt_predicate.value();
+      prev_unused.predicate = SimplifyAsAndOfOrs(new_predicate, analyzer);
+      regions_written.erase(prev_unused.buffer);
+    }
+  }
+
+  // Otherwise, add new "touch" to represent the unused values
+  for (auto [buffer, predicate] : regions_written) {
+    constraints.push_back(
+        BufferTouch{buffer, predicate, tir::Call(buffer->dtype, builtin::undef(), {})});
+  }
+
+  // If buffer is read out, narrow the predicate
+  for (auto& prev_unused : constraints) {
+    if (auto opt_pred = regions_read.Get(prev_unused.buffer)) {
+      PrimExpr predicate = opt_pred.value();
+      prev_unused.predicate = SimplifyAsAndOfOrs(prev_unused.predicate && !predicate, analyzer);
+    }
+  }
+
+  // Clean-up and remove any empty constraints
+  constraints.erase(
+      std::remove_if(constraints.begin(), constraints.end(),
+                     [](const auto& constraint) { return is_zero(constraint.predicate); }),
+      constraints.end());
+}
+
+void BufferState::RemoveFreeParameters(const Map<Var, Range>& free_predicate_parameters,
+                                       Analyzer* analyzer) {
+  for (auto& known : constraints) {
+    known.predicate = NarrowPredicateExpression(known.predicate, free_predicate_parameters);
+    known.predicate = SimplifyAsAndOfOrs(known.predicate, analyzer);
+  }
+}
+
+bool BufferState::IsEquivalentTo(const BufferState& other, Analyzer* analyzer) const {
+  if (constraints.size() != other.constraints.size()) {
+    return false;
+  }
+
+  for (size_t i = 0; i < constraints.size(); i++) {
+    if (!constraints[i].IsEquivalentTo(other.constraints[i], analyzer)) {
+      return false;
+    }
+  }
+
+  return true;
+}
+
+Optional<Array<Var>> ControlFlowGraph::GetIndexVariables(const Buffer& buf) const {
+  if (auto it = axis_var_lookup_.find(buf); it != axis_var_lookup_.end()) {
+    return (*it).second;
+  } else {
+    return NullOpt;
+  }
+}
+
+Array<Var> ControlFlowGraph::GetIndexVariables(const Buffer& buf, const Array<PrimExpr>& indices) {
+  if (auto it = axis_var_lookup_.find(buf); it != axis_var_lookup_.end()) {
+    return (*it).second;
+  }
+
+  Array<Var> vars;
+  for (size_t i = 0; i < indices.size(); i++) {
+    std::stringstream ss;
+    ss << buf->name << "_axis_" << i;
+    vars.push_back(Var(ss.str(), indices[i].dtype().element_of()));
+  }
+
+  axis_var_lookup_.Set(buf, vars);
+  return vars;
+}
+
+void ControlFlowGraph::ForwardPropagateKnownValues(size_t max_revisits) {
+  // Values to visit when searching.  Using a std::set to
+  // preferentially visit nodes near the start of the control flow.
+  std::set<size_t> to_visit;
+
+  // Map from a block's index
+  std::unordered_map<size_t, size_t> visit_count_lookup;
+
+  // Initiatize the locations to search from, propagating values
+  // forward from all locations that have a known value.
+  for (size_t i = 0; i < control_flow_.size(); i++) {
+    bool has_known_value = false;
+    for (const auto& touch : control_flow_[i].touch_points) {
+      if (!HasBufferLoad(touch.value)) {
+        has_known_value = true;
+        break;
+      }
+    }
+
+    if (has_known_value) {
+      to_visit.insert(i);
+    }
+  }
+
+  Analyzer analyzer;
+  analyzer.rewrite_simplify.SetEnabledExtensions(arith::RewriteSimplifier::Extension(
+      arith::RewriteSimplifier::kTransitivelyProveInequalities |
+      arith::RewriteSimplifier::kApplyConstraintsToBooleanBranches));
+
+  analyzer.Bind(iterator_ranges_);
+  analyzer.Bind(free_predicate_parameters_);
+
+  while (to_visit.size()) {
+    size_t visiting = *to_visit.begin();
+    to_visit.erase(visiting);
+
+    size_t num_previous_visits = visit_count_lookup[visiting]++;
+
+    ControlFlowBlock& block = control_flow_[visiting];
+
+    // Step 1: Collect known values provided from each precedessor
+    block.known_at_block_start = [&]() -> BufferState {
+      if (num_previous_visits >= max_revisits) {
+        return BufferState();
+      }
+      ICHECK_LE(block.predecessors.size(), 2) << "Each block should have at most two predecessors";

Review Comment:
   Why at most two? Is this for limiting loop dependency analysis? (1) the initial predecessor from loop start, and (2) from a previous loop?



##########
src/tir/analysis/control_flow_graph.h:
##########
@@ -0,0 +1,597 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * \file control_flow_graph.h
+ * \brief Utility for extracting and interacting with buffer touch points
+ */
+
+#include <tvm/arith/analyzer.h>
+#include <tvm/arith/int_solver.h>
+#include <tvm/runtime/container/array.h>
+#include <tvm/tir/buffer.h>
+#include <tvm/tir/stmt.h>
+#include <tvm/tir/var.h>
+
+#include <unordered_map>
+#include <utility>
+#include <vector>
+
+#ifndef TVM_TIR_ANALYSIS_CONTROL_FLOW_GRAPH_H_
+#define TVM_TIR_ANALYSIS_CONTROL_FLOW_GRAPH_H_
+
+namespace tvm {
+namespace tir {
+
+/*! \brief Represents an interaction with a buffer */
+struct BufferTouch {
+  enum class AccessType {
+    /*! \brief Buffer access occurs in BufferLoad */
+    Read,
+
+    /*! \brief Buffer access occurs in BufferStore */
+    Write,
+
+    /*! \brief Buffer access occurs in tir::builtin::assume() */
+    Assume,
+  };
+
+  BufferTouch(Buffer buffer, PrimExpr predicate, PrimExpr value)
+      : buffer(buffer),
+        predicate(predicate),
+        value(value),
+        loop_var_expressions({}),
+        touch_type(AccessType::Assume) {}
+
+  BufferTouch(Buffer buffer, PrimExpr predicate, PrimExpr value,
+              std::vector<std::pair<Var, PrimExpr>> loop_var_expressions, AccessType touch_type)
+      : buffer(buffer),
+        predicate(predicate),
+        value(value),
+        loop_var_expressions(loop_var_expressions),
+        touch_type(touch_type) {}
+
+  /*! \brief The buffer being touched */
+  Buffer buffer;
+
+  /*! \brief A predicate that is true when this touch applies
+   *
+   * May be in terms of axis variables to indicate touches that impact
+   * only a portion of a buffer.
+   */
+  PrimExpr predicate;
+
+  /*! \brief The value in this buffer after the touch
+   *
+   * May be in terms of axis variables to indicate a known
+   * non-constant value.  May be in terms of a BufferLoad to indicate
+   * an unknown value.
+   */
+  PrimExpr value;
+
+  /*! \brief Active loops during the buffer touch
+   *
+   * The vector contains one entry for each loop that contains the
+   * buffer touch.  The `Var` item in each entry is the loop variable
+   * itself.  The `PrimExpr` item is an expression for the loop
+   * variable in terms of the buffer axis variables in
+   * `ControlFlowGraph::axis_var_lookup_`.
+   *
+   * Used to construct boolean expressions indicating whether the loop
+   * iteration that performs this touch has been reached.
+   */
+  std::vector<std::pair<Var, PrimExpr>> loop_var_expressions;
+
+  /*! \brief How the buffer was interacted with
+   *
+   * When used as a constraint (e.g. in BufferState), should use
+   * Assume.
+   */
+  AccessType touch_type{AccessType::Assume};
+
+  /*! \brief Generate a boolean expression that is true for indices
+   *  accessed by this touch during this iteration or a previous
+   *  loop iteration.
+   *
+   * Used during forward propagation, to track known values that were
+   * written in the current loop iteration, or in a preceding loop
+   * iteration.
+   */
+  PrimExpr BeforeLoopIteration() const;
+
+  /*! \brief Generate a boolean expression that is true for indices
+   *  accessed by this touch during this loop iteration.
+   *
+   * Used during speculative no-op insertion checks, to specify which
+   * indices must be later overwritten for a store to have no impact
+   * on final results.
+   */
+  PrimExpr AtLoopIteration() const;
+
+  /*! \brief Generate a boolean expression that is true for indices
+   *  accessed by this touch during this loop iteration or a
+   *  subsequent loop iteration.
+   *
+   * Used during backward propagation, to track indices that that are
+   * overwritten in the current loop iteration or in a later loop
+   * iteration.
+   */
+  PrimExpr AfterLoopIteration() const;
+
+  /* \brief Checks if this touch affects a subset of indices of another
+   *
+   * Returns true if the indices accessed by this touch are a subset
+   * of predicate is true can be proven to be a subset of the other
+   * subset.  Returns false if it cannot be proven to be a subset of
+   * ther other subset.
+   */
+  bool IsSubsetOf(const BufferTouch& other, arith::Analyzer* analyzer) const;
+
+  /* \brief Checks if this touch affects distinct indices from another
+   *
+   * Returns true if it can be proven that the two predicates cannot
+   * be simultaneously true.  Returns false if it cannot be proven
+   * that the two predicates are distinct.
+   */
+  bool IsDistinctFrom(const BufferTouch& other, arith::Analyzer* analyzer) const;
+
+  /* \brief Checks if this touch affects distinct indices from another
+   *
+   * Returns true if it can be proven that the two predicates cannot
+   * be simultaneously true.  Returns false if it cannot be proven
+   * that the two predicates are distinct.
+   */
+  bool IsEquivalentTo(const BufferTouch& other, arith::Analyzer* analyzer) const;
+
+  friend std::ostream& operator<<(std::ostream& os, const BufferTouch& expr);
+};
+
+/*! \brief Represents the known state of buffers at a specific point */
+class BufferState {
+ public:
+  /*! Default constructor
+   *
+   * Initialize the buffer state with no known information.
+   */
+  BufferState() {}
+
+  /*! \brief Replace BufferLoad instances with known values
+   *
+   * \param expr The expression to be updated.
+   *
+   * \param axis_var_lookup A map from buffer to the variables
+   * representing positions along the buffer's axes.
+   *
+   * \param analyzer The analyzer to use when validating a
+   * constraint's predicate.
+   *
+   * \returns The modified expression.  If no substitutions are made,
+   * the original expression is returned.
+   */
+  PrimExpr SubstituteKnownBufferValues(PrimExpr expr,
+                                       const Map<Buffer, Array<Var>>& axis_var_lookup,
+                                       arith::Analyzer* analyzer) const;
+
+  /*! \brief Apply a condition to all known constraints
+   *
+   * For example, when propagating pre-loop constraints into the body
+   * of a loop, add a condition that the loop iterator is zero.
+   *
+   * \param condition The condition to apply
+   */
+  void AddCondition(const PrimExpr& condition);
+
+  /*! \brief Perform a variable substitution for all constraints
+   *
+   * For example, when propagating constraints from the end of a loop
+   * to the beginning, replace `i` with `i-1`.
+   *
+   * \param var_remap The variable remapping to apply.
+   */
+  void Substitute(const Map<Var, PrimExpr>& var_remap, arith::Analyzer* analyzer);
+
+  /*! \brief Simplify the predicate of all constraints
+   *
+   * \param analyzer The analyzer with which to simplify
+   */
+  void Simplify(arith::Analyzer* analyzer);
+
+  /*! \brief Update the known buffer values based on buffer touches
+   *
+   * For any Write or Assume touches, update the known values.  For
+   * any Read touches, ignore.  Used to determine known values at the
+   * end of a control flow block, given the known values at the start.
+   *
+   * \param axis_var_lookup A map from buffer to the variables
+   * representing positions along the buffer's axes.
+   *
+   * \param touch_points The buffer touch points to apply
+   *
+   * \param analyzer The analyzer to use for simplifications
+   */
+  void ApplyTouches(const Map<Buffer, Array<Var>>& axis_var_lookup,
+                    const std::vector<BufferTouch>& touch_points, arith::Analyzer* analyzer);
+
+  /*! \brief Update unused buffer locations based on buffer touches
+   *
+   * For any Write, mark the written-to indices as unused.  (That is,
+   * immediately prior to assigning `buf[i] = expr`, the value stored
+   * at `buf[i]` is irrelevant.)  For any Read, mark the read-from
+   * indices as used.  This method is used to determine unused buffer
+   * indices at the start of a control flow block, given the unused
+   * buffer indices values at the end.
+   *
+   * \param axis_var_lookup A map from buffer to the variables
+   * representing positions along the buffer's axes.
+   *
+   * \param touch_points The buffer touch points to apply
+   *
+   * \param analyzer The analyzer to use for simplifications
+   */
+  void BackpropUnusedIndices(const Map<Buffer, Array<Var>>& axis_var_lookup,
+                             const std::vector<BufferTouch>& touch_points,
+                             arith::Analyzer* analyzer);
+
+  /*! \brief Remove free parameters from the constraints
+   *
+   * \param free_predicate_parameters
+   *
+   * \param analyzer The analyzer with which to simplify after removal
+   */
+  void RemoveFreeParameters(const Map<Var, Range>& free_predicate_parameters,
+                            arith::Analyzer* analyzer);
+
+  /*! \brief Check if two buffer states are equivalent
+   *
+   * \param other
+   *
+   * \param analyzer The analyzer used to check equality of PrimExpr
+   *
+   * \return True if the two states are provably equivalent, false otherwise.
+   */
+  bool IsEquivalentTo(const BufferState& other, arith::Analyzer* analyzer) const;
+
+  /* \brief Add known values provided by another state
+   *
+   * \param other The state with which to merge constraints
+   *
+   * \param analyzer The analyzer with which to simplify the result
+   */
+  void Union(const BufferState& other, arith::Analyzer* analyzer);
+
+  /* \brief Remove all known values not consistent with another state
+   *
+   * \param other The state with which to merge constraints
+   *
+   * \param analyzer The analyzer with which to simplify the result
+   */
+  void Intersection(const BufferState& other, arith::Analyzer* analyzer);
+
+  friend std::ostream& operator<<(std::ostream& os, const BufferState&);
+
+ private:
+  friend class ControlFlowGraph;
+  /*! \brief The known constraints */
+  std::vector<BufferTouch> constraints;
+};
+
+/*! \brief Represents the flow of control through a `tir::Stmt`

Review Comment:
   Fantastic documentation for building a mental model of the ControlFlowGraph structure and intention behind its design. Thanks!



##########
src/tir/analysis/control_flow_graph.h:
##########
@@ -0,0 +1,597 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * \file control_flow_graph.h
+ * \brief Utility for extracting and interacting with buffer touch points
+ */
+
+#include <tvm/arith/analyzer.h>
+#include <tvm/arith/int_solver.h>
+#include <tvm/runtime/container/array.h>
+#include <tvm/tir/buffer.h>
+#include <tvm/tir/stmt.h>
+#include <tvm/tir/var.h>
+
+#include <unordered_map>
+#include <utility>
+#include <vector>
+
+#ifndef TVM_TIR_ANALYSIS_CONTROL_FLOW_GRAPH_H_
+#define TVM_TIR_ANALYSIS_CONTROL_FLOW_GRAPH_H_
+
+namespace tvm {
+namespace tir {
+
+/*! \brief Represents an interaction with a buffer */
+struct BufferTouch {
+  enum class AccessType {
+    /*! \brief Buffer access occurs in BufferLoad */
+    Read,
+
+    /*! \brief Buffer access occurs in BufferStore */
+    Write,
+
+    /*! \brief Buffer access occurs in tir::builtin::assume() */
+    Assume,
+  };
+
+  BufferTouch(Buffer buffer, PrimExpr predicate, PrimExpr value)
+      : buffer(buffer),
+        predicate(predicate),
+        value(value),
+        loop_var_expressions({}),
+        touch_type(AccessType::Assume) {}
+
+  BufferTouch(Buffer buffer, PrimExpr predicate, PrimExpr value,
+              std::vector<std::pair<Var, PrimExpr>> loop_var_expressions, AccessType touch_type)
+      : buffer(buffer),
+        predicate(predicate),
+        value(value),
+        loop_var_expressions(loop_var_expressions),
+        touch_type(touch_type) {}
+
+  /*! \brief The buffer being touched */
+  Buffer buffer;
+
+  /*! \brief A predicate that is true when this touch applies
+   *
+   * May be in terms of axis variables to indicate touches that impact
+   * only a portion of a buffer.
+   */
+  PrimExpr predicate;
+
+  /*! \brief The value in this buffer after the touch
+   *
+   * May be in terms of axis variables to indicate a known
+   * non-constant value.  May be in terms of a BufferLoad to indicate
+   * an unknown value.
+   */
+  PrimExpr value;
+
+  /*! \brief Active loops during the buffer touch
+   *
+   * The vector contains one entry for each loop that contains the
+   * buffer touch.  The `Var` item in each entry is the loop variable
+   * itself.  The `PrimExpr` item is an expression for the loop
+   * variable in terms of the buffer axis variables in
+   * `ControlFlowGraph::axis_var_lookup_`.
+   *
+   * Used to construct boolean expressions indicating whether the loop
+   * iteration that performs this touch has been reached.
+   */
+  std::vector<std::pair<Var, PrimExpr>> loop_var_expressions;
+
+  /*! \brief How the buffer was interacted with
+   *
+   * When used as a constraint (e.g. in BufferState), should use
+   * Assume.
+   */
+  AccessType touch_type{AccessType::Assume};
+
+  /*! \brief Generate a boolean expression that is true for indices
+   *  accessed by this touch during this iteration or a previous
+   *  loop iteration.
+   *
+   * Used during forward propagation, to track known values that were
+   * written in the current loop iteration, or in a preceding loop
+   * iteration.
+   */
+  PrimExpr BeforeLoopIteration() const;
+
+  /*! \brief Generate a boolean expression that is true for indices
+   *  accessed by this touch during this loop iteration.
+   *
+   * Used during speculative no-op insertion checks, to specify which
+   * indices must be later overwritten for a store to have no impact
+   * on final results.
+   */
+  PrimExpr AtLoopIteration() const;
+
+  /*! \brief Generate a boolean expression that is true for indices
+   *  accessed by this touch during this loop iteration or a
+   *  subsequent loop iteration.
+   *
+   * Used during backward propagation, to track indices that that are
+   * overwritten in the current loop iteration or in a later loop
+   * iteration.
+   */
+  PrimExpr AfterLoopIteration() const;
+
+  /* \brief Checks if this touch affects a subset of indices of another
+   *
+   * Returns true if the indices accessed by this touch are a subset
+   * of predicate is true can be proven to be a subset of the other
+   * subset.  Returns false if it cannot be proven to be a subset of
+   * ther other subset.
+   */
+  bool IsSubsetOf(const BufferTouch& other, arith::Analyzer* analyzer) const;
+
+  /* \brief Checks if this touch affects distinct indices from another
+   *
+   * Returns true if it can be proven that the two predicates cannot
+   * be simultaneously true.  Returns false if it cannot be proven
+   * that the two predicates are distinct.
+   */
+  bool IsDistinctFrom(const BufferTouch& other, arith::Analyzer* analyzer) const;
+
+  /* \brief Checks if this touch affects distinct indices from another
+   *
+   * Returns true if it can be proven that the two predicates cannot
+   * be simultaneously true.  Returns false if it cannot be proven
+   * that the two predicates are distinct.
+   */
+  bool IsEquivalentTo(const BufferTouch& other, arith::Analyzer* analyzer) const;
+
+  friend std::ostream& operator<<(std::ostream& os, const BufferTouch& expr);
+};
+
+/*! \brief Represents the known state of buffers at a specific point */
+class BufferState {
+ public:
+  /*! Default constructor
+   *
+   * Initialize the buffer state with no known information.
+   */
+  BufferState() {}
+
+  /*! \brief Replace BufferLoad instances with known values
+   *
+   * \param expr The expression to be updated.
+   *
+   * \param axis_var_lookup A map from buffer to the variables
+   * representing positions along the buffer's axes.
+   *
+   * \param analyzer The analyzer to use when validating a
+   * constraint's predicate.
+   *
+   * \returns The modified expression.  If no substitutions are made,
+   * the original expression is returned.
+   */
+  PrimExpr SubstituteKnownBufferValues(PrimExpr expr,
+                                       const Map<Buffer, Array<Var>>& axis_var_lookup,
+                                       arith::Analyzer* analyzer) const;
+
+  /*! \brief Apply a condition to all known constraints
+   *
+   * For example, when propagating pre-loop constraints into the body
+   * of a loop, add a condition that the loop iterator is zero.
+   *
+   * \param condition The condition to apply
+   */
+  void AddCondition(const PrimExpr& condition);
+
+  /*! \brief Perform a variable substitution for all constraints
+   *
+   * For example, when propagating constraints from the end of a loop
+   * to the beginning, replace `i` with `i-1`.
+   *
+   * \param var_remap The variable remapping to apply.
+   */
+  void Substitute(const Map<Var, PrimExpr>& var_remap, arith::Analyzer* analyzer);
+
+  /*! \brief Simplify the predicate of all constraints
+   *
+   * \param analyzer The analyzer with which to simplify
+   */
+  void Simplify(arith::Analyzer* analyzer);
+
+  /*! \brief Update the known buffer values based on buffer touches
+   *
+   * For any Write or Assume touches, update the known values.  For
+   * any Read touches, ignore.  Used to determine known values at the
+   * end of a control flow block, given the known values at the start.
+   *
+   * \param axis_var_lookup A map from buffer to the variables
+   * representing positions along the buffer's axes.
+   *
+   * \param touch_points The buffer touch points to apply
+   *
+   * \param analyzer The analyzer to use for simplifications
+   */
+  void ApplyTouches(const Map<Buffer, Array<Var>>& axis_var_lookup,
+                    const std::vector<BufferTouch>& touch_points, arith::Analyzer* analyzer);
+
+  /*! \brief Update unused buffer locations based on buffer touches
+   *
+   * For any Write, mark the written-to indices as unused.  (That is,
+   * immediately prior to assigning `buf[i] = expr`, the value stored
+   * at `buf[i]` is irrelevant.)  For any Read, mark the read-from
+   * indices as used.  This method is used to determine unused buffer
+   * indices at the start of a control flow block, given the unused
+   * buffer indices values at the end.
+   *
+   * \param axis_var_lookup A map from buffer to the variables
+   * representing positions along the buffer's axes.
+   *
+   * \param touch_points The buffer touch points to apply
+   *
+   * \param analyzer The analyzer to use for simplifications
+   */
+  void BackpropUnusedIndices(const Map<Buffer, Array<Var>>& axis_var_lookup,
+                             const std::vector<BufferTouch>& touch_points,
+                             arith::Analyzer* analyzer);
+
+  /*! \brief Remove free parameters from the constraints
+   *
+   * \param free_predicate_parameters
+   *
+   * \param analyzer The analyzer with which to simplify after removal
+   */
+  void RemoveFreeParameters(const Map<Var, Range>& free_predicate_parameters,
+                            arith::Analyzer* analyzer);
+
+  /*! \brief Check if two buffer states are equivalent
+   *
+   * \param other
+   *
+   * \param analyzer The analyzer used to check equality of PrimExpr
+   *
+   * \return True if the two states are provably equivalent, false otherwise.
+   */
+  bool IsEquivalentTo(const BufferState& other, arith::Analyzer* analyzer) const;
+
+  /* \brief Add known values provided by another state
+   *
+   * \param other The state with which to merge constraints
+   *
+   * \param analyzer The analyzer with which to simplify the result
+   */
+  void Union(const BufferState& other, arith::Analyzer* analyzer);
+
+  /* \brief Remove all known values not consistent with another state
+   *
+   * \param other The state with which to merge constraints
+   *
+   * \param analyzer The analyzer with which to simplify the result
+   */
+  void Intersection(const BufferState& other, arith::Analyzer* analyzer);
+
+  friend std::ostream& operator<<(std::ostream& os, const BufferState&);
+
+ private:
+  friend class ControlFlowGraph;
+  /*! \brief The known constraints */
+  std::vector<BufferTouch> constraints;
+};
+
+/*! \brief Represents the flow of control through a `tir::Stmt`
+ *
+ * This class contains an internal representation of the possible
+ * control flow that may occur during execution of a `tir::Stmt`.  It
+ * consists of a collection of ControlFlowBlock objects, each of which
+ * represents a subset of operations performed during execution, along
+ * with edges that represent allowed transitions between
+ * `ControlFlowBlock`.
+ *
+ * In addition, the following restrictions are used.
+ *
+ * 1. Each block may have at most two predecessors, and at most two
+ *    successors.
+ *
+ * 2. Within each block, values stored in a buffer do not change.
+ *    That is, encountering a `BufferStore` node requires creating a
+ *    new block.
+ *
+ * For example, consider the following PrimFunc
+ *
+ * ```python
+ * @T.prim_func
+ * def func(T.Buffer[16, "float32"]):
+ *     for i in T.serial(16):
+ *         if i < 8:
+ *              B[i] = i
+ *         else:
+ *              B[i] = i-8
+ * ```
+ *
+ * The control flow graph would have eight control blocks.
+ *
+ * 1. function_entry, from the start of the function through the
+ *    evaluation of the loop's extent.
+ *
+ *    Predecessors: n/a
+ *    Successors: loop_start
+ *
+ * 2. loop_start, after entering the body of the loop, through the
+ *    evaluation of the conditional `i < 8`
+ *
+ *    Predecessors: function_entry, after_conditional
+ *    Successors: then_clause_start, else_clause_start
+ *
+ * 3. then_clause_start, after entering the then_clause of `i < 8`,
+ *    through evaluation of the value `i`.
+ *
+ *    Predecessors: loop_start
+ *    Successors: then_clause_end
+ *
+ * 4. then_clause_end, after storing to `B[i]` prior to exiting the
+ *    then_clause.
+ *
+ *    Predecessors: then_clause_start
+ *    Successors: after_conditional
+ *
+ * 5. else_clause_start, after entering the else_clause of `i < 8`,
+ *    through evaluation of the value `i-8`.
+ *
+ *    Predecessors: loop_start
+ *    Successors: else_clause_end
+ *
+ * 6. else_clause_end, after storing to `B[i]` prior to exiting the
+ *    else_clause.
+ *
+ *    Predecessors: else_clause_start
+ *    Successors: after_conditional
+ *
+ * 7. after_conditional, after the end of the if/then/else, before the
+ *    end of the loop body
+ *
+ *    Predecessors: then_clause_end, else_clause_end
+ *    Successors: loop_start, after_loop
+ *
+ * 8. after_loop, after the loop
+ *
+ *    Predecessors: after_conditional
+ *    Successors: n/a
+ *
+ *
+ * By identifying `BufferStore` nodes whose value does not depend on
+ * values stored in input buffers (e.g. initializing `buf[i] = 0.0`),
+ * or whose values are provided using `builtin::assume()`
+ * (e.g. `T.assume(buf[i] == 0.0)`), the value stored in a buffer at
+ * those indices may be known for a given control block.  These known
+ * values can then be propagated forward to successor blocks, to be
+ * used in context-dependent simplifications.
+ */
+class ControlFlowGraph {
+ public:
+  /* \brief Extract the touch pattern from a TIR statement
+   */
+  explicit ControlFlowGraph(const Stmt& stmt, size_t max_revisits = 5);
+
+  /* \brief Check if a write is overwritten without impacting final results
+   *
+   * \param store The store to be examined
+   *
+   * \param context The context in which the buffer store occurs, used
+   * to identify the control-flow block in which the store occurs.  In
+   * most cases, this will be the same object as the `store` itself.
+   *
+   * \param analyzer The analyzer to be used for simplifications
+   *
+   * \return True if the specified store can be proven to be
+   * overwritten without contributing to any later statements.
+   * Returns false otherwise.
+   */
+  bool IsOverwrittenWithoutEffect(const BufferStore& store, const Stmt& context,
+                                  arith::Analyzer* analyzer) const;
+
+  /* \brief Simplify the expression, assuming it occurs within the given context
+   *
+   * \param expr The expression to be simplified.  Does not need to
+   * have occurred within the statement used to construct this
+   * BufferTouchPattern.
+   *
+   * \param context The statement where this expression occurred, or
+   * is to be inserted.  Must occur within the statement used to
+   * construct this BufferTouchPattern.
+   *
+   * \param analyzer The analyzer to be used for simplifications
+   *
+   * \returns The simplified statement
+   */
+  PrimExpr SimplifyInContext(PrimExpr expr, const Stmt& context, arith::Analyzer* analyzer) const;
+
+  /*! \brief Remove the specified BufferStore from the control-flow
+   *  graph
+   *
+   * Removing the specified store, which may reflow known values.
+   * This is necessary when simplifying sequential stores of the same
+   * value.  Otherwise, the first could be removed as a no-op because
+   * it is overwritten by the second, and the second could be removed
+   * as a no-op because it is the same value as the first.
+   *
+   * \param store The store to remove
+   */
+  void RemoveStore(const tir::BufferStore& store);
+
+  friend std::ostream& operator<<(std::ostream& os, const ControlFlowGraph& pattern);
+
+ private:
+  /*! \brief Return index variables representing locations within a
+   *   buffer.
+   *
+   * For a given buffer, will always return the same set of variables.
+   *
+   * \param buf The buffer being accessed
+   *
+   * \param indices The indices at which the buffer is being accessed.
+   * These are used to set the dtype of the buffer axis variables.
+   *
+   * \returns Variables representing a position along the buffer's axis.
+   */
+  Array<Var> GetIndexVariables(const Buffer& buf, const Array<PrimExpr>& indices);
+
+  /*! \brief Return index variables representing locations within a
+   *   buffer, if they have been generated before.
+   *
+   * For a given buffer, will always return the same set of variables.
+   *
+   * \param buf The buffer being accessed
+   *
+   * \returns Variables representing a position along the buffer's axis.
+   */
+  Optional<Array<Var>> GetIndexVariables(const Buffer& buf) const;
+
+  /*! \brief Propagate known values from known BufferStore/assume
+   *  subsequent control flow blocks
+   */
+  void ForwardPropagateKnownValues(size_t max_revisits);
+
+  void BackwardPropagateUnusedValues(size_t max_revisits);

Review Comment:
   Missing docs



##########
src/arith/conjunctive_normal_form.cc:
##########
@@ -248,14 +248,14 @@ void AndOfOrs::TrySimplifyOr(Key* a_ptr, Key* b_ptr, Analyzer* analyzer) {
   Key& a = *a_ptr;
   Key& b = *b_ptr;
   PrimExpr joint = GetExpr(a) || GetExpr(b);
-  PrimExpr simplified = analyzer->Simplify(joint);
+  PrimExpr simplified = analyzer->rewrite_simplify(joint);
   if (!ExprDeepEqual()(simplified, joint)) {
     if (auto* simplified_or = simplified.as<OrNode>()) {
       a = GetKey(simplified_or->a);
       b = GetKey(simplified_or->b);
     } else {
-      a = GetKey(simplified);
-      b = key_false_;
+      a = key_false_;
+      b = GetKey(simplified);

Review Comment:
   Is this a bug fix? Do we have a test to reproduce if so?



##########
src/tir/analysis/control_flow_graph.h:
##########
@@ -0,0 +1,597 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * \file control_flow_graph.h
+ * \brief Utility for extracting and interacting with buffer touch points
+ */
+
+#include <tvm/arith/analyzer.h>
+#include <tvm/arith/int_solver.h>
+#include <tvm/runtime/container/array.h>
+#include <tvm/tir/buffer.h>
+#include <tvm/tir/stmt.h>
+#include <tvm/tir/var.h>
+
+#include <unordered_map>
+#include <utility>
+#include <vector>
+
+#ifndef TVM_TIR_ANALYSIS_CONTROL_FLOW_GRAPH_H_
+#define TVM_TIR_ANALYSIS_CONTROL_FLOW_GRAPH_H_
+
+namespace tvm {
+namespace tir {
+
+/*! \brief Represents an interaction with a buffer */
+struct BufferTouch {
+  enum class AccessType {
+    /*! \brief Buffer access occurs in BufferLoad */
+    Read,
+
+    /*! \brief Buffer access occurs in BufferStore */
+    Write,
+
+    /*! \brief Buffer access occurs in tir::builtin::assume() */
+    Assume,
+  };
+
+  BufferTouch(Buffer buffer, PrimExpr predicate, PrimExpr value)
+      : buffer(buffer),
+        predicate(predicate),
+        value(value),
+        loop_var_expressions({}),
+        touch_type(AccessType::Assume) {}
+
+  BufferTouch(Buffer buffer, PrimExpr predicate, PrimExpr value,
+              std::vector<std::pair<Var, PrimExpr>> loop_var_expressions, AccessType touch_type)
+      : buffer(buffer),
+        predicate(predicate),
+        value(value),
+        loop_var_expressions(loop_var_expressions),
+        touch_type(touch_type) {}
+
+  /*! \brief The buffer being touched */
+  Buffer buffer;
+
+  /*! \brief A predicate that is true when this touch applies
+   *
+   * May be in terms of axis variables to indicate touches that impact
+   * only a portion of a buffer.
+   */
+  PrimExpr predicate;
+
+  /*! \brief The value in this buffer after the touch
+   *
+   * May be in terms of axis variables to indicate a known
+   * non-constant value.  May be in terms of a BufferLoad to indicate
+   * an unknown value.
+   */
+  PrimExpr value;
+
+  /*! \brief Active loops during the buffer touch
+   *
+   * The vector contains one entry for each loop that contains the
+   * buffer touch.  The `Var` item in each entry is the loop variable
+   * itself.  The `PrimExpr` item is an expression for the loop
+   * variable in terms of the buffer axis variables in
+   * `ControlFlowGraph::axis_var_lookup_`.
+   *
+   * Used to construct boolean expressions indicating whether the loop
+   * iteration that performs this touch has been reached.
+   */
+  std::vector<std::pair<Var, PrimExpr>> loop_var_expressions;
+
+  /*! \brief How the buffer was interacted with
+   *
+   * When used as a constraint (e.g. in BufferState), should use
+   * Assume.
+   */
+  AccessType touch_type{AccessType::Assume};
+
+  /*! \brief Generate a boolean expression that is true for indices
+   *  accessed by this touch during this iteration or a previous
+   *  loop iteration.
+   *
+   * Used during forward propagation, to track known values that were
+   * written in the current loop iteration, or in a preceding loop
+   * iteration.
+   */
+  PrimExpr BeforeLoopIteration() const;
+
+  /*! \brief Generate a boolean expression that is true for indices
+   *  accessed by this touch during this loop iteration.
+   *
+   * Used during speculative no-op insertion checks, to specify which
+   * indices must be later overwritten for a store to have no impact
+   * on final results.
+   */
+  PrimExpr AtLoopIteration() const;
+
+  /*! \brief Generate a boolean expression that is true for indices
+   *  accessed by this touch during this loop iteration or a
+   *  subsequent loop iteration.
+   *
+   * Used during backward propagation, to track indices that that are
+   * overwritten in the current loop iteration or in a later loop
+   * iteration.
+   */
+  PrimExpr AfterLoopIteration() const;
+
+  /* \brief Checks if this touch affects a subset of indices of another
+   *
+   * Returns true if the indices accessed by this touch are a subset
+   * of predicate is true can be proven to be a subset of the other
+   * subset.  Returns false if it cannot be proven to be a subset of
+   * ther other subset.
+   */
+  bool IsSubsetOf(const BufferTouch& other, arith::Analyzer* analyzer) const;
+
+  /* \brief Checks if this touch affects distinct indices from another
+   *
+   * Returns true if it can be proven that the two predicates cannot
+   * be simultaneously true.  Returns false if it cannot be proven
+   * that the two predicates are distinct.
+   */
+  bool IsDistinctFrom(const BufferTouch& other, arith::Analyzer* analyzer) const;
+
+  /* \brief Checks if this touch affects distinct indices from another
+   *
+   * Returns true if it can be proven that the two predicates cannot
+   * be simultaneously true.  Returns false if it cannot be proven
+   * that the two predicates are distinct.
+   */
+  bool IsEquivalentTo(const BufferTouch& other, arith::Analyzer* analyzer) const;
+
+  friend std::ostream& operator<<(std::ostream& os, const BufferTouch& expr);
+};
+
+/*! \brief Represents the known state of buffers at a specific point */
+class BufferState {
+ public:
+  /*! Default constructor
+   *
+   * Initialize the buffer state with no known information.
+   */
+  BufferState() {}
+
+  /*! \brief Replace BufferLoad instances with known values
+   *
+   * \param expr The expression to be updated.
+   *
+   * \param axis_var_lookup A map from buffer to the variables
+   * representing positions along the buffer's axes.
+   *
+   * \param analyzer The analyzer to use when validating a
+   * constraint's predicate.
+   *
+   * \returns The modified expression.  If no substitutions are made,
+   * the original expression is returned.
+   */
+  PrimExpr SubstituteKnownBufferValues(PrimExpr expr,
+                                       const Map<Buffer, Array<Var>>& axis_var_lookup,
+                                       arith::Analyzer* analyzer) const;
+
+  /*! \brief Apply a condition to all known constraints
+   *
+   * For example, when propagating pre-loop constraints into the body
+   * of a loop, add a condition that the loop iterator is zero.
+   *
+   * \param condition The condition to apply
+   */
+  void AddCondition(const PrimExpr& condition);
+
+  /*! \brief Perform a variable substitution for all constraints
+   *
+   * For example, when propagating constraints from the end of a loop
+   * to the beginning, replace `i` with `i-1`.
+   *
+   * \param var_remap The variable remapping to apply.
+   */
+  void Substitute(const Map<Var, PrimExpr>& var_remap, arith::Analyzer* analyzer);
+
+  /*! \brief Simplify the predicate of all constraints
+   *
+   * \param analyzer The analyzer with which to simplify
+   */
+  void Simplify(arith::Analyzer* analyzer);
+
+  /*! \brief Update the known buffer values based on buffer touches
+   *
+   * For any Write or Assume touches, update the known values.  For
+   * any Read touches, ignore.  Used to determine known values at the
+   * end of a control flow block, given the known values at the start.
+   *
+   * \param axis_var_lookup A map from buffer to the variables
+   * representing positions along the buffer's axes.
+   *
+   * \param touch_points The buffer touch points to apply
+   *
+   * \param analyzer The analyzer to use for simplifications
+   */
+  void ApplyTouches(const Map<Buffer, Array<Var>>& axis_var_lookup,
+                    const std::vector<BufferTouch>& touch_points, arith::Analyzer* analyzer);
+
+  /*! \brief Update unused buffer locations based on buffer touches
+   *
+   * For any Write, mark the written-to indices as unused.  (That is,
+   * immediately prior to assigning `buf[i] = expr`, the value stored
+   * at `buf[i]` is irrelevant.)  For any Read, mark the read-from
+   * indices as used.  This method is used to determine unused buffer
+   * indices at the start of a control flow block, given the unused
+   * buffer indices values at the end.
+   *
+   * \param axis_var_lookup A map from buffer to the variables
+   * representing positions along the buffer's axes.
+   *
+   * \param touch_points The buffer touch points to apply
+   *
+   * \param analyzer The analyzer to use for simplifications
+   */
+  void BackpropUnusedIndices(const Map<Buffer, Array<Var>>& axis_var_lookup,
+                             const std::vector<BufferTouch>& touch_points,
+                             arith::Analyzer* analyzer);
+
+  /*! \brief Remove free parameters from the constraints
+   *
+   * \param free_predicate_parameters
+   *
+   * \param analyzer The analyzer with which to simplify after removal
+   */
+  void RemoveFreeParameters(const Map<Var, Range>& free_predicate_parameters,
+                            arith::Analyzer* analyzer);
+
+  /*! \brief Check if two buffer states are equivalent
+   *
+   * \param other
+   *
+   * \param analyzer The analyzer used to check equality of PrimExpr
+   *
+   * \return True if the two states are provably equivalent, false otherwise.
+   */
+  bool IsEquivalentTo(const BufferState& other, arith::Analyzer* analyzer) const;
+
+  /* \brief Add known values provided by another state
+   *
+   * \param other The state with which to merge constraints
+   *
+   * \param analyzer The analyzer with which to simplify the result
+   */
+  void Union(const BufferState& other, arith::Analyzer* analyzer);
+
+  /* \brief Remove all known values not consistent with another state
+   *
+   * \param other The state with which to merge constraints
+   *
+   * \param analyzer The analyzer with which to simplify the result
+   */
+  void Intersection(const BufferState& other, arith::Analyzer* analyzer);
+
+  friend std::ostream& operator<<(std::ostream& os, const BufferState&);
+
+ private:
+  friend class ControlFlowGraph;
+  /*! \brief The known constraints */
+  std::vector<BufferTouch> constraints;
+};
+
+/*! \brief Represents the flow of control through a `tir::Stmt`
+ *
+ * This class contains an internal representation of the possible
+ * control flow that may occur during execution of a `tir::Stmt`.  It
+ * consists of a collection of ControlFlowBlock objects, each of which
+ * represents a subset of operations performed during execution, along
+ * with edges that represent allowed transitions between
+ * `ControlFlowBlock`.
+ *
+ * In addition, the following restrictions are used.
+ *
+ * 1. Each block may have at most two predecessors, and at most two
+ *    successors.
+ *
+ * 2. Within each block, values stored in a buffer do not change.
+ *    That is, encountering a `BufferStore` node requires creating a
+ *    new block.
+ *
+ * For example, consider the following PrimFunc
+ *
+ * ```python
+ * @T.prim_func
+ * def func(T.Buffer[16, "float32"]):
+ *     for i in T.serial(16):
+ *         if i < 8:
+ *              B[i] = i
+ *         else:
+ *              B[i] = i-8
+ * ```
+ *
+ * The control flow graph would have eight control blocks.
+ *
+ * 1. function_entry, from the start of the function through the
+ *    evaluation of the loop's extent.
+ *
+ *    Predecessors: n/a
+ *    Successors: loop_start
+ *
+ * 2. loop_start, after entering the body of the loop, through the
+ *    evaluation of the conditional `i < 8`
+ *
+ *    Predecessors: function_entry, after_conditional
+ *    Successors: then_clause_start, else_clause_start
+ *
+ * 3. then_clause_start, after entering the then_clause of `i < 8`,
+ *    through evaluation of the value `i`.
+ *
+ *    Predecessors: loop_start
+ *    Successors: then_clause_end
+ *
+ * 4. then_clause_end, after storing to `B[i]` prior to exiting the
+ *    then_clause.
+ *
+ *    Predecessors: then_clause_start
+ *    Successors: after_conditional
+ *
+ * 5. else_clause_start, after entering the else_clause of `i < 8`,
+ *    through evaluation of the value `i-8`.
+ *
+ *    Predecessors: loop_start
+ *    Successors: else_clause_end
+ *
+ * 6. else_clause_end, after storing to `B[i]` prior to exiting the
+ *    else_clause.
+ *
+ *    Predecessors: else_clause_start
+ *    Successors: after_conditional
+ *
+ * 7. after_conditional, after the end of the if/then/else, before the
+ *    end of the loop body
+ *
+ *    Predecessors: then_clause_end, else_clause_end
+ *    Successors: loop_start, after_loop
+ *
+ * 8. after_loop, after the loop
+ *
+ *    Predecessors: after_conditional
+ *    Successors: n/a
+ *
+ *
+ * By identifying `BufferStore` nodes whose value does not depend on
+ * values stored in input buffers (e.g. initializing `buf[i] = 0.0`),
+ * or whose values are provided using `builtin::assume()`
+ * (e.g. `T.assume(buf[i] == 0.0)`), the value stored in a buffer at
+ * those indices may be known for a given control block.  These known
+ * values can then be propagated forward to successor blocks, to be
+ * used in context-dependent simplifications.
+ */
+class ControlFlowGraph {
+ public:
+  /* \brief Extract the touch pattern from a TIR statement
+   */
+  explicit ControlFlowGraph(const Stmt& stmt, size_t max_revisits = 5);
+
+  /* \brief Check if a write is overwritten without impacting final results
+   *
+   * \param store The store to be examined
+   *
+   * \param context The context in which the buffer store occurs, used
+   * to identify the control-flow block in which the store occurs.  In
+   * most cases, this will be the same object as the `store` itself.
+   *
+   * \param analyzer The analyzer to be used for simplifications
+   *
+   * \return True if the specified store can be proven to be
+   * overwritten without contributing to any later statements.
+   * Returns false otherwise.
+   */
+  bool IsOverwrittenWithoutEffect(const BufferStore& store, const Stmt& context,
+                                  arith::Analyzer* analyzer) const;
+
+  /* \brief Simplify the expression, assuming it occurs within the given context
+   *
+   * \param expr The expression to be simplified.  Does not need to
+   * have occurred within the statement used to construct this
+   * BufferTouchPattern.
+   *
+   * \param context The statement where this expression occurred, or
+   * is to be inserted.  Must occur within the statement used to
+   * construct this BufferTouchPattern.
+   *
+   * \param analyzer The analyzer to be used for simplifications
+   *
+   * \returns The simplified statement
+   */
+  PrimExpr SimplifyInContext(PrimExpr expr, const Stmt& context, arith::Analyzer* analyzer) const;
+
+  /*! \brief Remove the specified BufferStore from the control-flow
+   *  graph
+   *
+   * Removing the specified store, which may reflow known values.
+   * This is necessary when simplifying sequential stores of the same
+   * value.  Otherwise, the first could be removed as a no-op because
+   * it is overwritten by the second, and the second could be removed
+   * as a no-op because it is the same value as the first.
+   *
+   * \param store The store to remove
+   */
+  void RemoveStore(const tir::BufferStore& store);
+
+  friend std::ostream& operator<<(std::ostream& os, const ControlFlowGraph& pattern);
+
+ private:
+  /*! \brief Return index variables representing locations within a
+   *   buffer.
+   *
+   * For a given buffer, will always return the same set of variables.
+   *
+   * \param buf The buffer being accessed
+   *
+   * \param indices The indices at which the buffer is being accessed.
+   * These are used to set the dtype of the buffer axis variables.
+   *
+   * \returns Variables representing a position along the buffer's axis.
+   */
+  Array<Var> GetIndexVariables(const Buffer& buf, const Array<PrimExpr>& indices);
+
+  /*! \brief Return index variables representing locations within a
+   *   buffer, if they have been generated before.
+   *
+   * For a given buffer, will always return the same set of variables.
+   *
+   * \param buf The buffer being accessed
+   *
+   * \returns Variables representing a position along the buffer's axis.
+   */
+  Optional<Array<Var>> GetIndexVariables(const Buffer& buf) const;
+
+  /*! \brief Propagate known values from known BufferStore/assume
+   *  subsequent control flow blocks
+   */
+  void ForwardPropagateKnownValues(size_t max_revisits);
+
+  void BackwardPropagateUnusedValues(size_t max_revisits);
+
+  struct ControlFlowEdge {
+    /* \brief The source block of the control flow edge
+     *
+     * Lookup index into `control_flow_`
+     */
+    size_t index;
+
+    /*! \brief Variable remaps
+     *
+     * e.g. Replacing loop iterator `i` with `i-1` when following an
+     * edge from the end of a loop to the beginning of the loop.
+     */
+    Map<Var, PrimExpr> var_remap;
+
+    /*! \brief Condition that must to true after following this edge
+     *
+     * This is applied after variable remapping.  For example, `i >
+     * loop_min` when following the an edge from the end of a loop to
+     * the beginning of the loop.
+     */
+    Optional<PrimExpr> post_condition;
+  };
+  friend std::ostream& operator<<(std::ostream& os, const ControlFlowEdge& edge);
+
+  struct ControlFlowBlock {
+    struct LoopEntry {
+      Var loop_var;
+      PrimExpr loop_min;
+      PrimExpr loop_max;
+      Range loop_range;
+    };
+
+    /*! \brief Loop iterators that are active during this block */
+    std::vector<LoopEntry> active_loop_iterators;
+
+    /*! \brief Loop-dependent Let bindings that may appear within the block */
+    Map<Var, PrimExpr> let_bindings_using_loop;
+
+    /*! \brief Predicate that must be true to have reached this block */
+    PrimExpr scope_predicate{Bool(true)};
+
+    /*! \brief All known values prior to executing the block */
+    BufferState known_at_block_start;
+
+    /*! \brief All known values after executing the block */
+    BufferState known_at_block_end;
+
+    /*! \brief Indices whose value at the start of the block is known to be unused */
+    BufferState unused_at_block_start;
+
+    /*! \brief Indices whose value at the end of the block is known to be unused */
+    BufferState unused_at_block_end;
+
+    /* \brief Buffer touches that occur within the block
+     *
+     * All buffer touches within a block can be treated as occurring
+     * simultaneously.
+     */
+    std::vector<BufferTouch> touch_points;
+
+    /* \brief The blocks that occur after this block
+     *
+     * Lookup index into `control_flow_`
+     */
+    std::vector<ControlFlowEdge> successors;
+
+    /* \brief The blocks that occur before this block */
+    std::vector<ControlFlowEdge> predecessors;
+
+    BufferTouch MakeBufferTouch(ControlFlowGraph* graph, const Buffer& buf,
+                                const Array<PrimExpr>& indices, BufferTouch::AccessType touch_type,
+                                PrimExpr known_value_expr) const;
+
+    std::pair<BufferTouch, Map<Var, Range>> MakeBufferTouch(const Buffer& buf,
+                                                            Array<Var> index_variables,
+                                                            Array<PrimExpr> indices,
+                                                            BufferTouch::AccessType touch_type,
+                                                            PrimExpr known_value_expr) const;

Review Comment:
   Missing docs



##########
src/tir/analysis/control_flow_graph.h:
##########
@@ -0,0 +1,597 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * \file control_flow_graph.h
+ * \brief Utility for extracting and interacting with buffer touch points
+ */
+
+#include <tvm/arith/analyzer.h>
+#include <tvm/arith/int_solver.h>
+#include <tvm/runtime/container/array.h>
+#include <tvm/tir/buffer.h>
+#include <tvm/tir/stmt.h>
+#include <tvm/tir/var.h>
+
+#include <unordered_map>
+#include <utility>
+#include <vector>
+
+#ifndef TVM_TIR_ANALYSIS_CONTROL_FLOW_GRAPH_H_
+#define TVM_TIR_ANALYSIS_CONTROL_FLOW_GRAPH_H_
+
+namespace tvm {
+namespace tir {
+
+/*! \brief Represents an interaction with a buffer */
+struct BufferTouch {
+  enum class AccessType {
+    /*! \brief Buffer access occurs in BufferLoad */
+    Read,
+
+    /*! \brief Buffer access occurs in BufferStore */
+    Write,
+
+    /*! \brief Buffer access occurs in tir::builtin::assume() */
+    Assume,
+  };
+
+  BufferTouch(Buffer buffer, PrimExpr predicate, PrimExpr value)
+      : buffer(buffer),
+        predicate(predicate),
+        value(value),
+        loop_var_expressions({}),
+        touch_type(AccessType::Assume) {}
+
+  BufferTouch(Buffer buffer, PrimExpr predicate, PrimExpr value,
+              std::vector<std::pair<Var, PrimExpr>> loop_var_expressions, AccessType touch_type)
+      : buffer(buffer),
+        predicate(predicate),
+        value(value),
+        loop_var_expressions(loop_var_expressions),
+        touch_type(touch_type) {}
+
+  /*! \brief The buffer being touched */
+  Buffer buffer;
+
+  /*! \brief A predicate that is true when this touch applies
+   *
+   * May be in terms of axis variables to indicate touches that impact
+   * only a portion of a buffer.
+   */
+  PrimExpr predicate;
+
+  /*! \brief The value in this buffer after the touch
+   *
+   * May be in terms of axis variables to indicate a known
+   * non-constant value.  May be in terms of a BufferLoad to indicate
+   * an unknown value.
+   */
+  PrimExpr value;
+
+  /*! \brief Active loops during the buffer touch
+   *
+   * The vector contains one entry for each loop that contains the
+   * buffer touch.  The `Var` item in each entry is the loop variable
+   * itself.  The `PrimExpr` item is an expression for the loop
+   * variable in terms of the buffer axis variables in
+   * `ControlFlowGraph::axis_var_lookup_`.
+   *
+   * Used to construct boolean expressions indicating whether the loop
+   * iteration that performs this touch has been reached.
+   */
+  std::vector<std::pair<Var, PrimExpr>> loop_var_expressions;
+
+  /*! \brief How the buffer was interacted with
+   *
+   * When used as a constraint (e.g. in BufferState), should use
+   * Assume.
+   */
+  AccessType touch_type{AccessType::Assume};

Review Comment:
   Perhaps it would be better to have an unknown access type than to default to `Assume`?



##########
src/tir/analysis/control_flow_graph.cc:
##########
@@ -0,0 +1,1641 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * \file control_flow_graph.cc
+ * \brief Utility to deduce bound of expression
+ */
+
+#include "control_flow_graph.h"
+
+#include <tvm/runtime/registry.h>
+#include <tvm/tir/analysis.h>
+#include <tvm/tir/builtin.h>
+#include <tvm/tir/expr.h>
+#include <tvm/tir/op.h>
+#include <tvm/tir/stmt_functor.h>
+
+#include <numeric>
+#include <optional>
+#include <queue>
+#include <set>
+#include <sstream>
+#include <unordered_set>
+
+#include "../../arith/conjunctive_normal_form.h"
+#include "../../arith/constraint_extract.h"
+#include "../../arith/ir_mutator_with_analyzer.h"
+#include "../../arith/ir_visitor_with_analyzer.h"
+#include "../../arith/narrow_predicate_expression.h"
+#include "../../arith/unwrap_vector_expr.h"
+
+namespace tvm {
+namespace tir {
+
+using namespace arith;
+
+namespace {
+bool HasBufferLoad(PrimExpr expr) {
+  struct Visitor : public ExprVisitor {
+    void VisitExpr_(const BufferLoadNode* node) override { found_buffer_load = true; }
+    bool found_buffer_load{false};
+  };
+
+  Visitor visitor;
+  visitor(expr);
+  return visitor.found_buffer_load;
+}
+
+Optional<PrimExpr> SubstituteParamValues(const Array<Var>& param_vars,
+                                         const Array<PrimExpr>& param_values,
+                                         const PrimExpr& expr) {
+  ICHECK_EQ(param_vars.size(), param_values.size())
+      << "Expression was defined as having " << param_vars.size() << " parameters, but received "
+      << param_values.size() << " arguments.";
+
+  Map<tir::Var, PrimExpr> var_map;
+  for (size_t i = 0; i < param_values.size(); i++) {
+    var_map.Set(param_vars[i], param_values[i]);
+  }
+
+  return Substitute(expr, var_map);
+}
+}  // namespace
+
+PrimExpr BufferTouch::BeforeLoopIteration() const {
+  PrimExpr loop_predicate = Bool(true);
+  for (auto it = loop_var_expressions.rbegin(); it != loop_var_expressions.rend(); it++) {
+    const Var& loop_var = it->first;
+    const PrimExpr& loop_expr = it->second;
+    loop_predicate = (loop_var <= loop_expr) || ((loop_var == loop_expr) && loop_predicate);
+  }
+  return loop_predicate;
+}
+
+PrimExpr BufferTouch::AtLoopIteration() const {
+  PrimExpr loop_predicate = Bool(true);
+  for (auto it = loop_var_expressions.rbegin(); it != loop_var_expressions.rend(); it++) {
+    const Var& loop_var = it->first;
+    const PrimExpr& loop_expr = it->second;
+    loop_predicate = (loop_var == loop_expr) && loop_predicate;
+  }
+  return loop_predicate;
+}
+
+PrimExpr BufferTouch::AfterLoopIteration() const {
+  PrimExpr loop_predicate = Bool(true);
+  for (auto it = loop_var_expressions.rbegin(); it != loop_var_expressions.rend(); it++) {
+    const Var& loop_var = it->first;
+    const PrimExpr& loop_expr = it->second;
+    loop_predicate = (loop_var >= loop_expr) || ((loop_var == loop_expr) && loop_predicate);
+  }
+  return loop_predicate;
+}
+
+bool BufferTouch::IsSubsetOf(const BufferTouch& other, Analyzer* analyzer) const {
+  if (this->buffer.same_as(other.buffer)) {
+    With<ConstraintContext> constraint(analyzer, predicate);
+
+    return analyzer->CanProve(other.predicate);
+  } else {
+    return false;
+  }
+}
+
+bool BufferTouch::IsDistinctFrom(const BufferTouch& other, Analyzer* analyzer) const {
+  if (this->buffer.same_as(other.buffer)) {
+    With<ConstraintContext> constraint(analyzer, predicate);
+
+    return analyzer->CanProve(!other.predicate);
+  } else {
+    return true;
+  }
+}
+
+std::ostream& operator<<(std::ostream& os, const BufferTouch& tp) {
+  auto touch_type = [&]() {
+    if (tp.touch_type == BufferTouch::AccessType::Read) {
+      return "read";
+    } else if (tp.touch_type == BufferTouch::AccessType::Write) {
+      return "write";
+    } else if (tp.touch_type == BufferTouch::AccessType::Assume) {
+      return "assume";
+    } else {
+      return "???";
+    }
+  }();
+
+  os << "BufferTouch(" << tp.buffer->name << ", " << touch_type << ", " << tp.predicate
+     << ", value = " << tp.value << ")";
+  return os;
+}
+
+class BufferConstraintApply : public IRMutatorWithAnalyzer {
+ public:
+  using Parent = IRMutatorWithAnalyzer;
+
+  BufferConstraintApply(const Map<Buffer, Array<Var>>& axis_var_lookup,
+                        const std::vector<BufferTouch>& knowns, Analyzer* analyzer)
+      : Parent(analyzer), axis_var_lookup_(axis_var_lookup), knowns_(knowns) {}
+
+  using Parent::VisitExpr_;
+
+  PrimExpr VisitExpr_(const BufferLoadNode* op) override {
+    for (const auto& known : knowns_) {
+      if (!op->buffer.same_as(known.buffer)) {
+        continue;
+      }
+
+      Optional<Var> lane_var = NullOpt;
+      IntImm num_lanes;
+
+      Array<PrimExpr> indices = op->indices.Map([&](const auto& index) {
+        if (index.dtype().lanes() == 1) {
+          return index;
+        } else {
+          ICHECK(!lane_var) << "Multiple indices found with non-scalar values";
+          lane_var = Var("lane", index.dtype().element_of());
+          num_lanes = IntImm(index.dtype().element_of(), index.dtype().lanes());
+          return UnwrapVectorExpr(index, lane_var.value());
+        }
+      });
+
+      auto axis_vars = axis_var_lookup_.at(op->buffer);
+      PrimExpr predicate = SubstituteParamValues(axis_vars, indices, known.predicate).value();
+
+      std::optional<With<ConstraintContext>> context;
+      if (lane_var.defined()) {
+        Var lanes = lane_var.value();
+        PrimExpr known = (IntImm(lanes.dtype(), 0) <= lanes) && (lanes < num_lanes);
+        context.emplace(analyzer_, known);
+      }
+
+      if (analyzer_->CanProve(predicate)) {
+        return SubstituteParamValues(axis_vars, op->indices, known.value).value();
+      }
+    }
+
+    return GetRef<PrimExpr>(op);
+  }
+
+ private:
+  const Map<Buffer, Array<Var>>& axis_var_lookup_;
+  const std::vector<BufferTouch>& knowns_;
+};
+
+/*! \brief Extract the control-flow graph
+ *
+ * Walk through a statement, populating the control-flow graph.
+ */
+class ControlFlowGraphBuilder final : public IRVisitorWithAnalyzer {
+ public:
+  static void Build(ControlFlowGraph* out, const Stmt& stmt) {
+    ControlFlowGraphBuilder extractor(out);
+    extractor.AppendControlBlock();
+    extractor(stmt);
+  }
+
+ private:
+  ControlFlowGraphBuilder(ControlFlowGraph* out) : out_(out) {}
+
+  using Parent = IRVisitorWithAnalyzer;
+  using Parent::VisitExpr_;
+  using Parent::VisitStmt_;
+
+  void VisitStmt(const Stmt& stmt) override {
+    // Update the lookup table to determine which control-flow block
+    // contains the start of the specified statement.  This is used
+    // later to determine which set of known values should be used to
+    // simplify a statement.
+    out_->control_flow_lookup_[stmt.get()] = CurrentControlBlock();
+    Stmt prev_stmt = current_stmt_;
+    current_stmt_ = stmt;
+    Parent::VisitStmt(stmt);
+    current_stmt_ = prev_stmt;
+  }
+
+  void VisitStmt_(const EvaluateNode* op) override {
+    if (auto* call = op->value.as<CallNode>()) {
+      if (call->op.same_as(builtin::assume())) {
+        Assume(call->args[0], true);
+        return;
+      }
+    }
+
+    Parent::VisitStmt_(op);
+  }
+
+  void Assume(PrimExpr assumption, bool from_assume_statement) {
+    for (const auto& expr : ExtractConstraints(assumption, false)) {
+      AssumeConstraintComponent(expr, from_assume_statement);
+    }
+  }
+
+  void AssumeConstraintComponent(PrimExpr assumption, bool from_assume_statement) {
+    PrimExpr additional_predicate = Bool(true);
+
+    std::vector<PrimExpr> buffer_exprs;
+    for (const auto& expr : ExtractComponents(assumption)) {
+      auto side_effect = tir::SideEffect(expr);
+      if (side_effect <= tir::CallEffectKind::kPure) {
+        // Pulling out portions of the assumption that do not depend
+        // on a buffer value allows the following two forms to be
+        // treated identically.
+        //
+        // if i < 3: T.assume(buf[i] == value)
+        // T.assume(i>=3 or buf[i] == value)
+        additional_predicate = additional_predicate && logical_not(expr);
+      } else if (side_effect == tir::CallEffectKind::kReadState) {
+        buffer_exprs.push_back(expr);
+      } else {
+        LOG(FATAL) << "Assumption must be pure or read-only";
+      }
+    }
+
+    if (buffer_exprs.empty()) {
+      out_->non_buffer_assumptions_.push_back(!CurrentScopePredicate() || assumption);
+      return;
+    }
+
+    CHECK_EQ(buffer_exprs.size(), 1) << "T.assume must contain only a single buffer expression";
+
+    auto* as_equal_node = buffer_exprs[0].as<tir::EQNode>();
+    CHECK(as_equal_node || !from_assume_statement)
+        << "T.assume buffer constraint must be of the form 'buffer[indices] == "
+           "value', but received "
+        << assumption;
+    if (!as_equal_node) {
+      // This assumption is an inequality a data-dependent
+      // conditional.  Not an error for this to occur, but also not
+      // something that is currently supported.
+      return;
+    }
+
+    tir::BufferLoad load;
+    PrimExpr value;
+    if (auto* as_load = as_equal_node->a.as<tir::BufferLoadNode>()) {
+      load = GetRef<tir::BufferLoad>(as_load);
+      value = as_equal_node->b;
+    } else if (auto* as_load = as_equal_node->b.as<tir::BufferLoadNode>()) {
+      load = GetRef<tir::BufferLoad>(as_load);
+      value = as_equal_node->a;
+    } else if (!from_assume_statement) {
+      return;
+    } else {
+      LOG(FATAL) << "T.assume buffer constraint must be of the form 'buffer[indices] == value'";
+    }
+
+    auto has_side_effect = tir::SideEffect(value) > tir::CallEffectKind::kPure;
+    CHECK(!has_side_effect || !from_assume_statement)
+        << "Buffer value in constraint must be pure expression, but was " << value;
+    if (has_side_effect) {
+      return;
+    }
+
+    {
+      InternalConstraintContext context(this, additional_predicate);
+      VisitAccess(load, BufferTouch::AccessType::Assume, value);
+    }
+    // Appending a control block ensures that all control blocks have
+    // at most one statement that changes the known buffer contents.
+    auto prev_block = CurrentControlBlock();
+    auto new_block = AppendControlBlock();
+    MarkControlFlow(prev_block, new_block);
+  }
+
+  void VisitExpr_(const LetNode* op) override {
+    std::optional<BindLetVar> binding;
+    if (UsesLoopVar(op->value)) {
+      binding.emplace(this, op->var, op->value);
+    }
+    Parent::VisitExpr_(op);
+  }
+
+  void VisitStmt_(const LetStmtNode* op) override {
+    std::optional<BindLetVar> binding;
+    if (UsesLoopVar(op->value)) {
+      binding.emplace(this, op->var, op->value);
+    }
+    Parent::VisitStmt_(op);
+  }
+
+  void VisitExpr_(const BufferLoadNode* op) override {
+    Parent::VisitExpr_(op);
+    BufferLoad load = GetRef<BufferLoad>(op);
+    VisitAccess(load, BufferTouch::AccessType::Read, load);
+  }
+
+  void VisitStmt_(const BufferStoreNode* op) override {
+    Parent::VisitStmt_(op);
+    VisitAccess(GetRef<BufferStore>(op), BufferTouch::AccessType::Write, op->value);
+    // Appending a control block ensures that all control blocks have
+    // at most one statement that changes the buffer contents.
+    auto prev_block = CurrentControlBlock();
+    auto new_block = AppendControlBlock();
+    MarkControlFlow(prev_block, new_block);
+  }
+
+  void VisitStmt_(const ForNode* op) override {
+    out_->iterator_ranges_.Set(op->loop_var, Range::FromMinExtent(op->min, op->extent));
+
+    auto before_loop = CurrentControlBlock();
+    size_t loop_start = -1;
+
+    {
+      BindActiveLoopVar binding(this, op->loop_var, op->min, op->extent);
+      loop_start = AppendControlBlock();
+      Parent::VisitStmt_(op);
+    }
+
+    auto loop_end = CurrentControlBlock();
+    auto after_loop = AppendControlBlock();
+    PrimExpr max_iterator_value = analyzer_.Simplify(op->min + op->extent - 1);
+    {
+      auto [forward, backward] = MarkControlFlow(before_loop, loop_start);
+      backward.post_condition = (op->loop_var == op->min);
+      forward.var_remap = {{op->loop_var, op->min}};
+    }
+    {
+      auto [forward, backward] = MarkControlFlow(loop_end, after_loop);
+      backward.var_remap = {{op->loop_var, max_iterator_value}};
+      forward.post_condition = (op->loop_var == max_iterator_value);
+    }
+    {
+      auto [forward, backward] = MarkControlFlow(loop_end, loop_start);
+      backward.var_remap = {{op->loop_var, op->loop_var - 1}};
+      forward.var_remap = {{op->loop_var, op->loop_var + 1}};
+      backward.post_condition = (op->loop_var > op->min);
+      forward.post_condition = (op->loop_var < max_iterator_value);
+    }
+  }
+
+  void VisitStmt_(const IfThenElseNode* op) override {
+    this->VisitExpr(op->condition);
+
+    PrimExpr real_condition = ExtractRealCondition(op->condition);
+
+    auto before_branching = CurrentControlBlock();
+
+    auto branch_start = AppendControlBlock();
+    MarkControlFlow(before_branching, branch_start);
+
+    {
+      InternalConstraintContext context(this, real_condition);
+      auto then_start = AppendControlBlock();
+      if (context.assume.defined()) {
+        Assume(context.assume.value(), false);
+      }
+      auto [forward, backward] = MarkControlFlow(branch_start, then_start);
+      backward.post_condition = real_condition;
+      forward.post_condition = real_condition;
+      this->VisitStmt(op->then_case);
+    }
+    auto then_end = CurrentControlBlock();
+
+    auto negation = analyzer_.rewrite_simplify(!real_condition);
+    {
+      InternalConstraintContext context(this, negation);
+      auto else_start = AppendControlBlock();
+      if (context.assume.defined()) {
+        Assume(context.assume.value(), false);
+      }
+      auto [forward, backward] = MarkControlFlow(branch_start, else_start);
+      backward.post_condition = negation;
+      forward.post_condition = negation;
+
+      if (op->else_case.defined()) {
+        this->VisitStmt(op->else_case.value());
+      }
+    }
+
+    auto else_end = CurrentControlBlock();
+    auto after_branching = AppendControlBlock();
+
+    if (HasBufferLoad(real_condition)) {
+      // The buffer value may have changed during the body of the
+      // condition, so we can't provide it as a post-condition.
+      MarkControlFlow(then_end, after_branching);
+      MarkControlFlow(else_end, after_branching);
+    } else {
+      {
+        auto [forward, backward] = MarkControlFlow(then_end, after_branching);
+        backward.post_condition = real_condition;
+        forward.post_condition = real_condition;
+      }
+      {
+        auto [forward, backward] = MarkControlFlow(else_end, after_branching);
+        backward.post_condition = negation;
+        forward.post_condition = negation;
+      }
+    }
+  }
+
+  /*! \brief Internal utility, returns true if the expression depends
+   *  on a loop iterator
+   */
+  bool UsesLoopVar(const PrimExpr& expr) {
+    return UsesVar(expr, [&](const VarNode* expr_var) {
+      return loop_dependent_vars_.find(expr_var) != loop_dependent_vars_.end();
+    });
+  }
+
+  /*! \brief Record the interaction with the buffer.
+   *
+   * \param node The TIR node that accesses the buffer.  Should be
+   * either a BufferLoad or BufferStore node.
+   *
+   * \param touch_type The type of buffer access being performed.  A
+   * BufferStore should always use AccessType::Write.  A BufferLoad
+   * may use either AccessType::Read or AccessType::Assume, depending
+   * on whether the BufferLoad occurs within `builtin::assume`.
+   *
+   * \param known_value_expr The value in the buffer following the access.
+   */
+  template <typename BufferAccess>
+  void VisitAccess(const BufferAccess& node, BufferTouch::AccessType touch_type,
+                   PrimExpr known_value_expr) {
+    auto& current_block = out_->control_flow_.back();
+    BufferTouch buffer_touch = current_block.MakeBufferTouch(out_, node->buffer, node->indices,
+                                                             touch_type, known_value_expr);
+    current_block.touch_points.push_back(buffer_touch);
+  }
+
+  /*! \brief Return a predicate for having reached the current
+   *  control-flow block
+   *
+   * For example, while inside an IfThenElse, will return the
+   * IfThenElse's condition.
+   */
+  PrimExpr CurrentScopePredicate() const {
+    PrimExpr predicate = Bool(true);
+    for (const auto& condition : conditions_) {
+      predicate = predicate && condition;
+    }
+    return predicate;
+  }
+
+  /* \brief Add a new control block, returning its index */
+  size_t AppendControlBlock() {
+    size_t index = out_->control_flow_.size();
+    auto& block = out_->control_flow_.emplace_back();
+    block.active_loop_iterators = active_loop_iterators_;
+    block.let_bindings_using_loop = let_bindings_using_loop_;
+    block.scope_predicate = CurrentScopePredicate();
+    return index;
+  }
+
+  /* \brief The index of the current control block */
+  size_t CurrentControlBlock() { return out_->control_flow_.size() - 1; }
+
+  /* \brief Mark a possible control from one block to another
+   *
+   * \param from_block The block from which control leaves
+   *
+   * \param to_block The block to which control enters
+   *
+   * \param var_remap Variable replacements that should be made in
+   * known expression while traversing this edge.  For example,
+   * replacing `i` with `i-1` when entering the next loop iteration,
+   * or replacing `i` with `n-1` when concluding a loop.
+   */
+  std::pair<ControlFlowGraph::ControlFlowEdge&, ControlFlowGraph::ControlFlowEdge&> MarkControlFlow(
+      size_t from_block, size_t to_block) {
+    ICHECK_LE(from_block, out_->control_flow_.size());
+    ICHECK_LE(to_block, out_->control_flow_.size());
+
+    auto& forward = out_->control_flow_[from_block].successors.emplace_back(
+        ControlFlowGraph::ControlFlowEdge{to_block, {}, NullOpt});
+    auto& backward = out_->control_flow_[to_block].predecessors.emplace_back(
+        ControlFlowGraph::ControlFlowEdge{from_block, {}, NullOpt});
+    return {forward, backward};
+  }
+
+  // Internal utility, context manager for entering/leaving a scoped constraint
+  struct InternalConstraintContext {
+    InternalConstraintContext(ControlFlowGraphBuilder* self, PrimExpr constraint)
+        : self(self), analyzer_context(&self->analyzer_, constraint) {
+      old_num_constraints = self->conditions_.size();
+
+      auto side_effect = tir::SideEffect(constraint);
+      if (side_effect <= tir::CallEffectKind::kPure) {
+        self->conditions_.push_back(constraint);
+      } else if (side_effect <= tir::CallEffectKind::kReadState) {
+        assume = constraint;
+      }
+
+      new_num_constraints = self->conditions_.size();
+    }
+    ~InternalConstraintContext() {
+      ICHECK_EQ(self->conditions_.size(), new_num_constraints)
+          << "Internal error: Each condition should only be popped once.";
+      self->conditions_.erase(self->conditions_.begin() + old_num_constraints,
+                              self->conditions_.end());
+    }
+
+    ControlFlowGraphBuilder* self{nullptr};
+    With<ConstraintContext> analyzer_context;
+    size_t old_num_constraints{0};
+    size_t new_num_constraints{0};
+    Optional<PrimExpr> assume{NullOpt};
+
+    // Disable default-generated copy/move assignment and constructors
+    InternalConstraintContext(const InternalConstraintContext&) = delete;
+    InternalConstraintContext& operator=(const InternalConstraintContext&) = delete;
+    InternalConstraintContext(InternalConstraintContext&&) = delete;
+    InternalConstraintContext& operator=(InternalConstraintContext&&) = delete;
+  };
+
+  // Internal utility, context manager for tracking a loop
+  struct BindActiveLoopVar {
+    BindActiveLoopVar(ControlFlowGraphBuilder* self, Var var, PrimExpr loop_min,
+                      PrimExpr loop_extent)
+        : self(self), var(var) {
+      PrimExpr loop_max = loop_min + (loop_extent - 1);
+      auto loop_range = Range::FromMinExtent(loop_min, loop_extent);
+      self->active_loop_iterators_.push_back({var, loop_min, loop_max, loop_range});
+      self->loop_dependent_vars_.insert(var.get());
+    }
+    ~BindActiveLoopVar() { self->active_loop_iterators_.pop_back(); }
+
+    ControlFlowGraphBuilder* self;
+    Var var;
+
+    // Disable default-generated copy/move assignment and constructors
+    BindActiveLoopVar(const BindActiveLoopVar&) = delete;
+    BindActiveLoopVar& operator=(const BindActiveLoopVar&) = delete;
+    BindActiveLoopVar(BindActiveLoopVar&&) = delete;
+    BindActiveLoopVar& operator=(BindActiveLoopVar&&) = delete;
+  };
+
+  // Internal utility, context manager for tracking a variable binding
+  struct BindLetVar {
+    BindLetVar(ControlFlowGraphBuilder* self, Var var, PrimExpr value) : self(self), var(var) {
+      self->let_bindings_using_loop_.Set(var, value);
+      self->loop_dependent_vars_.insert(var.get());
+    }
+    ~BindLetVar() {
+      self->loop_dependent_vars_.erase(var.get());
+      self->let_bindings_using_loop_.erase(var);
+    }
+    ControlFlowGraphBuilder* self;
+    Var var;
+
+    // Disable default-generated copy/move assignment and constructors
+    BindLetVar(const BindLetVar&) = delete;
+    BindLetVar& operator=(const BindLetVar&) = delete;
+    BindLetVar(BindLetVar&&) = delete;
+    BindLetVar& operator=(BindLetVar&&) = delete;
+  };
+
+  struct LoopEntry {
+    Var loop_var;
+    PrimExpr loop_min;
+    PrimExpr loop_max;
+    Range loop_range;
+  };
+
+  // Track in order to know which Vars to write in terms of the buffer
+  // indices and substitute out of the predicate.
+  std::vector<ControlFlowGraph::ControlFlowBlock::LoopEntry> active_loop_iterators_;
+
+  // Track all loop iterators, along with values derived from loop iterators.
+  std::unordered_set<const VarNode*> loop_dependent_vars_;
+
+  // Any let binding that depends, directly or indirectly, on a loop
+  // binding.  When making a predicate in terms of the buffer indices,
+  // these need to be substituted out.
+  // std::unordered_map<const VarNode*, PrimExpr> let_bindings_using_loop_;
+  Map<Var, PrimExpr> let_bindings_using_loop_;
+
+  // Track in order to know what conditions limit the buffer access
+  std::vector<PrimExpr> conditions_;
+
+  // Track in order to know what statement initiated the buffer access
+  Stmt current_stmt_;
+
+  // Output data structure
+  ControlFlowGraph* out_;
+};
+
+std::pair<BufferTouch, Map<Var, Range>> ControlFlowGraph::ControlFlowBlock::MakeBufferTouch(
+    const tir::Buffer& buf, Array<Var> index_variables, Array<PrimExpr> indices,
+    BufferTouch::AccessType touch_type, PrimExpr known_value_expr) const {
+  const auto& current_block = *this;
+
+  Analyzer local_analyzer;
+
+  Optional<Var> lane_var = NullOpt;
+  IntImm num_lanes;
+
+  Array<PrimExpr> index_expressions = indices.Map([&](const auto& index) {
+    if (index.dtype().lanes() == 1) {
+      return index;
+    } else {
+      ICHECK(!lane_var) << "Multiple indices found with non-scalar values";
+      lane_var = Var("lane", index.dtype().element_of());
+      num_lanes = IntImm(index.dtype().element_of(), index.dtype().lanes());
+      return UnwrapVectorExpr(index, lane_var.value());
+    }
+  });
+
+  Array<Var> loop_vars;
+
+  Map<Var, Range> loop_ranges;
+  for (const auto& loop_entry : current_block.active_loop_iterators) {
+    loop_vars.push_back(loop_entry.loop_var);
+    loop_ranges.Set(loop_entry.loop_var, loop_entry.loop_range);
+  }
+
+  // If the indices contain multiple lanes, treat the lane variable
+  // as an additional loop iterator to be solved for and substituted
+  // out.
+  if (lane_var) {
+    loop_vars.push_back(lane_var.value());
+    loop_ranges.Set(lane_var.value(), Range::FromMinExtent(0, num_lanes));
+  }
+
+  IntConstraintsTransform transform = [&]() {
+    ICHECK_EQ(index_variables.size(), index_expressions.size());
+
+    Array<PrimExpr> relations;
+
+    for (size_t i = 0; i < index_expressions.size(); i++) {
+      PrimExpr expr = index_expressions[i];
+      Var var = index_variables[i];
+
+      expr = Substitute(expr, current_block.let_bindings_using_loop);
+      relations.push_back(var == expr);
+    }
+
+    IntConstraints system(loop_vars, loop_ranges, relations);
+    return arith::SolveLinearEquations(system);
+  }();
+
+  Map<Var, PrimExpr> loop_var_to_axis_var = transform->src_to_dst;
+  Map<Var, Range> free_params = transform->dst->ranges;
+  PrimExpr transform_predicate =
+      std::accumulate(transform->dst->relations.begin(), transform->dst->relations.end(),
+                      PrimExpr(Bool(true)), [](PrimExpr a, PrimExpr b) { return a && b; });
+
+  transform_predicate = SimplifyAsAndOfOrs(transform_predicate, &local_analyzer);
+
+  auto find_removable_params = [&]() -> Map<Var, PrimExpr> {
+    Map<Var, PrimExpr> removable_params;
+
+    // The arith::SolveLinearEquations is more general than the
+    // utilities in iter_affine_map.h, but can introduce free
+    // parameters that could later be determined with the known
+    // constraints.  This step removes all such free parameters.
+    for (const auto& expr : ExtractConstraints(transform_predicate)) {
+      if (auto* as_equal = expr.as<EQNode>()) {
+        auto check_expr = [&](const PrimExpr& a, const PrimExpr& b) {
+          auto* var_ptr = a.as<VarNode>();
+          if (!var_ptr) {
+            return;
+          }
+
+          Var var = GetRef<Var>(var_ptr);
+          if (free_params.count(var) == 0) {
+            return;
+          }
+
+          bool uses_free_param =
+              UsesVar(b, [&](const VarNode* v) { return free_params.count(GetRef<Var>(v)) > 0; });
+          if (uses_free_param) {
+            return;
+          }
+          removable_params.Set(var, b);
+        };
+        check_expr(as_equal->a, as_equal->b);
+        check_expr(as_equal->b, as_equal->a);
+      }
+    }
+
+    // In addition, the arith::SolveLinearEquation can introduce
+    // free parameters with an extent of one.  Filtering them out here
+    // avoids needing to track them through later simplifications.
+    for (const auto [var, range] : free_params) {
+      if (is_one(range->extent)) {
+        removable_params.Set(var, range->min);
+      }
+    }
+
+    return removable_params;
+  };
+  for (auto removable_params = find_removable_params(); removable_params.size() > 0;
+       removable_params = find_removable_params()) {
+    auto update = [&](const PrimExpr& expr) {
+      return local_analyzer.Simplify(Substitute(expr, removable_params));
+    };
+
+    Map<Var, PrimExpr> new_map;
+    for (const auto [loop_var, expr] : loop_var_to_axis_var) {
+      static_cast<void>(expr);  // gcc 7.x bug, https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81767
+      new_map.Set(loop_var, update(expr));
+    }
+    loop_var_to_axis_var = new_map;
+
+    transform_predicate = update(transform_predicate);
+
+    for (const auto [var, expr] : removable_params) {
+      static_cast<void>(expr);  // gcc 7.x bug, https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81767
+      free_params.erase(var);
+    }
+  }
+
+  // Normalization function, applied to both the predicate and the
+  // known value.  Converts from an expression in terms of loop
+  // iterators to an expression in terms of buffer indices.
+  auto normalize_expr = [&](PrimExpr expr) -> PrimExpr {
+    expr = Substitute(expr, current_block.let_bindings_using_loop);
+
+    if (lane_var) {
+      expr = UnwrapVectorExpr(expr, lane_var.value());
+    }
+    expr = Substitute(expr, loop_var_to_axis_var);
+
+    return expr;
+  };
+
+  // Collect the current loop variables, along with an expression for
+  // the loop variables in terms of the buffer axis variables.  This
+  // is used during forward/backward propagation to generate predicate
+  // tracking whether a loop iteration has been reached.
+  std::vector<std::pair<Var, PrimExpr>> loop_var_expressions;
+  for (const auto& entry : current_block.active_loop_iterators) {
+    auto expr_it = loop_var_to_axis_var.find(entry.loop_var);
+    ICHECK(expr_it != loop_var_to_axis_var.end());
+    loop_var_expressions.push_back({entry.loop_var, (*expr_it).second});
+  }
+
+  // The full predicate is composed of the values required to reach
+  // the scope of the BufferStore or builtin::assume(), any bounds
+  // implied by solving for the axis variables, and any additional
+  // statements resulting from unpacking the expression contained in
+  // builtin::assume().
+  PrimExpr scope_predicate = normalize_expr(current_block.scope_predicate);
+  transform_predicate = normalize_expr(transform_predicate);
+
+  known_value_expr = local_analyzer.Simplify(normalize_expr(known_value_expr));
+
+  // Deliberately use an analyzer without scope-based information,
+  // to avoid simplifying `scope_predicate` to True.
+  PrimExpr predicate_expr = local_analyzer.Simplify(transform_predicate && scope_predicate);
+
+  BufferTouch buffer_touch = {buf, predicate_expr, known_value_expr, loop_var_expressions,
+                              touch_type};
+
+  return {buffer_touch, free_params};
+}
+
+BufferTouch ControlFlowGraph::ControlFlowBlock::MakeBufferTouch(ControlFlowGraph* graph,
+                                                                const tir::Buffer& buf,
+                                                                const Array<PrimExpr>& indices,
+                                                                BufferTouch::AccessType touch_type,
+                                                                PrimExpr known_value_expr) const {
+  ICHECK(graph);
+  auto [buffer_touch, free_params] = MakeBufferTouch(buf, graph->GetIndexVariables(buf, indices),
+                                                     indices, touch_type, known_value_expr);
+  for (const auto& pair : free_params) {
+    graph->free_predicate_parameters_.Set(pair.first, pair.second);
+  }
+  return buffer_touch;
+}
+
+ControlFlowGraph::ControlFlowGraph(const tir::Stmt& stmt, size_t max_revisits) {
+  ControlFlowGraphBuilder::Build(this, stmt);
+  ForwardPropagateKnownValues(max_revisits);
+  BackwardPropagateUnusedValues(max_revisits);
+}
+
+std::ostream& operator<<(std::ostream& os, const ControlFlowGraph::ControlFlowEdge& edge) {
+  os << edge.index;
+  if (edge.var_remap.size()) {
+    os << " with remap " << edge.var_remap;
+  }
+  if (edge.post_condition) {
+    os << " with postcondition " << edge.post_condition;
+  }
+
+  return os;
+}
+
+std::ostream& operator<<(std::ostream& os, const ControlFlowGraph::ControlFlowBlock& block) {
+  os << "Predecessors: [";
+  for (size_t i = 0; i < block.predecessors.size(); i++) {
+    if (i) {
+      os << ", ";
+    }
+    os << block.predecessors[i];
+  }
+  os << "]\n";
+
+  os << "Active loop iterators: [";
+  for (size_t i = 0; i < block.active_loop_iterators.size(); i++) {
+    if (i) {
+      os << ", ";
+    }
+    os << block.active_loop_iterators[i].loop_var;
+  }
+  os << "]\n";
+
+  os << "Before block knowns: " << block.known_at_block_start << "\n";
+
+  os << "Before block unused: " << block.unused_at_block_start << "\n";
+
+  for (size_t i = 0; i < block.touch_points.size(); i++) {
+    os << "Touch[" << i << "] = " << block.touch_points[i] << "\n";
+  }
+  os << "After block: " << block.known_at_block_end << "\n";
+
+  os << "After block unused: " << block.unused_at_block_end << "\n";
+
+  os << "Successors: [";
+  for (size_t i = 0; i < block.successors.size(); i++) {
+    if (i) {
+      os << ", ";
+    }
+    os << block.successors[i];
+  }
+  os << "]";
+  return os;
+}
+
+std::ostream& operator<<(std::ostream& os, const ControlFlowGraph& pattern) {
+  os << "Touch pattern contains " << pattern.control_flow_.size() << " control blocks."
+     << (pattern.control_flow_.size() ? "\n" : "");
+  for (size_t i = 0; i < pattern.control_flow_.size(); i++) {
+    os << "\t"
+       << "ControlBlock[" << i << "] = " << pattern.control_flow_[i] << "\n";
+  }
+
+  return os;
+}
+
+bool BufferTouch::IsEquivalentTo(const BufferTouch& other, Analyzer* analyzer) const {
+  // Constraints must apply to the same buffer to be equivalent
+  if (!buffer.same_as(other.buffer) || touch_type != other.touch_type) {
+    return false;
+  }
+
+  ExprDeepEqual deep_equal;
+
+  auto implies = [&](const PrimExpr& a, const PrimExpr& b) -> bool {
+    With<ConstraintContext> context(analyzer, a);
+    return analyzer->CanProve(b);
+  };
+
+  // Predicates must be equivalent expressions, or must both be undefined
+  bool equivalent_predicates =
+      deep_equal(predicate, other.predicate) ||
+      (implies(predicate, other.predicate) && implies(other.predicate, predicate));
+  if (!equivalent_predicates) {
+    return false;
+  }
+
+  // The known value must be equal
+  if (!deep_equal(value, other.value) && !analyzer->CanProveEqual(value, other.value)) {
+    return false;
+  }
+
+  return true;
+}
+
+std::ostream& operator<<(std::ostream& os, const BufferState& state) {
+  for (size_t i = 0; i < state.constraints.size(); i++) {
+    os << "constraints[" << i << "] = " << state.constraints[i]
+       << (i + 1 == state.constraints.size() ? "" : "\n");
+  }
+  return os;
+}
+
+PrimExpr BufferState::SubstituteKnownBufferValues(
+    PrimExpr expr, const Map<tir::Buffer, Array<tir::Var>>& axis_var_lookup,
+    Analyzer* analyzer) const {
+  BufferConstraintApply mutator(axis_var_lookup, constraints, analyzer);
+  return mutator(std::move(expr));
+}
+
+void BufferState::AddCondition(const PrimExpr& condition) {
+  for (auto& constraint : constraints) {
+    constraint.predicate = constraint.predicate && condition;
+  }
+}
+
+void BufferState::Substitute(const Map<Var, PrimExpr>& var_remap, Analyzer* analyzer) {
+  if (var_remap.size()) {
+    for (auto& prior : constraints) {
+      PrimExpr updated = tvm::tir::Substitute(prior.predicate, var_remap);
+      if (!updated.same_as(prior.predicate)) {
+        prior.predicate = SimplifyAsAndOfOrs(updated, analyzer);
+      }
+    }
+  }
+}
+
+void BufferState::Simplify(Analyzer* analyzer) {
+  for (auto& constraint : constraints) {
+    constraint.predicate = SimplifyAsAndOfOrs(constraint.predicate, analyzer);
+  }
+}
+
+void BufferState::Union(const BufferState& b, Analyzer* analyzer) {
+  for (const auto& b_constraint : b.constraints) {
+    bool used = false;
+    for (auto& a_constraint : constraints) {
+      if (a_constraint.buffer.same_as(b_constraint.buffer) &&
+          analyzer->CanProveEqual(a_constraint.value, b_constraint.value)) {
+        a_constraint.predicate =
+            SimplifyAsAndOfOrs(a_constraint.predicate || b_constraint.predicate, analyzer);
+        used = true;
+        break;
+      }
+    }
+    if (!used) {
+      constraints.push_back(b_constraint);
+    }
+  }
+}
+
+void BufferState::Intersection(const BufferState& b, Analyzer* analyzer) {
+  // For a constraint to be in the output, it must be present in both
+  // inputs.
+
+  std::vector<BufferTouch> new_constraints;
+  for (const auto& ai : constraints) {
+    for (const auto& bi : b.constraints) {
+      if (ai.buffer.same_as(bi.buffer)) {
+        PrimExpr predicate = SimplifyAsAndOfOrs(ai.predicate && bi.predicate, analyzer);
+        if (!is_zero(predicate)) {
+          With<ConstraintContext> context(analyzer, predicate);
+          PrimExpr known_value_a = ai.value;
+          PrimExpr known_value_b = bi.value;
+
+          bool is_consistent = analyzer->CanProveEqual(known_value_a, known_value_b);
+          if (is_consistent) {
+            new_constraints.push_back({ai.buffer, predicate, known_value_a});
+          }
+        }
+      }
+    }
+  }
+
+  constraints = std::move(new_constraints);
+}
+
+class BufferRegionCollector : public ExprVisitor {
+ public:
+  struct Region {
+    PrimExpr region_predicate;
+    std::unordered_map<const BufferLoadNode*, Optional<PrimExpr>> known_values;
+  };
+
+  static std::vector<Region> Collect(const Map<Buffer, Array<Var>>& axis_var_lookup,
+                                     const std::vector<BufferTouch>& knowns,
+                                     const std::vector<Optional<PrimExpr>>& exprs,
+                                     Analyzer* analyzer) {
+    BufferRegionCollector collector(axis_var_lookup, knowns, analyzer);
+    for (const auto& expr : exprs) {
+      if (expr) {
+        collector(expr.value());
+      }
+    }
+
+    return collector.regions_;
+  }
+
+ private:
+  using Parent = ExprVisitor;
+
+  BufferRegionCollector(const Map<Buffer, Array<Var>>& axis_var_lookup,
+                        const std::vector<BufferTouch>& knowns, Analyzer* analyzer)
+      : analyzer_(analyzer), axis_var_lookup_(axis_var_lookup), knowns_(knowns) {
+    regions_.push_back(Region{Bool(true), {}});
+  }
+
+  using Parent::VisitExpr_;
+
+  void VisitExpr_(const BufferLoadNode* op) override {
+    // Helper struct for the known values of this BufferLoad
+    struct Known {
+      PrimExpr predicate;
+      Optional<PrimExpr> value;
+    };
+
+    std::vector<Known> new_regions;
+
+    PrimExpr unknown_region = Bool(true);
+
+    for (const BufferTouch& constraint : knowns_) {
+      if (!op->buffer.same_as(constraint.buffer)) {
+        // This is a different buffer, so continue searching.
+        continue;
+      }
+
+      auto axis_vars = axis_var_lookup_.at(op->buffer);
+      PrimExpr touch_predicate =
+          SubstituteParamValues(axis_vars, op->indices, constraint.predicate).value();
+      touch_predicate = SimplifyAsAndOfOrs(touch_predicate, analyzer_);
+
+      if (!is_zero(touch_predicate)) {
+        Optional<PrimExpr> known_value =
+            SubstituteParamValues(axis_vars, op->indices, constraint.value);
+        new_regions.push_back(Known{touch_predicate, known_value});
+
+        unknown_region = unknown_region && !touch_predicate;
+        unknown_region = SimplifyAsAndOfOrs(unknown_region, analyzer_);
+      }
+    }
+
+    if (new_regions.size()) {
+      Analyzer local_analyzer;
+
+      if (!is_zero(unknown_region)) {
+        new_regions.insert(new_regions.begin(), Known{unknown_region, NullOpt});
+      }
+
+      std::vector<Region> updated_regions;
+      for (const auto& prev_region : regions_) {
+        for (const auto& new_region : new_regions) {
+          PrimExpr intersection =
+              SimplifyAsAndOfOrs(prev_region.region_predicate && new_region.predicate, analyzer_);
+
+          if (!is_zero(intersection)) {
+            Region merged{intersection, prev_region.known_values};
+            merged.known_values[op] = new_region.value;
+            updated_regions.push_back(std::move(merged));
+          }
+        }
+      }
+      regions_ = updated_regions;
+    }
+  }
+
+  Analyzer* analyzer_;
+  std::vector<Region> regions_;
+  const Map<Buffer, Array<Var>>& axis_var_lookup_;
+  const std::vector<BufferTouch>& knowns_;
+};
+
+class BufferRegionValueReplacer : public IRMutatorWithAnalyzer {
+ public:
+  static PrimExpr Apply(
+      const std::unordered_map<const BufferLoadNode*, Optional<PrimExpr>>& known_values,
+      PrimExpr expr, Analyzer* analyzer) {
+    BufferRegionValueReplacer mutator(known_values, analyzer);
+    PrimExpr result = mutator(expr);
+    // Simplification must occur after the substitution, as known
+    // values may provide enable simplifications.  Also, cannot track
+    // whether a BufferLoad was
+    result = analyzer->Simplify(result);
+    return result;
+  }
+
+ private:
+  using Parent = IRMutatorWithAnalyzer;
+
+  BufferRegionValueReplacer(
+      const std::unordered_map<const BufferLoadNode*, Optional<PrimExpr>>& known_values,
+      Analyzer* analyzer)
+      : Parent(analyzer), known_values_(known_values) {}
+
+  using Parent::VisitExpr_;
+
+  PrimExpr VisitExpr_(const BufferLoadNode* op) override {
+    auto it = known_values_.find(op);
+    if (it != known_values_.end() && it->second) {
+      return it->second.value();
+    } else {
+      return GetRef<PrimExpr>(op);
+    }
+  }
+
+  const std::unordered_map<const BufferLoadNode*, Optional<PrimExpr>>& known_values_;
+};
+
+void BufferState::ApplyTouches(const Map<Buffer, Array<Var>>& axis_var_lookup,
+                               const std::vector<BufferTouch>& touch_points, Analyzer* analyzer) {
+  std::vector<BufferTouch> new_knowns;
+  Map<Buffer, PrimExpr> keep_prior_known_at;
+
+  for (auto& touch : touch_points) {
+    if (touch.touch_type == BufferTouch::AccessType::Read) {
+      continue;
+    }
+
+    PrimExpr known_value = touch.value;
+
+    PrimExpr predicate = touch.predicate && touch.AfterLoopIteration();
+    auto regions = BufferRegionCollector::Collect(axis_var_lookup, constraints,
+                                                  {predicate, touch.value}, analyzer);
+
+    for (const auto& region : regions) {
+      PrimExpr updated_predicate = BufferRegionValueReplacer::Apply(
+          region.known_values, region.region_predicate && predicate, analyzer);
+
+      updated_predicate = SimplifyAsAndOfOrs(updated_predicate, analyzer);
+      PrimExpr updated_value =
+          BufferRegionValueReplacer::Apply(region.known_values, known_value, analyzer);
+
+      if (!is_zero(updated_predicate)) {
+        if (auto it = keep_prior_known_at.find(touch.buffer); it != keep_prior_known_at.end()) {
+          keep_prior_known_at.Set(touch.buffer, (*it).second && !updated_predicate);
+        } else {
+          keep_prior_known_at.Set(touch.buffer, !updated_predicate);
+        }
+
+        if (!HasBufferLoad(updated_value)) {
+          BufferTouch new_constraint{touch.buffer, updated_predicate, updated_value};
+          new_knowns.push_back(new_constraint);
+        }
+      }
+    }
+  }
+
+  if (keep_prior_known_at.size()) {
+    for (auto& constraint : constraints) {
+      if (auto it = keep_prior_known_at.find(constraint.buffer); it != keep_prior_known_at.end()) {
+        constraint.predicate = SimplifyAsAndOfOrs(constraint.predicate && (*it).second, analyzer);
+      }
+    }
+  }
+
+  if (new_knowns.size()) {
+    std::vector<bool> used(new_knowns.size(), false);
+
+    for (auto& constraint : constraints) {
+      PrimExpr expand_known_at = Bool(false);
+
+      PrimExpr prev_value = constraint.value;
+
+      for (size_t i = 0; i < new_knowns.size(); i++) {
+        if (new_knowns[i].buffer.same_as(constraint.buffer)) {
+          Optional<PrimExpr> overwritten_with = new_knowns[i].value;
+          if (overwritten_with && analyzer->CanProveEqual(prev_value, overwritten_with.value())) {
+            expand_known_at =
+                SimplifyAsAndOfOrs(expand_known_at || new_knowns[i].predicate, analyzer);
+            used[i] = true;
+          }
+        }
+      }
+
+      if (!is_zero(expand_known_at)) {
+        constraint.predicate =
+            SimplifyAsAndOfOrs(constraint.predicate || expand_known_at, analyzer);
+      }
+    }
+
+    for (size_t i = 0; i < new_knowns.size(); i++) {
+      if (!used[i]) {
+        constraints.push_back(new_knowns[i]);
+      }
+    }
+  }
+
+  constraints.erase(
+      std::remove_if(constraints.begin(), constraints.end(),
+                     [&](const auto& constraint) { return is_zero(constraint.predicate); }),
+      constraints.end());
+}
+
+void BufferState::BackpropUnusedIndices(const Map<Buffer, Array<Var>>& axis_var_lookup,
+                                        const std::vector<BufferTouch>& touch_points,
+                                        Analyzer* analyzer) {
+  std::vector<BufferTouch> new_knowns;
+  Map<Buffer, PrimExpr> keep_prior_known_at;
+
+  Map<Buffer, PrimExpr> regions_written;
+  Map<Buffer, PrimExpr> regions_read;
+  for (auto it = touch_points.rbegin(); it != touch_points.rend(); it++) {
+    const auto& touch = *it;
+
+    Map<Buffer, PrimExpr>* to_update{nullptr};
+    if (touch.touch_type == BufferTouch::AccessType::Write) {
+      to_update = &regions_written;
+
+    } else if (touch.touch_type == BufferTouch::AccessType::Read) {
+      to_update = &regions_read;
+    } else {
+      continue;
+    }
+
+    PrimExpr prev = to_update->Get(touch.buffer).value_or(Bool(false));
+    PrimExpr new_predicate = touch.predicate && touch.BeforeLoopIteration();
+    to_update->Set(touch.buffer, prev || new_predicate);
+  }
+
+  auto update_map = [&](auto& map) {
+    Map<Buffer, PrimExpr> new_map;
+    for (auto [buffer, predicate] : map) {
+      new_map.Set(buffer, SimplifyAsAndOfOrs(predicate, analyzer));
+    }
+    map = std::move(new_map);
+  };
+  update_map(regions_written);
+  update_map(regions_read);
+
+  // If buffer is already in used, widen the predicate
+  for (auto& prev_unused : constraints) {
+    if (auto opt_predicate = regions_written.Get(prev_unused.buffer)) {
+      PrimExpr new_predicate = prev_unused.predicate || opt_predicate.value();
+      prev_unused.predicate = SimplifyAsAndOfOrs(new_predicate, analyzer);
+      regions_written.erase(prev_unused.buffer);
+    }
+  }
+
+  // Otherwise, add new "touch" to represent the unused values
+  for (auto [buffer, predicate] : regions_written) {
+    constraints.push_back(
+        BufferTouch{buffer, predicate, tir::Call(buffer->dtype, builtin::undef(), {})});
+  }
+
+  // If buffer is read out, narrow the predicate
+  for (auto& prev_unused : constraints) {
+    if (auto opt_pred = regions_read.Get(prev_unused.buffer)) {
+      PrimExpr predicate = opt_pred.value();
+      prev_unused.predicate = SimplifyAsAndOfOrs(prev_unused.predicate && !predicate, analyzer);
+    }
+  }
+
+  // Clean-up and remove any empty constraints
+  constraints.erase(
+      std::remove_if(constraints.begin(), constraints.end(),
+                     [](const auto& constraint) { return is_zero(constraint.predicate); }),
+      constraints.end());
+}
+
+void BufferState::RemoveFreeParameters(const Map<Var, Range>& free_predicate_parameters,
+                                       Analyzer* analyzer) {
+  for (auto& known : constraints) {
+    known.predicate = NarrowPredicateExpression(known.predicate, free_predicate_parameters);
+    known.predicate = SimplifyAsAndOfOrs(known.predicate, analyzer);
+  }
+}
+
+bool BufferState::IsEquivalentTo(const BufferState& other, Analyzer* analyzer) const {
+  if (constraints.size() != other.constraints.size()) {
+    return false;
+  }
+
+  for (size_t i = 0; i < constraints.size(); i++) {
+    if (!constraints[i].IsEquivalentTo(other.constraints[i], analyzer)) {
+      return false;
+    }
+  }
+
+  return true;
+}
+
+Optional<Array<Var>> ControlFlowGraph::GetIndexVariables(const Buffer& buf) const {
+  if (auto it = axis_var_lookup_.find(buf); it != axis_var_lookup_.end()) {
+    return (*it).second;
+  } else {
+    return NullOpt;
+  }
+}
+
+Array<Var> ControlFlowGraph::GetIndexVariables(const Buffer& buf, const Array<PrimExpr>& indices) {
+  if (auto it = axis_var_lookup_.find(buf); it != axis_var_lookup_.end()) {
+    return (*it).second;
+  }
+
+  Array<Var> vars;
+  for (size_t i = 0; i < indices.size(); i++) {
+    std::stringstream ss;
+    ss << buf->name << "_axis_" << i;
+    vars.push_back(Var(ss.str(), indices[i].dtype().element_of()));
+  }
+
+  axis_var_lookup_.Set(buf, vars);
+  return vars;
+}
+
+void ControlFlowGraph::ForwardPropagateKnownValues(size_t max_revisits) {
+  // Values to visit when searching.  Using a std::set to
+  // preferentially visit nodes near the start of the control flow.
+  std::set<size_t> to_visit;
+
+  // Map from a block's index
+  std::unordered_map<size_t, size_t> visit_count_lookup;
+
+  // Initiatize the locations to search from, propagating values
+  // forward from all locations that have a known value.
+  for (size_t i = 0; i < control_flow_.size(); i++) {
+    bool has_known_value = false;
+    for (const auto& touch : control_flow_[i].touch_points) {
+      if (!HasBufferLoad(touch.value)) {
+        has_known_value = true;
+        break;
+      }
+    }
+
+    if (has_known_value) {
+      to_visit.insert(i);
+    }
+  }
+
+  Analyzer analyzer;
+  analyzer.rewrite_simplify.SetEnabledExtensions(arith::RewriteSimplifier::Extension(
+      arith::RewriteSimplifier::kTransitivelyProveInequalities |
+      arith::RewriteSimplifier::kApplyConstraintsToBooleanBranches));
+
+  analyzer.Bind(iterator_ranges_);
+  analyzer.Bind(free_predicate_parameters_);
+
+  while (to_visit.size()) {
+    size_t visiting = *to_visit.begin();
+    to_visit.erase(visiting);
+
+    size_t num_previous_visits = visit_count_lookup[visiting]++;
+
+    ControlFlowBlock& block = control_flow_[visiting];
+
+    // Step 1: Collect known values provided from each precedessor
+    block.known_at_block_start = [&]() -> BufferState {
+      if (num_previous_visits >= max_revisits) {
+        return BufferState();
+      }
+      ICHECK_LE(block.predecessors.size(), 2) << "Each block should have at most two predecessors";

Review Comment:
   nit: If my above comment ⬆️  was a correct interpretation, it might be worth documenting somewhere if there is a limitation imposed by the assumption that, or if not, why it is complete.



##########
src/tir/analysis/control_flow_graph.cc:
##########
@@ -0,0 +1,1641 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * \file control_flow_graph.cc
+ * \brief Utility to deduce bound of expression
+ */
+
+#include "control_flow_graph.h"
+
+#include <tvm/runtime/registry.h>
+#include <tvm/tir/analysis.h>
+#include <tvm/tir/builtin.h>
+#include <tvm/tir/expr.h>
+#include <tvm/tir/op.h>
+#include <tvm/tir/stmt_functor.h>
+
+#include <numeric>
+#include <optional>
+#include <queue>
+#include <set>
+#include <sstream>
+#include <unordered_set>
+
+#include "../../arith/conjunctive_normal_form.h"
+#include "../../arith/constraint_extract.h"
+#include "../../arith/ir_mutator_with_analyzer.h"
+#include "../../arith/ir_visitor_with_analyzer.h"
+#include "../../arith/narrow_predicate_expression.h"
+#include "../../arith/unwrap_vector_expr.h"
+
+namespace tvm {
+namespace tir {
+
+using namespace arith;
+
+namespace {
+bool HasBufferLoad(PrimExpr expr) {
+  struct Visitor : public ExprVisitor {
+    void VisitExpr_(const BufferLoadNode* node) override { found_buffer_load = true; }
+    bool found_buffer_load{false};
+  };
+
+  Visitor visitor;
+  visitor(expr);
+  return visitor.found_buffer_load;
+}
+
+Optional<PrimExpr> SubstituteParamValues(const Array<Var>& param_vars,
+                                         const Array<PrimExpr>& param_values,
+                                         const PrimExpr& expr) {
+  ICHECK_EQ(param_vars.size(), param_values.size())
+      << "Expression was defined as having " << param_vars.size() << " parameters, but received "
+      << param_values.size() << " arguments.";
+
+  Map<tir::Var, PrimExpr> var_map;
+  for (size_t i = 0; i < param_values.size(); i++) {
+    var_map.Set(param_vars[i], param_values[i]);
+  }
+
+  return Substitute(expr, var_map);
+}
+}  // namespace
+
+PrimExpr BufferTouch::BeforeLoopIteration() const {
+  PrimExpr loop_predicate = Bool(true);
+  for (auto it = loop_var_expressions.rbegin(); it != loop_var_expressions.rend(); it++) {
+    const Var& loop_var = it->first;
+    const PrimExpr& loop_expr = it->second;
+    loop_predicate = (loop_var <= loop_expr) || ((loop_var == loop_expr) && loop_predicate);
+  }
+  return loop_predicate;
+}
+
+PrimExpr BufferTouch::AtLoopIteration() const {
+  PrimExpr loop_predicate = Bool(true);
+  for (auto it = loop_var_expressions.rbegin(); it != loop_var_expressions.rend(); it++) {
+    const Var& loop_var = it->first;
+    const PrimExpr& loop_expr = it->second;
+    loop_predicate = (loop_var == loop_expr) && loop_predicate;
+  }
+  return loop_predicate;
+}
+
+PrimExpr BufferTouch::AfterLoopIteration() const {
+  PrimExpr loop_predicate = Bool(true);
+  for (auto it = loop_var_expressions.rbegin(); it != loop_var_expressions.rend(); it++) {
+    const Var& loop_var = it->first;
+    const PrimExpr& loop_expr = it->second;
+    loop_predicate = (loop_var >= loop_expr) || ((loop_var == loop_expr) && loop_predicate);
+  }
+  return loop_predicate;
+}
+
+bool BufferTouch::IsSubsetOf(const BufferTouch& other, Analyzer* analyzer) const {
+  if (this->buffer.same_as(other.buffer)) {
+    With<ConstraintContext> constraint(analyzer, predicate);
+
+    return analyzer->CanProve(other.predicate);
+  } else {
+    return false;
+  }
+}
+
+bool BufferTouch::IsDistinctFrom(const BufferTouch& other, Analyzer* analyzer) const {
+  if (this->buffer.same_as(other.buffer)) {
+    With<ConstraintContext> constraint(analyzer, predicate);
+
+    return analyzer->CanProve(!other.predicate);
+  } else {
+    return true;
+  }
+}
+
+std::ostream& operator<<(std::ostream& os, const BufferTouch& tp) {
+  auto touch_type = [&]() {
+    if (tp.touch_type == BufferTouch::AccessType::Read) {
+      return "read";
+    } else if (tp.touch_type == BufferTouch::AccessType::Write) {
+      return "write";
+    } else if (tp.touch_type == BufferTouch::AccessType::Assume) {
+      return "assume";
+    } else {
+      return "???";
+    }
+  }();
+
+  os << "BufferTouch(" << tp.buffer->name << ", " << touch_type << ", " << tp.predicate
+     << ", value = " << tp.value << ")";
+  return os;
+}
+
+class BufferConstraintApply : public IRMutatorWithAnalyzer {
+ public:
+  using Parent = IRMutatorWithAnalyzer;
+
+  BufferConstraintApply(const Map<Buffer, Array<Var>>& axis_var_lookup,
+                        const std::vector<BufferTouch>& knowns, Analyzer* analyzer)
+      : Parent(analyzer), axis_var_lookup_(axis_var_lookup), knowns_(knowns) {}
+
+  using Parent::VisitExpr_;
+
+  PrimExpr VisitExpr_(const BufferLoadNode* op) override {
+    for (const auto& known : knowns_) {
+      if (!op->buffer.same_as(known.buffer)) {
+        continue;
+      }
+
+      Optional<Var> lane_var = NullOpt;
+      IntImm num_lanes;
+
+      Array<PrimExpr> indices = op->indices.Map([&](const auto& index) {
+        if (index.dtype().lanes() == 1) {
+          return index;
+        } else {
+          ICHECK(!lane_var) << "Multiple indices found with non-scalar values";
+          lane_var = Var("lane", index.dtype().element_of());
+          num_lanes = IntImm(index.dtype().element_of(), index.dtype().lanes());
+          return UnwrapVectorExpr(index, lane_var.value());
+        }
+      });
+
+      auto axis_vars = axis_var_lookup_.at(op->buffer);
+      PrimExpr predicate = SubstituteParamValues(axis_vars, indices, known.predicate).value();
+
+      std::optional<With<ConstraintContext>> context;
+      if (lane_var.defined()) {
+        Var lanes = lane_var.value();
+        PrimExpr known = (IntImm(lanes.dtype(), 0) <= lanes) && (lanes < num_lanes);
+        context.emplace(analyzer_, known);
+      }
+
+      if (analyzer_->CanProve(predicate)) {
+        return SubstituteParamValues(axis_vars, op->indices, known.value).value();
+      }
+    }
+
+    return GetRef<PrimExpr>(op);
+  }
+
+ private:
+  const Map<Buffer, Array<Var>>& axis_var_lookup_;
+  const std::vector<BufferTouch>& knowns_;
+};
+
+/*! \brief Extract the control-flow graph
+ *
+ * Walk through a statement, populating the control-flow graph.
+ */
+class ControlFlowGraphBuilder final : public IRVisitorWithAnalyzer {
+ public:
+  static void Build(ControlFlowGraph* out, const Stmt& stmt) {
+    ControlFlowGraphBuilder extractor(out);
+    extractor.AppendControlBlock();
+    extractor(stmt);
+  }
+
+ private:
+  ControlFlowGraphBuilder(ControlFlowGraph* out) : out_(out) {}
+
+  using Parent = IRVisitorWithAnalyzer;
+  using Parent::VisitExpr_;
+  using Parent::VisitStmt_;
+
+  void VisitStmt(const Stmt& stmt) override {
+    // Update the lookup table to determine which control-flow block
+    // contains the start of the specified statement.  This is used
+    // later to determine which set of known values should be used to
+    // simplify a statement.
+    out_->control_flow_lookup_[stmt.get()] = CurrentControlBlock();
+    Stmt prev_stmt = current_stmt_;
+    current_stmt_ = stmt;
+    Parent::VisitStmt(stmt);
+    current_stmt_ = prev_stmt;
+  }
+
+  void VisitStmt_(const EvaluateNode* op) override {
+    if (auto* call = op->value.as<CallNode>()) {
+      if (call->op.same_as(builtin::assume())) {
+        Assume(call->args[0], true);
+        return;
+      }
+    }
+
+    Parent::VisitStmt_(op);
+  }
+
+  void Assume(PrimExpr assumption, bool from_assume_statement) {
+    for (const auto& expr : ExtractConstraints(assumption, false)) {
+      AssumeConstraintComponent(expr, from_assume_statement);
+    }
+  }
+
+  void AssumeConstraintComponent(PrimExpr assumption, bool from_assume_statement) {
+    PrimExpr additional_predicate = Bool(true);
+
+    std::vector<PrimExpr> buffer_exprs;
+    for (const auto& expr : ExtractComponents(assumption)) {
+      auto side_effect = tir::SideEffect(expr);
+      if (side_effect <= tir::CallEffectKind::kPure) {
+        // Pulling out portions of the assumption that do not depend
+        // on a buffer value allows the following two forms to be
+        // treated identically.
+        //
+        // if i < 3: T.assume(buf[i] == value)
+        // T.assume(i>=3 or buf[i] == value)
+        additional_predicate = additional_predicate && logical_not(expr);
+      } else if (side_effect == tir::CallEffectKind::kReadState) {
+        buffer_exprs.push_back(expr);
+      } else {
+        LOG(FATAL) << "Assumption must be pure or read-only";
+      }
+    }
+
+    if (buffer_exprs.empty()) {
+      out_->non_buffer_assumptions_.push_back(!CurrentScopePredicate() || assumption);
+      return;
+    }
+
+    CHECK_EQ(buffer_exprs.size(), 1) << "T.assume must contain only a single buffer expression";
+
+    auto* as_equal_node = buffer_exprs[0].as<tir::EQNode>();
+    CHECK(as_equal_node || !from_assume_statement)
+        << "T.assume buffer constraint must be of the form 'buffer[indices] == "
+           "value', but received "
+        << assumption;
+    if (!as_equal_node) {
+      // This assumption is an inequality a data-dependent
+      // conditional.  Not an error for this to occur, but also not
+      // something that is currently supported.
+      return;
+    }
+
+    tir::BufferLoad load;
+    PrimExpr value;
+    if (auto* as_load = as_equal_node->a.as<tir::BufferLoadNode>()) {
+      load = GetRef<tir::BufferLoad>(as_load);
+      value = as_equal_node->b;
+    } else if (auto* as_load = as_equal_node->b.as<tir::BufferLoadNode>()) {
+      load = GetRef<tir::BufferLoad>(as_load);
+      value = as_equal_node->a;
+    } else if (!from_assume_statement) {
+      return;
+    } else {
+      LOG(FATAL) << "T.assume buffer constraint must be of the form 'buffer[indices] == value'";
+    }
+
+    auto has_side_effect = tir::SideEffect(value) > tir::CallEffectKind::kPure;
+    CHECK(!has_side_effect || !from_assume_statement)
+        << "Buffer value in constraint must be pure expression, but was " << value;
+    if (has_side_effect) {
+      return;
+    }
+
+    {
+      InternalConstraintContext context(this, additional_predicate);
+      VisitAccess(load, BufferTouch::AccessType::Assume, value);
+    }
+    // Appending a control block ensures that all control blocks have
+    // at most one statement that changes the known buffer contents.
+    auto prev_block = CurrentControlBlock();
+    auto new_block = AppendControlBlock();
+    MarkControlFlow(prev_block, new_block);
+  }
+
+  void VisitExpr_(const LetNode* op) override {
+    std::optional<BindLetVar> binding;
+    if (UsesLoopVar(op->value)) {
+      binding.emplace(this, op->var, op->value);
+    }
+    Parent::VisitExpr_(op);
+  }
+
+  void VisitStmt_(const LetStmtNode* op) override {
+    std::optional<BindLetVar> binding;
+    if (UsesLoopVar(op->value)) {
+      binding.emplace(this, op->var, op->value);
+    }
+    Parent::VisitStmt_(op);
+  }
+
+  void VisitExpr_(const BufferLoadNode* op) override {
+    Parent::VisitExpr_(op);
+    BufferLoad load = GetRef<BufferLoad>(op);
+    VisitAccess(load, BufferTouch::AccessType::Read, load);
+  }
+
+  void VisitStmt_(const BufferStoreNode* op) override {
+    Parent::VisitStmt_(op);
+    VisitAccess(GetRef<BufferStore>(op), BufferTouch::AccessType::Write, op->value);
+    // Appending a control block ensures that all control blocks have
+    // at most one statement that changes the buffer contents.
+    auto prev_block = CurrentControlBlock();
+    auto new_block = AppendControlBlock();
+    MarkControlFlow(prev_block, new_block);
+  }
+
+  void VisitStmt_(const ForNode* op) override {
+    out_->iterator_ranges_.Set(op->loop_var, Range::FromMinExtent(op->min, op->extent));
+
+    auto before_loop = CurrentControlBlock();
+    size_t loop_start = -1;
+
+    {
+      BindActiveLoopVar binding(this, op->loop_var, op->min, op->extent);
+      loop_start = AppendControlBlock();
+      Parent::VisitStmt_(op);
+    }
+
+    auto loop_end = CurrentControlBlock();
+    auto after_loop = AppendControlBlock();
+    PrimExpr max_iterator_value = analyzer_.Simplify(op->min + op->extent - 1);
+    {
+      auto [forward, backward] = MarkControlFlow(before_loop, loop_start);
+      backward.post_condition = (op->loop_var == op->min);
+      forward.var_remap = {{op->loop_var, op->min}};
+    }
+    {
+      auto [forward, backward] = MarkControlFlow(loop_end, after_loop);
+      backward.var_remap = {{op->loop_var, max_iterator_value}};
+      forward.post_condition = (op->loop_var == max_iterator_value);
+    }
+    {
+      auto [forward, backward] = MarkControlFlow(loop_end, loop_start);
+      backward.var_remap = {{op->loop_var, op->loop_var - 1}};
+      forward.var_remap = {{op->loop_var, op->loop_var + 1}};
+      backward.post_condition = (op->loop_var > op->min);
+      forward.post_condition = (op->loop_var < max_iterator_value);
+    }
+  }
+
+  void VisitStmt_(const IfThenElseNode* op) override {
+    this->VisitExpr(op->condition);
+
+    PrimExpr real_condition = ExtractRealCondition(op->condition);
+
+    auto before_branching = CurrentControlBlock();
+
+    auto branch_start = AppendControlBlock();
+    MarkControlFlow(before_branching, branch_start);
+
+    {
+      InternalConstraintContext context(this, real_condition);
+      auto then_start = AppendControlBlock();
+      if (context.assume.defined()) {
+        Assume(context.assume.value(), false);
+      }
+      auto [forward, backward] = MarkControlFlow(branch_start, then_start);
+      backward.post_condition = real_condition;
+      forward.post_condition = real_condition;
+      this->VisitStmt(op->then_case);
+    }
+    auto then_end = CurrentControlBlock();
+
+    auto negation = analyzer_.rewrite_simplify(!real_condition);
+    {
+      InternalConstraintContext context(this, negation);
+      auto else_start = AppendControlBlock();
+      if (context.assume.defined()) {
+        Assume(context.assume.value(), false);
+      }
+      auto [forward, backward] = MarkControlFlow(branch_start, else_start);
+      backward.post_condition = negation;
+      forward.post_condition = negation;
+
+      if (op->else_case.defined()) {
+        this->VisitStmt(op->else_case.value());
+      }
+    }
+
+    auto else_end = CurrentControlBlock();
+    auto after_branching = AppendControlBlock();
+
+    if (HasBufferLoad(real_condition)) {
+      // The buffer value may have changed during the body of the
+      // condition, so we can't provide it as a post-condition.
+      MarkControlFlow(then_end, after_branching);
+      MarkControlFlow(else_end, after_branching);
+    } else {
+      {
+        auto [forward, backward] = MarkControlFlow(then_end, after_branching);
+        backward.post_condition = real_condition;
+        forward.post_condition = real_condition;
+      }
+      {
+        auto [forward, backward] = MarkControlFlow(else_end, after_branching);
+        backward.post_condition = negation;
+        forward.post_condition = negation;
+      }
+    }
+  }
+
+  /*! \brief Internal utility, returns true if the expression depends
+   *  on a loop iterator
+   */
+  bool UsesLoopVar(const PrimExpr& expr) {
+    return UsesVar(expr, [&](const VarNode* expr_var) {
+      return loop_dependent_vars_.find(expr_var) != loop_dependent_vars_.end();
+    });
+  }
+
+  /*! \brief Record the interaction with the buffer.
+   *
+   * \param node The TIR node that accesses the buffer.  Should be
+   * either a BufferLoad or BufferStore node.
+   *
+   * \param touch_type The type of buffer access being performed.  A
+   * BufferStore should always use AccessType::Write.  A BufferLoad
+   * may use either AccessType::Read or AccessType::Assume, depending
+   * on whether the BufferLoad occurs within `builtin::assume`.
+   *
+   * \param known_value_expr The value in the buffer following the access.
+   */
+  template <typename BufferAccess>
+  void VisitAccess(const BufferAccess& node, BufferTouch::AccessType touch_type,
+                   PrimExpr known_value_expr) {
+    auto& current_block = out_->control_flow_.back();
+    BufferTouch buffer_touch = current_block.MakeBufferTouch(out_, node->buffer, node->indices,
+                                                             touch_type, known_value_expr);
+    current_block.touch_points.push_back(buffer_touch);
+  }
+
+  /*! \brief Return a predicate for having reached the current
+   *  control-flow block
+   *
+   * For example, while inside an IfThenElse, will return the
+   * IfThenElse's condition.
+   */
+  PrimExpr CurrentScopePredicate() const {
+    PrimExpr predicate = Bool(true);
+    for (const auto& condition : conditions_) {
+      predicate = predicate && condition;
+    }
+    return predicate;
+  }
+
+  /* \brief Add a new control block, returning its index */
+  size_t AppendControlBlock() {
+    size_t index = out_->control_flow_.size();
+    auto& block = out_->control_flow_.emplace_back();
+    block.active_loop_iterators = active_loop_iterators_;
+    block.let_bindings_using_loop = let_bindings_using_loop_;
+    block.scope_predicate = CurrentScopePredicate();
+    return index;
+  }
+
+  /* \brief The index of the current control block */
+  size_t CurrentControlBlock() { return out_->control_flow_.size() - 1; }
+
+  /* \brief Mark a possible control from one block to another
+   *
+   * \param from_block The block from which control leaves
+   *
+   * \param to_block The block to which control enters
+   *
+   * \param var_remap Variable replacements that should be made in
+   * known expression while traversing this edge.  For example,
+   * replacing `i` with `i-1` when entering the next loop iteration,
+   * or replacing `i` with `n-1` when concluding a loop.
+   */
+  std::pair<ControlFlowGraph::ControlFlowEdge&, ControlFlowGraph::ControlFlowEdge&> MarkControlFlow(
+      size_t from_block, size_t to_block) {
+    ICHECK_LE(from_block, out_->control_flow_.size());
+    ICHECK_LE(to_block, out_->control_flow_.size());
+
+    auto& forward = out_->control_flow_[from_block].successors.emplace_back(
+        ControlFlowGraph::ControlFlowEdge{to_block, {}, NullOpt});
+    auto& backward = out_->control_flow_[to_block].predecessors.emplace_back(
+        ControlFlowGraph::ControlFlowEdge{from_block, {}, NullOpt});
+    return {forward, backward};
+  }
+
+  // Internal utility, context manager for entering/leaving a scoped constraint
+  struct InternalConstraintContext {
+    InternalConstraintContext(ControlFlowGraphBuilder* self, PrimExpr constraint)
+        : self(self), analyzer_context(&self->analyzer_, constraint) {
+      old_num_constraints = self->conditions_.size();
+
+      auto side_effect = tir::SideEffect(constraint);
+      if (side_effect <= tir::CallEffectKind::kPure) {
+        self->conditions_.push_back(constraint);
+      } else if (side_effect <= tir::CallEffectKind::kReadState) {
+        assume = constraint;
+      }
+
+      new_num_constraints = self->conditions_.size();
+    }
+    ~InternalConstraintContext() {
+      ICHECK_EQ(self->conditions_.size(), new_num_constraints)
+          << "Internal error: Each condition should only be popped once.";
+      self->conditions_.erase(self->conditions_.begin() + old_num_constraints,
+                              self->conditions_.end());
+    }
+
+    ControlFlowGraphBuilder* self{nullptr};
+    With<ConstraintContext> analyzer_context;
+    size_t old_num_constraints{0};
+    size_t new_num_constraints{0};
+    Optional<PrimExpr> assume{NullOpt};
+
+    // Disable default-generated copy/move assignment and constructors
+    InternalConstraintContext(const InternalConstraintContext&) = delete;
+    InternalConstraintContext& operator=(const InternalConstraintContext&) = delete;
+    InternalConstraintContext(InternalConstraintContext&&) = delete;
+    InternalConstraintContext& operator=(InternalConstraintContext&&) = delete;
+  };
+
+  // Internal utility, context manager for tracking a loop
+  struct BindActiveLoopVar {
+    BindActiveLoopVar(ControlFlowGraphBuilder* self, Var var, PrimExpr loop_min,
+                      PrimExpr loop_extent)
+        : self(self), var(var) {
+      PrimExpr loop_max = loop_min + (loop_extent - 1);
+      auto loop_range = Range::FromMinExtent(loop_min, loop_extent);
+      self->active_loop_iterators_.push_back({var, loop_min, loop_max, loop_range});
+      self->loop_dependent_vars_.insert(var.get());
+    }
+    ~BindActiveLoopVar() { self->active_loop_iterators_.pop_back(); }
+
+    ControlFlowGraphBuilder* self;
+    Var var;
+
+    // Disable default-generated copy/move assignment and constructors
+    BindActiveLoopVar(const BindActiveLoopVar&) = delete;
+    BindActiveLoopVar& operator=(const BindActiveLoopVar&) = delete;
+    BindActiveLoopVar(BindActiveLoopVar&&) = delete;
+    BindActiveLoopVar& operator=(BindActiveLoopVar&&) = delete;
+  };
+
+  // Internal utility, context manager for tracking a variable binding
+  struct BindLetVar {
+    BindLetVar(ControlFlowGraphBuilder* self, Var var, PrimExpr value) : self(self), var(var) {
+      self->let_bindings_using_loop_.Set(var, value);
+      self->loop_dependent_vars_.insert(var.get());
+    }
+    ~BindLetVar() {
+      self->loop_dependent_vars_.erase(var.get());
+      self->let_bindings_using_loop_.erase(var);
+    }
+    ControlFlowGraphBuilder* self;
+    Var var;
+
+    // Disable default-generated copy/move assignment and constructors
+    BindLetVar(const BindLetVar&) = delete;
+    BindLetVar& operator=(const BindLetVar&) = delete;
+    BindLetVar(BindLetVar&&) = delete;
+    BindLetVar& operator=(BindLetVar&&) = delete;
+  };
+
+  struct LoopEntry {
+    Var loop_var;
+    PrimExpr loop_min;
+    PrimExpr loop_max;
+    Range loop_range;
+  };
+
+  // Track in order to know which Vars to write in terms of the buffer
+  // indices and substitute out of the predicate.
+  std::vector<ControlFlowGraph::ControlFlowBlock::LoopEntry> active_loop_iterators_;
+
+  // Track all loop iterators, along with values derived from loop iterators.
+  std::unordered_set<const VarNode*> loop_dependent_vars_;
+
+  // Any let binding that depends, directly or indirectly, on a loop
+  // binding.  When making a predicate in terms of the buffer indices,
+  // these need to be substituted out.
+  // std::unordered_map<const VarNode*, PrimExpr> let_bindings_using_loop_;
+  Map<Var, PrimExpr> let_bindings_using_loop_;
+
+  // Track in order to know what conditions limit the buffer access
+  std::vector<PrimExpr> conditions_;
+
+  // Track in order to know what statement initiated the buffer access
+  Stmt current_stmt_;
+
+  // Output data structure
+  ControlFlowGraph* out_;
+};
+
+std::pair<BufferTouch, Map<Var, Range>> ControlFlowGraph::ControlFlowBlock::MakeBufferTouch(
+    const tir::Buffer& buf, Array<Var> index_variables, Array<PrimExpr> indices,
+    BufferTouch::AccessType touch_type, PrimExpr known_value_expr) const {
+  const auto& current_block = *this;
+
+  Analyzer local_analyzer;
+
+  Optional<Var> lane_var = NullOpt;
+  IntImm num_lanes;
+
+  Array<PrimExpr> index_expressions = indices.Map([&](const auto& index) {
+    if (index.dtype().lanes() == 1) {
+      return index;
+    } else {
+      ICHECK(!lane_var) << "Multiple indices found with non-scalar values";
+      lane_var = Var("lane", index.dtype().element_of());
+      num_lanes = IntImm(index.dtype().element_of(), index.dtype().lanes());
+      return UnwrapVectorExpr(index, lane_var.value());
+    }
+  });
+
+  Array<Var> loop_vars;
+
+  Map<Var, Range> loop_ranges;
+  for (const auto& loop_entry : current_block.active_loop_iterators) {
+    loop_vars.push_back(loop_entry.loop_var);
+    loop_ranges.Set(loop_entry.loop_var, loop_entry.loop_range);
+  }
+
+  // If the indices contain multiple lanes, treat the lane variable
+  // as an additional loop iterator to be solved for and substituted
+  // out.
+  if (lane_var) {
+    loop_vars.push_back(lane_var.value());
+    loop_ranges.Set(lane_var.value(), Range::FromMinExtent(0, num_lanes));
+  }
+
+  IntConstraintsTransform transform = [&]() {
+    ICHECK_EQ(index_variables.size(), index_expressions.size());
+
+    Array<PrimExpr> relations;
+
+    for (size_t i = 0; i < index_expressions.size(); i++) {
+      PrimExpr expr = index_expressions[i];
+      Var var = index_variables[i];
+
+      expr = Substitute(expr, current_block.let_bindings_using_loop);
+      relations.push_back(var == expr);
+    }
+
+    IntConstraints system(loop_vars, loop_ranges, relations);
+    return arith::SolveLinearEquations(system);
+  }();
+
+  Map<Var, PrimExpr> loop_var_to_axis_var = transform->src_to_dst;
+  Map<Var, Range> free_params = transform->dst->ranges;
+  PrimExpr transform_predicate =
+      std::accumulate(transform->dst->relations.begin(), transform->dst->relations.end(),
+                      PrimExpr(Bool(true)), [](PrimExpr a, PrimExpr b) { return a && b; });
+
+  transform_predicate = SimplifyAsAndOfOrs(transform_predicate, &local_analyzer);
+
+  auto find_removable_params = [&]() -> Map<Var, PrimExpr> {
+    Map<Var, PrimExpr> removable_params;
+
+    // The arith::SolveLinearEquations is more general than the
+    // utilities in iter_affine_map.h, but can introduce free
+    // parameters that could later be determined with the known
+    // constraints.  This step removes all such free parameters.
+    for (const auto& expr : ExtractConstraints(transform_predicate)) {
+      if (auto* as_equal = expr.as<EQNode>()) {
+        auto check_expr = [&](const PrimExpr& a, const PrimExpr& b) {
+          auto* var_ptr = a.as<VarNode>();
+          if (!var_ptr) {
+            return;
+          }
+
+          Var var = GetRef<Var>(var_ptr);
+          if (free_params.count(var) == 0) {
+            return;
+          }
+
+          bool uses_free_param =
+              UsesVar(b, [&](const VarNode* v) { return free_params.count(GetRef<Var>(v)) > 0; });
+          if (uses_free_param) {
+            return;
+          }
+          removable_params.Set(var, b);
+        };
+        check_expr(as_equal->a, as_equal->b);
+        check_expr(as_equal->b, as_equal->a);
+      }
+    }
+
+    // In addition, the arith::SolveLinearEquation can introduce
+    // free parameters with an extent of one.  Filtering them out here
+    // avoids needing to track them through later simplifications.
+    for (const auto [var, range] : free_params) {
+      if (is_one(range->extent)) {
+        removable_params.Set(var, range->min);
+      }
+    }
+
+    return removable_params;
+  };
+  for (auto removable_params = find_removable_params(); removable_params.size() > 0;
+       removable_params = find_removable_params()) {
+    auto update = [&](const PrimExpr& expr) {
+      return local_analyzer.Simplify(Substitute(expr, removable_params));
+    };
+
+    Map<Var, PrimExpr> new_map;
+    for (const auto [loop_var, expr] : loop_var_to_axis_var) {
+      static_cast<void>(expr);  // gcc 7.x bug, https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81767
+      new_map.Set(loop_var, update(expr));
+    }
+    loop_var_to_axis_var = new_map;
+
+    transform_predicate = update(transform_predicate);
+
+    for (const auto [var, expr] : removable_params) {
+      static_cast<void>(expr);  // gcc 7.x bug, https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81767
+      free_params.erase(var);
+    }
+  }
+
+  // Normalization function, applied to both the predicate and the
+  // known value.  Converts from an expression in terms of loop
+  // iterators to an expression in terms of buffer indices.
+  auto normalize_expr = [&](PrimExpr expr) -> PrimExpr {
+    expr = Substitute(expr, current_block.let_bindings_using_loop);
+
+    if (lane_var) {
+      expr = UnwrapVectorExpr(expr, lane_var.value());
+    }
+    expr = Substitute(expr, loop_var_to_axis_var);
+
+    return expr;
+  };
+
+  // Collect the current loop variables, along with an expression for
+  // the loop variables in terms of the buffer axis variables.  This
+  // is used during forward/backward propagation to generate predicate
+  // tracking whether a loop iteration has been reached.
+  std::vector<std::pair<Var, PrimExpr>> loop_var_expressions;
+  for (const auto& entry : current_block.active_loop_iterators) {
+    auto expr_it = loop_var_to_axis_var.find(entry.loop_var);
+    ICHECK(expr_it != loop_var_to_axis_var.end());
+    loop_var_expressions.push_back({entry.loop_var, (*expr_it).second});
+  }
+
+  // The full predicate is composed of the values required to reach
+  // the scope of the BufferStore or builtin::assume(), any bounds
+  // implied by solving for the axis variables, and any additional
+  // statements resulting from unpacking the expression contained in
+  // builtin::assume().
+  PrimExpr scope_predicate = normalize_expr(current_block.scope_predicate);
+  transform_predicate = normalize_expr(transform_predicate);
+
+  known_value_expr = local_analyzer.Simplify(normalize_expr(known_value_expr));
+
+  // Deliberately use an analyzer without scope-based information,
+  // to avoid simplifying `scope_predicate` to True.
+  PrimExpr predicate_expr = local_analyzer.Simplify(transform_predicate && scope_predicate);
+
+  BufferTouch buffer_touch = {buf, predicate_expr, known_value_expr, loop_var_expressions,
+                              touch_type};
+
+  return {buffer_touch, free_params};
+}
+
+BufferTouch ControlFlowGraph::ControlFlowBlock::MakeBufferTouch(ControlFlowGraph* graph,
+                                                                const tir::Buffer& buf,
+                                                                const Array<PrimExpr>& indices,
+                                                                BufferTouch::AccessType touch_type,
+                                                                PrimExpr known_value_expr) const {
+  ICHECK(graph);
+  auto [buffer_touch, free_params] = MakeBufferTouch(buf, graph->GetIndexVariables(buf, indices),
+                                                     indices, touch_type, known_value_expr);
+  for (const auto& pair : free_params) {
+    graph->free_predicate_parameters_.Set(pair.first, pair.second);
+  }
+  return buffer_touch;
+}
+
+ControlFlowGraph::ControlFlowGraph(const tir::Stmt& stmt, size_t max_revisits) {
+  ControlFlowGraphBuilder::Build(this, stmt);
+  ForwardPropagateKnownValues(max_revisits);
+  BackwardPropagateUnusedValues(max_revisits);
+}
+
+std::ostream& operator<<(std::ostream& os, const ControlFlowGraph::ControlFlowEdge& edge) {
+  os << edge.index;
+  if (edge.var_remap.size()) {
+    os << " with remap " << edge.var_remap;
+  }
+  if (edge.post_condition) {
+    os << " with postcondition " << edge.post_condition;
+  }
+
+  return os;
+}
+
+std::ostream& operator<<(std::ostream& os, const ControlFlowGraph::ControlFlowBlock& block) {
+  os << "Predecessors: [";
+  for (size_t i = 0; i < block.predecessors.size(); i++) {
+    if (i) {
+      os << ", ";
+    }
+    os << block.predecessors[i];
+  }
+  os << "]\n";
+
+  os << "Active loop iterators: [";
+  for (size_t i = 0; i < block.active_loop_iterators.size(); i++) {
+    if (i) {
+      os << ", ";
+    }
+    os << block.active_loop_iterators[i].loop_var;
+  }
+  os << "]\n";
+
+  os << "Before block knowns: " << block.known_at_block_start << "\n";
+
+  os << "Before block unused: " << block.unused_at_block_start << "\n";
+
+  for (size_t i = 0; i < block.touch_points.size(); i++) {
+    os << "Touch[" << i << "] = " << block.touch_points[i] << "\n";
+  }
+  os << "After block: " << block.known_at_block_end << "\n";
+
+  os << "After block unused: " << block.unused_at_block_end << "\n";
+
+  os << "Successors: [";
+  for (size_t i = 0; i < block.successors.size(); i++) {
+    if (i) {
+      os << ", ";
+    }
+    os << block.successors[i];
+  }
+  os << "]";
+  return os;
+}
+
+std::ostream& operator<<(std::ostream& os, const ControlFlowGraph& pattern) {
+  os << "Touch pattern contains " << pattern.control_flow_.size() << " control blocks."
+     << (pattern.control_flow_.size() ? "\n" : "");
+  for (size_t i = 0; i < pattern.control_flow_.size(); i++) {
+    os << "\t"
+       << "ControlBlock[" << i << "] = " << pattern.control_flow_[i] << "\n";
+  }
+
+  return os;
+}
+
+bool BufferTouch::IsEquivalentTo(const BufferTouch& other, Analyzer* analyzer) const {
+  // Constraints must apply to the same buffer to be equivalent
+  if (!buffer.same_as(other.buffer) || touch_type != other.touch_type) {
+    return false;
+  }
+
+  ExprDeepEqual deep_equal;
+
+  auto implies = [&](const PrimExpr& a, const PrimExpr& b) -> bool {
+    With<ConstraintContext> context(analyzer, a);
+    return analyzer->CanProve(b);
+  };
+
+  // Predicates must be equivalent expressions, or must both be undefined
+  bool equivalent_predicates =
+      deep_equal(predicate, other.predicate) ||
+      (implies(predicate, other.predicate) && implies(other.predicate, predicate));
+  if (!equivalent_predicates) {
+    return false;
+  }
+
+  // The known value must be equal
+  if (!deep_equal(value, other.value) && !analyzer->CanProveEqual(value, other.value)) {
+    return false;
+  }
+
+  return true;
+}
+
+std::ostream& operator<<(std::ostream& os, const BufferState& state) {
+  for (size_t i = 0; i < state.constraints.size(); i++) {
+    os << "constraints[" << i << "] = " << state.constraints[i]
+       << (i + 1 == state.constraints.size() ? "" : "\n");
+  }
+  return os;
+}
+
+PrimExpr BufferState::SubstituteKnownBufferValues(
+    PrimExpr expr, const Map<tir::Buffer, Array<tir::Var>>& axis_var_lookup,
+    Analyzer* analyzer) const {
+  BufferConstraintApply mutator(axis_var_lookup, constraints, analyzer);
+  return mutator(std::move(expr));
+}
+
+void BufferState::AddCondition(const PrimExpr& condition) {
+  for (auto& constraint : constraints) {
+    constraint.predicate = constraint.predicate && condition;
+  }
+}
+
+void BufferState::Substitute(const Map<Var, PrimExpr>& var_remap, Analyzer* analyzer) {
+  if (var_remap.size()) {
+    for (auto& prior : constraints) {
+      PrimExpr updated = tvm::tir::Substitute(prior.predicate, var_remap);
+      if (!updated.same_as(prior.predicate)) {
+        prior.predicate = SimplifyAsAndOfOrs(updated, analyzer);
+      }
+    }
+  }
+}
+
+void BufferState::Simplify(Analyzer* analyzer) {
+  for (auto& constraint : constraints) {
+    constraint.predicate = SimplifyAsAndOfOrs(constraint.predicate, analyzer);
+  }
+}
+
+void BufferState::Union(const BufferState& b, Analyzer* analyzer) {
+  for (const auto& b_constraint : b.constraints) {
+    bool used = false;
+    for (auto& a_constraint : constraints) {
+      if (a_constraint.buffer.same_as(b_constraint.buffer) &&
+          analyzer->CanProveEqual(a_constraint.value, b_constraint.value)) {
+        a_constraint.predicate =
+            SimplifyAsAndOfOrs(a_constraint.predicate || b_constraint.predicate, analyzer);
+        used = true;
+        break;
+      }
+    }
+    if (!used) {
+      constraints.push_back(b_constraint);
+    }
+  }
+}
+
+void BufferState::Intersection(const BufferState& b, Analyzer* analyzer) {
+  // For a constraint to be in the output, it must be present in both
+  // inputs.
+
+  std::vector<BufferTouch> new_constraints;
+  for (const auto& ai : constraints) {
+    for (const auto& bi : b.constraints) {
+      if (ai.buffer.same_as(bi.buffer)) {
+        PrimExpr predicate = SimplifyAsAndOfOrs(ai.predicate && bi.predicate, analyzer);
+        if (!is_zero(predicate)) {
+          With<ConstraintContext> context(analyzer, predicate);
+          PrimExpr known_value_a = ai.value;
+          PrimExpr known_value_b = bi.value;
+
+          bool is_consistent = analyzer->CanProveEqual(known_value_a, known_value_b);
+          if (is_consistent) {
+            new_constraints.push_back({ai.buffer, predicate, known_value_a});
+          }
+        }
+      }
+    }
+  }
+
+  constraints = std::move(new_constraints);
+}
+
+class BufferRegionCollector : public ExprVisitor {
+ public:
+  struct Region {
+    PrimExpr region_predicate;
+    std::unordered_map<const BufferLoadNode*, Optional<PrimExpr>> known_values;
+  };
+
+  static std::vector<Region> Collect(const Map<Buffer, Array<Var>>& axis_var_lookup,
+                                     const std::vector<BufferTouch>& knowns,
+                                     const std::vector<Optional<PrimExpr>>& exprs,
+                                     Analyzer* analyzer) {
+    BufferRegionCollector collector(axis_var_lookup, knowns, analyzer);
+    for (const auto& expr : exprs) {
+      if (expr) {
+        collector(expr.value());
+      }
+    }
+
+    return collector.regions_;
+  }
+
+ private:
+  using Parent = ExprVisitor;
+
+  BufferRegionCollector(const Map<Buffer, Array<Var>>& axis_var_lookup,
+                        const std::vector<BufferTouch>& knowns, Analyzer* analyzer)
+      : analyzer_(analyzer), axis_var_lookup_(axis_var_lookup), knowns_(knowns) {
+    regions_.push_back(Region{Bool(true), {}});
+  }
+
+  using Parent::VisitExpr_;
+
+  void VisitExpr_(const BufferLoadNode* op) override {
+    // Helper struct for the known values of this BufferLoad
+    struct Known {
+      PrimExpr predicate;
+      Optional<PrimExpr> value;
+    };
+
+    std::vector<Known> new_regions;
+
+    PrimExpr unknown_region = Bool(true);
+
+    for (const BufferTouch& constraint : knowns_) {
+      if (!op->buffer.same_as(constraint.buffer)) {
+        // This is a different buffer, so continue searching.
+        continue;
+      }
+
+      auto axis_vars = axis_var_lookup_.at(op->buffer);
+      PrimExpr touch_predicate =
+          SubstituteParamValues(axis_vars, op->indices, constraint.predicate).value();
+      touch_predicate = SimplifyAsAndOfOrs(touch_predicate, analyzer_);
+
+      if (!is_zero(touch_predicate)) {
+        Optional<PrimExpr> known_value =
+            SubstituteParamValues(axis_vars, op->indices, constraint.value);
+        new_regions.push_back(Known{touch_predicate, known_value});
+
+        unknown_region = unknown_region && !touch_predicate;
+        unknown_region = SimplifyAsAndOfOrs(unknown_region, analyzer_);
+      }
+    }
+
+    if (new_regions.size()) {
+      Analyzer local_analyzer;
+
+      if (!is_zero(unknown_region)) {
+        new_regions.insert(new_regions.begin(), Known{unknown_region, NullOpt});
+      }
+
+      std::vector<Region> updated_regions;
+      for (const auto& prev_region : regions_) {
+        for (const auto& new_region : new_regions) {
+          PrimExpr intersection =
+              SimplifyAsAndOfOrs(prev_region.region_predicate && new_region.predicate, analyzer_);
+
+          if (!is_zero(intersection)) {
+            Region merged{intersection, prev_region.known_values};
+            merged.known_values[op] = new_region.value;
+            updated_regions.push_back(std::move(merged));
+          }
+        }
+      }
+      regions_ = updated_regions;
+    }
+  }
+
+  Analyzer* analyzer_;
+  std::vector<Region> regions_;
+  const Map<Buffer, Array<Var>>& axis_var_lookup_;
+  const std::vector<BufferTouch>& knowns_;
+};
+
+class BufferRegionValueReplacer : public IRMutatorWithAnalyzer {
+ public:
+  static PrimExpr Apply(
+      const std::unordered_map<const BufferLoadNode*, Optional<PrimExpr>>& known_values,
+      PrimExpr expr, Analyzer* analyzer) {
+    BufferRegionValueReplacer mutator(known_values, analyzer);
+    PrimExpr result = mutator(expr);
+    // Simplification must occur after the substitution, as known
+    // values may provide enable simplifications.  Also, cannot track
+    // whether a BufferLoad was
+    result = analyzer->Simplify(result);
+    return result;
+  }
+
+ private:
+  using Parent = IRMutatorWithAnalyzer;
+
+  BufferRegionValueReplacer(
+      const std::unordered_map<const BufferLoadNode*, Optional<PrimExpr>>& known_values,
+      Analyzer* analyzer)
+      : Parent(analyzer), known_values_(known_values) {}
+
+  using Parent::VisitExpr_;
+
+  PrimExpr VisitExpr_(const BufferLoadNode* op) override {
+    auto it = known_values_.find(op);
+    if (it != known_values_.end() && it->second) {
+      return it->second.value();
+    } else {
+      return GetRef<PrimExpr>(op);
+    }
+  }
+
+  const std::unordered_map<const BufferLoadNode*, Optional<PrimExpr>>& known_values_;
+};
+
+void BufferState::ApplyTouches(const Map<Buffer, Array<Var>>& axis_var_lookup,
+                               const std::vector<BufferTouch>& touch_points, Analyzer* analyzer) {
+  std::vector<BufferTouch> new_knowns;
+  Map<Buffer, PrimExpr> keep_prior_known_at;
+
+  for (auto& touch : touch_points) {
+    if (touch.touch_type == BufferTouch::AccessType::Read) {
+      continue;
+    }
+
+    PrimExpr known_value = touch.value;
+
+    PrimExpr predicate = touch.predicate && touch.AfterLoopIteration();
+    auto regions = BufferRegionCollector::Collect(axis_var_lookup, constraints,
+                                                  {predicate, touch.value}, analyzer);
+
+    for (const auto& region : regions) {
+      PrimExpr updated_predicate = BufferRegionValueReplacer::Apply(
+          region.known_values, region.region_predicate && predicate, analyzer);
+
+      updated_predicate = SimplifyAsAndOfOrs(updated_predicate, analyzer);
+      PrimExpr updated_value =
+          BufferRegionValueReplacer::Apply(region.known_values, known_value, analyzer);
+
+      if (!is_zero(updated_predicate)) {
+        if (auto it = keep_prior_known_at.find(touch.buffer); it != keep_prior_known_at.end()) {
+          keep_prior_known_at.Set(touch.buffer, (*it).second && !updated_predicate);
+        } else {
+          keep_prior_known_at.Set(touch.buffer, !updated_predicate);
+        }
+
+        if (!HasBufferLoad(updated_value)) {
+          BufferTouch new_constraint{touch.buffer, updated_predicate, updated_value};
+          new_knowns.push_back(new_constraint);
+        }
+      }
+    }
+  }
+
+  if (keep_prior_known_at.size()) {
+    for (auto& constraint : constraints) {
+      if (auto it = keep_prior_known_at.find(constraint.buffer); it != keep_prior_known_at.end()) {
+        constraint.predicate = SimplifyAsAndOfOrs(constraint.predicate && (*it).second, analyzer);
+      }
+    }
+  }
+
+  if (new_knowns.size()) {
+    std::vector<bool> used(new_knowns.size(), false);
+
+    for (auto& constraint : constraints) {
+      PrimExpr expand_known_at = Bool(false);
+
+      PrimExpr prev_value = constraint.value;
+
+      for (size_t i = 0; i < new_knowns.size(); i++) {
+        if (new_knowns[i].buffer.same_as(constraint.buffer)) {
+          Optional<PrimExpr> overwritten_with = new_knowns[i].value;
+          if (overwritten_with && analyzer->CanProveEqual(prev_value, overwritten_with.value())) {
+            expand_known_at =
+                SimplifyAsAndOfOrs(expand_known_at || new_knowns[i].predicate, analyzer);
+            used[i] = true;
+          }
+        }
+      }
+
+      if (!is_zero(expand_known_at)) {
+        constraint.predicate =
+            SimplifyAsAndOfOrs(constraint.predicate || expand_known_at, analyzer);
+      }
+    }
+
+    for (size_t i = 0; i < new_knowns.size(); i++) {
+      if (!used[i]) {
+        constraints.push_back(new_knowns[i]);
+      }
+    }
+  }
+
+  constraints.erase(
+      std::remove_if(constraints.begin(), constraints.end(),
+                     [&](const auto& constraint) { return is_zero(constraint.predicate); }),
+      constraints.end());
+}
+
+void BufferState::BackpropUnusedIndices(const Map<Buffer, Array<Var>>& axis_var_lookup,
+                                        const std::vector<BufferTouch>& touch_points,
+                                        Analyzer* analyzer) {
+  std::vector<BufferTouch> new_knowns;
+  Map<Buffer, PrimExpr> keep_prior_known_at;
+
+  Map<Buffer, PrimExpr> regions_written;
+  Map<Buffer, PrimExpr> regions_read;
+  for (auto it = touch_points.rbegin(); it != touch_points.rend(); it++) {
+    const auto& touch = *it;
+
+    Map<Buffer, PrimExpr>* to_update{nullptr};
+    if (touch.touch_type == BufferTouch::AccessType::Write) {
+      to_update = &regions_written;
+
+    } else if (touch.touch_type == BufferTouch::AccessType::Read) {
+      to_update = &regions_read;
+    } else {
+      continue;
+    }
+
+    PrimExpr prev = to_update->Get(touch.buffer).value_or(Bool(false));
+    PrimExpr new_predicate = touch.predicate && touch.BeforeLoopIteration();
+    to_update->Set(touch.buffer, prev || new_predicate);
+  }
+
+  auto update_map = [&](auto& map) {
+    Map<Buffer, PrimExpr> new_map;
+    for (auto [buffer, predicate] : map) {
+      new_map.Set(buffer, SimplifyAsAndOfOrs(predicate, analyzer));
+    }
+    map = std::move(new_map);
+  };
+  update_map(regions_written);
+  update_map(regions_read);
+
+  // If buffer is already in used, widen the predicate
+  for (auto& prev_unused : constraints) {
+    if (auto opt_predicate = regions_written.Get(prev_unused.buffer)) {
+      PrimExpr new_predicate = prev_unused.predicate || opt_predicate.value();
+      prev_unused.predicate = SimplifyAsAndOfOrs(new_predicate, analyzer);
+      regions_written.erase(prev_unused.buffer);
+    }
+  }
+
+  // Otherwise, add new "touch" to represent the unused values
+  for (auto [buffer, predicate] : regions_written) {
+    constraints.push_back(
+        BufferTouch{buffer, predicate, tir::Call(buffer->dtype, builtin::undef(), {})});
+  }
+
+  // If buffer is read out, narrow the predicate
+  for (auto& prev_unused : constraints) {
+    if (auto opt_pred = regions_read.Get(prev_unused.buffer)) {
+      PrimExpr predicate = opt_pred.value();
+      prev_unused.predicate = SimplifyAsAndOfOrs(prev_unused.predicate && !predicate, analyzer);
+    }
+  }
+
+  // Clean-up and remove any empty constraints
+  constraints.erase(
+      std::remove_if(constraints.begin(), constraints.end(),
+                     [](const auto& constraint) { return is_zero(constraint.predicate); }),
+      constraints.end());
+}
+
+void BufferState::RemoveFreeParameters(const Map<Var, Range>& free_predicate_parameters,
+                                       Analyzer* analyzer) {
+  for (auto& known : constraints) {
+    known.predicate = NarrowPredicateExpression(known.predicate, free_predicate_parameters);
+    known.predicate = SimplifyAsAndOfOrs(known.predicate, analyzer);
+  }
+}
+
+bool BufferState::IsEquivalentTo(const BufferState& other, Analyzer* analyzer) const {
+  if (constraints.size() != other.constraints.size()) {
+    return false;
+  }
+
+  for (size_t i = 0; i < constraints.size(); i++) {
+    if (!constraints[i].IsEquivalentTo(other.constraints[i], analyzer)) {
+      return false;
+    }
+  }
+
+  return true;
+}
+
+Optional<Array<Var>> ControlFlowGraph::GetIndexVariables(const Buffer& buf) const {
+  if (auto it = axis_var_lookup_.find(buf); it != axis_var_lookup_.end()) {
+    return (*it).second;
+  } else {
+    return NullOpt;
+  }
+}
+
+Array<Var> ControlFlowGraph::GetIndexVariables(const Buffer& buf, const Array<PrimExpr>& indices) {
+  if (auto it = axis_var_lookup_.find(buf); it != axis_var_lookup_.end()) {
+    return (*it).second;
+  }
+
+  Array<Var> vars;
+  for (size_t i = 0; i < indices.size(); i++) {
+    std::stringstream ss;
+    ss << buf->name << "_axis_" << i;
+    vars.push_back(Var(ss.str(), indices[i].dtype().element_of()));
+  }
+
+  axis_var_lookup_.Set(buf, vars);
+  return vars;
+}
+
+void ControlFlowGraph::ForwardPropagateKnownValues(size_t max_revisits) {
+  // Values to visit when searching.  Using a std::set to
+  // preferentially visit nodes near the start of the control flow.
+  std::set<size_t> to_visit;
+
+  // Map from a block's index
+  std::unordered_map<size_t, size_t> visit_count_lookup;
+
+  // Initiatize the locations to search from, propagating values
+  // forward from all locations that have a known value.
+  for (size_t i = 0; i < control_flow_.size(); i++) {
+    bool has_known_value = false;
+    for (const auto& touch : control_flow_[i].touch_points) {
+      if (!HasBufferLoad(touch.value)) {
+        has_known_value = true;
+        break;
+      }
+    }
+
+    if (has_known_value) {
+      to_visit.insert(i);
+    }
+  }
+
+  Analyzer analyzer;
+  analyzer.rewrite_simplify.SetEnabledExtensions(arith::RewriteSimplifier::Extension(
+      arith::RewriteSimplifier::kTransitivelyProveInequalities |
+      arith::RewriteSimplifier::kApplyConstraintsToBooleanBranches));
+
+  analyzer.Bind(iterator_ranges_);
+  analyzer.Bind(free_predicate_parameters_);
+
+  while (to_visit.size()) {
+    size_t visiting = *to_visit.begin();
+    to_visit.erase(visiting);
+
+    size_t num_previous_visits = visit_count_lookup[visiting]++;
+
+    ControlFlowBlock& block = control_flow_[visiting];
+
+    // Step 1: Collect known values provided from each precedessor

Review Comment:
   nit: predecessor 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@tvm.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org