You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tvm.apache.org by jr...@apache.org on 2021/12/15 06:39:04 UTC

[tvm] branch main updated: [Relay] Support large constants saved/loaded outside of VM executable (#9734)

This is an automated email from the ASF dual-hosted git repository.

jroesch pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tvm.git


The following commit(s) were added to refs/heads/main by this push:
     new e7b5d2d  [Relay] Support large constants saved/loaded outside of VM executable (#9734)
e7b5d2d is described below

commit e7b5d2dde89acaad523a081047f7f26e957faefe
Author: Mark Shields <87...@users.noreply.github.com>
AuthorDate: Tue Dec 14 22:38:41 2021 -0800

    [Relay] Support large constants saved/loaded outside of VM executable (#9734)
    
    * [Relay] Support large constants.
    
    This allows constant tensors at or above a given byte limit to be marked as
    'late bound' and saved/reloaded to a file independently of the overall
    executable. Since the executable is often embedded in the data segment of
    generated runtime Modules this avoids problems with external tools which can't
    handle multi-gigabyte data segments.
    
    [ACE-466 in OctoML JIRA]
    
    * [checkpoint] fix latent bytecode/code bug
---
 include/tvm/runtime/vm/executable.h |  79 ++++++++++++---
 include/tvm/runtime/vm/vm.h         |   4 +-
 python/tvm/runtime/vm.py            |  14 ++-
 src/runtime/vm/executable.cc        | 187 ++++++++++++++++++++++++++++++------
 src/runtime/vm/profiler/vm.cc       |   6 +-
 src/runtime/vm/profiler/vm.h        |   2 +-
 src/runtime/vm/vm.cc                |  14 ++-
 tests/python/relay/test_vm.py       |  34 +++++++
 8 files changed, 284 insertions(+), 56 deletions(-)

diff --git a/include/tvm/runtime/vm/executable.h b/include/tvm/runtime/vm/executable.h
index f07db36..6359da0 100644
--- a/include/tvm/runtime/vm/executable.h
+++ b/include/tvm/runtime/vm/executable.h
@@ -68,12 +68,20 @@ class Executable : public ModuleNode {
 
   /*!
    * \brief Write the Executable to the binary stream in serialized form.
+   *
+   * Late-bound constants (if any) must have already been saved by \p
+   * MoveLateBoundConstantsToBinary.
+   *
    * \param stream The binary stream to save the executable to.
    */
   void SaveToBinary(dmlc::Stream* stream) final;
 
   /*!
-   * \brief Write the Executable to the provided path as a file contianing its serialized content.
+   * \brief Write the Executable to the provided path as a file containing its serialized content.
+   *
+   * Late-bound constants (if any) must have already been saved by \p
+   * MoveLateBoundConstantsToBinary.
+   *
    * \param path The path to write the serialized data to.
    * \param format The format of the serialized blob.
    */
@@ -81,7 +89,10 @@ class Executable : public ModuleNode {
 
   /*!
    * \brief Serialize the executable into global section, constant section, and
-   * code section.
+   * code section. This object must outlive the returned byte array.
+   *
+   * Late-bound constants (if any) must have already been saved by \p
+   * MoveLateBoundConstantsToBinary.
    *
    * \return The binary representation of the VM.
    */
@@ -90,6 +101,8 @@ class Executable : public ModuleNode {
   /*!
    * \brief Load the saved VM executable.
    *
+   * Late-bound constants (if any) must then be loaded by \p LoadLateBoundConstantsFromBinary.
+   *
    * \param code The bytecode in string.
    * \param lib The compiled runtime library.
    *
@@ -98,6 +111,35 @@ class Executable : public ModuleNode {
   static runtime::Module Load(const std::string& code, const runtime::Module lib);
 
   /*!
+   * \brief Returns the late-bound constants for the executable (if any) as a byte-stream.
+   * Leaves the executable's late-bound constants map empty. Only constants who's byte
+   * tensor size is greater than or equal to \p byte_limit are marked as late-bound. \p byte_limit
+   * may be zero.
+   *
+   * Must be called before \p SaveToBinary and friends if late-bound constants are
+   * desired. Otherwise can be ignore.
+   */
+  void MoveLateBoundConstantsToStream(dmlc::Stream* stream, size_t byte_limit);
+
+  /*!
+   * \brief As for \p MoveLateBoundConstantsToStream, but save to file at \p path.
+   */
+  void MoveLateBoundConstantsToFile(const std::string& path, size_t byte_limit);
+
+  /*!
+   * \brief Restores the late-bound constants for the executable (if any) from given byte-stream.
+   *
+   * Must be called after \p Load but before any other methods if \p MoveLateBoundConstantsToBinary
+   * was used when saving. Otherwise can be ignored.
+   */
+  void LoadLateBoundConstantsFromStream(dmlc::Stream* stream);
+
+  /*!
+   * \brief As for \p LoadLateBoundConstantsFromStream, but load from file at \p path.
+   */
+  void LoadLateBoundConstantsFromFile(const std::string& path);
+
+  /*!
    * \brief Get the serialized form of the `functions`. This is
    * essentially bytecode serialization.
    *
@@ -125,7 +167,7 @@ class Executable : public ModuleNode {
    * example, `DLDataType` will be unpacked into three fields (code, bits, lanes).
    *   4. The rest of the line indicates the field with variable length, e.g.,
    * the shape of a tensor, the args used by an `InvokPacked` instruction, etc.
-
+   *
    * The field starting from # is only used for debugging. The serialized code
    * doesn't contain it, therefore the deserializer doens't need to handle it.
    */
@@ -205,8 +247,19 @@ class Executable : public ModuleNode {
    * shape-related data and code.
    */
   int host_device_index = -1;
-  /*! \brief The global constant pool. */
+  /*!
+   * \brief The global constant array.
+   *
+   * LoadConst instructions indexes are w.r.t. this vector. Late-bound constants are removed
+   * from this table after saving late-bound constants.
+   */
   std::vector<ObjectRef> constants;
+  /*!
+   * \brief For each constant index the name of the late-bound constant, or null if constant is
+   * immediate. Only populated after loading executable but before loading late-bound constants.
+   */
+  std::vector<String> late_bound_constant_names;
+
   /*! \brief A map from globals (as strings) to their index in the Relay function map. */
   std::unordered_map<std::string, Index> global_map;
   /*! \brief A mapping from the packed function's global name (as string) to the index that
@@ -238,9 +291,16 @@ class Executable : public ModuleNode {
   /*!
    * \brief Save the constant pool.
    *
-   * \param strm The output stream.
+   * \param stream The output stream.
+   */
+  void SaveConstantSection(dmlc::Stream* stream);
+
+  /*!
+   * \brief Load the constant pool.
+   *
+   * \param stream The input stream.
    */
-  void SaveConstantSection(dmlc::Stream* strm);
+  void LoadConstantSection(dmlc::Stream* stream);
 
   /*!
    * \brief Save primitive op names.
@@ -271,13 +331,6 @@ class Executable : public ModuleNode {
   void LoadGlobalSection(dmlc::Stream* strm);
 
   /*!
-   * \brief Load the constant pool.
-   *
-   * \param strm The input stream.
-   */
-  void LoadConstantSection(dmlc::Stream* strm);
-
-  /*!
    * \brief Load primitive op names.
    *
    * \param strm The input stream.
diff --git a/include/tvm/runtime/vm/vm.h b/include/tvm/runtime/vm/vm.h
index 604c973..67c21a1 100644
--- a/include/tvm/runtime/vm/vm.h
+++ b/include/tvm/runtime/vm/vm.h
@@ -174,7 +174,7 @@ class VirtualMachine : public runtime::ModuleNode {
    * \brief load the executable for the virtual machine.
    * \param exec The executable.
    */
-  virtual void LoadExecutable(const Executable* exec);
+  virtual void LoadExecutable(Executable* exec);
 
  protected:
   /*! \brief Push a call frame on to the call stack. */
@@ -300,7 +300,7 @@ class VirtualMachine : public runtime::ModuleNode {
   /*! \brief The special return register. */
   ObjectRef return_register_;
   /*! \brief The executable the VM will operate on. */
-  const Executable* exec_;
+  Executable* exec_;
   /*! \brief The function name to inputs mapping. */
   std::unordered_map<std::string, std::vector<ObjectRef>> inputs_;
   /*!
diff --git a/python/tvm/runtime/vm.py b/python/tvm/runtime/vm.py
index 65609ff..d9cab84 100644
--- a/python/tvm/runtime/vm.py
+++ b/python/tvm/runtime/vm.py
@@ -77,6 +77,8 @@ class Executable(object):
         self._get_stats = self.mod["get_stats"]
         self._get_function_arity = self.mod["get_function_arity"]
         self._get_function_param_name = self.mod["get_function_param_name"]
+        self._move_late_bound_consts = self.mod["move_late_bound_consts"]
+        self._load_late_bound_consts = self.mod["load_late_bound_consts"]
 
     def save(self):
         """Save the Relay VM Executable.
@@ -162,11 +164,11 @@ class Executable(object):
             An executable constructed using the provided artifacts.
         """
         if isinstance(bytecode, (bytes, str)):
-            code = bytearray(bytecode)
+            bytecode = bytearray(bytecode)
         elif not isinstance(bytecode, (bytearray, TVMByteArray)):
             raise TypeError(
                 "bytecode is expected to be the type of bytearray "
-                + "or TVMByteArray, but received {}".format(type(code))
+                + "or TVMByteArray, but received {}".format(type(bytecode))
             )
 
         if lib is not None and not isinstance(lib, tvm.runtime.Module):
@@ -298,6 +300,14 @@ class Executable(object):
         self._function_params[func_name] = params
         return params
 
+    def move_late_bound_consts(self, path, byte_limit):
+        """Move all constants of byte size greater or equal to byte_limit to file at path"""
+        return self._move_late_bound_consts(path, byte_limit)
+
+    def load_late_bound_consts(self, path):
+        """Re-load constants previously saved to file at path"""
+        return self._load_late_bound_consts(path, bytes)
+
 
 class VirtualMachine(object):
     """Relay VM runtime.
diff --git a/src/runtime/vm/executable.cc b/src/runtime/vm/executable.cc
index 44971c0..76c385a 100644
--- a/src/runtime/vm/executable.cc
+++ b/src/runtime/vm/executable.cc
@@ -88,6 +88,19 @@ PackedFunc Executable::GetFunction(const std::string& name, const ObjectPtr<Obje
       vm->LoadExecutable(this);
       *rv = Module(vm);
     });
+  } else if (name == "move_late_bound_consts") {
+    return PackedFunc([this](TVMArgs args, TVMRetValue* rv) {
+      CHECK_EQ(args.size(), 2);
+      std::string path = args[0];
+      uint64_t byte_limit = args[1];
+      MoveLateBoundConstantsToFile(path, static_cast<size_t>(byte_limit));
+    });
+  } else if (name == "load_late_bound_consts") {
+    return PackedFunc([this](TVMArgs args, TVMRetValue* rv) {
+      CHECK_EQ(args.size(), 1);
+      std::string path = args[0];
+      LoadLateBoundConstantsFromFile(path);
+    });
   } else {
     LOG(FATAL) << "Unknown packed function: " << name;
     return PackedFunc(nullptr);
@@ -306,6 +319,68 @@ void Executable::SaveVirtualDevicesSection(dmlc::Stream* strm) {
   strm->Write(host_device_index);
 }
 
+void Executable::MoveLateBoundConstantsToStream(dmlc::Stream* stream, size_t byte_limit) {
+  ICHECK(late_bound_constant_names.empty());
+  late_bound_constant_names.reserve(constants.size());
+  Map<String, NDArray> map;
+  size_t total_late_bound_bytes = 0;
+  for (size_t const_index = 0; const_index < constants.size(); ++const_index) {
+    const auto ndarray = Downcast<NDArray>(constants[const_index]);
+    ICHECK(ndarray.defined()) << "Undefined constant at index " << const_index;
+    size_t num_bytes = runtime::GetDataSize(*ndarray.operator->());
+    if (num_bytes < byte_limit) {
+      // Leave as immediate.
+      late_bound_constant_names.emplace_back(nullptr);
+      continue;
+    }
+    total_late_bound_bytes += num_bytes;
+    std::ostringstream os;
+    os << "const_" << const_index;
+    String name = os.str();
+    map.Set(name, Downcast<NDArray>(std::move(constants[const_index])));
+    late_bound_constant_names.emplace_back(std::move(name));
+  }
+  VLOG(1) << "moved " << map.size() << " constants of " << total_late_bound_bytes
+          << " bytes (out of " << constants.size() << " overall) to be late-bound";
+  runtime::SaveParams(stream, map);
+}
+
+void Executable::MoveLateBoundConstantsToFile(const std::string& path, size_t byte_limit) {
+  std::string bytes;
+  dmlc::MemoryStringStream stream(&bytes);
+  MoveLateBoundConstantsToStream(&stream, byte_limit);
+  SaveBinaryToFile(path, bytes);
+}
+
+void Executable::LoadLateBoundConstantsFromStream(dmlc::Stream* stream) {
+  ICHECK_EQ(late_bound_constant_names.size(), constants.size());
+  Map<String, NDArray> map = runtime::LoadParams(stream);
+  VLOG(1) << "loaded " << map.size() << " late-bound constants";
+  for (size_t const_index = 0; const_index < constants.size(); ++const_index) {
+    if (!late_bound_constant_names[const_index].defined()) {
+      ICHECK(constants[const_index].defined())
+          << "Undefined immediate constant at index " << const_index;
+      continue;
+    }
+    const String& name = late_bound_constant_names[const_index];
+    ICHECK(!constants[const_index].defined()) << "Unexpected constant at index " << const_index;
+    auto itr = map.find(name);
+    ICHECK(itr != map.end()) << "No binding for late-bound constant at index " << const_index
+                             << " with name '" << name << "'";
+    constants[const_index] = (*itr).second;
+    map.erase(name);
+  }
+  late_bound_constant_names.clear();
+  ICHECK(map.empty()) << "Have " << map.size() << " unused late-bound constants";
+}
+
+void Executable::LoadLateBoundConstantsFromFile(const std::string& path) {
+  std::string bytes;
+  LoadBinaryFromFile(path, &bytes);
+  dmlc::MemoryStringStream stream(&bytes);
+  LoadLateBoundConstantsFromStream(&stream);
+}
+
 void Executable::SaveGlobalSection(dmlc::Stream* strm) {
   std::vector<std::pair<std::string, Index>> globals(this->global_map.begin(),
                                                      this->global_map.end());
@@ -321,19 +396,88 @@ void Executable::SaveGlobalSection(dmlc::Stream* strm) {
   strm->Write(glbs);
 }
 
-void Executable::SaveConstantSection(dmlc::Stream* strm) {
-  std::vector<DLTensor*> arrays;
-  for (const auto& obj : this->constants) {
-    const auto cell = Downcast<runtime::NDArray>(obj);
-    arrays.push_back(const_cast<DLTensor*>(cell.operator->()));
-  }
-  strm->Write(static_cast<uint64_t>(this->constants.size()));
-  for (const auto& it : arrays) {
-    runtime::SaveDLTensor(strm, it);
+namespace {
+// Tags to distinguish immediate vs late-bound constants in constants table bytestream.
+constexpr uint32_t kImmediateConstTag = 0;
+constexpr uint32_t kLateBoundConstTag = 1;
+}  // namespace
+
+void Executable::SaveConstantSection(dmlc::Stream* stream) {
+  // Save the overall number of constants.
+  stream->Write(static_cast<uint64_t>(constants.size()));
+
+  for (size_t const_index = 0; const_index < constants.size(); ++const_index) {
+    if (late_bound_constant_names.empty() || !late_bound_constant_names[const_index].defined()) {
+      // Tag immediate constants by 0.
+      stream->Write(kImmediateConstTag);
+      // Write as DLTensor.
+      const auto ndarray = Downcast<runtime::NDArray>(constants[const_index]);
+      ICHECK(ndarray.defined());
+      runtime::SaveDLTensor(stream, ndarray.operator->());
+      VLOG(1) << "save " << const_index << " as immediate";
+    } else {
+      // Tag late-bound constants by 1.
+      const String& name = late_bound_constant_names[const_index];
+      ICHECK(!constants[const_index].defined());
+      stream->Write(kLateBoundConstTag);
+      // Write a string.
+      stream->Write(std::string(name));
+      VLOG(1) << "save " << const_index << " as late-bound";
+    }
   }
 
+  VLOG(1) << "saved " << constants.size() << " constants";
+
   // Save the const to device index mapping.
-  strm->Write(this->const_device_indexes);
+  stream->Write(const_device_indexes);
+}
+
+void Executable::LoadConstantSection(dmlc::Stream* stream) {
+  uint64_t sz;
+  // Load the overall number of constants.
+  STREAM_CHECK(stream->Read(&sz, sizeof(sz)), "constants table size");
+  size_t size = static_cast<size_t>(sz);
+
+  VLOG(1) << "loading " << size << " constants";
+
+  constants.resize(size);
+  late_bound_constant_names.resize(size);
+  bool any_late_bound = false;
+
+  // Load each of the constants.
+  for (size_t const_index = 0; const_index < size; const_index++) {
+    uint32_t tag;
+    STREAM_CHECK(stream->Read(&tag, sizeof(tag)), "constant tag");
+    if (tag == kImmediateConstTag) {
+      // Immediate constants tagged by 0.
+      VLOG(1) << "load " << const_index << " as immediate";
+      runtime::NDArray ndarray;
+      STREAM_CHECK(ndarray.Load(stream), "constant tensor");
+      constants[const_index] = std::move(ndarray);
+      late_bound_constant_names[const_index] = String(ObjectPtr<StringObj>(nullptr));
+    } else if (tag == kLateBoundConstTag) {
+      // Late-bound constants tagged by 1.
+      VLOG(1) << "load " << const_index << " as late-bound";
+      std::string name;
+      STREAM_CHECK(stream->Read(&name), "late-bound constant name");
+      constants[const_index] = NDArray(nullptr);
+      late_bound_constant_names[const_index] = std::move(name);
+      any_late_bound = true;
+    } else {
+      STREAM_CHECK(false, "constant tag");
+    }
+  }
+
+  if (!any_late_bound) {
+    late_bound_constant_names.clear();
+  }
+
+  // Load the const to device index mapping.
+  std::vector<Index> indexes;
+  indexes.reserve(size);
+  STREAM_CHECK(stream->Read(&indexes), "constant devices");
+  ICHECK_EQ(size, indexes.size());
+  const_device_indexes = std::move(indexes);
 }
 
 void Executable::SavePrimitiveOpNames(dmlc::Stream* strm) {
@@ -597,7 +741,7 @@ runtime::Module Executable::Load(const std::string& code, const runtime::Module
   auto exec = make_object<Executable>();
 
   // Support null-initialization of lib, to enable initialization during
-  // deserialization before we have we have deserialized the imports.
+  // deserialization before we have deserialized the imports.
   if (lib.defined()) {
     exec->SetLib(lib);
   }
@@ -640,27 +784,6 @@ void Executable::LoadGlobalSection(dmlc::Stream* strm) {
   }
 }
 
-void Executable::LoadConstantSection(dmlc::Stream* strm) {
-  uint64_t sz;
-  // Load the number of constants.
-  STREAM_CHECK(strm->Read(&sz, sizeof(sz)), "constant");
-
-  size_t size = static_cast<size_t>(sz);
-  // Load each of the constants.
-  for (size_t i = 0; i < size; i++) {
-    runtime::NDArray constant;
-    STREAM_CHECK(constant.Load(strm), "constant");
-    this->constants.emplace_back(std::move(constant));
-  }
-
-  // Load the const to device index mapping.
-  std::vector<Index> const_device_indexes;
-  const_device_indexes.reserve(size);
-  STREAM_CHECK(strm->Read(&const_device_indexes), "constant");
-  ICHECK_EQ(size, const_device_indexes.size());
-  this->const_device_indexes = std::move(const_device_indexes);
-}
-
 void Executable::LoadPrimitiveOpNames(dmlc::Stream* strm) {
   std::vector<std::string> primitive_names;
   STREAM_CHECK(strm->Read(&primitive_names), "primitive name");
diff --git a/src/runtime/vm/profiler/vm.cc b/src/runtime/vm/profiler/vm.cc
index fe27595..67344df 100644
--- a/src/runtime/vm/profiler/vm.cc
+++ b/src/runtime/vm/profiler/vm.cc
@@ -90,7 +90,7 @@ PackedFunc VirtualMachineDebug::GetFunction(const std::string& name,
   }
 }
 
-void VirtualMachineDebug::LoadExecutable(const Executable* exec) {
+void VirtualMachineDebug::LoadExecutable(Executable* exec) {
   VirtualMachine::LoadExecutable(exec);
   ICHECK(exec_);
   for (auto kv : exec_->primitive_map) {
@@ -202,7 +202,7 @@ void VirtualMachineDebug::InvokePacked(Index packed_index, const PackedFunc& fun
   }
 }
 
-runtime::Module CreateVirtualMachineDebug(const Executable* exec) {
+runtime::Module CreateVirtualMachineDebug(Executable* exec) {
   auto vm = make_object<VirtualMachineDebug>();
   vm->LoadExecutable(exec);
   return runtime::Module(vm);
@@ -210,7 +210,7 @@ runtime::Module CreateVirtualMachineDebug(const Executable* exec) {
 
 TVM_REGISTER_GLOBAL("runtime._VirtualMachineDebug").set_body([](TVMArgs args, TVMRetValue* rv) {
   runtime::Module mod = args[0];
-  const auto* exec = dynamic_cast<Executable*>(mod.operator->());
+  auto* exec = dynamic_cast<Executable*>(mod.operator->());
   ICHECK(exec) << "Virtual machine has not been defined yet."
                << "\n";
   *rv = CreateVirtualMachineDebug(exec);
diff --git a/src/runtime/vm/profiler/vm.h b/src/runtime/vm/profiler/vm.h
index 4325fa8..4a09b51 100644
--- a/src/runtime/vm/profiler/vm.h
+++ b/src/runtime/vm/profiler/vm.h
@@ -44,7 +44,7 @@ class VirtualMachineDebug : public VirtualMachine {
 
   PackedFunc GetFunction(const std::string& name, const ObjectPtr<Object>& sptr_to_self) final;
 
-  void LoadExecutable(const Executable* exec) final;
+  void LoadExecutable(Executable* exec) final;
 
   ~VirtualMachineDebug() {}
 
diff --git a/src/runtime/vm/vm.cc b/src/runtime/vm/vm.cc
index e057022..acbbec0 100644
--- a/src/runtime/vm/vm.cc
+++ b/src/runtime/vm/vm.cc
@@ -219,6 +219,12 @@ PackedFunc VirtualMachine::GetFunction(const std::string& name,
   } else if (name == "set_input") {
     return PackedFunc(
         [sptr_to_self, this](TVMArgs args, TVMRetValue* rv) { SetInput(args[0], args, 1); });
+  } else if (name == "load_late_bound_consts") {
+    return PackedFunc([this](TVMArgs args, TVMRetValue* rv) {
+      CHECK_EQ(args.size(), 1);
+      std::string path = args[0];
+      exec_->LoadLateBoundConstantsFromFile(path);
+    });
   } else {
     LOG(FATAL) << "Unknown packed function: " << name;
     return PackedFunc([sptr_to_self, name](TVMArgs args, TVMRetValue* rv) {});
@@ -365,8 +371,10 @@ void VirtualMachine::InvokePacked(Index packed_index, const PackedFunc& func, In
   }
 }
 
-void VirtualMachine::LoadExecutable(const Executable* exec) {
+void VirtualMachine::LoadExecutable(Executable* exec) {
   ICHECK(exec) << "The executable is not created yet.";
+  ICHECK(exec->late_bound_constant_names.empty())
+      << "Need to load late-bound-constants before creating VM";
   exec_ = exec;
 
   runtime::Module lib = exec_->GetLib();
@@ -753,7 +761,7 @@ void VirtualMachine::RunLoop() {
   }
 }
 
-runtime::Module CreateVirtualMachine(const Executable* exec) {
+runtime::Module CreateVirtualMachine(Executable* exec) {
   auto vm = make_object<VirtualMachine>();
   vm->LoadExecutable(exec);
   return runtime::Module(vm);
@@ -761,7 +769,7 @@ runtime::Module CreateVirtualMachine(const Executable* exec) {
 
 TVM_REGISTER_GLOBAL("runtime._VirtualMachine").set_body([](TVMArgs args, TVMRetValue* rv) {
   runtime::Module mod = args[0];
-  const auto* exec = dynamic_cast<Executable*>(mod.operator->());
+  auto* exec = dynamic_cast<Executable*>(mod.operator->());
   ICHECK(exec) << "The virtual machine executable has not been defined yet.";
   *rv = CreateVirtualMachine(exec);
 });
diff --git a/tests/python/relay/test_vm.py b/tests/python/relay/test_vm.py
index ea1f4dd..1c60702 100644
--- a/tests/python/relay/test_vm.py
+++ b/tests/python/relay/test_vm.py
@@ -1112,6 +1112,40 @@ def test_multi_targets():
     tvm.testing.assert_allclose(actual_result.numpy(), expected_result)
 
 
+def test_large_constants():
+    """Large constants can be serialized outside of executable"""
+    target = tvm.target.Target("llvm")
+    dev = tvm.cpu()
+
+    # fn(x) { add(x, <large constant>) }
+    x = relay.var("x", shape=(1000, 1000))
+    const_data = np.random.rand(1000, 1000).astype("float32")
+    const = relay.const(const_data, dtype="float32")
+    func = relay.Function([x], relay.op.add(x, const))
+    mod = tvm.IRModule.from_expr(func)
+
+    # Compile to executable.
+    vm_exec = vm.compile(mod, target=target)
+
+    # Save to constants and library files
+    temp = utils.tempdir()
+    path_consts = temp.relpath("consts")
+    vm_exec.move_late_bound_consts(path_consts, byte_limit=256)
+    path_dso = temp.relpath("lib.so")
+    vm_exec.mod.export_library(path_dso)
+
+    # Load library files and constants
+    mod = runtime.load_module(path_dso)
+    mod["load_late_bound_consts"](path_consts)
+
+    # Test main
+    x_data = np.random.rand(1000, 1000).astype("float32")
+    the_vm = runtime.vm.VirtualMachine(mod, dev)
+    actual = the_vm.invoke("main", x_data)
+    expected = x_data + const_data
+    tvm.testing.assert_allclose(expected, actual.numpy())
+
+
 if __name__ == "__main__":
     import sys