You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tvm.apache.org by GitBox <gi...@apache.org> on 2022/09/08 19:06:48 UTC
[GitHub] [tvm] tkonolige commented on a diff in pull request #11358: [Virtual Machine] Implementation of 'set_output_zero_copy'

tkonolige commented on code in PR #11358:
URL: https://github.com/apache/tvm/pull/11358#discussion_r966291114


##########
include/tvm/runtime/vm/vm.h:
##########
@@ -339,6 +371,51 @@ class TVM_DLL VirtualMachine : public runtime::ModuleNode {
   void SetInputTensorWithIndex(std::vector<ObjectRef>& tensors,  // NOLINT(*)
                                const TVMArgValue& tensor, int index, Device dev);
 
+  /*!
+   * \brief Convert tensor from TVMArgValue to ObjectRef.
+   * DLTensor and NDArray types are supported.
+   * \param tensor given arg value containing tensor.
+   * \return tensor in ObjectRef format
+   */
+  ObjectRef TensorFromTVMArgValueToObjectRef(const TVMArgValue& tensor) const;
+
+  /*!
+   * \brief Get index of outputs in register_file from func code
+   * \return result register index
+   */
+  Index GetResultRegisterIndex() const;
+
+  /*!
+   * \brief Calculate the index of operation which destination is result
+   * \param res_index is the index of op returning result
+   */
+  void CalculatePreResultOpIndex(Index res_index);
+
+  /*!
+   * \brief Collect indices from register_file for output tensors.
+   * It helps to replace output tensors allocated in RunLoop by
+   * tensors pre-allocated outside. Scenario is when `set_output` is used
+   * \param func_name The function's name.
+   */
+  void CollectOutputTensorRegIndices(const std::string& func_name);

Review Comment:
   I'd prefer this to return the tensor indices instead of setting some internal state. There's now a lot of stateful functions and it is unclear how they all interact.



##########
src/runtime/vm/vm.cc:
##########
@@ -810,21 +906,90 @@ void VirtualMachine::RunLoop() {
         WriteRegister(instr.dst, dst_data);
         OpStopHook();
         pc_++;
-        goto main_loop;
+        break;
       }
       case Opcode::KillRegister: {
         OpStartHook(instr);
         WriteRegister(instr.dst, ObjectRef());
         OpStopHook();
         pc_++;
-        goto main_loop;
+        break;
       }
       default:
         LOG(FATAL) << "Unknown instruction opcode: " << int(instr.op);
     }
   }
 }
 
+void VirtualMachine::WriteAllocatedTensor(const Instruction& instr) {
+  auto shape = std::vector<int64_t>(instr.alloc_tensor.ndim);
+
+  for (uint32_t i = 0; i < instr.alloc_tensor.ndim; ++i) {
+    shape[i] = instr.alloc_tensor.shape[i];
+  }
+
+  auto storage_obj = ReadRegister(instr.alloc_tensor.storage);
+  auto offset = LoadScalarInt(instr.alloc_tensor.offset);
+  auto storage = Downcast<Storage>(storage_obj);
+  auto obj = storage->AllocNDArray(offset, shape, instr.alloc_tensor.dtype);
+  VLOG(2) << "allocated "
+          << RuntimeObject2String(obj, GetDevice(exec_->host_device_index),
+                                  /*show_contents=*/false);
+
+  WriteRegister(instr.dst, obj);
+}
+
+void VirtualMachine::WriteAllocatedTensorFromOutside(const Instruction& instr) {
+  // External tensor(s) has been already written to the register (instr.dst)
+  auto ex_arr = Downcast<NDArray>(ReadRegister(instr.dst));
+  auto ex_shape = ex_arr.Shape();
+  auto ex_size = ex_shape.size();
+  auto ex_dtype = ex_arr->dtype;
+
+  auto in_size = instr.alloc_tensor.ndim;
+  auto in_dtype = instr.alloc_tensor.dtype;
+  ICHECK_EQ(TypeEqual(in_dtype, ex_dtype), true)
+      << "Data types mismatching for internal and external output tensors";
+
+  bool size_check = false;
+  if (ex_size != in_size) {
+    size_check = true;
+  } else {
+    for (size_t i = 0; i < in_size; ++i) {
+      if (ex_shape[i] != instr.alloc_tensor.shape[i]) {
+        size_check = true;
+        break;
+      }
+    }
+  }
+
+  if (size_check) {
+    // Match element number
+    size_t in_el_num = 1, ex_el_num = 1;
+    for (size_t i = 0; i < ex_size; ++i) {
+      ex_el_num *= ex_shape[i];
+    }
+    for (size_t i = 0; i < in_size; ++i) {
+      in_el_num *= instr.alloc_tensor.shape[i];
+    }
+    ICHECK_EQ(in_el_num, ex_el_num)
+        << "Element number mismatching of internal and external output tensors";
+    if (code_[preresult_op_index_].op == Opcode::ReshapeTensor) {
+      int64_t* dims = instr.alloc_tensor.shape;
+      std::vector<int64_t> ref_shape(dims, dims + int64_t(in_size));
+      auto reshaped_tensor = ex_arr.CreateView(ref_shape, ex_dtype);
+      WriteRegister(instr.dst, reshaped_tensor);
+    } else {
+      LOG_ERROR << "Internal and external output tensor shapes are mismatched";

Review Comment:
   ```suggestion
         LOG(FATAL) << "Internal and external output tensor shapes are mismatched";
   ```



##########
src/runtime/vm/vm.cc:
##########
@@ -518,13 +584,54 @@ int64_t VirtualMachine::LoadScalarInt(Index r) const {
   return result;
 }
 
-void VirtualMachine::RunLoop() {
+Index VirtualMachine::GetResultRegisterIndex() const {
+  Index op_index = 0;
+  while (code_[op_index].op != Opcode::Ret) {
+    ++op_index;
+  }
+
+  return code_[op_index].result;
+}
+
+void VirtualMachine::CalculatePreResultOpIndex(Index res_index) {
+  if (preresult_op_index_ == -1) {
+    preresult_op_index_ = 0;
+    while (code_[preresult_op_index_].dst != res_index) {
+      ++preresult_op_index_;
+    }
+  }
+}
+
+void VirtualMachine::CollectOutputTensorRegIndices(const std::string& func_name) {
+  if (!output_tensor_reg_indices_[func_name].empty()) {
+    return;
+  }
+
+  auto& reg_indices = output_tensor_reg_indices_[func_name];
+  Index res_index = GetResultRegisterIndex();
+  CalculatePreResultOpIndex(res_index);
+  auto& preres_instr = code_[preresult_op_index_];
+  auto op_code = preres_instr.op;
+  if (op_code == Opcode::AllocTensor) {
+    reg_indices.emplace_back(res_index);
+  } else if (op_code == Opcode::AllocADT) {
+    for (Index i = 0; i < preres_instr.num_fields; ++i) {
+      reg_indices.push_back(preres_instr.datatype_fields[i]);
+    }
+  } else if (op_code == Opcode::ReshapeTensor) {
+    reg_indices.push_back(preres_instr.reshape_tensor.tensor);
+  } else {
+    LOG(WARNING) << "Operation " << size_t(op_code) << " is not supported for set_outputs method";

Review Comment:
   This should be a fatal.



##########
src/runtime/vm/vm.cc:
##########
@@ -518,13 +584,54 @@ int64_t VirtualMachine::LoadScalarInt(Index r) const {
   return result;
 }
 
-void VirtualMachine::RunLoop() {
+Index VirtualMachine::GetResultRegisterIndex() const {
+  Index op_index = 0;
+  while (code_[op_index].op != Opcode::Ret) {
+    ++op_index;
+  }
+
+  return code_[op_index].result;
+}
+
+void VirtualMachine::CalculatePreResultOpIndex(Index res_index) {
+  if (preresult_op_index_ == -1) {
+    preresult_op_index_ = 0;
+    while (code_[preresult_op_index_].dst != res_index) {
+      ++preresult_op_index_;
+    }
+  }
+}
+
+void VirtualMachine::CollectOutputTensorRegIndices(const std::string& func_name) {
+  if (!output_tensor_reg_indices_[func_name].empty()) {
+    return;
+  }
+
+  auto& reg_indices = output_tensor_reg_indices_[func_name];
+  Index res_index = GetResultRegisterIndex();
+  CalculatePreResultOpIndex(res_index);
+  auto& preres_instr = code_[preresult_op_index_];
+  auto op_code = preres_instr.op;
+  if (op_code == Opcode::AllocTensor) {
+    reg_indices.emplace_back(res_index);
+  } else if (op_code == Opcode::AllocADT) {
+    for (Index i = 0; i < preres_instr.num_fields; ++i) {
+      reg_indices.push_back(preres_instr.datatype_fields[i]);
+    }
+  } else if (op_code == Opcode::ReshapeTensor) {
+    reg_indices.push_back(preres_instr.reshape_tensor.tensor);
+  } else {
+    LOG(WARNING) << "Operation " << size_t(op_code) << " is not supported for set_outputs method";
+  }
+}
+
+void VirtualMachine::RunLoop(const std::vector<Index>& output_tensor_reg_indices) {
   ICHECK(this->exec_);
   ICHECK(this->code_);
   pc_ = 0;
   Index frame_start = frames_.size();
-  while (true) {
-  main_loop:
+  bool iterate = true;
+  while (iterate) {

Review Comment:
   Remove the change from goto to loop. It doesn't seem necessary for this PR.



##########
python/tvm/runtime/vm.py:
##########
@@ -550,6 +551,24 @@ def invoke_stateful(self, func_name, *args, **kwargs):
             self.set_input(func_name, *args, **kwargs)
         self._invoke_stateful(func_name)
 
+    def invoke_with_outputs(self, func_name, *args):

Review Comment:
   Can you make this function take input arguments too instead of requiring `set_input`.



##########
src/runtime/vm/vm.cc:
##########
@@ -143,8 +143,15 @@ PackedFunc VirtualMachine::GetFunction(const std::string& name,
       } else {
         auto it = inputs_.find(func_name);
         ICHECK(it != inputs_.end()) << "Input has not been set for function " << func_name;
-        const std::vector<ObjectRef>& func_args = it->second;
-        *rv = Invoke(func, func_args);
+        const std::vector<ObjectRef>& input_args = it->second;
+        if (set_outputs_enabled_.count(func_name) && set_outputs_enabled_[func_name]) {
+          ICHECK(outputs_.count(func_name))
+              << "Outputs have not been set for function " << func_name;
+          *rv = Invoke(func, input_args, outputs_[func_name]);
+          set_outputs_enabled_[func_name] = false;

Review Comment:
   I think you need to clear `outputs_` here to you don't hold an unnecessary reference.



##########
include/tvm/runtime/vm/vm.h:
##########
@@ -281,6 +291,28 @@ class TVM_DLL VirtualMachine : public runtime::ModuleNode {
    */
   void SetOneInput(std::string name, const TVMArgValue& tag, const TVMArgValue& tensor);
 
+  /*!
+   * \brief Set pre-allocated outputs to a function.
+   * \param name The function name
+   * \param args outputs to the function.
+   */
+  void SetOutputs(std::string name, TVMArgs args);

Review Comment:
   Clarify that this only applies to the next single `Invoke` call.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@tvm.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org