You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tvm.apache.org by GitBox <gi...@apache.org> on 2020/11/07 01:37:39 UTC

[GitHub] [incubator-tvm] comaniac commented on a change in pull request #6872: [BYOC][TRT] Allocate GPU data buffers and transfer data when needed

comaniac commented on a change in pull request #6872:
URL: https://github.com/apache/incubator-tvm/pull/6872#discussion_r519076124



##########
File path: src/runtime/contrib/tensorrt/tensorrt_runtime.cc
##########
@@ -141,26 +150,38 @@ class TensorRTRuntime : public JSONRuntimeBase {
 #else
     ICHECK(context->execute(batch_size_, bindings.data())) << "Running TensorRT failed.";
 #endif
+
+    // Copy outputs from GPU buffers if needed.
+    for (size_t i = 0; i < outputs_.size(); ++i) {
+      uint32_t eid = EntryID(outputs_[i]);
+      const std::string& name = engine_and_context.outputs[i];
+      int binding_index = engine->getBindingIndex(name.c_str());
+      ICHECK_NE(binding_index, -1);
+      if (data_entry_[eid]->ctx.device_type != kDLGPU) {
+        device_buffers[binding_index].CopyTo(const_cast<DLTensor*>(data_entry_[eid]));
+      }
+    }
   }
 
  private:
   /*!
    * \brief Build TensorRT engine from JSON representation.
    */
   void BuildEngine() {
+    if (trt_engine_cache_.count(symbol_name_)) return;

Review comment:
       Improve the docstring to explicitly mention the caching functionality.

##########
File path: src/runtime/contrib/tensorrt/tensorrt_builder.cc
##########
@@ -217,6 +231,20 @@ void TensorRTBuilder::CleanUp() {
   }
 }
 
+void TensorRTBuilder::AllocateDeviceBufferIfNeeded(nvinfer1::ICudaEngine* engine,

Review comment:
       We can just name it `AllocateDeviceBuffer` and add comments to mention we will bypass if the data entry is already on the GPU.

##########
File path: src/runtime/contrib/tensorrt/tensorrt_runtime.cc
##########
@@ -106,9 +104,11 @@ class TensorRTRuntime : public JSONRuntimeBase {
 #ifdef TVM_GRAPH_RUNTIME_TENSORRT
   /*! \brief Run inference using built engine. */
   void Run() override {
+    BuildEngine();

Review comment:
       Is the reason of moving `BuildEngine` from `Init` to `Run` because you need subgraph specific information (e.g., I/O data entry IDs) to allocate device buffers?




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org