You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mxnet.apache.org by je...@apache.org on 2022/05/16 15:25:48 UTC

[incubator-mxnet] branch master updated: Fix CI in master (#21026)

This is an automated email from the ASF dual-hosted git repository.

jevans pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git


The following commit(s) were added to refs/heads/master by this push:
     new eb8982deb2 Fix CI in master (#21026)
eb8982deb2 is described below

commit eb8982deb2cb7bc724146b774f5a8d299feab4d4
Author: Joe Evans <jo...@gmail.com>
AuthorDate: Mon May 16 08:25:38 2022 -0700

    Fix CI in master (#21026)
    
    * Update minor versions of nvidia cuda containers to use that have the latest keys pre-installed.
    
    * Update the TensorRT pipeline to Cuda 11.2.
    
    * Update TensorRT pipeline to use Cuda 11.4 and update libnvinfer to 8.2.4.
    
    * Allow setting TRT version as argument in docker-compose.yml and update to TRT 8.2.4 for cuda 11.4.
    
    * Use python3 executable when building tensorrt (so we can update to ubuntu 20.04 base) and enable int64 build.
    
    * Remove unneeded line.
    
    * Support TRT 8+.
    
    * Update onnx-tensorrt to 22.02 release.
    
    * Add support for trt >= 8.
    
    * Fix lint
    
    * Remove debug line.
    
    * Don't upgrade libcudnn, use what is in the latest container from nvidia.
    
    * Set CUDNN_VERSION inside nvidia containers when NV_CUDNN_VERSION is set instead.
    
    * Go back to updating libcudnn8.
---
 3rdparty/onnx-tensorrt                             |  2 +-
 ci/docker/Dockerfile.build.ubuntu                  | 28 +++++++++-------------
 ci/docker/docker-compose.yml                       | 13 +++++-----
 ci/docker/runtime_functions.sh                     | 18 +++++++++++---
 ci/jenkins/Jenkins_steps.groovy                    |  2 +-
 src/operator/subgraph/tensorrt/onnx_to_tensorrt.cc | 15 ++++++++++++
 src/operator/subgraph/tensorrt/onnx_to_tensorrt.h  |  2 +-
 7 files changed, 51 insertions(+), 29 deletions(-)

diff --git a/3rdparty/onnx-tensorrt b/3rdparty/onnx-tensorrt
index 2eb74d933f..8b6144a885 160000
--- a/3rdparty/onnx-tensorrt
+++ b/3rdparty/onnx-tensorrt
@@ -1 +1 @@
-Subproject commit 2eb74d933f89e1590fdbfc64971a36e5f72df720
+Subproject commit 8b6144a88534e6fd15b6ef5457f1c1a1fc153939
diff --git a/ci/docker/Dockerfile.build.ubuntu b/ci/docker/Dockerfile.build.ubuntu
index cde9bf9c60..8501908992 100644
--- a/ci/docker/Dockerfile.build.ubuntu
+++ b/ci/docker/Dockerfile.build.ubuntu
@@ -159,25 +159,19 @@ SHELL ["/bin/bash", "-c"]
 # We need to redeclare ARG due to
 # https://docs.docker.com/engine/reference/builder/#understand-how-arg-and-from-interact
 ARG BASE_IMAGE
-RUN export SHORT_CUDA_VERSION=${CUDA_VERSION%.*} && \
-    export OS_RELEASE="$(cat /etc/os-release)" && \
-    apt-get update && \
-    apt-get install -y --allow-change-held-packages libcudnn8 libcudnn8-dev && \
-    if [[ ${OS_RELEASE} == *"Bionic"* ]]; then \
-        if [ ${SHORT_CUDA_VERSION} = 11.0 ]; then \
-            TRT_VERSION="7.2.0-1+cuda11.0"; \
-            TRT_MAJOR_VERSION=7; \
-        elif [ ${SHORT_CUDA_VERSION} = 11.1 ]; then \
-            TRT_VERSION="7.2.1-1+cuda11.1"; \
-            TRT_MAJOR_VERSION=7; \
-        else \
-            echo "ERROR: Cuda ${SHORT_CUDA_VERSION} not yet supported in Dockerfile.build.ubuntu"; \
-            exit 1; \
-        fi; \
+RUN apt-get update && \
+        apt-get install -y --allow-change-held-packages libcudnn8 libcudnn8-dev && \
+        rm -rf /var/lib/apt/lists/*
+
+ARG TRT_VERSION
+RUN if [ ! -z "${TRT_VERSION}" ]; then \
+        apt-get update && \
+        TRT_MAJOR_VERSION=$(echo $TRT_VERSION | cut -d. -f 1) && \
         apt-get install -y libnvinfer${TRT_MAJOR_VERSION}=${TRT_VERSION} \
                            libnvinfer-dev=${TRT_VERSION} \
                            libnvinfer-plugin${TRT_MAJOR_VERSION}=${TRT_VERSION} \
                            libnvinfer-plugin-dev=${TRT_VERSION}; \
-    fi && \
-    rm -rf /var/lib/apt/lists/*
+        rm -rf /var/lib/apt/lists/*; \
+    fi
+
 
diff --git a/ci/docker/docker-compose.yml b/ci/docker/docker-compose.yml
index 0a3f320f42..9e19bb2e09 100644
--- a/ci/docker/docker-compose.yml
+++ b/ci/docker/docker-compose.yml
@@ -68,7 +68,7 @@ services:
       dockerfile: Dockerfile.build.centos7
       target: base
       args:
-        BASE_IMAGE: nvidia/cuda:11.0-cudnn8-devel-centos7
+        BASE_IMAGE: nvidia/cuda:11.0.3-cudnn8-devel-centos7
       cache_from:
         - ${DOCKER_CACHE_REGISTRY}/build.centos7_gpu_cu110:latest
   centos7_gpu_cu112:
@@ -98,16 +98,17 @@ services:
         BASE_IMAGE: ubuntu:20.04
       cache_from:
         - ${DOCKER_CACHE_REGISTRY}/build.ubuntu_cpu:latest
-  ubuntu_tensorrt_cu111:
-    image: ${DOCKER_CACHE_REGISTRY}/build.ubuntu_tensorrt_cu111:latest
+  ubuntu_tensorrt_cu114:
+    image: ${DOCKER_CACHE_REGISTRY}/build.ubuntu_tensorrt_cu114:latest
     build:
       context: .
       dockerfile: Dockerfile.build.ubuntu
       target: gpu
       args:
-        BASE_IMAGE: nvidia/cuda:11.1-cudnn8-devel-ubuntu18.04
+        BASE_IMAGE: nvidia/cuda:11.4.0-cudnn8-devel-ubuntu20.04
+        TRT_VERSION: 8.2.4-1+cuda11.4
       cache_from:
-        - ${DOCKER_CACHE_REGISTRY}/build.ubuntu_tensorrt_cu111:latest
+        - ${DOCKER_CACHE_REGISTRY}/build.ubuntu_tensorrt_cu114:latest
   ubuntu_gpu_cu111:
     image: ${DOCKER_CACHE_REGISTRY}/build.ubuntu_gpu_cu111:latest
     build:
@@ -115,7 +116,7 @@ services:
       dockerfile: Dockerfile.build.ubuntu
       target: gpu
       args:
-        BASE_IMAGE: nvidia/cuda:11.1-cudnn8-devel-ubuntu20.04
+        BASE_IMAGE: nvidia/cuda:11.1.1-cudnn8-devel-ubuntu20.04
       cache_from:
         - ${DOCKER_CACHE_REGISTRY}/build.ubuntu_gpu_cu111:latest
   ###################################################################################################
diff --git a/ci/docker/runtime_functions.sh b/ci/docker/runtime_functions.sh
index 185913254a..04105d488d 100755
--- a/ci/docker/runtime_functions.sh
+++ b/ci/docker/runtime_functions.sh
@@ -28,6 +28,14 @@ CI_CMAKE_CUDA10_ARCH="5.2 7.5"
 # compute capabilities for CI instances supported by CUDA >= 11.1 (i.e. p3, g4, g5)
 CI_CMAKE_CUDA_ARCH="5.2 7.5 8.6"
 
+# On newer nvidia cuda containers, these environment variables
+#  are prefixed with NV_, so provide compatibility
+if [ ! -z "$NV_CUDNN_VERSION" ]; then
+    if [ -z "$CUDNN_VERSION" ]; then
+        export CUDNN_VERSION=$NV_CUDNN_VERSION
+    fi
+fi
+
 clean_repo() {
     set -ex
     git clean -xfd
@@ -548,6 +556,9 @@ build_ubuntu_gpu_tensorrt() {
     export CC=gcc-7
     export CXX=g++-7
     export ONNX_NAMESPACE=onnx
+    export PYBIN=$(which python3)
+    PYVERFULL=$($PYBIN -V | awk '{print $2}')
+    export PYVER=${PYVERFULL%.*}
 
     # Build ONNX
     pushd .
@@ -556,7 +567,7 @@ build_ubuntu_gpu_tensorrt() {
     rm -rf build
     mkdir -p build
     cd build
-    cmake -DCMAKE_CXX_FLAGS=-I/usr/include/python${PYVER} -DBUILD_SHARED_LIBS=ON ..
+    cmake -DPYTHON_EXECUTABLE=$PYBIN -DCMAKE_CXX_FLAGS=-I/usr/include/python${PYVER} -DBUILD_SHARED_LIBS=ON ..
     make -j$(nproc)
     export LIBRARY_PATH=`pwd`:`pwd`/onnx/:$LIBRARY_PATH
     export CPLUS_INCLUDE_PATH=`pwd`:$CPLUS_INCLUDE_PATH
@@ -566,12 +577,12 @@ build_ubuntu_gpu_tensorrt() {
 
     # Build ONNX-TensorRT
     export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/usr/local/lib
-    export CPLUS_INCLUDE_PATH=${CPLUS_INCLUDE_PATH}:/usr/local/cuda-10.2/targets/x86_64-linux/include/
+    export CPLUS_INCLUDE_PATH=${CPLUS_INCLUDE_PATH}:/usr/local/cuda/targets/x86_64-linux/include/
     pushd .
     cd 3rdparty/onnx-tensorrt/
     mkdir -p build
     cd build
-    cmake -DONNX_NAMESPACE=$ONNX_NAMESPACE ..
+    cmake -DPYTHON_EXECUTABLE=$PYBIN -DONNX_NAMESPACE=$ONNX_NAMESPACE ..
     make -j$(nproc)
     export LIBRARY_PATH=`pwd`:$LIBRARY_PATH
     popd
@@ -585,6 +596,7 @@ build_ubuntu_gpu_tensorrt() {
           -DUSE_CUDNN=1                           \
           -DUSE_OPENCV=1                          \
           -DUSE_TENSORRT=1                        \
+          -DUSE_INT64_TENSOR_SIZE=1               \
           -DUSE_OPENMP=0                          \
           -DUSE_BLAS=Open                         \
           -DUSE_ONEDNN=0                          \
diff --git a/ci/jenkins/Jenkins_steps.groovy b/ci/jenkins/Jenkins_steps.groovy
index aec2b6564f..81dfe4e8f3 100644
--- a/ci/jenkins/Jenkins_steps.groovy
+++ b/ci/jenkins/Jenkins_steps.groovy
@@ -263,7 +263,7 @@ def compile_unix_tensorrt_gpu(lib_name) {
         ws('workspace/build-tensorrt') {
           timeout(time: max_time, unit: 'MINUTES') {
             utils.init_git()
-            utils.docker_run('ubuntu_tensorrt_cu111', 'build_ubuntu_gpu_tensorrt', false)
+            utils.docker_run('ubuntu_tensorrt_cu114', 'build_ubuntu_gpu_tensorrt', false)
             utils.pack_lib(lib_name, mx_tensorrt_lib)
           }
         }
diff --git a/src/operator/subgraph/tensorrt/onnx_to_tensorrt.cc b/src/operator/subgraph/tensorrt/onnx_to_tensorrt.cc
index 25efcb67a8..24e945dae1 100644
--- a/src/operator/subgraph/tensorrt/onnx_to_tensorrt.cc
+++ b/src/operator/subgraph/tensorrt/onnx_to_tensorrt.cc
@@ -103,17 +103,32 @@ onnxToTrtCtx(const std::string& onnx_model,
     }
     throw dmlc::Error("Cannot parse ONNX into TensorRT Engine");
   }
+#if NV_TENSORRT_MAJOR >= 8
+  auto trt_config = InferObject(trt_builder->createBuilderConfig());
+#endif
   if (dmlc::GetEnv("MXNET_TENSORRT_USE_FP16", true)) {
     if (trt_builder->platformHasFastFp16()) {
+#if NV_TENSORRT_MAJOR >= 8
+      trt_config->setFlag(nvinfer1::BuilderFlag::kFP16);
+#else
       trt_builder->setFp16Mode(true);
+#endif
     } else {
       LOG(WARNING) << "TensorRT can't use fp16 on this platform";
     }
   }
   trt_builder->setMaxBatchSize(max_batch_size);
+#if NV_TENSORRT_MAJOR >= 8
+  trt_config->setMaxWorkspaceSize(max_workspace_size);
+  if (debug_builder) {
+    trt_config->setFlag(nvinfer1::BuilderFlag::kDEBUG);
+  }
+  auto trt_engine = InferObject(trt_builder->buildEngineWithConfig(*trt_network, *trt_config));
+#else
   trt_builder->setMaxWorkspaceSize(max_workspace_size);
   trt_builder->setDebugSync(debug_builder);
   auto trt_engine = InferObject(trt_builder->buildCudaEngine(*trt_network));
+#endif
   return std::make_tuple(std::move(trt_engine), std::move(trt_parser), std::move(trt_logger));
 }
 
diff --git a/src/operator/subgraph/tensorrt/onnx_to_tensorrt.h b/src/operator/subgraph/tensorrt/onnx_to_tensorrt.h
index 834b20a441..5b93faafdd 100644
--- a/src/operator/subgraph/tensorrt/onnx_to_tensorrt.h
+++ b/src/operator/subgraph/tensorrt/onnx_to_tensorrt.h
@@ -68,7 +68,7 @@ class TRT_Logger : public nvinfer1::ILogger {
  public:
   TRT_Logger(Severity verbosity = Severity::kWARNING, std::ostream& ostream = std::cout)  // NOLINT
       : _verbosity(verbosity), _ostream(&ostream) {}
-  void log(Severity severity, const char* msg) override {
+  void log(Severity severity, const char* msg) noexcept override {
     if (severity <= _verbosity) {
       time_t rawtime = std::time(0);
       char buf[256];