You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mxnet.apache.org by je...@apache.org on 2022/05/16 15:25:48 UTC
[incubator-mxnet] branch master updated: Fix CI in master (#21026)
This is an automated email from the ASF dual-hosted git repository.
jevans pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git
The following commit(s) were added to refs/heads/master by this push:
new eb8982deb2 Fix CI in master (#21026)
eb8982deb2 is described below
commit eb8982deb2cb7bc724146b774f5a8d299feab4d4
Author: Joe Evans <jo...@gmail.com>
AuthorDate: Mon May 16 08:25:38 2022 -0700
Fix CI in master (#21026)
* Update minor versions of nvidia cuda containers to use that have the latest keys pre-installed.
* Update the TensorRT pipeline to Cuda 11.2.
* Update TensorRT pipeline to use Cuda 11.4 and update libnvinfer to 8.2.4.
* Allow setting TRT version as argument in docker-compose.yml and update to TRT 8.2.4 for cuda 11.4.
* Use python3 executable when building tensorrt (so we can update to ubuntu 20.04 base) and enable int64 build.
* Remove unneeded line.
* Support TRT 8+.
* Update onnx-tensorrt to 22.02 release.
* Add support for trt >= 8.
* Fix lint
* Remove debug line.
* Don't upgrade libcudnn, use what is in the latest container from nvidia.
* Set CUDNN_VERSION inside nvidia containers when NV_CUDNN_VERSION is set instead.
* Go back to updating libcudnn8.
---
3rdparty/onnx-tensorrt | 2 +-
ci/docker/Dockerfile.build.ubuntu | 28 +++++++++-------------
ci/docker/docker-compose.yml | 13 +++++-----
ci/docker/runtime_functions.sh | 18 +++++++++++---
ci/jenkins/Jenkins_steps.groovy | 2 +-
src/operator/subgraph/tensorrt/onnx_to_tensorrt.cc | 15 ++++++++++++
src/operator/subgraph/tensorrt/onnx_to_tensorrt.h | 2 +-
7 files changed, 51 insertions(+), 29 deletions(-)
diff --git a/3rdparty/onnx-tensorrt b/3rdparty/onnx-tensorrt
index 2eb74d933f..8b6144a885 160000
--- a/3rdparty/onnx-tensorrt
+++ b/3rdparty/onnx-tensorrt
@@ -1 +1 @@
-Subproject commit 2eb74d933f89e1590fdbfc64971a36e5f72df720
+Subproject commit 8b6144a88534e6fd15b6ef5457f1c1a1fc153939
diff --git a/ci/docker/Dockerfile.build.ubuntu b/ci/docker/Dockerfile.build.ubuntu
index cde9bf9c60..8501908992 100644
--- a/ci/docker/Dockerfile.build.ubuntu
+++ b/ci/docker/Dockerfile.build.ubuntu
@@ -159,25 +159,19 @@ SHELL ["/bin/bash", "-c"]
# We need to redeclare ARG due to
# https://docs.docker.com/engine/reference/builder/#understand-how-arg-and-from-interact
ARG BASE_IMAGE
-RUN export SHORT_CUDA_VERSION=${CUDA_VERSION%.*} && \
- export OS_RELEASE="$(cat /etc/os-release)" && \
- apt-get update && \
- apt-get install -y --allow-change-held-packages libcudnn8 libcudnn8-dev && \
- if [[ ${OS_RELEASE} == *"Bionic"* ]]; then \
- if [ ${SHORT_CUDA_VERSION} = 11.0 ]; then \
- TRT_VERSION="7.2.0-1+cuda11.0"; \
- TRT_MAJOR_VERSION=7; \
- elif [ ${SHORT_CUDA_VERSION} = 11.1 ]; then \
- TRT_VERSION="7.2.1-1+cuda11.1"; \
- TRT_MAJOR_VERSION=7; \
- else \
- echo "ERROR: Cuda ${SHORT_CUDA_VERSION} not yet supported in Dockerfile.build.ubuntu"; \
- exit 1; \
- fi; \
+RUN apt-get update && \
+ apt-get install -y --allow-change-held-packages libcudnn8 libcudnn8-dev && \
+ rm -rf /var/lib/apt/lists/*
+
+ARG TRT_VERSION
+RUN if [ ! -z "${TRT_VERSION}" ]; then \
+ apt-get update && \
+ TRT_MAJOR_VERSION=$(echo $TRT_VERSION | cut -d. -f 1) && \
apt-get install -y libnvinfer${TRT_MAJOR_VERSION}=${TRT_VERSION} \
libnvinfer-dev=${TRT_VERSION} \
libnvinfer-plugin${TRT_MAJOR_VERSION}=${TRT_VERSION} \
libnvinfer-plugin-dev=${TRT_VERSION}; \
- fi && \
- rm -rf /var/lib/apt/lists/*
+ rm -rf /var/lib/apt/lists/*; \
+ fi
+
diff --git a/ci/docker/docker-compose.yml b/ci/docker/docker-compose.yml
index 0a3f320f42..9e19bb2e09 100644
--- a/ci/docker/docker-compose.yml
+++ b/ci/docker/docker-compose.yml
@@ -68,7 +68,7 @@ services:
dockerfile: Dockerfile.build.centos7
target: base
args:
- BASE_IMAGE: nvidia/cuda:11.0-cudnn8-devel-centos7
+ BASE_IMAGE: nvidia/cuda:11.0.3-cudnn8-devel-centos7
cache_from:
- ${DOCKER_CACHE_REGISTRY}/build.centos7_gpu_cu110:latest
centos7_gpu_cu112:
@@ -98,16 +98,17 @@ services:
BASE_IMAGE: ubuntu:20.04
cache_from:
- ${DOCKER_CACHE_REGISTRY}/build.ubuntu_cpu:latest
- ubuntu_tensorrt_cu111:
- image: ${DOCKER_CACHE_REGISTRY}/build.ubuntu_tensorrt_cu111:latest
+ ubuntu_tensorrt_cu114:
+ image: ${DOCKER_CACHE_REGISTRY}/build.ubuntu_tensorrt_cu114:latest
build:
context: .
dockerfile: Dockerfile.build.ubuntu
target: gpu
args:
- BASE_IMAGE: nvidia/cuda:11.1-cudnn8-devel-ubuntu18.04
+ BASE_IMAGE: nvidia/cuda:11.4.0-cudnn8-devel-ubuntu20.04
+ TRT_VERSION: 8.2.4-1+cuda11.4
cache_from:
- - ${DOCKER_CACHE_REGISTRY}/build.ubuntu_tensorrt_cu111:latest
+ - ${DOCKER_CACHE_REGISTRY}/build.ubuntu_tensorrt_cu114:latest
ubuntu_gpu_cu111:
image: ${DOCKER_CACHE_REGISTRY}/build.ubuntu_gpu_cu111:latest
build:
@@ -115,7 +116,7 @@ services:
dockerfile: Dockerfile.build.ubuntu
target: gpu
args:
- BASE_IMAGE: nvidia/cuda:11.1-cudnn8-devel-ubuntu20.04
+ BASE_IMAGE: nvidia/cuda:11.1.1-cudnn8-devel-ubuntu20.04
cache_from:
- ${DOCKER_CACHE_REGISTRY}/build.ubuntu_gpu_cu111:latest
###################################################################################################
diff --git a/ci/docker/runtime_functions.sh b/ci/docker/runtime_functions.sh
index 185913254a..04105d488d 100755
--- a/ci/docker/runtime_functions.sh
+++ b/ci/docker/runtime_functions.sh
@@ -28,6 +28,14 @@ CI_CMAKE_CUDA10_ARCH="5.2 7.5"
# compute capabilities for CI instances supported by CUDA >= 11.1 (i.e. p3, g4, g5)
CI_CMAKE_CUDA_ARCH="5.2 7.5 8.6"
+# On newer nvidia cuda containers, these environment variables
+# are prefixed with NV_, so provide compatibility
+if [ ! -z "$NV_CUDNN_VERSION" ]; then
+ if [ -z "$CUDNN_VERSION" ]; then
+ export CUDNN_VERSION=$NV_CUDNN_VERSION
+ fi
+fi
+
clean_repo() {
set -ex
git clean -xfd
@@ -548,6 +556,9 @@ build_ubuntu_gpu_tensorrt() {
export CC=gcc-7
export CXX=g++-7
export ONNX_NAMESPACE=onnx
+ export PYBIN=$(which python3)
+ PYVERFULL=$($PYBIN -V | awk '{print $2}')
+ export PYVER=${PYVERFULL%.*}
# Build ONNX
pushd .
@@ -556,7 +567,7 @@ build_ubuntu_gpu_tensorrt() {
rm -rf build
mkdir -p build
cd build
- cmake -DCMAKE_CXX_FLAGS=-I/usr/include/python${PYVER} -DBUILD_SHARED_LIBS=ON ..
+ cmake -DPYTHON_EXECUTABLE=$PYBIN -DCMAKE_CXX_FLAGS=-I/usr/include/python${PYVER} -DBUILD_SHARED_LIBS=ON ..
make -j$(nproc)
export LIBRARY_PATH=`pwd`:`pwd`/onnx/:$LIBRARY_PATH
export CPLUS_INCLUDE_PATH=`pwd`:$CPLUS_INCLUDE_PATH
@@ -566,12 +577,12 @@ build_ubuntu_gpu_tensorrt() {
# Build ONNX-TensorRT
export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/usr/local/lib
- export CPLUS_INCLUDE_PATH=${CPLUS_INCLUDE_PATH}:/usr/local/cuda-10.2/targets/x86_64-linux/include/
+ export CPLUS_INCLUDE_PATH=${CPLUS_INCLUDE_PATH}:/usr/local/cuda/targets/x86_64-linux/include/
pushd .
cd 3rdparty/onnx-tensorrt/
mkdir -p build
cd build
- cmake -DONNX_NAMESPACE=$ONNX_NAMESPACE ..
+ cmake -DPYTHON_EXECUTABLE=$PYBIN -DONNX_NAMESPACE=$ONNX_NAMESPACE ..
make -j$(nproc)
export LIBRARY_PATH=`pwd`:$LIBRARY_PATH
popd
@@ -585,6 +596,7 @@ build_ubuntu_gpu_tensorrt() {
-DUSE_CUDNN=1 \
-DUSE_OPENCV=1 \
-DUSE_TENSORRT=1 \
+ -DUSE_INT64_TENSOR_SIZE=1 \
-DUSE_OPENMP=0 \
-DUSE_BLAS=Open \
-DUSE_ONEDNN=0 \
diff --git a/ci/jenkins/Jenkins_steps.groovy b/ci/jenkins/Jenkins_steps.groovy
index aec2b6564f..81dfe4e8f3 100644
--- a/ci/jenkins/Jenkins_steps.groovy
+++ b/ci/jenkins/Jenkins_steps.groovy
@@ -263,7 +263,7 @@ def compile_unix_tensorrt_gpu(lib_name) {
ws('workspace/build-tensorrt') {
timeout(time: max_time, unit: 'MINUTES') {
utils.init_git()
- utils.docker_run('ubuntu_tensorrt_cu111', 'build_ubuntu_gpu_tensorrt', false)
+ utils.docker_run('ubuntu_tensorrt_cu114', 'build_ubuntu_gpu_tensorrt', false)
utils.pack_lib(lib_name, mx_tensorrt_lib)
}
}
diff --git a/src/operator/subgraph/tensorrt/onnx_to_tensorrt.cc b/src/operator/subgraph/tensorrt/onnx_to_tensorrt.cc
index 25efcb67a8..24e945dae1 100644
--- a/src/operator/subgraph/tensorrt/onnx_to_tensorrt.cc
+++ b/src/operator/subgraph/tensorrt/onnx_to_tensorrt.cc
@@ -103,17 +103,32 @@ onnxToTrtCtx(const std::string& onnx_model,
}
throw dmlc::Error("Cannot parse ONNX into TensorRT Engine");
}
+#if NV_TENSORRT_MAJOR >= 8
+ auto trt_config = InferObject(trt_builder->createBuilderConfig());
+#endif
if (dmlc::GetEnv("MXNET_TENSORRT_USE_FP16", true)) {
if (trt_builder->platformHasFastFp16()) {
+#if NV_TENSORRT_MAJOR >= 8
+ trt_config->setFlag(nvinfer1::BuilderFlag::kFP16);
+#else
trt_builder->setFp16Mode(true);
+#endif
} else {
LOG(WARNING) << "TensorRT can't use fp16 on this platform";
}
}
trt_builder->setMaxBatchSize(max_batch_size);
+#if NV_TENSORRT_MAJOR >= 8
+ trt_config->setMaxWorkspaceSize(max_workspace_size);
+ if (debug_builder) {
+ trt_config->setFlag(nvinfer1::BuilderFlag::kDEBUG);
+ }
+ auto trt_engine = InferObject(trt_builder->buildEngineWithConfig(*trt_network, *trt_config));
+#else
trt_builder->setMaxWorkspaceSize(max_workspace_size);
trt_builder->setDebugSync(debug_builder);
auto trt_engine = InferObject(trt_builder->buildCudaEngine(*trt_network));
+#endif
return std::make_tuple(std::move(trt_engine), std::move(trt_parser), std::move(trt_logger));
}
diff --git a/src/operator/subgraph/tensorrt/onnx_to_tensorrt.h b/src/operator/subgraph/tensorrt/onnx_to_tensorrt.h
index 834b20a441..5b93faafdd 100644
--- a/src/operator/subgraph/tensorrt/onnx_to_tensorrt.h
+++ b/src/operator/subgraph/tensorrt/onnx_to_tensorrt.h
@@ -68,7 +68,7 @@ class TRT_Logger : public nvinfer1::ILogger {
public:
TRT_Logger(Severity verbosity = Severity::kWARNING, std::ostream& ostream = std::cout) // NOLINT
: _verbosity(verbosity), _ostream(&ostream) {}
- void log(Severity severity, const char* msg) override {
+ void log(Severity severity, const char* msg) noexcept override {
if (severity <= _verbosity) {
time_t rawtime = std::time(0);
char buf[256];