You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mxnet.apache.org by ma...@apache.org on 2020/11/14 06:48:07 UTC
[incubator-mxnet] branch master updated: Cuda 11 build fixes
(#19530)
This is an automated email from the ASF dual-hosted git repository.
manuseth pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git
The following commit(s) were added to refs/heads/master by this push:
new 3bf556d Cuda 11 build fixes (#19530)
3bf556d is described below
commit 3bf556dedcbe37af9e24b02bc18ebcb4e5c0283c
Author: Joe Evans <gi...@250hacks.net>
AuthorDate: Fri Nov 13 22:45:56 2020 -0800
Cuda 11 build fixes (#19530)
* Don't use namespace for pow() function, since it is built into cuda math library, and cast the second argument so it will find an acceptable form.
* Properly case exponent.
* Remove thrust library override and use default from cuda 11.0.
* Fix lint.
* Fix lint.
Co-authored-by: Joe Evans <jo...@amazon.com>
---
ci/build_windows.py | 17 +----------------
src/operator/contrib/multi_lamb.cu | 8 ++++----
src/operator/contrib/multi_lans.cu | 8 ++++----
3 files changed, 9 insertions(+), 24 deletions(-)
diff --git a/ci/build_windows.py b/ci/build_windows.py
index f6626d6..f184922 100755
--- a/ci/build_windows.py
+++ b/ci/build_windows.py
@@ -151,20 +151,6 @@ def windows_build(args):
mxnet_root = get_mxnet_root()
logging.info("Found MXNet root: {}".format(mxnet_root))
- if 'GPU' in args.flavour:
- # Get Thrust version to be shipped in Cuda 11, due to flakyness of
- # older Thrust versions with MSVC 19 compiler
- with remember_cwd():
- tmpdirname = tempfile.mkdtemp()
- os.chdir(tmpdirname)
- r = requests.get('https://github.com/thrust/thrust/archive/1.9.8.zip', allow_redirects=True)
- with open('thrust.zip', 'wb') as f:
- f.write(r.content)
- with zipfile.ZipFile('thrust.zip', 'r') as zip_ref:
- zip_ref.extractall('.')
- thrust_path = os.path.join(tmpdirname, "thrust-1.9.8")
-
-
# cuda thrust / CUB + VS 2019 is flaky: try multiple times if fail
MAXIMUM_TRY = 5
build_try = 0
@@ -178,8 +164,7 @@ def windows_build(args):
os.chdir(path)
env = os.environ.copy()
if 'GPU' in args.flavour:
- env["CXXFLAGS"] = '/FS /MD /O2 /Ob2 /I {}'.format(thrust_path)
- env["CUDAFLAGS"] = '-I {}'.format(thrust_path)
+ env["CXXFLAGS"] = '/FS /MD /O2 /Ob2'
cmd = "\"{}\" && cmake -GNinja {} {}".format(args.vcvars,
CMAKE_FLAGS[args.flavour],
mxnet_root)
diff --git a/src/operator/contrib/multi_lamb.cu b/src/operator/contrib/multi_lamb.cu
index 6415bfb..0a55b89 100644
--- a/src/operator/contrib/multi_lamb.cu
+++ b/src/operator/contrib/multi_lamb.cu
@@ -50,10 +50,10 @@ __global__ void KernelStep1(const MultiLAMBKernelParam<DType, MPDType> kernel_pa
MPDType biascorrection1, biascorrection2;
if (bias_correction) {
- biascorrection1 = 1.0 -
- static_cast<MPDType>(std::pow(beta1, kernel_params.step_count[tensor_id]));
- biascorrection2 = 1.0 -
- static_cast<MPDType>(std::pow(beta2, kernel_params.step_count[tensor_id]));
+ biascorrection1 = 1.0 - static_cast<MPDType>(
+ pow(beta1, static_cast<float>(kernel_params.step_count[tensor_id])));
+ biascorrection2 = 1.0 - static_cast<MPDType>(
+ pow(beta2, static_cast<float>(kernel_params.step_count[tensor_id])));
} else {
biascorrection1 = static_cast<MPDType>(1.0);
biascorrection2 = static_cast<MPDType>(1.0);
diff --git a/src/operator/contrib/multi_lans.cu b/src/operator/contrib/multi_lans.cu
index 64de721..2a7acb6 100644
--- a/src/operator/contrib/multi_lans.cu
+++ b/src/operator/contrib/multi_lans.cu
@@ -53,10 +53,10 @@ __global__ void KernelStep1(const MultiLANSKernelParam<DType, MPDType> kernel_pa
MPDType biascorrection1, biascorrection2;
- biascorrection1 = 1.0 -
- static_cast<MPDType>(std::pow(beta1, kernel_params.step_count[tensor_id]));
- biascorrection2 = 1.0 -
- static_cast<MPDType>(std::pow(beta2, kernel_params.step_count[tensor_id]));
+ biascorrection1 = 1.0 - static_cast<MPDType>(
+ pow(beta1, static_cast<float>(kernel_params.step_count[tensor_id])));
+ biascorrection2 = 1.0 - static_cast<MPDType>(
+ pow(beta2, static_cast<float>(kernel_params.step_count[tensor_id])));
MPDType r_weight[ILP_LAMB];
MPDType r_grad[ILP_LAMB];