You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mxnet.apache.org by jx...@apache.org on 2017/11/10 23:37:24 UTC
[incubator-mxnet] branch master updated: support for lapack
functions with mkl (#8577)
This is an automated email from the ASF dual-hosted git repository.
jxie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git
The following commit(s) were added to refs/heads/master by this push:
new 00ddcf4 support for lapack functions with mkl (#8577)
00ddcf4 is described below
commit 00ddcf44299a1f88c8d6448f30cf002c8942a2fa
Author: moin <as...@yahoo.de>
AuthorDate: Sat Nov 11 00:37:17 2017 +0100
support for lapack functions with mkl (#8577)
---
make/config.mk | 6 ++++
make/osx.mk | 6 ++++
src/operator/c_lapack_api.h | 81 ++++++++++++++++++++++++++++++++++++++++++---
src/operator/linalg_impl.h | 22 ++++++------
4 files changed, 100 insertions(+), 15 deletions(-)
diff --git a/make/config.mk b/make/config.mk
index d47d4d6..a4774f0 100644
--- a/make/config.mk
+++ b/make/config.mk
@@ -105,6 +105,12 @@ USE_LAPACK = 1
# path to lapack library in case of a non-standard installation
USE_LAPACK_PATH =
+# by default, disable lapack when using MKL
+# switch on when there is a full installation of MKL available (not just MKL2017/MKL_ML)
+ifeq ($(USE_BLAS), mkl)
+USE_LAPACK = 0
+endif
+
# add path to intel library, you may need it for MKL, if you did not add the path
# to environment variable
USE_INTEL_PATH = NONE
diff --git a/make/osx.mk b/make/osx.mk
index d9ce6f2..b17b04c 100644
--- a/make/osx.mk
+++ b/make/osx.mk
@@ -67,6 +67,12 @@ USE_BLAS = apple
# only effective when compiled with blas versions openblas/apple/atlas/mkl
USE_LAPACK = 1
+# by default, disable lapack when using MKL
+# switch on when there is a full installation of MKL available (not just MKL2017/MKL_ML)
+ifeq ($(USE_BLAS), mkl)
+USE_LAPACK = 0
+endif
+
# add path to intel library, you may need it for MKL, if you did not add the path
# to environment variable
USE_INTEL_PATH = NONE
diff --git a/src/operator/c_lapack_api.h b/src/operator/c_lapack_api.h
index 53b0bf2..293c3f2 100644
--- a/src/operator/c_lapack_api.h
+++ b/src/operator/c_lapack_api.h
@@ -70,6 +70,9 @@
using namespace mshadow;
+// Will cause clash with MKL fortran layer headers
+#if MSHADOW_USE_MKL == 0
+
extern "C" {
// Fortran signatures
@@ -118,15 +121,14 @@ extern "C" {
MXNET_LAPACK_FSIG_SYEVD(dsyevd, double)
}
-#define MXNET_LAPACK_ROW_MAJOR 101
-#define MXNET_LAPACK_COL_MAJOR 102
+#endif // MSHADOW_USE_MKL == 0
+
#define CHECK_LAPACK_UPLO(a) \
CHECK(a == 'U' || a == 'L') << "neither L nor U specified as triangle in lapack call";
inline char loup(char uplo, bool invert) { return invert ? (uplo == 'U' ? 'L' : 'U') : uplo; }
-
/*!
* \brief Transpose matrix data in memory
*
@@ -160,7 +162,75 @@ inline void flip<cpu, double>(int m, int n,
}
-#if MXNET_USE_LAPACK
+#if (MSHADOW_USE_MKL && MXNET_USE_LAPACK)
+
+ // We interface with the C-interface of MKL
+ // as this is the preferred way.
+ #include <mkl_lapacke.h>
+
+ #define MXNET_LAPACK_ROW_MAJOR LAPACK_ROW_MAJOR
+ #define MXNET_LAPACK_COL_MAJOR LAPACK_COL_MAJOR
+
+ // These function have already matching signature.
+ #define MXNET_LAPACK_spotrf LAPACKE_spotrf
+ #define MXNET_LAPACK_dpotrf LAPACKE_dpotrf
+ #define MXNET_LAPACK_spotri LAPACKE_spotri
+ #define MXNET_LAPACK_dpotri LAPACKE_dpotri
+ #define mxnet_lapack_sposv LAPACKE_sposv
+ #define mxnet_lapack_dposv LAPACKE_dposv
+
+ // The following functions differ in signature from the
+ // MXNET_LAPACK-signature and have to be wrapped.
+ #define MXNET_LAPACK_CWRAP_GELQF(prefix, dtype) \
+ inline int MXNET_LAPACK_##prefix##gelqf(int matrix_layout, int m, int n, \
+ dtype *a, int lda, dtype* tau, \
+ dtype* work, int lwork) { \
+ if (lwork != -1) { \
+ return LAPACKE_##prefix##gelqf(matrix_layout, m, n, a, lda, tau); \
+ } \
+ *work = 0; \
+ return 0; \
+ }
+ MXNET_LAPACK_CWRAP_GELQF(s, float)
+ MXNET_LAPACK_CWRAP_GELQF(d, double)
+
+ #define MXNET_LAPACK_CWRAP_ORGLQ(prefix, dtype) \
+ inline int MXNET_LAPACK_##prefix##orglq(int matrix_layout, int m, int n, \
+ dtype *a, int lda, dtype* tau, \
+ dtype* work, int lwork) { \
+ if (lwork != -1) { \
+ return LAPACKE_##prefix##orglq(matrix_layout, m, n, m, a, lda, tau); \
+ } \
+ *work = 0; \
+ return 0; \
+ }
+ MXNET_LAPACK_CWRAP_ORGLQ(s, float)
+ MXNET_LAPACK_CWRAP_ORGLQ(d, double)
+
+ // This has to be called internally in COL_MAJOR format even when matrix_layout
+ // is row-major as otherwise the eigenvectors would be returned as cols in a
+ // row-major matrix layout (see MKL documentation).
+ // We also have to allocate at least one DType element as workspace as the
+ // calling code assumes that the workspace has at least that size.
+ #define MXNET_LAPACK_CWRAP_SYEVD(prefix, dtype) \
+ inline int MXNET_LAPACK_##prefix##syevd(int matrix_layout, char uplo, int n, dtype *a, \
+ int lda, dtype *w, dtype *work, int lwork, \
+ int *iwork, int liwork) { \
+ if (lwork != -1) { \
+ char o(loup(uplo, (matrix_layout == MXNET_LAPACK_ROW_MAJOR))); \
+ return LAPACKE_##prefix##syevd(LAPACK_COL_MAJOR, 'V', o, n, a, lda, w); \
+ } \
+ *work = 1; \
+ *iwork = 0; \
+ return 0; \
+ }
+ MXNET_LAPACK_CWRAP_SYEVD(s, float)
+ MXNET_LAPACK_CWRAP_SYEVD(d, double)
+
+#elif MXNET_USE_LAPACK
+
+ #define MXNET_LAPACK_ROW_MAJOR 101
+ #define MXNET_LAPACK_COL_MAJOR 102
// These functions can be called with either row- or col-major format.
#define MXNET_LAPACK_CWRAPPER1(func, dtype) \
@@ -271,6 +341,9 @@ inline void flip<cpu, double>(int m, int n,
" Ensure that lapack library is installed and build with USE_LAPACK=1 to get lapack" \
" functionalities.")
+ #define MXNET_LAPACK_ROW_MAJOR 101
+ #define MXNET_LAPACK_COL_MAJOR 102
+
// Define compilable stubs.
#define MXNET_LAPACK_CWRAPPER1(func, dtype) \
inline int MXNET_LAPACK_##func(int matrix_layout, char uplo, int n, dtype* a, int lda) { \
diff --git a/src/operator/linalg_impl.h b/src/operator/linalg_impl.h
index b1b35cf..b3e6573 100644
--- a/src/operator/linalg_impl.h
+++ b/src/operator/linalg_impl.h
@@ -56,7 +56,7 @@ inline void check_gemm(const Tensor<xpu, 2, DType>& A, const Tensor<xpu, 2, DTyp
<< "Non compatible matrix dimensions between inputs A and B for gemm";
}
-#if MSHADOW_USE_CBLAS == 1
+#if (MSHADOW_USE_CBLAS == 1 || MSHADOW_USE_MKL == 1)
#define LINALG_CPU_GEMM(fname, DType) \
template<> inline \
@@ -98,7 +98,7 @@ void linalg_batch_gemm<cpu, DType>(const Tensor<cpu, 3, DType>& A, const Tensor<
LOG(FATAL) << "linalg_batch_gemm not implemented by mxnet for cpu, needs cblas!"; \
}
-#endif // MSHADOW_USE_CBLAS == 1
+#endif // MSHADOW_USE_CBLAS == 1 || MSHADOW_USE_MKL == 1
LINALG_CPU_GEMM(sgemm, float)
LINALG_CPU_GEMM(dgemm, double)
@@ -253,7 +253,7 @@ inline void check_trsm(const Tensor<xpu, 2, DType>& A, const Tensor<xpu, 2, DTyp
<< "Non compatible matrix dimensions between inputs A and B for trsm";
}
-#if MSHADOW_USE_CBLAS == 1
+#if (MSHADOW_USE_CBLAS == 1 || MSHADOW_USE_MKL == 1)
#define LINALG_CPU_TRSM(fname, DType) \
template<> inline \
@@ -292,7 +292,7 @@ void linalg_batch_trsm<cpu, DType>(const Tensor<cpu, 3, DType>& A, const Tensor<
LOG(FATAL) << "linalg_batch_trsm not implemented, needs cblas!"; \
}
-#endif // MSHADOW_USE_CBLAS == 1
+#endif // MSHADOW_USE_CBLAS == 1 || MSHADOW_USE_MKL == 1
LINALG_CPU_TRSM(strsm, float)
LINALG_CPU_TRSM(dtrsm, double)
@@ -389,7 +389,7 @@ inline void linalg_gemm(const Tensor<xpu, 2, DType>& A,
}
}
-#if MSHADOW_USE_CBLAS == 0
+#if (MSHADOW_USE_CBLAS == 0 && MSHADOW_USE_MKL == 0)
// A template for a cpu linalg_gemm implementation using mshadow::dot()
#define LINALG_CPU_GEMM_NO_CBLAS(DType) \
@@ -443,7 +443,7 @@ void linalg_gemm<cpu, DType>(const Tensor<cpu, 2, DType>& A, \
LINALG_CPU_GEMM_NO_CBLAS(float)
LINALG_CPU_GEMM_NO_CBLAS(double)
-#endif // (MSHADOW_USE_CBLAS == 0)
+#endif // (MSHADOW_USE_CBLAS == 0 && MSHADOW_USE_MKL == 0)
//////////////////////////////// TRMM ////////////////////////////////////////////
@@ -463,7 +463,7 @@ inline void check_trmm(const Tensor<xpu, 2, DType>& A, const Tensor<xpu, 2, DTyp
<< "Non compatible matrix dimensions between inputs A and B for trmm";
}
-#if MSHADOW_USE_CBLAS == 1
+#if (MSHADOW_USE_CBLAS == 1 || MSHADOW_USE_MKL == 1)
#define LINALG_CPU_TRMM(fname, DType) \
template<> inline \
@@ -485,7 +485,7 @@ void linalg_trmm<cpu, DType>(const Tensor<cpu, 2, DType>& A, const Tensor<cpu, 2
LOG(FATAL) << "linalg_trmm not implemented, needs cblas!"; \
}
-#endif // MSHADOW_USE_CBLAS == 1
+#endif // MSHADOW_USE_CBLAS == 1 || MSHADOW_USE_MKL == 1
#define LINALG_XPU_BATCH_TRMM(xpu, DType) \
template<> inline \
@@ -735,7 +735,7 @@ void check_syrk(const Tensor<xpu, 2, DType>& A, const Tensor<xpu, 2, DType>& B,
<< "Non compatible matrix dimensions between inputs A and B for syrk";
}
-#if MSHADOW_USE_CBLAS == 1
+#if (MSHADOW_USE_CBLAS == 1 || MSHADOW_USE_MKL == 1)
#define LINALG_CPU_SYRK(fname, DType) \
template<> inline \
@@ -758,7 +758,7 @@ void linalg_syrk<cpu, DType>(const Tensor<cpu, 2, DType>& A, \
LOG(FATAL) << "linalg_syrk not implemented by mxnet for cpu, needs cblas!"; \
}
-#endif // MSHADOW_USE_CBLAS == 1
+#endif // MSHADOW_USE_CBLAS == 1 || MSHADOW_USE_MKL == 1
#define LINALG_XPU_BATCH_SYRK(xpu, DType) \
template<> inline \
@@ -811,7 +811,7 @@ void check_gelqf(const Tensor<xpu, 2, DType>& A,
// Any checking that helps user debug potential problems.
CHECK_LE(A.size(0), A.size(1))
<< "A must have num(rows) <= num(columns)";
- CHECK_LT(A.size(0), work.size(0))
+ CHECK_LE(A.size(0), work.size(0))
<< "Size of work is too small";
}
--
To stop receiving notification emails like this one, please contact
['"commits@mxnet.apache.org" <co...@mxnet.apache.org>'].