You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemds.apache.org by mb...@apache.org on 2021/06/18 23:27:38 UTC

[systemds] branch master updated: [SYSTEMDS-3033] Fix native BLAS tsmm right (nnz compute, lda param)

This is an automated email from the ASF dual-hosted git repository.

mboehm7 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/systemds.git


The following commit(s) were added to refs/heads/master by this push:
     new effb11b  [SYSTEMDS-3033] Fix native BLAS tsmm right (nnz compute, lda param)
effb11b is described below

commit effb11b9fb4c4e0a0297463a088243a5005a452e
Author: Matthias Boehm <mb...@gmail.com>
AuthorDate: Sat Jun 19 01:26:52 2021 +0200

    [SYSTEMDS-3033] Fix native BLAS tsmm right (nnz compute, lda param)
    
    This patch fixes issues with the rather uncommon native BLAS tsmm-right
    (e.g., in dist()), where invalid parameters led to incorrect results and
    corrupted nnz (and thus index out of bounds exceptions on dense to
    sparse conversion). Furthermore, the nnz computation ran over the size
    of the allocated output array, causes occasional segmentation faults.
    This patch fixes the issues, which also makes the common tsmm-left
    sightly faster (less nnz computation).
---
 src/main/cpp/lib/libsystemds_mkl-Linux-x86_64.so   | Bin 39384 -> 39384 bytes
 .../cpp/lib/libsystemds_openblas-Linux-x86_64.so   | Bin 43144 -> 43144 bytes
 src/main/cpp/libmatrixmult.cpp                     |   2 +-
 src/main/cpp/systemds.cpp                          |  42 +++++++++++----------
 4 files changed, 23 insertions(+), 21 deletions(-)

diff --git a/src/main/cpp/lib/libsystemds_mkl-Linux-x86_64.so b/src/main/cpp/lib/libsystemds_mkl-Linux-x86_64.so
index f6b495a..a677b94 100644
Binary files a/src/main/cpp/lib/libsystemds_mkl-Linux-x86_64.so and b/src/main/cpp/lib/libsystemds_mkl-Linux-x86_64.so differ
diff --git a/src/main/cpp/lib/libsystemds_openblas-Linux-x86_64.so b/src/main/cpp/lib/libsystemds_openblas-Linux-x86_64.so
index e953e7f..227443e 100644
Binary files a/src/main/cpp/lib/libsystemds_openblas-Linux-x86_64.so and b/src/main/cpp/lib/libsystemds_openblas-Linux-x86_64.so differ
diff --git a/src/main/cpp/libmatrixmult.cpp b/src/main/cpp/libmatrixmult.cpp
index a8ace72..cb387ef 100644
--- a/src/main/cpp/libmatrixmult.cpp
+++ b/src/main/cpp/libmatrixmult.cpp
@@ -54,6 +54,6 @@ void tsmm(double *m1Ptr, double *retPtr, int m1rlen, int m1clen, bool leftTrans,
     int n = leftTrans ? m1clen : m1rlen;
     int k = leftTrans ? m1rlen : m1clen;
     cblas_dsyrk(CblasRowMajor, CblasUpper,
-      leftTrans ? CblasTrans : CblasNoTrans, n, k, 1, m1Ptr, n, 0, retPtr, n);
+      leftTrans ? CblasTrans : CblasNoTrans, n, k, 1, m1Ptr, m1clen, 0, retPtr, n);
   }
 }
diff --git a/src/main/cpp/systemds.cpp b/src/main/cpp/systemds.cpp
index 47b6e8f..bed1d42 100644
--- a/src/main/cpp/systemds.cpp
+++ b/src/main/cpp/systemds.cpp
@@ -95,10 +95,12 @@ JNIEXPORT jlong JNICALL Java_org_apache_sysds_utils_NativeHelper_tsmm
   double* m1Ptr = GET_DOUBLE_ARRAY(env, m1, numThreads);
   double* retPtr = GET_DOUBLE_ARRAY(env, ret, numThreads);
   if(m1Ptr == NULL || retPtr == NULL)
-  	return -1;
+    return -1;
 
   tsmm(m1Ptr, retPtr, (int)m1rlen, (int)m1clen, (bool)leftTrans, (int)numThreads);
-  size_t nnz = computeNNZ<double>(retPtr, m1rlen * m1clen);
+
+  int n = leftTrans ? m1clen : m1rlen;
+  size_t nnz = computeNNZ<double>(retPtr, n * n);
 
   RELEASE_INPUT_ARRAY(env, m1, m1Ptr, numThreads);
   RELEASE_ARRAY(env, ret, retPtr, numThreads);
@@ -201,27 +203,27 @@ JNIEXPORT jlong JNICALL Java_org_apache_sysds_utils_NativeHelper_sconv2dBiasAddD
     return -1;
   
   size_t nnz = sconv2dBiasAddDense(inputPtr, biasPtr, filterPtr, retPtr, (int) N, (int) C, (int) H, (int) W, (int) K,
-		(int) R, (int) S, (int) stride_h, (int) stride_w, (int) pad_h, (int) pad_w, (int) P,
+    (int) R, (int) S, (int) stride_h, (int) stride_w, (int) pad_h, (int) pad_w, (int) P,
 		(int) Q, true, (int) numThreads);
 
   return static_cast<jlong>(nnz);
 }
 
 JNIEXPORT jlong JNICALL Java_org_apache_sysds_utils_NativeHelper_conv2dBackwardDataDense(
-		JNIEnv* env, jclass, jdoubleArray filter, jdoubleArray dout,
-		jdoubleArray ret, jint N, jint C, jint H, jint W, jint K, jint R, jint S,
-		jint stride_h, jint stride_w, jint pad_h, jint pad_w, jint P, jint Q, jint numThreads) {
-  
+    JNIEnv* env, jclass, jdoubleArray filter, jdoubleArray dout,
+    jdoubleArray ret, jint N, jint C, jint H, jint W, jint K, jint R, jint S,
+    jint stride_h, jint stride_w, jint pad_h, jint pad_w, jint P, jint Q, jint numThreads) {
+
   double* filterPtr = GET_DOUBLE_ARRAY(env, filter, numThreads);
   double* doutPtr = GET_DOUBLE_ARRAY(env, dout, numThreads);
   double* retPtr = GET_DOUBLE_ARRAY(env, ret, numThreads);
   if(doutPtr == NULL || filterPtr == NULL || retPtr == NULL)
-  	return -1;
-  
+    return -1;
+
   size_t nnz = conv2dBackwardDataDense(filterPtr, doutPtr, retPtr, (int) N, (int) C, (int) H, (int) W, (int) K,
-		(int) R, (int) S, (int) stride_h, (int) stride_w, (int) pad_h, (int) pad_w,
-		(int) P, (int) Q, (int) numThreads);
-  
+    (int) R, (int) S, (int) stride_h, (int) stride_w, (int) pad_h, (int) pad_w,
+    (int) P, (int) Q, (int) numThreads);
+
   RELEASE_INPUT_ARRAY(env, filter, filterPtr, numThreads);
   RELEASE_INPUT_ARRAY(env, dout, doutPtr, numThreads);
   RELEASE_ARRAY(env, ret, retPtr, numThreads);
@@ -229,19 +231,19 @@ JNIEXPORT jlong JNICALL Java_org_apache_sysds_utils_NativeHelper_conv2dBackwardD
 }
 
 JNIEXPORT jlong JNICALL Java_org_apache_sysds_utils_NativeHelper_conv2dBackwardFilterDense(
-		JNIEnv* env, jclass, jdoubleArray input, jdoubleArray dout,
-		jdoubleArray ret, jint N, jint C, jint H, jint W, jint K, jint R, jint S,
-		jint stride_h, jint stride_w, jint pad_h, jint pad_w, jint P, jint Q, jint numThreads) {
+    JNIEnv* env, jclass, jdoubleArray input, jdoubleArray dout,
+    jdoubleArray ret, jint N, jint C, jint H, jint W, jint K, jint R, jint S,
+    jint stride_h, jint stride_w, jint pad_h, jint pad_w, jint P, jint Q, jint numThreads) {
   double* inputPtr = GET_DOUBLE_ARRAY(env, input, numThreads);
   double* doutPtr = GET_DOUBLE_ARRAY(env, dout, numThreads);
   double* retPtr = GET_DOUBLE_ARRAY(env, ret, numThreads);
   if(doutPtr == NULL || inputPtr == NULL || retPtr == NULL)
-  	return -1;
-  
+    return -1;
+
   size_t nnz = conv2dBackwardFilterDense(inputPtr, doutPtr, retPtr, (int)N, (int) C, (int) H, (int) W, (int) K, (int) R,
-		(int) S, (int) stride_h, (int) stride_w, (int) pad_h, (int) pad_w, (int) P,
-		(int) Q, (int) numThreads);
-  
+    (int) S, (int) stride_h, (int) stride_w, (int) pad_h, (int) pad_w, (int) P,
+    (int) Q, (int) numThreads);
+
   RELEASE_INPUT_ARRAY(env, input, inputPtr, numThreads);
   RELEASE_INPUT_ARRAY(env, dout, doutPtr, numThreads);
   RELEASE_ARRAY(env, ret, retPtr, numThreads);