You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by ni...@apache.org on 2017/05/09 03:57:09 UTC
incubator-systemml git commit: [SYSTEMML-540] [MINOR] Added
additional performance counters for native invocation and improved
SGDNesterov
Repository: incubator-systemml
Updated Branches:
refs/heads/master b8de68b74 -> 5489c665d
[SYSTEMML-540] [MINOR] Added additional performance counters for native invocation and improved SGDNesterov
Closes #485.
Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/5489c665
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/5489c665
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/5489c665
Branch: refs/heads/master
Commit: 5489c665dcb434d88c165492557ef14ad284b69f
Parents: b8de68b
Author: Niketan Pansare <np...@us.ibm.com>
Authored: Mon May 8 20:55:24 2017 -0700
Committer: Niketan Pansare <np...@us.ibm.com>
Committed: Mon May 8 20:56:11 2017 -0700
----------------------------------------------------------------------
.../cpp/lib/libsystemml_mkl-Linux-x86_64.so | Bin 27408 -> 31824 bytes
.../lib/libsystemml_openblas-Linux-x86_64.so | Bin 27416 -> 27520 bytes
src/main/cpp/libmatrixdnn.cpp | 54 ++++++++++------
src/main/cpp/libmatrixdnn.h | 6 +-
src/main/cpp/systemml.cpp | 32 +++++-----
src/main/cpp/systemml.h | 29 ++++-----
.../sysml/runtime/matrix/data/LibMatrixDNN.java | 24 ++++---
.../runtime/matrix/data/LibMatrixNative.java | 63 +++++++++++++------
.../sysml/runtime/matrix/data/MatrixBlock.java | 14 +++++
.../apache/sysml/udf/lib/SGDNesterovUpdate.java | 13 ++--
.../org/apache/sysml/utils/NativeHelper.java | 9 +--
.../java/org/apache/sysml/utils/Statistics.java | 45 +++++++++++--
.../org/apache/sysml/api/dl/Caffe2DML.scala | 2 +-
.../org/apache/sysml/api/dl/CaffeSolver.scala | 9 ++-
14 files changed, 199 insertions(+), 101 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/5489c665/src/main/cpp/lib/libsystemml_mkl-Linux-x86_64.so
----------------------------------------------------------------------
diff --git a/src/main/cpp/lib/libsystemml_mkl-Linux-x86_64.so b/src/main/cpp/lib/libsystemml_mkl-Linux-x86_64.so
index 0a6427a..a740930 100755
Binary files a/src/main/cpp/lib/libsystemml_mkl-Linux-x86_64.so and b/src/main/cpp/lib/libsystemml_mkl-Linux-x86_64.so differ
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/5489c665/src/main/cpp/lib/libsystemml_openblas-Linux-x86_64.so
----------------------------------------------------------------------
diff --git a/src/main/cpp/lib/libsystemml_openblas-Linux-x86_64.so b/src/main/cpp/lib/libsystemml_openblas-Linux-x86_64.so
index ffdcd5a..dfd1ecb 100755
Binary files a/src/main/cpp/lib/libsystemml_openblas-Linux-x86_64.so and b/src/main/cpp/lib/libsystemml_openblas-Linux-x86_64.so differ
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/5489c665/src/main/cpp/libmatrixdnn.cpp
----------------------------------------------------------------------
diff --git a/src/main/cpp/libmatrixdnn.cpp b/src/main/cpp/libmatrixdnn.cpp
index a521804..ed02042 100644
--- a/src/main/cpp/libmatrixdnn.cpp
+++ b/src/main/cpp/libmatrixdnn.cpp
@@ -27,6 +27,14 @@
#include <cstring>
#include "omp.h"
+int computeNNZ(double* arr, int limit) {
+ int nnz = 0;
+ #pragma omp parallel for reduction(+: nnz)
+ for(int i=0; i<limit; i++)
+ nnz += (arr[i]!=0) ? 1 : 0;
+ return nnz;
+}
+
void rotate180(double* inputArray, double* outputArray, int N, int C, int H, int W,
int K, int R, int S, int stride_h, int stride_w, int pad_h,
int pad_w, int P, int Q) {
@@ -124,7 +132,7 @@ void im2col(double* inputArray, double* outputArray, int N, int C, int H, int W,
}
-void conv2dBackwardFilterDense(double* inputPtr, double* doutPtr, double* retPtr, int N, int C, int H, int W, int K, int R, int S,
+int conv2dBackwardFilterDense(double* inputPtr, double* doutPtr, double* retPtr, int N, int C, int H, int W, int K, int R, int S,
int stride_h, int stride_w, int pad_h, int pad_w, int P, int Q, int numThreads) {
// First step: Avoids oversubscription and other openmp/internal blas threading issues
setNumThreadsForBLAS(1);
@@ -150,7 +158,8 @@ void conv2dBackwardFilterDense(double* inputPtr, double* doutPtr, double* retPtr
#pragma omp parallel for num_threads(numOpenMPThreads)
for (int n = 0; n < N; n++) {
- double* loweredMat = loweredMatArrays + numIm2ColElem*omp_get_thread_num();
+ int threadID = omp_get_thread_num();
+ double* loweredMat = loweredMatArrays + numIm2ColElem*threadID;
// Step 1: Perform im2col
im2col(inputPtr + n * CHW, loweredMat, 1, C, H, W, K,
@@ -158,18 +167,22 @@ void conv2dBackwardFilterDense(double* inputPtr, double* doutPtr, double* retPtr
P, Q);
// Step 2: Rotate dout
- double* rotatedDoutPtr = rotatedDoutPtrArrays + numRotatedElem*omp_get_thread_num();
+ double* rotatedDoutPtr = rotatedDoutPtrArrays + numRotatedElem*threadID;
rotate180(doutPtr + n * KPQ, rotatedDoutPtr, 1, C, H, W, K,
R, S, stride_h, stride_w, pad_h, pad_w,
P, Q);
- // Multiply to get CRS X K
- double* temp1 = temp + numTempElem*omp_get_thread_num();
- // Step 3: loweredMat (CRS X PQ) %*% rotated_dout (PQ X K)
- matmult(loweredMat, rotatedDoutPtr, temp1, C * R * S, P * Q, K, 1);
-
+ // Multiply to get tmp1 = CRS X K
+ double* temp1 = temp + numTempElem*threadID;
+ // Step 3: temp1 = alpha * (loweredMat (CRS X PQ) %*% rotated_dout (PQ X K)) + beta*temp1
+ int m1rlen = C * R * S; int m1clen = P * Q; int m2clen = K;
+ double* m1Ptr = loweredMat; double* m2Ptr = rotatedDoutPtr; double alpha = 1; double beta = 1;
+ cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, m1rlen, m2clen, m1clen, alpha, m1Ptr, m1clen, m2Ptr, m2clen, beta, temp1, m2clen);
} // end omp parallel for
+ delete [] loweredMatArrays;
+ delete [] rotatedDoutPtrArrays;
+
// Inplace transpose addition
int numRow = CRS;
for(int t = 0; t < numOpenMPThreads; t++) {
@@ -184,11 +197,10 @@ void conv2dBackwardFilterDense(double* inputPtr, double* doutPtr, double* retPtr
}
delete [] temp;
- delete [] loweredMatArrays;
- delete [] rotatedDoutPtrArrays;
+ return computeNNZ(retPtr, K*CRS);
}
-void conv2dBackwardDataDense(double* filterPtr, double* doutPtr, double* retPtr, int N, int C, int H, int W, int K, int R, int S,
+int conv2dBackwardDataDense(double* filterPtr, double* doutPtr, double* retPtr, int N, int C, int H, int W, int K, int R, int S,
int stride_h, int stride_w, int pad_h, int pad_w, int P, int Q, int numThreads) {
// First step: Avoids oversubscription and other openmp/internal blas threading issues
setNumThreadsForBLAS(1);
@@ -207,27 +219,28 @@ void conv2dBackwardDataDense(double* filterPtr, double* doutPtr, double* retPtr,
#pragma omp parallel for num_threads(numOpenMPThreads)
for (int n = 0; n < N; n++) {
+ int threadID = omp_get_thread_num();
// Step 1: Rotate dout
- double* rotatedDoutPtr = rotatedDoutPtrArrays + numRotatedElem*omp_get_thread_num();
+ double* rotatedDoutPtr = rotatedDoutPtrArrays + numRotatedElem*threadID;
rotate180(doutPtr + n * KPQ, rotatedDoutPtr, 1, C, H, W, K,
R, S, stride_h, stride_w, pad_h, pad_w,
P, Q);
// Step 2: t(rotatedDout (PQ X K) %*% filter (K X CRS))
- double* col2imInput = col2imInputArrays + numCol2ImElem*omp_get_thread_num();
+ double* col2imInput = col2imInputArrays + numCol2ImElem*threadID;
matmult(rotatedDoutPtr, filterPtr, col2imInput,
PQ, K, CRS, 1);
// Step 3: Perform col2im
- col2im(col2imInput, retPtr + n * CHW, 1, C, H, W, K,
+ double* outputArr = retPtr + n * CHW;
+ col2im(col2imInput, outputArr, 1, C, H, W, K,
R, S, stride_h, stride_w, pad_h, pad_w,
P, Q);
-
} // end omp parallel for
delete [] rotatedDoutPtrArrays;
delete [] col2imInputArrays;
-
+ return computeNNZ(retPtr, N*CHW);
}
void conv2dSparse(int apos, int alen, int* aix, double* avals, double* filterPtr, double* retPtr, int N, int C, int H, int W,
@@ -290,7 +303,8 @@ void conv2dBackwardFilterSparseDense(int apos, int alen, int* aix, double* avals
delete [] temp1;
}
-void conv2dBiasAddDense(double* inputPtr, double* biasPtr, double* filterPtr, double* retPtr, int N, int C, int H, int W, int K, int R, int S,
+
+int conv2dBiasAddDense(double* inputPtr, double* biasPtr, double* filterPtr, double* retPtr, int N, int C, int H, int W, int K, int R, int S,
int stride_h, int stride_w, int pad_h, int pad_w, int P, int Q, bool addBias, int numThreads) {
// First step: Avoids oversubscription and other openmp/internal blas threading issues
setNumThreadsForBLAS(1);
@@ -306,7 +320,8 @@ void conv2dBiasAddDense(double* inputPtr, double* biasPtr, double* filterPtr, do
#pragma omp parallel for num_threads(numOpenMPThreads)
for (int n = 0; n < N; n++) {
- double* loweredMat = loweredMatArrays + numIm2ColElem*omp_get_thread_num();
+ int threadID = omp_get_thread_num();
+ double* loweredMat = loweredMatArrays + numIm2ColElem*threadID;
// Step 1: Perform im2col
im2col(inputPtr + n * CHW, loweredMat, 1, C, H, W, K,
@@ -318,8 +333,8 @@ void conv2dBiasAddDense(double* inputPtr, double* biasPtr, double* filterPtr, do
C * R * S, P * Q, 1);
// Step 3: Add bias
+ double* outputArr = retPtr + n*KPQ;
if(addBias) {
- double* outputArr = retPtr + n*KPQ;
int index = 0;
for(int k = 0; k < K; k++) {
for(int pq = 0; pq < PQ; pq++, index++) {
@@ -330,4 +345,5 @@ void conv2dBiasAddDense(double* inputPtr, double* biasPtr, double* filterPtr, do
} // end omp parallel for
delete [] loweredMatArrays;
+ return computeNNZ(retPtr, N*KPQ);
}
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/5489c665/src/main/cpp/libmatrixdnn.h
----------------------------------------------------------------------
diff --git a/src/main/cpp/libmatrixdnn.h b/src/main/cpp/libmatrixdnn.h
index bf6c113..86e7b24 100644
--- a/src/main/cpp/libmatrixdnn.h
+++ b/src/main/cpp/libmatrixdnn.h
@@ -20,13 +20,13 @@
#ifndef _libmatrixdnn_h
#define _libmatrixdnn_h
-void conv2dBackwardFilterDense(double* inputPtr, double* doutPtr, double* retPtr, int N, int C, int H, int W, int K, int R, int S,
+int conv2dBackwardFilterDense(double* inputPtr, double* doutPtr, double* retPtr, int N, int C, int H, int W, int K, int R, int S,
int stride_h, int stride_w, int pad_h, int pad_w, int P, int Q, int numThreads);
-void conv2dBackwardDataDense(double* filterPtr, double* doutPtr, double* retPtr, int N, int C, int H, int W, int K, int R, int S,
+int conv2dBackwardDataDense(double* filterPtr, double* doutPtr, double* retPtr, int N, int C, int H, int W, int K, int R, int S,
int stride_h, int stride_w, int pad_h, int pad_w, int P, int Q, int numThreads);
-void conv2dBiasAddDense(double* inputPtr, double* biasPtr, double* filterPtr, double* retPtr, int N, int C, int H, int W, int K, int R, int S,
+int conv2dBiasAddDense(double* inputPtr, double* biasPtr, double* filterPtr, double* retPtr, int N, int C, int H, int W, int K, int R, int S,
int stride_h, int stride_w, int pad_h, int pad_w, int P, int Q, bool addBias, int numThreads);
void conv2dSparse(int apos, int alen, int* aix, double* avals, double* filter, double* ret, int N, int C, int H, int W,
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/5489c665/src/main/cpp/systemml.cpp
----------------------------------------------------------------------
diff --git a/src/main/cpp/systemml.cpp b/src/main/cpp/systemml.cpp
index 41ce0bc..34ae248 100644
--- a/src/main/cpp/systemml.cpp
+++ b/src/main/cpp/systemml.cpp
@@ -144,7 +144,7 @@ JNIEXPORT jboolean JNICALL Java_org_apache_sysml_utils_NativeHelper_conv2dBackwa
return (jboolean) true;
}
-JNIEXPORT jboolean JNICALL Java_org_apache_sysml_utils_NativeHelper_conv2dDense(
+JNIEXPORT jint JNICALL Java_org_apache_sysml_utils_NativeHelper_conv2dDense(
JNIEnv* env, jclass, jdoubleArray input, jdoubleArray filter,
jdoubleArray ret, jint N, jint C, jint H, jint W, jint K, jint R, jint S,
jint stride_h, jint stride_w, jint pad_h, jint pad_w, jint P, jint Q, jint numThreads) {
@@ -152,18 +152,18 @@ JNIEXPORT jboolean JNICALL Java_org_apache_sysml_utils_NativeHelper_conv2dDense(
double* filterPtr = GET_DOUBLE_ARRAY(env, filter, numThreads);
double* retPtr = GET_DOUBLE_ARRAY(env, ret, numThreads);
if(inputPtr == NULL || filterPtr == NULL || retPtr == NULL)
- return (jboolean) false;
+ return (jint) -1;
- conv2dBiasAddDense(inputPtr, 0, filterPtr, retPtr, (int) N, (int) C, (int) H, (int) W, (int) K, (int) R, (int) S,
+ int nnz = conv2dBiasAddDense(inputPtr, 0, filterPtr, retPtr, (int) N, (int) C, (int) H, (int) W, (int) K, (int) R, (int) S,
(int) stride_h, (int) stride_w, (int) pad_h, (int) pad_w, (int) P, (int) Q, false, (int) numThreads);
RELEASE_INPUT_DOUBLE_ARRAY(env, input, inputPtr, numThreads);
RELEASE_INPUT_DOUBLE_ARRAY(env, filter, filterPtr, numThreads);
RELEASE_DOUBLE_ARRAY(env, ret, retPtr, numThreads);
- return (jboolean) true;
+ return (jint) nnz;
}
-JNIEXPORT jboolean JNICALL Java_org_apache_sysml_utils_NativeHelper_conv2dBiasAddDense(
+JNIEXPORT jint JNICALL Java_org_apache_sysml_utils_NativeHelper_conv2dBiasAddDense(
JNIEnv* env, jclass, jdoubleArray input, jdoubleArray bias, jdoubleArray filter,
jdoubleArray ret, jint N, jint C, jint H, jint W, jint K, jint R, jint S,
jint stride_h, jint stride_w, jint pad_h, jint pad_w, jint P, jint Q, jint numThreads) {
@@ -173,19 +173,19 @@ JNIEXPORT jboolean JNICALL Java_org_apache_sysml_utils_NativeHelper_conv2dBiasAd
double* filterPtr = GET_DOUBLE_ARRAY(env, filter, numThreads);
double* retPtr = GET_DOUBLE_ARRAY(env, ret, numThreads);
if(inputPtr == NULL || biasPtr == NULL || filterPtr == NULL || retPtr == NULL)
- return (jboolean) false;
+ return (jint) -1;
- conv2dBiasAddDense(inputPtr, biasPtr, filterPtr, retPtr, (int) N, (int) C, (int) H, (int) W, (int) K, (int) R, (int) S,
+ int nnz = conv2dBiasAddDense(inputPtr, biasPtr, filterPtr, retPtr, (int) N, (int) C, (int) H, (int) W, (int) K, (int) R, (int) S,
(int) stride_h, (int) stride_w, (int) pad_h, (int) pad_w, (int) P, (int) Q, true, (int) numThreads);
RELEASE_INPUT_DOUBLE_ARRAY(env, input, inputPtr, numThreads);
RELEASE_INPUT_DOUBLE_ARRAY(env, bias, biasPtr, numThreads);
RELEASE_INPUT_DOUBLE_ARRAY(env, filter, filterPtr, numThreads);
RELEASE_DOUBLE_ARRAY(env, ret, retPtr, numThreads);
- return (jboolean) true;
+ return (jint) nnz;
}
-JNIEXPORT jboolean JNICALL Java_org_apache_sysml_utils_NativeHelper_conv2dBackwardDataDense(
+JNIEXPORT jint JNICALL Java_org_apache_sysml_utils_NativeHelper_conv2dBackwardDataDense(
JNIEnv* env, jclass, jdoubleArray filter, jdoubleArray dout,
jdoubleArray ret, jint N, jint C, jint H, jint W, jint K, jint R, jint S,
jint stride_h, jint stride_w, jint pad_h, jint pad_w, jint P, jint Q, jint numThreads) {
@@ -194,18 +194,18 @@ JNIEXPORT jboolean JNICALL Java_org_apache_sysml_utils_NativeHelper_conv2dBackwa
double* doutPtr = GET_DOUBLE_ARRAY(env, dout, numThreads);
double* retPtr = GET_DOUBLE_ARRAY(env, ret, numThreads);
if(doutPtr == NULL || filterPtr == NULL || retPtr == NULL)
- return (jboolean) false;
+ return (jint) -1;
- conv2dBackwardDataDense(filterPtr, doutPtr, retPtr, (int) N, (int) C, (int) H, (int) W, (int) K, (int) R, (int) S,
+ int nnz = conv2dBackwardDataDense(filterPtr, doutPtr, retPtr, (int) N, (int) C, (int) H, (int) W, (int) K, (int) R, (int) S,
(int) stride_h, (int) stride_w, (int) pad_h, (int) pad_w, (int) P, (int) Q, (int) numThreads);
RELEASE_INPUT_DOUBLE_ARRAY(env, filter, filterPtr, numThreads);
RELEASE_INPUT_DOUBLE_ARRAY(env, dout, doutPtr, numThreads);
RELEASE_DOUBLE_ARRAY(env, ret, retPtr, numThreads);
- return (jboolean) true;
+ return (jint) nnz;
}
-JNIEXPORT jboolean JNICALL Java_org_apache_sysml_utils_NativeHelper_conv2dBackwardFilterDense(
+JNIEXPORT jint JNICALL Java_org_apache_sysml_utils_NativeHelper_conv2dBackwardFilterDense(
JNIEnv* env, jclass, jdoubleArray input, jdoubleArray dout,
jdoubleArray ret, jint N, jint C, jint H, jint W, jint K, jint R, jint S,
jint stride_h, jint stride_w, jint pad_h, jint pad_w, jint P, jint Q, jint numThreads) {
@@ -213,13 +213,13 @@ JNIEXPORT jboolean JNICALL Java_org_apache_sysml_utils_NativeHelper_conv2dBackwa
double* doutPtr = GET_DOUBLE_ARRAY(env, dout, numThreads);
double* retPtr = GET_DOUBLE_ARRAY(env, ret, numThreads);
if(doutPtr == NULL || inputPtr == NULL || retPtr == NULL)
- return (jboolean) false;
+ return (jint) -1;
- conv2dBackwardFilterDense(inputPtr, doutPtr, retPtr, (int) N, (int) C, (int) H, (int) W, (int) K, (int) R, (int) S,
+ int nnz = conv2dBackwardFilterDense(inputPtr, doutPtr, retPtr, (int) N, (int) C, (int) H, (int) W, (int) K, (int) R, (int) S,
(int) stride_h, (int) stride_w, (int) pad_h, (int) pad_w, (int) P, (int) Q, (int) numThreads);
RELEASE_INPUT_DOUBLE_ARRAY(env, input, inputPtr, numThreads);
RELEASE_INPUT_DOUBLE_ARRAY(env, dout, doutPtr, numThreads);
RELEASE_DOUBLE_ARRAY(env, ret, retPtr, numThreads);
- return (jboolean) true;
+ return (jint) nnz;
}
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/5489c665/src/main/cpp/systemml.h
----------------------------------------------------------------------
diff --git a/src/main/cpp/systemml.h b/src/main/cpp/systemml.h
index ac36495..f6f5cd2 100644
--- a/src/main/cpp/systemml.h
+++ b/src/main/cpp/systemml.h
@@ -45,49 +45,49 @@ JNIEXPORT jboolean JNICALL Java_org_apache_sysml_utils_NativeHelper_tsmm
/*
* Class: org_apache_sysml_utils_NativeHelper
* Method: conv2dDense
- * Signature: ([D[D[DIIIIIIIIIIIIII)Z
+ * Signature: ([D[D[DIIIIIIIIIIIIII)I
*/
-JNIEXPORT jboolean JNICALL Java_org_apache_sysml_utils_NativeHelper_conv2dDense
+JNIEXPORT jint JNICALL Java_org_apache_sysml_utils_NativeHelper_conv2dDense
(JNIEnv *, jclass, jdoubleArray, jdoubleArray, jdoubleArray, jint, jint, jint, jint, jint, jint, jint, jint, jint, jint, jint, jint, jint, jint);
/*
* Class: org_apache_sysml_utils_NativeHelper
* Method: conv2dBiasAddDense
- * Signature: ([D[D[D[DIIIIIIIIIIIIII)Z
+ * Signature: ([D[D[D[DIIIIIIIIIIIIII)I
*/
-JNIEXPORT jboolean JNICALL Java_org_apache_sysml_utils_NativeHelper_conv2dBiasAddDense
+JNIEXPORT jint JNICALL Java_org_apache_sysml_utils_NativeHelper_conv2dBiasAddDense
(JNIEnv *, jclass, jdoubleArray, jdoubleArray, jdoubleArray, jdoubleArray, jint, jint, jint, jint, jint, jint, jint, jint, jint, jint, jint, jint, jint, jint);
/*
* Class: org_apache_sysml_utils_NativeHelper
- * Method: conv2dBackwardDataDense
- * Signature: ([D[D[DIIIIIIIIIIIIII)Z
+ * Method: conv2dBackwardFilterDense
+ * Signature: ([D[D[DIIIIIIIIIIIIII)I
*/
-JNIEXPORT jboolean JNICALL Java_org_apache_sysml_utils_NativeHelper_conv2dBackwardDataDense
+JNIEXPORT jint JNICALL Java_org_apache_sysml_utils_NativeHelper_conv2dBackwardFilterDense
(JNIEnv *, jclass, jdoubleArray, jdoubleArray, jdoubleArray, jint, jint, jint, jint, jint, jint, jint, jint, jint, jint, jint, jint, jint, jint);
/*
* Class: org_apache_sysml_utils_NativeHelper
- * Method: conv2dBackwardFilterDense
- * Signature: ([D[D[DIIIIIIIIIIIIII)Z
+ * Method: conv2dBackwardDataDense
+ * Signature: ([D[D[DIIIIIIIIIIIIII)I
*/
-JNIEXPORT jboolean JNICALL Java_org_apache_sysml_utils_NativeHelper_conv2dBackwardFilterDense
+JNIEXPORT jint JNICALL Java_org_apache_sysml_utils_NativeHelper_conv2dBackwardDataDense
(JNIEnv *, jclass, jdoubleArray, jdoubleArray, jdoubleArray, jint, jint, jint, jint, jint, jint, jint, jint, jint, jint, jint, jint, jint, jint);
/*
* Class: org_apache_sysml_utils_NativeHelper
- * Method: conv2dSparse
+ * Method: conv2dBackwardFilterSparseDense
* Signature: (II[I[D[D[DIIIIIIIIIIIIII)Z
*/
-JNIEXPORT jboolean JNICALL Java_org_apache_sysml_utils_NativeHelper_conv2dSparse
+JNIEXPORT jboolean JNICALL Java_org_apache_sysml_utils_NativeHelper_conv2dBackwardFilterSparseDense
(JNIEnv *, jclass, jint, jint, jintArray, jdoubleArray, jdoubleArray, jdoubleArray, jint, jint, jint, jint, jint, jint, jint, jint, jint, jint, jint, jint, jint, jint);
/*
* Class: org_apache_sysml_utils_NativeHelper
- * Method: conv2dBackwardFilterSparse
+ * Method: conv2dSparse
* Signature: (II[I[D[D[DIIIIIIIIIIIIII)Z
*/
-JNIEXPORT jboolean JNICALL Java_org_apache_sysml_utils_NativeHelper_conv2dBackwardFilterSparseDense
+JNIEXPORT jboolean JNICALL Java_org_apache_sysml_utils_NativeHelper_conv2dSparse
(JNIEnv *, jclass, jint, jint, jintArray, jdoubleArray, jdoubleArray, jdoubleArray, jint, jint, jint, jint, jint, jint, jint, jint, jint, jint, jint, jint, jint, jint);
/*
@@ -103,4 +103,5 @@ JNIEXPORT void JNICALL Java_org_apache_sysml_utils_NativeHelper_setMaxNumThreads
#endif
#endif
+
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/5489c665/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNN.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNN.java b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNN.java
index e4d3ba2..ab82697 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNN.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNN.java
@@ -84,7 +84,7 @@ public class LibMatrixDNN {
private static AtomicLong loopedConvBwdDataCol2ImTime = new AtomicLong(0);
public static void appendStatistics(StringBuilder sb) {
- if(DMLScript.STATISTICS && DISPLAY_STATISTICS && (conv2dDenseCount.get() != 0 || conv2dSparseCount.get() != 0)) {
+ if(DMLScript.STATISTICS && DISPLAY_STATISTICS) {
sb.append("LibMatrixDNN dense count (conv/bwdF/bwdD/im2col/maxBwd):\t"
+ conv2dDenseCount.get() + "/"
+ conv2dBwdFilterDenseCount.get() + "/"
@@ -97,15 +97,13 @@ public class LibMatrixDNN {
+ conv2dBwdDataSparseCount.get() + "/"
+ im2colSparseCount.get() + "/"
+ maxPoolBwdSparseCount.get() + ".\n");
- if(loopedConvMatMultTime.get() != 0 || loopedConvIm2ColTime.get() != 0) {
- sb.append("LibMatrixDNN conv(im2col/matmult), bwdF (im2col/matmult), bwdD (col2im/matmult) time:\t" +
- String.format("%.3f", loopedConvIm2ColTime.get()*1e-9) + "/" +
- String.format("%.3f", loopedConvMatMultTime.get()*1e-9) + "/" +
- String.format("%.3f", loopedConvBwdFilterIm2ColTime.get()*1e-9) + "/" +
- String.format("%.3f", loopedConvBwdFilterMatMultTime.get()*1e-9) + "/" +
- String.format("%.3f", loopedConvBwdDataCol2ImTime.get()*1e-9) + "/" +
- String.format("%.3f", loopedConvBwdDataMatMultTime.get()*1e-9) + " sec.\n");
- }
+ sb.append("LibMatrixDNN conv(im2col/matmult), bwdF (im2col/matmult), bwdD (col2im/matmult) time:\t" +
+ String.format("%.3f", loopedConvIm2ColTime.get()*1e-9) + "/" +
+ String.format("%.3f", loopedConvMatMultTime.get()*1e-9) + "/" +
+ String.format("%.3f", loopedConvBwdFilterIm2ColTime.get()*1e-9) + "/" +
+ String.format("%.3f", loopedConvBwdFilterMatMultTime.get()*1e-9) + "/" +
+ String.format("%.3f", loopedConvBwdDataCol2ImTime.get()*1e-9) + "/" +
+ String.format("%.3f", loopedConvBwdDataMatMultTime.get()*1e-9) + " sec.\n");
}
}
public static void resetStatistics() {
@@ -158,7 +156,7 @@ public class LibMatrixDNN {
params.bias.sparseToDense(); // Since bias is extremely small array
if(isEligibleForConv2dSparse(params))
- Statistics.numNativeLibMatrixDNNCalls.increment();
+ Statistics.numNativeSparseConv2dCalls.increment();
runConvTask(TaskType.LoopedIm2ColConv2d, params);
@@ -179,7 +177,7 @@ public class LibMatrixDNN {
checkInputsConv2dBackwardData(filter, dout, outputBlock, params);
if(isEligibleForConv2dBackwardDataDense(params))
- Statistics.numNativeLibMatrixDNNCalls.increment();
+ Statistics.numNativeSparseConv2dBwdDataCalls.increment();
runConvTask(TaskType.LoopedIm2ColConv2dBwdData, params);
@@ -200,7 +198,7 @@ public class LibMatrixDNN {
checkInputsConv2dBackwardFilter(input, dout, outputBlock, params);
if(isEligibleForConv2dBackwardFilterSparseDense(params))
- Statistics.numNativeLibMatrixDNNCalls.increment();
+ Statistics.numNativeSparseConv2dBwdFilterCalls.increment();
runConvTask(TaskType.LoopedIm2ColConv2dBwdFilter, params);
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/5489c665/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixNative.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixNative.java b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixNative.java
index 4b12596..524218d 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixNative.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixNative.java
@@ -18,6 +18,7 @@
*/
package org.apache.sysml.runtime.matrix.data;
+import org.apache.sysml.api.DMLScript;
import org.apache.sysml.hops.OptimizerUtils;
import org.apache.sysml.runtime.DMLRuntimeException;
import org.apache.sysml.utils.NativeHelper;
@@ -60,9 +61,13 @@ public class LibMatrixNative {
!isMatMultMemoryBound(m1.rlen, m1.clen, m2.clen) && !m1.isInSparseFormat() && !m2.isInSparseFormat()) {
ret.sparse = false;
ret.allocateDenseBlock();
+ long start = DMLScript.STATISTICS ? System.nanoTime() : 0;
if (NativeHelper.matrixMultDenseDense(m1.denseBlock, m2.denseBlock,
ret.denseBlock, m1.getNumRows(), m1.getNumColumns(), m2.getNumColumns(), k)) {
- Statistics.numNativeLibMatrixMultCalls.increment();
+ if(DMLScript.STATISTICS) {
+ Statistics.nativeLibMatrixMultTime += System.nanoTime() - start;
+ Statistics.numNativeLibMatrixMultCalls.increment();
+ }
ret.recomputeNonZeros();
// post-processing (nnz maintained in parallel)
if(examSparsity)
@@ -94,12 +99,17 @@ public class LibMatrixNative {
if(NativeHelper.isNativeLibraryLoaded() && !input.isInSparseFormat() && !filter.isInSparseFormat()) {
setNumThreads(params);
if(params.bias == null) {
- if(NativeHelper.conv2dDense(input.denseBlock, filter.denseBlock, outputBlock.denseBlock, params.N, params.C, params.H, params.W,
+ long start = DMLScript.STATISTICS ? System.nanoTime() : 0;
+ int nnz = NativeHelper.conv2dDense(input.denseBlock, filter.denseBlock, outputBlock.denseBlock, params.N, params.C, params.H, params.W,
params.K, params.R, params.S, params.stride_h, params.stride_w, params.pad_h, params.pad_w,
- params.P, params.Q, params.numThreads)) {
- Statistics.numNativeLibMatrixDNNCalls.increment();
+ params.P, params.Q, params.numThreads);
+ if(nnz != -1) {
+ if(DMLScript.STATISTICS) {
+ Statistics.nativeConv2dTime += System.nanoTime() - start;
+ Statistics.numNativeConv2dCalls.increment();
+ }
// post-processing: maintain nnz
- outputBlock.recomputeNonZeros();
+ outputBlock.setNonZeros(nnz);
return;
}
else {
@@ -110,13 +120,18 @@ public class LibMatrixNative {
else {
if(params.bias.isInSparseFormat())
params.bias.sparseToDense(); // Bias matrix is usually extremely small
- if(NativeHelper.conv2dBiasAddDense(input.denseBlock, params.bias.denseBlock, filter.denseBlock, outputBlock.denseBlock,
+ long start = DMLScript.STATISTICS ? System.nanoTime() : 0;
+ int nnz = NativeHelper.conv2dBiasAddDense(input.denseBlock, params.bias.denseBlock, filter.denseBlock, outputBlock.denseBlock,
params.N, params.C, params.H, params.W,
params.K, params.R, params.S, params.stride_h, params.stride_w, params.pad_h, params.pad_w,
- params.P, params.Q, params.numThreads)) {
- Statistics.numNativeLibMatrixDNNCalls.increment();
+ params.P, params.Q, params.numThreads);
+ if(nnz != -1) {
+ if(DMLScript.STATISTICS) {
+ Statistics.nativeConv2dTime += System.nanoTime() - start;
+ Statistics.numNativeConv2dCalls.increment();
+ }
// post-processing: maintain nnz
- outputBlock.recomputeNonZeros();
+ outputBlock.setNonZeros(nnz);
return;
}
else {
@@ -150,12 +165,17 @@ public class LibMatrixNative {
params.numThreads = params.numThreads <= 0 ? NativeHelper.getMaxNumThreads() : params.numThreads;
if(NativeHelper.isNativeLibraryLoaded() && !dout.isInSparseFormat() && !input.isInSparseFormat()) {
setNumThreads(params);
- if(NativeHelper.conv2dBackwardFilterDense(input.denseBlock, dout.denseBlock, outputBlock.denseBlock, params.N, params.C, params.H, params.W,
- params.K, params.R, params.S, params.stride_h, params.stride_w, params.pad_h, params.pad_w,
- params.P, params.Q, params.numThreads)) {
- Statistics.numNativeLibMatrixDNNCalls.increment();
+ long start = DMLScript.STATISTICS ? System.nanoTime() : 0;
+ int nnz = NativeHelper.conv2dBackwardFilterDense(input.denseBlock, dout.denseBlock, outputBlock.denseBlock, params.N, params.C, params.H, params.W,
+ params.K, params.R, params.S, params.stride_h, params.stride_w, params.pad_h, params.pad_w,
+ params.P, params.Q, params.numThreads);
+ if(nnz != -1) {
+ if(DMLScript.STATISTICS) {
+ Statistics.nativeConv2dBwdFilterTime += System.nanoTime() - start;
+ Statistics.numNativeConv2dBwdFilterCalls.increment();
+ }
// post-processing: maintain nnz
- outputBlock.recomputeNonZeros();
+ outputBlock.setNonZeros(nnz);
return;
}
else {
@@ -181,12 +201,17 @@ public class LibMatrixNative {
params.numThreads = params.numThreads <= 0 ? NativeHelper.getMaxNumThreads() : params.numThreads;
if(NativeHelper.isNativeLibraryLoaded() && !dout.isInSparseFormat() && !filter.isInSparseFormat()) {
setNumThreads(params);
- if(NativeHelper.conv2dBackwardDataDense(filter.denseBlock, dout.denseBlock, outputBlock.denseBlock, params.N, params.C, params.H, params.W,
- params.K, params.R, params.S, params.stride_h, params.stride_w, params.pad_h, params.pad_w,
- params.P, params.Q, params.numThreads)) {
- Statistics.numNativeLibMatrixDNNCalls.increment();
+ long start = DMLScript.STATISTICS ? System.nanoTime() : 0;
+ int nnz = NativeHelper.conv2dBackwardDataDense(filter.denseBlock, dout.denseBlock, outputBlock.denseBlock, params.N, params.C, params.H, params.W,
+ params.K, params.R, params.S, params.stride_h, params.stride_w, params.pad_h, params.pad_w,
+ params.P, params.Q, params.numThreads);
+ if(nnz != -1) {
+ if(DMLScript.STATISTICS) {
+ Statistics.nativeConv2dBwdDataTime += System.nanoTime() - start;
+ Statistics.numNativeConv2dBwdDataCalls.increment();
+ }
// post-processing: maintain nnz
- outputBlock.recomputeNonZeros();
+ outputBlock.setNonZeros(nnz);
return;
}
else {
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/5489c665/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java b/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java
index ff6a007..233350a 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java
@@ -34,6 +34,7 @@ import java.util.stream.LongStream;
import org.apache.commons.math3.random.Well1024a;
import org.apache.hadoop.io.DataInputBuffer;
+import org.apache.sysml.api.DMLScript;
import org.apache.sysml.conf.ConfigurationManager;
import org.apache.sysml.hops.Hop.OpOp2;
import org.apache.sysml.hops.OptimizerUtils;
@@ -86,6 +87,7 @@ import org.apache.sysml.runtime.util.FastBufferedDataOutputStream;
import org.apache.sysml.runtime.util.IndexRange;
import org.apache.sysml.runtime.util.UtilFunctions;
import org.apache.sysml.utils.NativeHelper;
+import org.apache.sysml.utils.Statistics;
@@ -104,6 +106,8 @@ public class MatrixBlock extends MatrixValue implements CacheBlock, Externalizab
//basic header (int rlen, int clen, byte type)
public static final int HEADER_SIZE = 9;
+ private static final boolean DISPLAY_STATISTICS = false; // Developer flag to measure performance overhead of various functions in this class
+
public enum BlockType{
EMPTY_BLOCK,
ULTRA_SPARSE_BLOCK, //ultra sparse representation, in-mem same as sparse
@@ -336,6 +340,7 @@ public class MatrixBlock extends MatrixValue implements CacheBlock, Externalizab
allocateDenseBlock();
}
+ @SuppressWarnings("unused")
public void allocateDenseBlock(boolean clearNNZ)
throws RuntimeException
{
@@ -350,7 +355,9 @@ public class MatrixBlock extends MatrixValue implements CacheBlock, Externalizab
//allocate block if non-existing or too small (guaranteed to be 0-initialized),
if(denseBlock == null || denseBlock.length < limit) {
+ long start = DISPLAY_STATISTICS && DMLScript.STATISTICS ? System.nanoTime() : 0;
denseBlock = new double[(int)limit];
+ Statistics.allocateDoubleArrTime += DISPLAY_STATISTICS && DMLScript.STATISTICS ? (System.nanoTime() - start) : 0;
}
//clear nnz if necessary
@@ -986,9 +993,11 @@ public class MatrixBlock extends MatrixValue implements CacheBlock, Externalizab
*
* @throws DMLRuntimeException if DMLRuntimeException occurs
*/
+ @SuppressWarnings("unused")
public void examSparsity()
throws DMLRuntimeException
{
+ long start = DISPLAY_STATISTICS && DMLScript.STATISTICS ? System.nanoTime() : 0;
//determine target representation
boolean sparseDst = evalSparseFormatInMemory();
@@ -1002,6 +1011,8 @@ public class MatrixBlock extends MatrixValue implements CacheBlock, Externalizab
sparseToDense();
else if( !sparse && sparseDst )
denseToSparse();
+
+ Statistics.examSparsityTime += DISPLAY_STATISTICS && DMLScript.STATISTICS ? (System.nanoTime() - start) : 0;
}
/**
@@ -1141,6 +1152,7 @@ public class MatrixBlock extends MatrixValue implements CacheBlock, Externalizab
* of the entire matrix block.
*
*/
+ @SuppressWarnings("unused")
public void recomputeNonZeros()
{
if( sparse && sparseBlock!=null ) //SPARSE (max long)
@@ -1150,12 +1162,14 @@ public class MatrixBlock extends MatrixValue implements CacheBlock, Externalizab
}
else if( !sparse && denseBlock!=null ) //DENSE (max int)
{
+ long start = DISPLAY_STATISTICS && DMLScript.STATISTICS ? System.nanoTime() : 0;
double[] a = denseBlock;
final int limit=rlen*clen;
int nnz = 0;
for(int i=0; i<limit; i++)
nnz += (a[i]!=0) ? 1 : 0;
nonZeros = nnz;
+ Statistics.recomputeNNZTime += DISPLAY_STATISTICS && DMLScript.STATISTICS ? (System.nanoTime() - start) : 0;
}
}
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/5489c665/src/main/java/org/apache/sysml/udf/lib/SGDNesterovUpdate.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/udf/lib/SGDNesterovUpdate.java b/src/main/java/org/apache/sysml/udf/lib/SGDNesterovUpdate.java
index 9c027d3..a88f230 100644
--- a/src/main/java/org/apache/sysml/udf/lib/SGDNesterovUpdate.java
+++ b/src/main/java/org/apache/sysml/udf/lib/SGDNesterovUpdate.java
@@ -39,7 +39,7 @@ import org.apache.sysml.udf.Matrix.ValueType;
* Assumption: the input batch fits in CP (which is also the assumption of most deep learning systems).
*
* Usage:
- * update_nesterov = externalFunction(matrix[double] X, matrix[double] dX, double lr, double mu, matrix[double] v) return (matrix[double] X, matrix[double] v) implemented in (classname="org.apache.sysml.udf.lib.SGDNesterovUpdate",exectype="mem");
+ * update_nesterov = externalFunction(matrix[double] X, matrix[double] dX, double lr, double mu, matrix[double] v, double lambda) return (matrix[double] X, matrix[double] v) implemented in (classname="org.apache.sysml.udf.lib.SGDNesterovUpdate",exectype="mem");
* [X, v] = update_nesterov(X, dX, lr, mu, v);
*
*
@@ -81,16 +81,19 @@ public class SGDNesterovUpdate extends PackageFunction {
double mu = Double.parseDouble(((Scalar)getFunctionInput(3)).getValue());
MatrixBlock v = ((Matrix) getFunctionInput(4)).getMatrixObject().acquireRead();
- // v = mu * v - lr * dX
+ double lambda = Double.parseDouble(((Scalar)getFunctionInput(5)).getValue());
+
+ // v = mu * v - lr * dX - lr*lambda*X
updatedV = new Matrix( "tmp_" + rand.nextLong(), v.getNumRows(), v.getNumColumns(), ValueType.Double );
MatrixBlock updatedVMB = allocateDenseMatrixBlock(updatedV);
double [] updatedVData = updatedVMB.getDenseBlock();
- if(isDense(v) && isDense(dX)) {
+ if(isDense(v) && isDense(dX) && isDense(X)) {
double [] vArr = v.getDenseBlock();
double [] dXArr = dX.getDenseBlock();
+ double [] XArr = X.getDenseBlock();
int nnz = 0;
for(int i = 0; i < updatedVData.length; i++) {
- updatedVData[i] = mu*vArr[i] - lr*dXArr[i];
+ updatedVData[i] = mu*vArr[i] - lr*dXArr[i] - lr*lambda*XArr[i];
nnz += (updatedVData[i]!=0) ? 1 : 0;
}
updatedVMB.setNonZeros(nnz);
@@ -98,8 +101,10 @@ public class SGDNesterovUpdate extends PackageFunction {
else {
multiplyByConstant(v, mu, updatedVData);
multiplyByConstant(dX, -lr, updatedVData);
+ multiplyByConstant(X, -lr*lambda, updatedVData);
updatedVMB.recomputeNonZeros();
}
+
updatedV.setMatrixDoubleArray(updatedVMB, OutputInfo.BinaryBlockOutputInfo, InputInfo.BinaryBlockInputInfo);
// X = X - mu * v_prev + (1 + mu) * v
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/5489c665/src/main/java/org/apache/sysml/utils/NativeHelper.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/utils/NativeHelper.java b/src/main/java/org/apache/sysml/utils/NativeHelper.java
index 129824b..fe5e085 100644
--- a/src/main/java/org/apache/sysml/utils/NativeHelper.java
+++ b/src/main/java/org/apache/sysml/utils/NativeHelper.java
@@ -254,17 +254,18 @@ public class NativeHelper {
// K = number of filters, R = filter height, S = filter width
// TODO: case not handled: sparse filters (which will only be executed in Java). Since filters are relatively smaller, this is a low priority.
+ // Returns -1 if failures or returns number of nonzeros
// Called by ConvolutionCPInstruction if both input and filter are dense
- public static native boolean conv2dDense(double [] input, double [] filter, double [] ret, int N, int C, int H, int W,
+ public static native int conv2dDense(double [] input, double [] filter, double [] ret, int N, int C, int H, int W,
int K, int R, int S, int stride_h, int stride_w, int pad_h, int pad_w, int P, int Q, int numThreads);
- public static native boolean conv2dBiasAddDense(double [] input, double [] bias, double [] filter, double [] ret, int N, int C, int H, int W,
+ public static native int conv2dBiasAddDense(double [] input, double [] bias, double [] filter, double [] ret, int N, int C, int H, int W,
int K, int R, int S, int stride_h, int stride_w, int pad_h, int pad_w, int P, int Q, int numThreads);
// Called by ConvolutionCPInstruction if both input and filter are dense
- public static native boolean conv2dBackwardFilterDense(double [] input, double [] dout, double [] ret, int N, int C, int H, int W,
+ public static native int conv2dBackwardFilterDense(double [] input, double [] dout, double [] ret, int N, int C, int H, int W,
int K, int R, int S, int stride_h, int stride_w, int pad_h, int pad_w, int P, int Q, int numThreads);
// If both filter and dout are dense, then called by ConvolutionCPInstruction
// Else, called by LibMatrixDNN's thread if filter is dense. dout[n] is converted to dense if sparse.
- public static native boolean conv2dBackwardDataDense(double [] filter, double [] dout, double [] ret, int N, int C, int H, int W,
+ public static native int conv2dBackwardDataDense(double [] filter, double [] dout, double [] ret, int N, int C, int H, int W,
int K, int R, int S, int stride_h, int stride_w, int pad_h, int pad_w, int P, int Q, int numThreads);
// Currently only supported with numThreads = 1 and sparse input
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/5489c665/src/main/java/org/apache/sysml/utils/Statistics.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/utils/Statistics.java b/src/main/java/org/apache/sysml/utils/Statistics.java
index 97888cb..49633d8 100644
--- a/src/main/java/org/apache/sysml/utils/Statistics.java
+++ b/src/main/java/org/apache/sysml/utils/Statistics.java
@@ -114,7 +114,21 @@ public class Statistics
private static LongAdder numNativeFailures = new LongAdder();
public static LongAdder numNativeLibMatrixMultCalls = new LongAdder();
- public static LongAdder numNativeLibMatrixDNNCalls = new LongAdder();
+ public static LongAdder numNativeConv2dCalls = new LongAdder();
+ public static LongAdder numNativeConv2dBwdDataCalls = new LongAdder();
+ public static LongAdder numNativeConv2dBwdFilterCalls = new LongAdder();
+ public static LongAdder numNativeSparseConv2dCalls = new LongAdder();
+ public static LongAdder numNativeSparseConv2dBwdFilterCalls = new LongAdder();
+ public static LongAdder numNativeSparseConv2dBwdDataCalls = new LongAdder();
+ public static long nativeLibMatrixMultTime = 0;
+ public static long nativeConv2dTime = 0;
+ public static long nativeConv2dBwdDataTime = 0;
+ public static long nativeConv2dBwdFilterTime = 0;
+
+ public static long recomputeNNZTime = 0;
+ public static long examSparsityTime = 0;
+ public static long allocateDoubleArrTime = 0;
+
public static void incrementNativeFailuresCounter() {
numNativeFailures.increment();
// This is very rare and am not sure it is possible at all. Our initial experiments never encountered this case.
@@ -378,8 +392,17 @@ public class Statistics
GPUStatistics.reset();
numNativeLibMatrixMultCalls.reset();
- numNativeLibMatrixDNNCalls.reset();
+ numNativeSparseConv2dCalls.reset();
+ numNativeSparseConv2dBwdDataCalls.reset();
+ numNativeSparseConv2dBwdFilterCalls.reset();
+ numNativeConv2dCalls.reset();
+ numNativeConv2dBwdDataCalls.reset();
+ numNativeConv2dBwdFilterCalls.reset();
numNativeFailures.reset();
+ nativeLibMatrixMultTime = 0;
+ nativeConv2dTime = 0;
+ nativeConv2dBwdFilterTime = 0;
+ nativeConv2dBwdDataTime = 0;
LibMatrixDNN.resetStatistics();
}
@@ -635,11 +658,23 @@ public class Statistics
//show extended caching/compilation statistics
if( DMLScript.STATISTICS )
{
- if(NativeHelper.blasType != null && (numNativeLibMatrixMultCalls.longValue() > 0 ||
- numNativeLibMatrixDNNCalls.longValue() > 0)) {
+ if(NativeHelper.blasType != null) {
String blas = NativeHelper.blasType != null ? NativeHelper.blasType : "";
- sb.append("Native " + blas + " calls (LibMatrixMult/LibMatrixDNN):\t" + numNativeLibMatrixMultCalls.longValue() + "/" + numNativeLibMatrixDNNCalls.longValue() + ".\n");
+ sb.append("Native " + blas + " calls (dense mult/conv/bwdF/bwdD):\t" + numNativeLibMatrixMultCalls.longValue() + "/" +
+ numNativeConv2dCalls.longValue() + "/" + numNativeConv2dBwdFilterCalls.longValue()
+ + "/" + numNativeConv2dBwdDataCalls.longValue() + ".\n");
+ sb.append("Native " + blas + " calls (sparse conv/bwdF/bwdD):\t" +
+ numNativeSparseConv2dCalls.longValue() + "/" + numNativeSparseConv2dBwdFilterCalls.longValue()
+ + "/" + numNativeSparseConv2dBwdDataCalls.longValue() + ".\n");
+ sb.append("Native " + blas + " times (dense mult/conv/bwdF/bwdD):\t" + String.format("%.3f", nativeLibMatrixMultTime*1e-9) + "/" +
+ String.format("%.3f", nativeConv2dTime*1e-9) + "/" + String.format("%.3f", nativeConv2dBwdFilterTime*1e-9) + "/" +
+ String.format("%.3f", nativeConv2dBwdDataTime*1e-9) + ".\n");
+ }
+ if(recomputeNNZTime != 0 || examSparsityTime != 0 || allocateDoubleArrTime != 0) {
+ sb.append("MatrixBlock times (recomputeNNZ/examSparsity/allocateDoubleArr):\t" + String.format("%.3f", recomputeNNZTime*1e-9) + "/" +
+ String.format("%.3f", examSparsityTime*1e-9) + "/" + String.format("%.3f", allocateDoubleArrTime*1e-9) + ".\n");
}
+
sb.append("Cache hits (Mem, WB, FS, HDFS):\t" + CacheStatistics.displayHits() + ".\n");
sb.append("Cache writes (WB, FS, HDFS):\t" + CacheStatistics.displayWrites() + ".\n");
sb.append("Cache times (ACQr/m, RLS, EXP):\t" + CacheStatistics.displayTime() + " sec.\n");
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/5489c665/src/main/scala/org/apache/sysml/api/dl/Caffe2DML.scala
----------------------------------------------------------------------
diff --git a/src/main/scala/org/apache/sysml/api/dl/Caffe2DML.scala b/src/main/scala/org/apache/sysml/api/dl/Caffe2DML.scala
index 377ebf3..f7f85c3 100644
--- a/src/main/scala/org/apache/sysml/api/dl/Caffe2DML.scala
+++ b/src/main/scala/org/apache/sysml/api/dl/Caffe2DML.scala
@@ -291,7 +291,7 @@ class Caffe2DML(val sc: SparkContext, val solverParam:Caffe.SolverParameter,
appendVisualizationHeaders(dmlScript, numTabs)
if(Caffe2DML.USE_NESTEROV_UDF) {
- tabDMLScript(dmlScript, numTabs).append("update_nesterov = externalFunction(matrix[double] X, matrix[double] dX, double lr, double mu, matrix[double] v) return (matrix[double] X, matrix[double] v) implemented in (classname=\"org.apache.sysml.udf.lib.SGDNesterovUpdate\",exectype=\"mem\"); \n")
+ tabDMLScript(dmlScript, numTabs).append("update_nesterov = externalFunction(matrix[double] X, matrix[double] dX, double lr, double mu, matrix[double] v, double lambda) return (matrix[double] X, matrix[double] v) implemented in (classname=\"org.apache.sysml.udf.lib.SGDNesterovUpdate\",exectype=\"mem\"); \n")
}
// Read and convert to one-hote encoding
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/5489c665/src/main/scala/org/apache/sysml/api/dl/CaffeSolver.scala
----------------------------------------------------------------------
diff --git a/src/main/scala/org/apache/sysml/api/dl/CaffeSolver.scala b/src/main/scala/org/apache/sysml/api/dl/CaffeSolver.scala
index ae3d21d..0620e44 100644
--- a/src/main/scala/org/apache/sysml/api/dl/CaffeSolver.scala
+++ b/src/main/scala/org/apache/sysml/api/dl/CaffeSolver.scala
@@ -144,12 +144,15 @@ class AdaGrad(lambda:Double=5e-04, epsilon:Double=1e-6) extends CaffeSolver {
*/
class Nesterov(lambda:Double=5e-04, momentum:Double=0.9) extends CaffeSolver {
def update(dmlScript:StringBuilder, layer:CaffeLayer):Unit = {
- l2reg_update(lambda, dmlScript, layer)
val fn = if(Caffe2DML.USE_NESTEROV_UDF) "update_nesterov" else "sgd_nesterov::update"
+ val lastParameter = if(Caffe2DML.USE_NESTEROV_UDF) (", " + lambda) else ""
+ if(!Caffe2DML.USE_NESTEROV_UDF) {
+ l2reg_update(lambda, dmlScript, layer)
+ }
if(layer.shouldUpdateWeight) dmlScript.append("\t").append("["+ commaSep(layer.weight, layer.weight+"_v") + "] " +
- "= " + fn + "(" + commaSep(layer.weight, layer.dWeight, getWeightLr(layer), momentum.toString, layer.weight+"_v") + ")\n")
+ "= " + fn + "(" + commaSep(layer.weight, layer.dWeight, getWeightLr(layer), momentum.toString, layer.weight+"_v") + lastParameter + ")\n")
if(layer.shouldUpdateBias) dmlScript.append("\t").append("["+ commaSep(layer.bias, layer.bias+"_v") + "] " +
- "= " + fn + "(" + commaSep(layer.bias, layer.dBias, getBiasLr(layer), momentum.toString, layer.bias+"_v") + ")\n")
+ "= " + fn + "(" + commaSep(layer.bias, layer.dBias, getBiasLr(layer), momentum.toString, layer.bias+"_v") + lastParameter + ")\n")
}
def init(dmlScript:StringBuilder, layer:CaffeLayer):Unit = {
if(layer.shouldUpdateWeight) dmlScript.append(layer.weight+"_v = sgd_nesterov::init(" + layer.weight + ")\n")