You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by ni...@apache.org on 2018/08/27 20:09:57 UTC
[2/5] systemml git commit: [SYSTEMML-1325] Cleanup static variables
in DMLScript
http://git-wip-us.apache.org/repos/asf/systemml/blob/ae268a9e/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCUDA.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCUDA.java b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCUDA.java
index 217acd6..d02a875 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCUDA.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCUDA.java
@@ -28,6 +28,7 @@ import static jcuda.runtime.cudaMemcpyKind.cudaMemcpyDeviceToHost;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.sysml.api.DMLScript;
+import org.apache.sysml.conf.ConfigurationManager;
import org.apache.sysml.runtime.DMLRuntimeException;
import org.apache.sysml.runtime.controlprogram.caching.MatrixObject;
import org.apache.sysml.runtime.controlprogram.context.ExecutionContext;
@@ -333,11 +334,11 @@ public class LibMatrixCUDA {
Pointer outputPointer = getDensePointer(gCtx, outputBlock, instName);
long t1=0;
- if (DMLScript.FINEGRAINED_STATISTICS) t1 = System.nanoTime();
+ if (ConfigurationManager.isFinegrainedStatistics()) t1 = System.nanoTime();
getCudaKernels(gCtx).launchKernel("relu_backward",
ExecutionConfig.getConfigForSimpleMatrixOperations(toInt(rows), toInt(cols)),
imagePointer, doutPointer, outputPointer, toInt(rows), toInt(cols));
- if (DMLScript.FINEGRAINED_STATISTICS) GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_RELU_BACKWARD_KERNEL, System.nanoTime() - t1);
+ if (ConfigurationManager.isFinegrainedStatistics()) GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_RELU_BACKWARD_KERNEL, System.nanoTime() - t1);
}
@@ -367,7 +368,7 @@ public class LibMatrixCUDA {
Pointer tmp = gCtx.allocate(instName, cols*sizeOfDataType);
reduceCol(gCtx, instName, "reduce_col_sum", imagePointer, tmp, N, cols);
reduceRow(gCtx, instName, "reduce_row_sum", tmp, outputPointer, toInt(C), toInt(HW));
- gCtx.cudaFreeHelper(instName, tmp, DMLScript.EAGER_CUDA_FREE);
+ gCtx.cudaFreeHelper(instName, tmp, gCtx.EAGER_CUDA_FREE);
}
/**
@@ -402,11 +403,11 @@ public class LibMatrixCUDA {
Pointer biasPointer = bias.getGPUObject(gCtx).getDensePointer();
Pointer outputPointer = outputBlock.getGPUObject(gCtx).getDensePointer();
long t1 = 0;
- if (DMLScript.FINEGRAINED_STATISTICS) t1 = System.nanoTime();
+ if (ConfigurationManager.isFinegrainedStatistics()) t1 = System.nanoTime();
getCudaKernels(gCtx).launchKernel("bias_multiply",
ExecutionConfig.getConfigForSimpleMatrixOperations(toInt(rows), toInt(cols)),
imagePointer, biasPointer, outputPointer, toInt(rows), toInt(cols), toInt(PQ));
- if (DMLScript.FINEGRAINED_STATISTICS) GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_BIAS_ADD_LIB, System.nanoTime() - t1);
+ if (ConfigurationManager.isFinegrainedStatistics()) GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_BIAS_ADD_LIB, System.nanoTime() - t1);
}
@@ -454,11 +455,11 @@ public class LibMatrixCUDA {
}
int PQ = cols / k;
long t1 = 0;
- if (DMLScript.FINEGRAINED_STATISTICS) t1 = System.nanoTime();
+ if (ConfigurationManager.isFinegrainedStatistics()) t1 = System.nanoTime();
getCudaKernels(gCtx).launchKernel("bias_add",
ExecutionConfig.getConfigForSimpleMatrixOperations(rows, cols),
image, bias, output, rows, cols, PQ);
- if (DMLScript.FINEGRAINED_STATISTICS) GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_BIAS_ADD_LIB, System.nanoTime() - t1);
+ if (ConfigurationManager.isFinegrainedStatistics()) GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_BIAS_ADD_LIB, System.nanoTime() - t1);
}
@@ -525,13 +526,13 @@ public class LibMatrixCUDA {
long t0=0, t1=0;
- if (DMLScript.FINEGRAINED_STATISTICS) t0 = System.nanoTime();
+ if (ConfigurationManager.isFinegrainedStatistics()) t0 = System.nanoTime();
cudaSupportFunctions.cublassyrk(getCublasHandle(gCtx), cublasFillMode.CUBLAS_FILL_MODE_LOWER,transa, m, k, one(), A, lda, zero(), C, ldc);
- if (DMLScript.FINEGRAINED_STATISTICS) GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_SYRK_LIB, System.nanoTime() - t0);
+ if (ConfigurationManager.isFinegrainedStatistics()) GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_SYRK_LIB, System.nanoTime() - t0);
- if (DMLScript.FINEGRAINED_STATISTICS) t1 = System.nanoTime();
+ if (ConfigurationManager.isFinegrainedStatistics()) t1 = System.nanoTime();
copyUpperToLowerTriangle(gCtx, instName, output);
- if (DMLScript.FINEGRAINED_STATISTICS) GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_UPPER_TO_LOWER_TRIANGLE_KERNEL, System.nanoTime() - t1);
+ if (ConfigurationManager.isFinegrainedStatistics()) GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_UPPER_TO_LOWER_TRIANGLE_KERNEL, System.nanoTime() - t1);
}
/**
@@ -740,7 +741,7 @@ public class LibMatrixCUDA {
default:
throw new DMLRuntimeException("Internal Error - Unsupported reduction direction for summation squared");
}
- gCtx.cudaFreeHelper(instName, tmp, DMLScript.EAGER_CUDA_FREE);
+ gCtx.cudaFreeHelper(instName, tmp, gCtx.EAGER_CUDA_FREE);
break;
}
case OP_MEAN:{
@@ -853,7 +854,7 @@ public class LibMatrixCUDA {
ScalarOperator divideOp = new RightScalarOperator(Divide.getDivideFnObject(), clen - 1);
matrixScalarOp(gCtx, instName, tmpRow, clen - 1, rlen, 1, out, divideOp);
- gCtx.cudaFreeHelper(instName, tmpRow, DMLScript.EAGER_CUDA_FREE);
+ gCtx.cudaFreeHelper(instName, tmpRow, gCtx.EAGER_CUDA_FREE);
break;
}
@@ -871,15 +872,15 @@ public class LibMatrixCUDA {
ScalarOperator divideOp = new RightScalarOperator(Divide.getDivideFnObject(), rlen - 1);
matrixScalarOp(gCtx, instName, tmpCol, rlen - 1, 1, clen, out, divideOp);
- gCtx.cudaFreeHelper(instName, tmpCol, DMLScript.EAGER_CUDA_FREE);
+ gCtx.cudaFreeHelper(instName, tmpCol, gCtx.EAGER_CUDA_FREE);
break;
}
default:
throw new DMLRuntimeException("Internal Error - Unsupported reduction direction for variance");
}
- gCtx.cudaFreeHelper(instName, tmp, DMLScript.EAGER_CUDA_FREE);
- gCtx.cudaFreeHelper(instName, tmp2, DMLScript.EAGER_CUDA_FREE);
+ gCtx.cudaFreeHelper(instName, tmp, gCtx.EAGER_CUDA_FREE);
+ gCtx.cudaFreeHelper(instName, tmp2, gCtx.EAGER_CUDA_FREE);
break;
}
case OP_MAXINDEX : {
@@ -938,24 +939,24 @@ public class LibMatrixCUDA {
long t1=0,t2=0;
- if (DMLScript.FINEGRAINED_STATISTICS) t1 = System.nanoTime();
+ if (ConfigurationManager.isFinegrainedStatistics()) t1 = System.nanoTime();
getCudaKernels(gCtx).launchKernel(kernelFunction, new ExecutionConfig(blocks, threads, sharedMem), in, tempOut, n);
//cudaDeviceSynchronize;
- if (DMLScript.FINEGRAINED_STATISTICS) GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_REDUCE_ALL_KERNEL, System.nanoTime() - t1);
+ if (ConfigurationManager.isFinegrainedStatistics()) GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_REDUCE_ALL_KERNEL, System.nanoTime() - t1);
int s = blocks;
while (s > 1) {
tmp = getKernelParamsForReduceAll(gCtx, s);
blocks = tmp[0]; threads = tmp[1]; sharedMem = tmp[2];
- if (DMLScript.FINEGRAINED_STATISTICS) t2 = System.nanoTime();
+ if (ConfigurationManager.isFinegrainedStatistics()) t2 = System.nanoTime();
getCudaKernels(gCtx).launchKernel(kernelFunction, new ExecutionConfig(blocks, threads, sharedMem),
tempOut, tempOut, s);
- if (DMLScript.FINEGRAINED_STATISTICS) GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_REDUCE_ALL_KERNEL, System.nanoTime() - t2);
+ if (ConfigurationManager.isFinegrainedStatistics()) GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_REDUCE_ALL_KERNEL, System.nanoTime() - t2);
s = (s + (threads*2-1)) / (threads*2);
}
double[] result = {-1f};
cudaSupportFunctions.deviceToHost(gCtx, tempOut, result, instName, false);
- gCtx.cudaFreeHelper(instName, tempOut, DMLScript.EAGER_CUDA_FREE);
+ gCtx.cudaFreeHelper(instName, tempOut, gCtx.EAGER_CUDA_FREE);
return result[0];
}
@@ -978,11 +979,11 @@ public class LibMatrixCUDA {
int blocks = tmp[0], threads = tmp[1], sharedMem = tmp[2];
long t0=0;
- if (DMLScript.FINEGRAINED_STATISTICS) t0 = System.nanoTime();
+ if (ConfigurationManager.isFinegrainedStatistics()) t0 = System.nanoTime();
getCudaKernels(gCtx).launchKernel(kernelFunction, new ExecutionConfig(blocks, threads, sharedMem),
in, out, rows, cols);
//cudaDeviceSynchronize;
- if (DMLScript.FINEGRAINED_STATISTICS) GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_REDUCE_ROW_KERNEL, System.nanoTime() - t0);
+ if (ConfigurationManager.isFinegrainedStatistics()) GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_REDUCE_ROW_KERNEL, System.nanoTime() - t0);
}
@@ -1005,11 +1006,11 @@ public class LibMatrixCUDA {
int blocks = tmp[0], threads = tmp[1], sharedMem = tmp[2];
long t0=0;
- if (DMLScript.FINEGRAINED_STATISTICS) t0 = System.nanoTime();
+ if (ConfigurationManager.isFinegrainedStatistics()) t0 = System.nanoTime();
getCudaKernels(gCtx).launchKernel(kernelFunction, new ExecutionConfig(blocks, threads, sharedMem),
in, out, rows, cols);
//cudaDeviceSynchronize;
- if (DMLScript.FINEGRAINED_STATISTICS) GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_REDUCE_COL_KERNEL, System.nanoTime() - t0);
+ if (ConfigurationManager.isFinegrainedStatistics()) GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_REDUCE_COL_KERNEL, System.nanoTime() - t0);
}
/**
@@ -1329,11 +1330,11 @@ public class LibMatrixCUDA {
int isLeftScalar = (op instanceof LeftScalarOperator) ? 1 : 0;
int size = rlenA * clenA;
long t0=0;
- if (DMLScript.FINEGRAINED_STATISTICS) t0 = System.nanoTime();
+ if (ConfigurationManager.isFinegrainedStatistics()) t0 = System.nanoTime();
getCudaKernels(gCtx).launchKernel("matrix_scalar_op",
ExecutionConfig.getConfigForSimpleVectorOperations(size),
a, scalar, c, size, getBinaryOp(op.fn), isLeftScalar);
- if (DMLScript.FINEGRAINED_STATISTICS) GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_MATRIX_SCALAR_OP_KERNEL, System.nanoTime() - t0);
+ if (ConfigurationManager.isFinegrainedStatistics()) GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_MATRIX_SCALAR_OP_KERNEL, System.nanoTime() - t0);
}
/**
@@ -1431,11 +1432,11 @@ public class LibMatrixCUDA {
LOG.trace("GPU : matrix_matrix_cellwise_op" + ", GPUContext=" + gCtx);
}
long t0=0;
- if (DMLScript.FINEGRAINED_STATISTICS) t0 = System.nanoTime();
+ if (ConfigurationManager.isFinegrainedStatistics()) t0 = System.nanoTime();
getCudaKernels(gCtx).launchKernel("matrix_matrix_cellwise_op",
ExecutionConfig.getConfigForSimpleMatrixOperations(maxRlen, maxClen),
a, b, c, maxRlen, maxClen, vecStatusA, vecStatusB, getBinaryOp(op.fn));
- if (DMLScript.FINEGRAINED_STATISTICS) GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_MATRIX_MATRIX_CELLWISE_OP_KERNEL, System.nanoTime() - t0);
+ if (ConfigurationManager.isFinegrainedStatistics()) GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_MATRIX_MATRIX_CELLWISE_OP_KERNEL, System.nanoTime() - t0);
}
/**
@@ -1525,11 +1526,11 @@ public class LibMatrixCUDA {
int rlen = toInt(out.getNumRows());
int clen = toInt(out.getNumColumns());
long t0 = 0;
- if (DMLScript.FINEGRAINED_STATISTICS)
+ if (ConfigurationManager.isFinegrainedStatistics())
t0 = System.nanoTime();
int size = rlen * clen;
getCudaKernels(gCtx).launchKernel("fill", ExecutionConfig.getConfigForSimpleVectorOperations(size), A, constant, size);
- if (DMLScript.FINEGRAINED_STATISTICS)
+ if (ConfigurationManager.isFinegrainedStatistics())
GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_FILL_KERNEL, System.nanoTime() - t0);
}
}
@@ -1544,10 +1545,10 @@ public class LibMatrixCUDA {
*/
public static void deviceCopy(String instName, Pointer src, Pointer dest, int rlen, int clen) {
long t0=0;
- if (DMLScript.FINEGRAINED_STATISTICS) t0 = System.nanoTime();
+ if (ConfigurationManager.isFinegrainedStatistics()) t0 = System.nanoTime();
int size = rlen * clen * sizeOfDataType;
cudaMemcpy(dest, src, size, cudaMemcpyDeviceToDevice);
- if (DMLScript.FINEGRAINED_STATISTICS) GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_DEVICE_TO_DEVICE, System.nanoTime() - t0);
+ if (ConfigurationManager.isFinegrainedStatistics()) GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_DEVICE_TO_DEVICE, System.nanoTime() - t0);
}
/**
@@ -1629,19 +1630,19 @@ public class LibMatrixCUDA {
// Invoke cuSparse when either are in sparse format
// Perform sparse-sparse dgeam
if (!isInSparseFormat(gCtx, in1)) {
- if (DMLScript.FINEGRAINED_STATISTICS)
+ if (ConfigurationManager.isFinegrainedStatistics())
t0 = System.nanoTime();
in1.getGPUObject(gCtx).denseToSparse();
- if (DMLScript.FINEGRAINED_STATISTICS)
+ if (ConfigurationManager.isFinegrainedStatistics())
GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_DENSE_TO_SPARSE,
System.nanoTime() - t0);
}
CSRPointer A = in1.getGPUObject(gCtx).getJcudaSparseMatrixPtr();
if (!isInSparseFormat(gCtx, in2)) {
- if (DMLScript.FINEGRAINED_STATISTICS)
+ if (ConfigurationManager.isFinegrainedStatistics())
t0 = System.nanoTime();
in2.getGPUObject(gCtx).denseToSparse();
- if (DMLScript.FINEGRAINED_STATISTICS)
+ if (ConfigurationManager.isFinegrainedStatistics())
GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_DENSE_TO_SPARSE,
System.nanoTime() - t0);
}
@@ -1664,21 +1665,21 @@ public class LibMatrixCUDA {
"Transpose in cusparseDcsrgeam not supported for sparse matrices on GPU");
}
- if (DMLScript.FINEGRAINED_STATISTICS)
+ if (ConfigurationManager.isFinegrainedStatistics())
t1 = System.nanoTime();
CSRPointer C = CSRPointer.allocateForDgeam(gCtx, getCusparseHandle(gCtx), A, B, m, n);
- if (DMLScript.FINEGRAINED_STATISTICS)
+ if (ConfigurationManager.isFinegrainedStatistics())
GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_SPARSE_ALLOCATE_LIB,
System.nanoTime() - t1);
out.getGPUObject(gCtx).setSparseMatrixCudaPointer(C);
//long sizeOfC = CSRPointer.estimateSize(C.nnz, out.getNumRows());
- if (DMLScript.FINEGRAINED_STATISTICS)
+ if (ConfigurationManager.isFinegrainedStatistics())
t0 = System.nanoTime();
cudaSupportFunctions.cusparsecsrgeam(getCusparseHandle(gCtx), m, n, alphaPtr, A.descr, toInt(A.nnz), A.val, A.rowPtr, A.colInd, betaPtr,
B.descr, toInt(B.nnz), B.val, B.rowPtr, B.colInd, C.descr, C.val, C.rowPtr, C.colInd);
//cudaDeviceSynchronize;
- if (DMLScript.FINEGRAINED_STATISTICS)
+ if (ConfigurationManager.isFinegrainedStatistics())
GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_SPARSE_DGEAM_LIB,
System.nanoTime() - t0);
}
@@ -1705,9 +1706,9 @@ public class LibMatrixCUDA {
getDenseMatrixOutputForGPUInstruction(ec, instName, outputName, outRLen, outCLen); // Allocated the dense output matrix
Pointer C = getDensePointer(gCtx, out, instName);
- if (DMLScript.FINEGRAINED_STATISTICS) t0 = System.nanoTime();
+ if (ConfigurationManager.isFinegrainedStatistics()) t0 = System.nanoTime();
cudaSupportFunctions.cublasgeam(getCublasHandle(gCtx), transa, transb, m, n, alphaPtr, A, lda, betaPtr, B, ldb, C, ldc);
- if (DMLScript.FINEGRAINED_STATISTICS) GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_DENSE_DGEAM_LIB, System.nanoTime() - t0);
+ if (ConfigurationManager.isFinegrainedStatistics()) GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_DENSE_DGEAM_LIB, System.nanoTime() - t0);
}
}
@@ -1736,9 +1737,9 @@ public class LibMatrixCUDA {
int m = toInt(numRowsA);
int n = lda;
int ldc = m;
- if (DMLScript.FINEGRAINED_STATISTICS) t0 = System.nanoTime();
+ if (ConfigurationManager.isFinegrainedStatistics()) t0 = System.nanoTime();
cudaSupportFunctions.cublasgeam(getCublasHandle(gCtx), CUBLAS_OP_T, CUBLAS_OP_T, m, n, one(), A, lda, zero(), A, ldb, C, ldc);
- if (DMLScript.FINEGRAINED_STATISTICS) GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_DENSE_DGEAM_LIB, System.nanoTime() - t0);
+ if (ConfigurationManager.isFinegrainedStatistics()) GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_DENSE_DGEAM_LIB, System.nanoTime() - t0);
}
@@ -1846,7 +1847,7 @@ public class LibMatrixCUDA {
*/
protected static void sliceDenseDense(GPUContext gCtx, String instName, Pointer inPointer, Pointer outPointer,
int rl, int ru, int cl, int cu, int inClen) {
- long t0 = DMLScript.FINEGRAINED_STATISTICS ? System.nanoTime() : 0;
+ long t0 = ConfigurationManager.isFinegrainedStatistics() ? System.nanoTime() : 0;
long retClen = cu - cl + 1;
if (inClen == retClen) {
cudaMemcpy(outPointer, inPointer.withByteOffset(rl * inClen * sizeOfDataType), (ru - rl + 1) * inClen
@@ -1856,7 +1857,7 @@ public class LibMatrixCUDA {
getCudaKernels(gCtx).launchKernel("slice_dense_dense", ExecutionConfig.getConfigForSimpleVectorOperations(toInt(retRlen*retClen)),
inPointer, outPointer, rl, ru, cl, cu, inClen, retRlen, retClen);
}
- if (DMLScript.FINEGRAINED_STATISTICS) GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_RIX_DENSE_OP, System.nanoTime() - t0);
+ if (ConfigurationManager.isFinegrainedStatistics()) GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_RIX_DENSE_OP, System.nanoTime() - t0);
}
/**
@@ -1879,7 +1880,7 @@ public class LibMatrixCUDA {
if(size == 0) return;
int retRlen = ru - rl + 1;
- long t0 = DMLScript.FINEGRAINED_STATISTICS ? System.nanoTime() : 0;
+ long t0 = ConfigurationManager.isFinegrainedStatistics() ? System.nanoTime() : 0;
int retClen = cu - cl + 1;
String kernel = null; String timer = null;
@@ -1901,7 +1902,7 @@ public class LibMatrixCUDA {
// We can generalize this later to output sparse matrix.
getCudaKernels(gCtx).launchKernel(kernel, ExecutionConfig.getConfigForSimpleVectorOperations(size),
inPointer.val, inPointer.rowPtr, inPointer.colInd, outPointer, rl, ru, cl, cu, retClen);
- if (DMLScript.FINEGRAINED_STATISTICS) GPUStatistics.maintainCPMiscTimes(instName, timer, System.nanoTime() - t0);
+ if (ConfigurationManager.isFinegrainedStatistics()) GPUStatistics.maintainCPMiscTimes(instName, timer, System.nanoTime() - t0);
}
/**
@@ -1946,11 +1947,11 @@ public class LibMatrixCUDA {
int maxRows = toInt(Math.max(rowsA, rowsB));
int maxCols = toInt(Math.max(colsA, colsB));
- if (DMLScript.FINEGRAINED_STATISTICS) t1 = System.nanoTime();
+ if (ConfigurationManager.isFinegrainedStatistics()) t1 = System.nanoTime();
getCudaKernels(gCtx)
.launchKernel("cbind", ExecutionConfig.getConfigForSimpleMatrixOperations(maxRows, maxCols), A, B, C,
rowsA, colsA, rowsB, colsB);
- if (DMLScript.FINEGRAINED_STATISTICS) GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_CBIND_KERNEL, System.nanoTime() - t1);
+ if (ConfigurationManager.isFinegrainedStatistics()) GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_CBIND_KERNEL, System.nanoTime() - t1);
}
@@ -1981,11 +1982,11 @@ public class LibMatrixCUDA {
int maxRows = Math.max(rowsA, rowsB);
int maxCols = Math.max(colsA, colsB);
- if (DMLScript.FINEGRAINED_STATISTICS) t1 = System.nanoTime();
+ if (ConfigurationManager.isFinegrainedStatistics()) t1 = System.nanoTime();
getCudaKernels(gCtx)
.launchKernel("rbind", ExecutionConfig.getConfigForSimpleMatrixOperations(maxRows, maxCols), A, B, C,
rowsA, colsA, rowsB, colsB);
- if (DMLScript.FINEGRAINED_STATISTICS) GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_RBIND_KERNEL, System.nanoTime() - t1);
+ if (ConfigurationManager.isFinegrainedStatistics()) GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_RBIND_KERNEL, System.nanoTime() - t1);
}
@@ -2315,10 +2316,10 @@ public class LibMatrixCUDA {
Pointer output = getDensePointer(gCtx, out, instName);
Pointer input = getDensePointer(gCtx, in1, instName);
int size = toInt(in1.getNumColumns() * in1.getNumRows());
- if (DMLScript.FINEGRAINED_STATISTICS) t1 = System.nanoTime();
+ if (ConfigurationManager.isFinegrainedStatistics()) t1 = System.nanoTime();
getCudaKernels(gCtx).launchKernel(kernel, ExecutionConfig.getConfigForSimpleVectorOperations(size),
input, output, size);
- if (DMLScript.FINEGRAINED_STATISTICS) GPUStatistics.maintainCPMiscTimes(instName, kernelTimer, System.nanoTime() - t1);
+ if (ConfigurationManager.isFinegrainedStatistics()) GPUStatistics.maintainCPMiscTimes(instName, kernelTimer, System.nanoTime() - t1);
}
}
@@ -2356,13 +2357,13 @@ public class LibMatrixCUDA {
// becomes
// C <- A
// C <- alpha*B + C
- if (DMLScript.FINEGRAINED_STATISTICS) t1 = System.nanoTime();
+ if (ConfigurationManager.isFinegrainedStatistics()) t1 = System.nanoTime();
cudaMemcpy(C, A, n*((long)sizeOfDataType), cudaMemcpyDeviceToDevice);
- if (DMLScript.FINEGRAINED_STATISTICS) GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_DEVICE_TO_DEVICE, System.nanoTime() - t1);
+ if (ConfigurationManager.isFinegrainedStatistics()) GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_DEVICE_TO_DEVICE, System.nanoTime() - t1);
- if (DMLScript.FINEGRAINED_STATISTICS) t2 = System.nanoTime();
+ if (ConfigurationManager.isFinegrainedStatistics()) t2 = System.nanoTime();
cudaSupportFunctions.cublasaxpy(getCublasHandle(gCtx), toInt(n), alphaPtr, B, 1, C, 1);
- if (DMLScript.FINEGRAINED_STATISTICS) GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_DAXPY_LIB, System.nanoTime() - t2);
+ if (ConfigurationManager.isFinegrainedStatistics()) GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_DAXPY_LIB, System.nanoTime() - t2);
}
else {
if(LOG.isTraceEnabled()) {
@@ -2372,12 +2373,12 @@ public class LibMatrixCUDA {
// Matrix-Vector daxpy
// Note: Vector-Matrix operation is not supported
// daxpy_matrix_vector(double* A, double* B, double alpha, double* ret, int rlenA, int clenA, int rlenB, int clenB)
- if (DMLScript.FINEGRAINED_STATISTICS) t1 = System.nanoTime();
+ if (ConfigurationManager.isFinegrainedStatistics()) t1 = System.nanoTime();
int rlenA = toInt(in1.getNumRows()); int clenA = toInt(in1.getNumColumns());
int rlenB = toInt(in2.getNumRows()); int clenB = toInt(in2.getNumColumns());
getCudaKernels(gCtx).launchKernel("daxpy_matrix_vector", ExecutionConfig.getConfigForSimpleMatrixOperations(rlenA, clenA),
A, B, constant, C, rlenA, clenA, rlenB, clenB);
- if (DMLScript.FINEGRAINED_STATISTICS) GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_DAXPY_MV_KERNEL, System.nanoTime() - t1);
+ if (ConfigurationManager.isFinegrainedStatistics()) GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_DAXPY_MV_KERNEL, System.nanoTime() - t1);
}
}
@@ -2424,20 +2425,20 @@ public class LibMatrixCUDA {
// convert dense matrices to row major
// Operation in cuSolver and cuBlas are for column major dense matrices
// and are destructive to the original input
- if (DMLScript.FINEGRAINED_STATISTICS) t0 = System.nanoTime();
+ if (ConfigurationManager.isFinegrainedStatistics()) t0 = System.nanoTime();
GPUObject ATobj = (GPUObject) Aobj.clone();
- if (DMLScript.FINEGRAINED_STATISTICS) GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_OBJECT_CLONE, System.nanoTime() - t0);
- if (DMLScript.FINEGRAINED_STATISTICS) t0 = System.nanoTime();
+ if (ConfigurationManager.isFinegrainedStatistics()) GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_OBJECT_CLONE, System.nanoTime() - t0);
+ if (ConfigurationManager.isFinegrainedStatistics()) t0 = System.nanoTime();
ATobj.denseRowMajorToColumnMajor();
- if (DMLScript.FINEGRAINED_STATISTICS) GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_ROW_TO_COLUMN_MAJOR, System.nanoTime() - t0);
+ if (ConfigurationManager.isFinegrainedStatistics()) GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_ROW_TO_COLUMN_MAJOR, System.nanoTime() - t0);
Pointer A = ATobj.getDensePointer();
- if (DMLScript.FINEGRAINED_STATISTICS) t0 = System.nanoTime();
+ if (ConfigurationManager.isFinegrainedStatistics()) t0 = System.nanoTime();
GPUObject bTobj = (GPUObject) bobj.clone();
- if (DMLScript.FINEGRAINED_STATISTICS) GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_OBJECT_CLONE, System.nanoTime() - t0);
- if (DMLScript.FINEGRAINED_STATISTICS) t0 = System.nanoTime();
+ if (ConfigurationManager.isFinegrainedStatistics()) GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_OBJECT_CLONE, System.nanoTime() - t0);
+ if (ConfigurationManager.isFinegrainedStatistics()) t0 = System.nanoTime();
bTobj.denseRowMajorToColumnMajor();
- if (DMLScript.FINEGRAINED_STATISTICS) GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_ROW_TO_COLUMN_MAJOR, System.nanoTime() - t0);
+ if (ConfigurationManager.isFinegrainedStatistics()) GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_ROW_TO_COLUMN_MAJOR, System.nanoTime() - t0);
Pointer b = bTobj.getDensePointer();
@@ -2446,18 +2447,18 @@ public class LibMatrixCUDA {
// http://docs.nvidia.com/cuda/cusolver/#ormqr-example1
// step 3: query working space of geqrf and ormqr
- if (DMLScript.FINEGRAINED_STATISTICS) t0 = System.nanoTime();
+ if (ConfigurationManager.isFinegrainedStatistics()) t0 = System.nanoTime();
int[] lwork = {0};
cudaSupportFunctions.cusolverDngeqrf_bufferSize(gCtx.getCusolverDnHandle(), m, n, A, m, lwork);
- if (DMLScript.FINEGRAINED_STATISTICS) GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_QR_BUFFER, System.nanoTime() - t0);
+ if (ConfigurationManager.isFinegrainedStatistics()) GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_QR_BUFFER, System.nanoTime() - t0);
// step 4: compute QR factorization
Pointer work = gCtx.allocate(instName, lwork[0] * sizeOfDataType);
Pointer tau = gCtx.allocate(instName, m * sizeOfDataType);
Pointer devInfo = gCtx.allocate(instName, Sizeof.INT);
- if (DMLScript.FINEGRAINED_STATISTICS) t0 = System.nanoTime();
+ if (ConfigurationManager.isFinegrainedStatistics()) t0 = System.nanoTime();
cudaSupportFunctions.cusolverDngeqrf(gCtx.getCusolverDnHandle(), m, n, A, m, tau, work, lwork[0], devInfo);
- if (DMLScript.FINEGRAINED_STATISTICS) GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_QR, System.nanoTime() - t0);
+ if (ConfigurationManager.isFinegrainedStatistics()) GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_QR, System.nanoTime() - t0);
int[] qrError = {-1};
cudaMemcpy(Pointer.to(qrError), devInfo, Sizeof.INT, cudaMemcpyDeviceToHost);
@@ -2466,34 +2467,34 @@ public class LibMatrixCUDA {
}
// step 5: compute Q^T*B
- if (DMLScript.FINEGRAINED_STATISTICS) t0 = System.nanoTime();
+ if (ConfigurationManager.isFinegrainedStatistics()) t0 = System.nanoTime();
cudaSupportFunctions.cusolverDnormqr(gCtx.getCusolverDnHandle(), cublasSideMode.CUBLAS_SIDE_LEFT, cublasOperation.CUBLAS_OP_T, m, 1, n, A, m, tau, b, m, work, lwork[0], devInfo);
- if (DMLScript.FINEGRAINED_STATISTICS) GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_ORMQR, System.nanoTime() - t0);
+ if (ConfigurationManager.isFinegrainedStatistics()) GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_ORMQR, System.nanoTime() - t0);
cudaMemcpy(Pointer.to(qrError), devInfo, Sizeof.INT, cudaMemcpyDeviceToHost);
if (qrError[0] != 0) {
throw new DMLRuntimeException("GPU : Error in call to ormqr (to compuete Q^T*B after QR factorization) as part of solve, argument " + qrError[0] + " was wrong");
}
// step 6: compute x = R \ Q^T*B
- if (DMLScript.FINEGRAINED_STATISTICS) t0 = System.nanoTime();
+ if (ConfigurationManager.isFinegrainedStatistics()) t0 = System.nanoTime();
cudaSupportFunctions.cublastrsm(gCtx.getCublasHandle(),
cublasSideMode.CUBLAS_SIDE_LEFT, cublasFillMode.CUBLAS_FILL_MODE_UPPER, cublasOperation.CUBLAS_OP_N, cublasDiagType.CUBLAS_DIAG_NON_UNIT,
n, 1, dataTypePointerTo(1.0), A, m, b, m);
- if (DMLScript.FINEGRAINED_STATISTICS) GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_TRSM, System.nanoTime() - t0);
+ if (ConfigurationManager.isFinegrainedStatistics()) GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_TRSM, System.nanoTime() - t0);
- if (DMLScript.FINEGRAINED_STATISTICS) t0 = System.nanoTime();
+ if (ConfigurationManager.isFinegrainedStatistics()) t0 = System.nanoTime();
bTobj.denseColumnMajorToRowMajor();
- if (DMLScript.FINEGRAINED_STATISTICS) GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_COLUMN_TO_ROW_MAJOR, System.nanoTime() - t0);
+ if (ConfigurationManager.isFinegrainedStatistics()) GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_COLUMN_TO_ROW_MAJOR, System.nanoTime() - t0);
// TODO : Find a way to assign bTobj directly to the output and set the correct flags so as to not crash
// There is an avoidable copy happening here
MatrixObject out = getDenseMatrixOutputForGPUInstruction(ec, instName, outputName, in1.getNumColumns(), 1);
cudaMemcpy(out.getGPUObject(gCtx).getDensePointer(), bTobj.getDensePointer(), n * 1 * sizeOfDataType, cudaMemcpyDeviceToDevice);
- gCtx.cudaFreeHelper(instName, work, DMLScript.EAGER_CUDA_FREE);
- gCtx.cudaFreeHelper(instName, tau, DMLScript.EAGER_CUDA_FREE);
- ATobj.clearData(instName, DMLScript.EAGER_CUDA_FREE);
- bTobj.clearData(instName, DMLScript.EAGER_CUDA_FREE);
+ gCtx.cudaFreeHelper(instName, work, gCtx.EAGER_CUDA_FREE);
+ gCtx.cudaFreeHelper(instName, tau, gCtx.EAGER_CUDA_FREE);
+ ATobj.clearData(instName, gCtx.EAGER_CUDA_FREE);
+ bTobj.clearData(instName, gCtx.EAGER_CUDA_FREE);
//debugPrintMatrix(b, n, 1);
}
@@ -2514,10 +2515,10 @@ public class LibMatrixCUDA {
*/
public static MatrixObject getDenseMatrixOutputForGPUInstruction(ExecutionContext ec, String instName, String name, long numRows, long numCols) {
long t0=0;
- if (DMLScript.FINEGRAINED_STATISTICS) t0 = System.nanoTime();
+ if (ConfigurationManager.isFinegrainedStatistics()) t0 = System.nanoTime();
Pair<MatrixObject, Boolean> mb = ec.getDenseMatrixOutputForGPUInstruction(name, numRows, numCols);
if (mb.getValue())
- if (DMLScript.FINEGRAINED_STATISTICS)
+ if (ConfigurationManager.isFinegrainedStatistics())
GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_ALLOCATE_DENSE_OUTPUT, System.nanoTime() - t0);
return mb.getKey();
}
@@ -2535,10 +2536,10 @@ public class LibMatrixCUDA {
*/
private static MatrixObject getSparseMatrixOutputForGPUInstruction(ExecutionContext ec, long numRows, long numCols, long nnz, String instName, String name) {
long t0=0;
- if (DMLScript.FINEGRAINED_STATISTICS) t0 = System.nanoTime();
+ if (ConfigurationManager.isFinegrainedStatistics()) t0 = System.nanoTime();
Pair<MatrixObject, Boolean> mb = ec.getSparseMatrixOutputForGPUInstruction(name, numRows, numCols, nnz);
if (mb.getValue())
- if (DMLScript.FINEGRAINED_STATISTICS)
+ if (ConfigurationManager.isFinegrainedStatistics())
GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_ALLOCATE_SPARSE_OUTPUT, System.nanoTime() - t0);
return mb.getKey();
}
http://git-wip-us.apache.org/repos/asf/systemml/blob/ae268a9e/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCuDNN.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCuDNN.java b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCuDNN.java
index c6abbfe..d3b5984 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCuDNN.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCuDNN.java
@@ -49,7 +49,7 @@ import jcuda.jcudnn.cudnnTensorDescriptor;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
-import org.apache.sysml.api.DMLScript;
+import org.apache.sysml.conf.ConfigurationManager;
import org.apache.sysml.hops.OptimizerUtils;
import org.apache.sysml.runtime.DMLRuntimeException;
import org.apache.sysml.runtime.controlprogram.caching.MatrixObject;
@@ -136,7 +136,7 @@ public class LibMatrixCuDNN extends LibMatrixCUDA {
private static Pointer denseIm2col(GPUContext gCtx, String instName, MatrixObject image, boolean isSparseImage, long N, long C, long H, long W,
int R, int S, int pad_h, int pad_w, int stride_h, int stride_w, int P, int Q) {
Pointer im2colPointer = null;
- long t1 = DMLScript.FINEGRAINED_STATISTICS ? System.nanoTime() : 0;
+ long t1 = ConfigurationManager.isFinegrainedStatistics() ? System.nanoTime() : 0;
if(isSparseImage) {
CSRPointer inPointer = getSparsePointer(gCtx, image, instName);
if(inPointer.nnz < 0) {
@@ -147,7 +147,7 @@ public class LibMatrixCuDNN extends LibMatrixCUDA {
getCudaKernels(gCtx).launchKernel("sparse_dense_im2col", ExecutionConfig.getConfigForSimpleVectorOperations(toInt(inPointer.nnz)),
inPointer.val, inPointer.rowPtr, inPointer.colInd, im2colPointer, inPointer.nnz, N,
C*H*W, H*W, W, R, S, P, Q, P*Q, R*S, N*P*Q, stride_h, stride_w, pad_h, pad_w);
- if (DMLScript.FINEGRAINED_STATISTICS)
+ if (ConfigurationManager.isFinegrainedStatistics())
GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_SPARSE_IM2COL_KERNEL, System.nanoTime() - t1);
}
else
@@ -159,7 +159,7 @@ public class LibMatrixCuDNN extends LibMatrixCUDA {
getCudaKernels(gCtx).launchKernel("dense_dense_im2col", ExecutionConfig.getConfigForSimpleVectorOperations(toInt(N*C*H*W)),
imagePointer, im2colPointer, N*C*H*W,
C*H*W, H*W, W, R, S, P, Q, P*Q, R*S, N*P*Q, stride_h, stride_w, pad_h, pad_w);
- if (DMLScript.FINEGRAINED_STATISTICS)
+ if (ConfigurationManager.isFinegrainedStatistics())
GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_DENSE_IM2COL_KERNEL, System.nanoTime() - t1);
}
return im2colPointer;
@@ -220,16 +220,16 @@ public class LibMatrixCuDNN extends LibMatrixCUDA {
CSRPointer filterPointer = filter.getGPUObject(gCtx).getJcudaSparseMatrixPtr();
Pointer matmultOutputPointer = gCtx.allocate(instName, NKPQ*sizeOfDataType);
LibMatrixCuMatMult.sparseDenseMatMult(gCtx, instName, matmultOutputPointer, filterPointer, im2colPointer, K, CRS, CRS, NPQ, K, NPQ, false, false);
- gCtx.cudaFreeHelper(instName, im2colPointer, DMLScript.EAGER_CUDA_FREE);
+ gCtx.cudaFreeHelper(instName, im2colPointer, gCtx.EAGER_CUDA_FREE);
// Perform reorg_knpq a reorg operation of matmultOutputPointer matrix with dimensions [K, NPQ]
// and return a matrix dstPointer with dimensions [N, KPQ]
- long t1 = DMLScript.FINEGRAINED_STATISTICS ? System.nanoTime() : 0;
+ long t1 = ConfigurationManager.isFinegrainedStatistics() ? System.nanoTime() : 0;
getCudaKernels(gCtx).launchKernel("reorg_knpq", ExecutionConfig.getConfigForSimpleVectorOperations(toInt(NKPQ)),
matmultOutputPointer, dstPointer, NKPQ, NPQ, KPQ, P*Q);
- if (DMLScript.FINEGRAINED_STATISTICS)
+ if (ConfigurationManager.isFinegrainedStatistics())
GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_DENSE_REORG_KNPQ_KERNEL, System.nanoTime() - t1);
- gCtx.cudaFreeHelper(instName, matmultOutputPointer, DMLScript.EAGER_CUDA_FREE);
+ gCtx.cudaFreeHelper(instName, matmultOutputPointer, gCtx.EAGER_CUDA_FREE);
}
else {
// Filter and output are accounted as dense in the memory estimation for conv2d
@@ -357,13 +357,13 @@ public class LibMatrixCuDNN extends LibMatrixCUDA {
}
try {
long t1 = 0;
- if (DMLScript.FINEGRAINED_STATISTICS) t1 = System.nanoTime();
+ if (ConfigurationManager.isFinegrainedStatistics()) t1 = System.nanoTime();
int status = cudnnConvolutionForward(getCudnnHandle(gCtx), one(),
algo.nchwTensorDesc, image,
algo.filterDesc, filter,
algo.convDesc, algo.algo, algo.workSpace, algo.sizeInBytes, zero(),
algo.nkpqTensorDesc, output);
- if (DMLScript.FINEGRAINED_STATISTICS)
+ if (ConfigurationManager.isFinegrainedStatistics())
GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_CONVOLUTION_FORWARD_LIB, System.nanoTime() - t1);
if (status != cudnnStatus.CUDNN_STATUS_SUCCESS) {
throw new DMLRuntimeException("Could not executed cudnnConvolutionForward: " + cudnnStatus.stringFor(status));
@@ -438,9 +438,9 @@ public class LibMatrixCuDNN extends LibMatrixCUDA {
// Perform one-input conv2dBackwardFilter
Pointer tempdwPointer = gCtx.allocate(instName, KCRS*sizeOfDataType);
for(int n = 0; n < N; n++) {
- long t0 = DMLScript.FINEGRAINED_STATISTICS ? System.nanoTime() : 0;
+ long t0 = ConfigurationManager.isFinegrainedStatistics() ? System.nanoTime() : 0;
cudaMemset(tempdwPointer, 0, KCRS*sizeOfDataType);
- if(DMLScript.FINEGRAINED_STATISTICS) GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_SET_ZERO, System.nanoTime() - t0);
+ if(ConfigurationManager.isFinegrainedStatistics()) GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_SET_ZERO, System.nanoTime() - t0);
// Perform one-input conv2dBackwardFilter
cudnnConv2dBackwardFilter(gCtx, instName, imgFetcher.getNthRow(n), doutFetcher.getNthRow(n), tempdwPointer, algo);
getCudaKernels(gCtx).launchKernel("inplace_add",
@@ -475,10 +475,10 @@ public class LibMatrixCuDNN extends LibMatrixCUDA {
LOG.trace("GPU : conv2dBackwardFilter" + ", GPUContext=" + gCtx);
}
try {
- long t1 = DMLScript.FINEGRAINED_STATISTICS ? System.nanoTime() : 0;
+ long t1 = ConfigurationManager.isFinegrainedStatistics() ? System.nanoTime() : 0;
int status = cudnnConvolutionBackwardFilter(getCudnnHandle(gCtx), one(), algo.nchwTensorDesc, imagePointer,
algo.nkpqTensorDesc, doutPointer, algo.convDesc, algo.algo, algo.workSpace, algo.sizeInBytes, zero(), algo.filterDesc, dwPointer);
- if (DMLScript.FINEGRAINED_STATISTICS)
+ if (ConfigurationManager.isFinegrainedStatistics())
GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_CONVOLUTION_BACKWARD_FILTER_LIB, System.nanoTime() - t1);
if (status != jcuda.jcudnn.cudnnStatus.CUDNN_STATUS_SUCCESS) {
throw new DMLRuntimeException("Could not executed cudnnConvolutionBackwardFilter: " + jcuda.jcudnn.cudnnStatus.stringFor(status));
@@ -578,10 +578,10 @@ public class LibMatrixCuDNN extends LibMatrixCUDA {
LOG.trace("GPU : conv2dBackwardData" + ", GPUContext=" + gCtx);
}
try {
- long t1 = DMLScript.FINEGRAINED_STATISTICS ? System.nanoTime() : 0;
+ long t1 = ConfigurationManager.isFinegrainedStatistics() ? System.nanoTime() : 0;
int status = cudnnConvolutionBackwardData(getCudnnHandle(gCtx), one(), algo.filterDesc, w,
algo.nkpqTensorDesc, dy, algo.convDesc, algo.algo, algo.workSpace, algo.sizeInBytes, zero(), algo.nchwTensorDesc, dx);
- if (DMLScript.FINEGRAINED_STATISTICS) GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_CONVOLUTION_BACKWARD_DATA_LIB, System.nanoTime() - t1);
+ if (ConfigurationManager.isFinegrainedStatistics()) GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_CONVOLUTION_BACKWARD_DATA_LIB, System.nanoTime() - t1);
if(status != jcuda.jcudnn.cudnnStatus.CUDNN_STATUS_SUCCESS) {
throw new DMLRuntimeException("Could not executed cudnnConvolutionBackwardData: " + jcuda.jcudnn.cudnnStatus.stringFor(status));
@@ -653,11 +653,11 @@ public class LibMatrixCuDNN extends LibMatrixCUDA {
LibMatrixCuDNNPoolingDescriptors.cudnnPoolingDescriptors(gCtx, instName, N, C, H, W, K, R, S,
pad_h, pad_w, stride_h, stride_w, P, Q, poolingType)) {
long t1=0,t2=0;
- if (DMLScript.FINEGRAINED_STATISTICS) t1 = System.nanoTime();
- if (DMLScript.FINEGRAINED_STATISTICS) GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_CUDNN_INIT, System.nanoTime() - t1);
- if (DMLScript.FINEGRAINED_STATISTICS) t2 = System.nanoTime();
+ if (ConfigurationManager.isFinegrainedStatistics()) t1 = System.nanoTime();
+ if (ConfigurationManager.isFinegrainedStatistics()) GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_CUDNN_INIT, System.nanoTime() - t1);
+ if (ConfigurationManager.isFinegrainedStatistics()) t2 = System.nanoTime();
int status = cudnnPoolingForward(getCudnnHandle(gCtx), desc.poolingDesc, one(), desc.xDesc, x, zero(), desc.yDesc, y);
- if (DMLScript.FINEGRAINED_STATISTICS) GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_MAXPOOLING_FORWARD_LIB, System.nanoTime() - t2);
+ if (ConfigurationManager.isFinegrainedStatistics()) GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_MAXPOOLING_FORWARD_LIB, System.nanoTime() - t2);
if(status != jcuda.jcudnn.cudnnStatus.CUDNN_STATUS_SUCCESS) {
throw new DMLRuntimeException("Could not executed cudnnPoolingForward: " + jcuda.jcudnn.cudnnStatus.stringFor(status));
}
@@ -752,20 +752,20 @@ public class LibMatrixCuDNN extends LibMatrixCUDA {
long t1=0, t2=0, t3=0;
int status;
if(!isMaxPoolOutputProvided) {
- if (DMLScript.FINEGRAINED_STATISTICS) t1 = System.nanoTime();
+ if (ConfigurationManager.isFinegrainedStatistics()) t1 = System.nanoTime();
long numBytes = N*C*P*Q*sizeOfDataType;
y = gCtx.allocate(instName, numBytes);
- if (DMLScript.FINEGRAINED_STATISTICS) GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_CUDNN_INIT, System.nanoTime() - t1);
- if (DMLScript.FINEGRAINED_STATISTICS) t2 = System.nanoTime();
+ if (ConfigurationManager.isFinegrainedStatistics()) GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_CUDNN_INIT, System.nanoTime() - t1);
+ if (ConfigurationManager.isFinegrainedStatistics()) t2 = System.nanoTime();
status = cudnnPoolingForward(getCudnnHandle(gCtx), desc.poolingDesc, one(), desc.xDesc, x, zero(), desc.yDesc, y);
- if (DMLScript.FINEGRAINED_STATISTICS) GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_MAXPOOLING_FORWARD_LIB, System.nanoTime() - t2);
+ if (ConfigurationManager.isFinegrainedStatistics()) GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_MAXPOOLING_FORWARD_LIB, System.nanoTime() - t2);
if(status != jcuda.jcudnn.cudnnStatus.CUDNN_STATUS_SUCCESS) {
throw new DMLRuntimeException("Could not executed cudnnPoolingForward before cudnnPoolingBackward: " + jcuda.jcudnn.cudnnStatus.stringFor(status));
}
}
- if (DMLScript.FINEGRAINED_STATISTICS) t3 = System.nanoTime();
+ if (ConfigurationManager.isFinegrainedStatistics()) t3 = System.nanoTime();
status = cudnnPoolingBackward(getCudnnHandle(gCtx), desc.poolingDesc, one(), desc.yDesc, y, desc.dyDesc, dy, desc.xDesc, x, zero(), desc.dxDesc, dx);
- if (DMLScript.FINEGRAINED_STATISTICS) GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_MAXPOOLING_BACKWARD_LIB, System.nanoTime() - t3);
+ if (ConfigurationManager.isFinegrainedStatistics()) GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_MAXPOOLING_BACKWARD_LIB, System.nanoTime() - t3);
if(status != jcuda.jcudnn.cudnnStatus.CUDNN_STATUS_SUCCESS) {
throw new DMLRuntimeException("Could not executed cudnnPoolingBackward: " + jcuda.jcudnn.cudnnStatus.stringFor(status));
@@ -775,10 +775,10 @@ public class LibMatrixCuDNN extends LibMatrixCUDA {
}
finally {
long t4=0;
- if (DMLScript.FINEGRAINED_STATISTICS) t4 = System.nanoTime();
+ if (ConfigurationManager.isFinegrainedStatistics()) t4 = System.nanoTime();
if(!isMaxPoolOutputProvided)
- gCtx.cudaFreeHelper(instName, y, DMLScript.EAGER_CUDA_FREE);
- if (DMLScript.FINEGRAINED_STATISTICS) GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_CUDNN_CLEANUP, System.nanoTime() - t4);
+ gCtx.cudaFreeHelper(instName, y, gCtx.EAGER_CUDA_FREE);
+ if (ConfigurationManager.isFinegrainedStatistics()) GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_CUDNN_CLEANUP, System.nanoTime() - t4);
}
}
@@ -795,18 +795,18 @@ public class LibMatrixCuDNN extends LibMatrixCUDA {
cudnnCreateActivationDescriptor(activationDescriptor);
double dummy = -1;
cudnnSetActivationDescriptor(activationDescriptor, CUDNN_ACTIVATION_RELU, CUDNN_PROPAGATE_NAN, dummy);
- if (DMLScript.FINEGRAINED_STATISTICS) t0 = System.nanoTime();
+ if (ConfigurationManager.isFinegrainedStatistics()) t0 = System.nanoTime();
cudnnActivationForward(getCudnnHandle(gCtx), activationDescriptor,
one(), srcTensorDesc, srcData,
zero(), dstTensorDesc, dstData);
- if (DMLScript.FINEGRAINED_STATISTICS) GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_ACTIVATION_FORWARD_LIB, System.nanoTime() - t0);
+ if (ConfigurationManager.isFinegrainedStatistics()) GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_ACTIVATION_FORWARD_LIB, System.nanoTime() - t0);
} catch (CudaException e) {
throw new DMLRuntimeException("Error in conv2d in GPUContext " + gCtx.toString() + " from Thread " + Thread.currentThread().toString(), e);
}
finally {
long t1=0;
- if (DMLScript.FINEGRAINED_STATISTICS) t1 = System.nanoTime();
- if (DMLScript.FINEGRAINED_STATISTICS) GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_CUDNN_CLEANUP, System.nanoTime() - t1);
+ if (ConfigurationManager.isFinegrainedStatistics()) t1 = System.nanoTime();
+ if (ConfigurationManager.isFinegrainedStatistics()) GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_CUDNN_CLEANUP, System.nanoTime() - t1);
}
}
@@ -831,11 +831,11 @@ public class LibMatrixCuDNN extends LibMatrixCUDA {
}
// Invokes relu(double* A, double* ret, int rlen, int clen)
Pointer srcData = getDensePointerForCuDNN(gCtx, in, instName); // TODO: FIXME: Add sparse kernel support for relu
- if (DMLScript.FINEGRAINED_STATISTICS) t0 = System.nanoTime();
+ if (ConfigurationManager.isFinegrainedStatistics()) t0 = System.nanoTime();
getCudaKernels(gCtx).launchKernel("relu",
ExecutionConfig.getConfigForSimpleMatrixOperations(toInt(N), toInt(CHW)),
srcData, dstData, toInt(N), toInt(CHW));
- if (DMLScript.FINEGRAINED_STATISTICS) GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_RELU_KERNEL, System.nanoTime() - t0);
+ if (ConfigurationManager.isFinegrainedStatistics()) GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_RELU_KERNEL, System.nanoTime() - t0);
}
else {
cudnnTensorDescriptor tensorDescriptor = new cudnnTensorDescriptor();
@@ -910,13 +910,13 @@ public class LibMatrixCuDNN extends LibMatrixCUDA {
}
if(return_sequences) {
- gCtx.cudaFreeHelper(instName, hyPointer, DMLScript.EAGER_CUDA_FREE);
+ gCtx.cudaFreeHelper(instName, hyPointer, gCtx.EAGER_CUDA_FREE);
Pointer sysmlYPointer = getDenseOutputPointer(ec, gCtx, instName, outputName, N, T*M);
LibMatrixCUDA.getCudaKernels(gCtx).launchKernel("prepare_lstm_output",
ExecutionConfig.getConfigForSimpleVectorOperations(N*T*M),
sysmlYPointer, cudnnYPointer, N, T, M, N*T*M);
}
- gCtx.cudaFreeHelper(instName, cudnnYPointer, DMLScript.EAGER_CUDA_FREE);
+ gCtx.cudaFreeHelper(instName, cudnnYPointer, gCtx.EAGER_CUDA_FREE);
}
public static void lstmBackward(ExecutionContext ec, GPUContext gCtx, String instName,
@@ -966,7 +966,7 @@ public class LibMatrixCuDNN extends LibMatrixCUDA {
// ----------------------
algo.workSpace, algo.sizeInBytes,
algo.reserveSpace, algo.reserveSpaceSizeInBytes);
- gCtx.cudaFreeHelper(instName, dy, DMLScript.EAGER_CUDA_FREE);
+ gCtx.cudaFreeHelper(instName, dy, gCtx.EAGER_CUDA_FREE);
ec.releaseMatrixInputForGPUInstruction(dcyName);
ec.releaseMatrixOutputForGPUInstruction(dhxName);
ec.releaseMatrixOutputForGPUInstruction(dcxName);
@@ -976,7 +976,7 @@ public class LibMatrixCuDNN extends LibMatrixCUDA {
ExecutionConfig.getConfigForSimpleVectorOperations(N*T*D),
smlDx, cudnnDx, N, D, T*D, N*T*D);
ec.releaseMatrixOutputForGPUInstruction(dxName);
- gCtx.cudaFreeHelper(instName, cudnnDx, DMLScript.EAGER_CUDA_FREE);
+ gCtx.cudaFreeHelper(instName, cudnnDx, gCtx.EAGER_CUDA_FREE);
// -------------------------------------------------------------------------------------------
Pointer cudnnDwPointer = gCtx.allocate(instName, (D+M+2)*(4*M)*LibMatrixCUDA.sizeOfDataType);
@@ -991,12 +991,12 @@ public class LibMatrixCuDNN extends LibMatrixCUDA {
ExecutionConfig.getConfigForSimpleVectorOperations((D+M+2)*(4*M)),
getDenseOutputPointer(ec, gCtx, instName, dwName, D+M, 4*M),
getDenseOutputPointer(ec, gCtx, instName, dbName, 1, 4*M), cudnnDwPointer, D, M);
- gCtx.cudaFreeHelper(instName, cudnnDwPointer, DMLScript.EAGER_CUDA_FREE);
+ gCtx.cudaFreeHelper(instName, cudnnDwPointer, gCtx.EAGER_CUDA_FREE);
ec.releaseMatrixOutputForGPUInstruction(dwName);
ec.releaseMatrixOutputForGPUInstruction(dbName);
// -------------------------------------------------------------------------------------------
- gCtx.cudaFreeHelper(instName, yPointer, DMLScript.EAGER_CUDA_FREE);
+ gCtx.cudaFreeHelper(instName, yPointer, gCtx.EAGER_CUDA_FREE);
}
}
http://git-wip-us.apache.org/repos/asf/systemml/blob/ae268a9e/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCuDNNConvolutionAlgorithm.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCuDNNConvolutionAlgorithm.java b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCuDNNConvolutionAlgorithm.java
index f70b453..dbad80c 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCuDNNConvolutionAlgorithm.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCuDNNConvolutionAlgorithm.java
@@ -19,7 +19,7 @@
package org.apache.sysml.runtime.matrix.data;
-import org.apache.sysml.api.DMLScript;
+import org.apache.sysml.conf.ConfigurationManager;
import org.apache.sysml.runtime.DMLRuntimeException;
import org.apache.sysml.runtime.instructions.gpu.GPUInstruction;
import org.apache.sysml.runtime.instructions.gpu.context.GPUContext;
@@ -86,7 +86,7 @@ public class LibMatrixCuDNNConvolutionAlgorithm implements java.lang.AutoCloseab
@Override
public void close() {
long t3 = 0;
- if (DMLScript.FINEGRAINED_STATISTICS) t3 = System.nanoTime();
+ if (ConfigurationManager.isFinegrainedStatistics()) t3 = System.nanoTime();
if(nchwTensorDesc != null)
cudnnDestroyTensorDescriptor(nchwTensorDesc);
if(nkpqTensorDesc != null)
@@ -97,12 +97,12 @@ public class LibMatrixCuDNNConvolutionAlgorithm implements java.lang.AutoCloseab
cudnnDestroyConvolutionDescriptor(convDesc);
if(sizeInBytes != 0) {
try {
- gCtx.cudaFreeHelper(instName, workSpace, DMLScript.EAGER_CUDA_FREE);
+ gCtx.cudaFreeHelper(instName, workSpace, gCtx.EAGER_CUDA_FREE);
} catch (DMLRuntimeException e) {
throw new RuntimeException(e);
}
}
- if(DMLScript.FINEGRAINED_STATISTICS)
+ if(ConfigurationManager.isFinegrainedStatistics())
GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_CUDNN_CLEANUP, System.nanoTime() - t3);
}
@@ -130,7 +130,7 @@ public class LibMatrixCuDNNConvolutionAlgorithm implements java.lang.AutoCloseab
public static LibMatrixCuDNNConvolutionAlgorithm cudnnGetConvolutionForwardAlgorithm(
GPUContext gCtx, String instName, int N, int C, int H, int W, int K, int R, int S,
int pad_h, int pad_w, int stride_h, int stride_w, int P, int Q, long workspaceLimit) {
- long t1 = DMLScript.FINEGRAINED_STATISTICS ? System.nanoTime() : 0;
+ long t1 = ConfigurationManager.isFinegrainedStatistics() ? System.nanoTime() : 0;
LibMatrixCuDNNConvolutionAlgorithm ret = new LibMatrixCuDNNConvolutionAlgorithm(gCtx, instName, N, C, H, W, K, R, S,
pad_h, pad_w, stride_h, stride_w, P, Q);
int[] algos = {-1};
@@ -144,7 +144,7 @@ public class LibMatrixCuDNNConvolutionAlgorithm implements java.lang.AutoCloseab
ret.workSpace = gCtx.allocate(instName, sizeInBytesArray[0]);
ret.sizeInBytes = sizeInBytesArray[0];
ret.algo = algos[0];
- if (DMLScript.FINEGRAINED_STATISTICS)
+ if (ConfigurationManager.isFinegrainedStatistics())
GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_CUDNN_INIT, System.nanoTime() - t1);
return ret;
}
@@ -173,7 +173,7 @@ public class LibMatrixCuDNNConvolutionAlgorithm implements java.lang.AutoCloseab
public static LibMatrixCuDNNConvolutionAlgorithm cudnnGetConvolutionBackwardFilterAlgorithm(
GPUContext gCtx, String instName, int N, int C, int H, int W, int K, int R, int S,
int pad_h, int pad_w, int stride_h, int stride_w, int P, int Q, long workspaceLimit) {
- long t1 = DMLScript.FINEGRAINED_STATISTICS ? System.nanoTime() : 0;
+ long t1 = ConfigurationManager.isFinegrainedStatistics() ? System.nanoTime() : 0;
LibMatrixCuDNNConvolutionAlgorithm ret = new LibMatrixCuDNNConvolutionAlgorithm(gCtx, instName, N, C, H, W, K, R, S,
pad_h, pad_w, stride_h, stride_w, P, Q);
@@ -190,7 +190,7 @@ public class LibMatrixCuDNNConvolutionAlgorithm implements java.lang.AutoCloseab
ret.sizeInBytes = sizeInBytesArray[0];
ret.algo = algos[0];
- if (DMLScript.FINEGRAINED_STATISTICS)
+ if (ConfigurationManager.isFinegrainedStatistics())
GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_CUDNN_INIT, System.nanoTime() - t1);
return ret;
}
@@ -229,7 +229,7 @@ public class LibMatrixCuDNNConvolutionAlgorithm implements java.lang.AutoCloseab
ret.algo = jcuda.jcudnn.cudnnConvolutionBwdDataAlgo.CUDNN_CONVOLUTION_BWD_DATA_ALGO_0;
}
else {
- long t1 = DMLScript.FINEGRAINED_STATISTICS ? System.nanoTime() : 0;
+ long t1 = ConfigurationManager.isFinegrainedStatistics() ? System.nanoTime() : 0;
int[] algos = {-1};
long sizeInBytesArray[] = {Math.min(workspaceLimit, MAX_WORKSPACE_LIMIT_BYTES)};
jcuda.jcudnn.JCudnn.cudnnGetConvolutionBackwardDataAlgorithm(
@@ -242,7 +242,7 @@ public class LibMatrixCuDNNConvolutionAlgorithm implements java.lang.AutoCloseab
ret.workSpace = gCtx.allocate(instName, sizeInBytesArray[0]);
ret.sizeInBytes = sizeInBytesArray[0];
ret.algo = algos[0];
- if (DMLScript.FINEGRAINED_STATISTICS)
+ if (ConfigurationManager.isFinegrainedStatistics())
GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_CUDNN_INIT, System.nanoTime() - t1);
}
return ret;
http://git-wip-us.apache.org/repos/asf/systemml/blob/ae268a9e/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCuDNNInputRowFetcher.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCuDNNInputRowFetcher.java b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCuDNNInputRowFetcher.java
index 0130aa6..81a703d 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCuDNNInputRowFetcher.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCuDNNInputRowFetcher.java
@@ -21,7 +21,7 @@ package org.apache.sysml.runtime.matrix.data;
import static jcuda.runtime.JCuda.cudaMemset;
import jcuda.Pointer;
-import org.apache.sysml.api.DMLScript;
+import org.apache.sysml.conf.ConfigurationManager;
import org.apache.sysml.runtime.DMLRuntimeException;
import org.apache.sysml.runtime.controlprogram.caching.MatrixObject;
import org.apache.sysml.runtime.instructions.gpu.GPUInstruction;
@@ -59,10 +59,10 @@ public class LibMatrixCuDNNInputRowFetcher extends LibMatrixCUDA implements java
public Pointer getNthRow(int n) {
if(isInputInSparseFormat) {
jcuda.runtime.JCuda.cudaDeviceSynchronize();
- long t0 = DMLScript.FINEGRAINED_STATISTICS ? System.nanoTime() : 0;
+ long t0 = ConfigurationManager.isFinegrainedStatistics() ? System.nanoTime() : 0;
cudaMemset(outPointer, 0, numColumns*sizeOfDataType);
jcuda.runtime.JCuda.cudaDeviceSynchronize();
- if(DMLScript.FINEGRAINED_STATISTICS) GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_SET_ZERO, System.nanoTime() - t0);
+ if(ConfigurationManager.isFinegrainedStatistics()) GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_SET_ZERO, System.nanoTime() - t0);
LibMatrixCUDA.sliceSparseDense(gCtx, instName, (CSRPointer)inPointer, outPointer, n, n, 0, LibMatrixCUDA.toInt(numColumns-1), numColumns);
}
else {
http://git-wip-us.apache.org/repos/asf/systemml/blob/ae268a9e/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCuDNNRnnAlgorithm.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCuDNNRnnAlgorithm.java b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCuDNNRnnAlgorithm.java
index 8ebc4e0..7b2c601 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCuDNNRnnAlgorithm.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCuDNNRnnAlgorithm.java
@@ -32,7 +32,6 @@ import static jcuda.jcudnn.cudnnRNNInputMode.CUDNN_LINEAR_INPUT;
import static jcuda.jcudnn.cudnnDirectionMode.CUDNN_UNIDIRECTIONAL;
import static jcuda.jcudnn.cudnnRNNAlgo.CUDNN_RNN_ALGO_STANDARD;
-import org.apache.sysml.api.DMLScript;
import org.apache.sysml.runtime.DMLRuntimeException;
import org.apache.sysml.runtime.controlprogram.context.ExecutionContext;
import org.apache.sysml.runtime.instructions.gpu.context.GPUContext;
@@ -301,7 +300,7 @@ public class LibMatrixCuDNNRnnAlgorithm implements java.lang.AutoCloseable {
}
if(sizeInBytes != 0) {
try {
- gCtx.cudaFreeHelper(instName, workSpace, DMLScript.EAGER_CUDA_FREE);
+ gCtx.cudaFreeHelper(instName, workSpace, gCtx.EAGER_CUDA_FREE);
} catch (DMLRuntimeException e) {
throw new RuntimeException(e);
}
@@ -309,7 +308,7 @@ public class LibMatrixCuDNNRnnAlgorithm implements java.lang.AutoCloseable {
workSpace = null;
if(reserveSpaceSizeInBytes != 0) {
try {
- gCtx.cudaFreeHelper(instName, reserveSpace, DMLScript.EAGER_CUDA_FREE);
+ gCtx.cudaFreeHelper(instName, reserveSpace, gCtx.EAGER_CUDA_FREE);
} catch (DMLRuntimeException e) {
throw new RuntimeException(e);
}
@@ -317,7 +316,7 @@ public class LibMatrixCuDNNRnnAlgorithm implements java.lang.AutoCloseable {
reserveSpace = null;
if(dropOutSizeInBytes != 0) {
try {
- gCtx.cudaFreeHelper(instName, dropOutStateSpace, DMLScript.EAGER_CUDA_FREE);
+ gCtx.cudaFreeHelper(instName, dropOutStateSpace, gCtx.EAGER_CUDA_FREE);
} catch (DMLRuntimeException e) {
throw new RuntimeException(e);
}
http://git-wip-us.apache.org/repos/asf/systemml/blob/ae268a9e/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCuMatMult.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCuMatMult.java b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCuMatMult.java
index 18739a8..9833456 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCuMatMult.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCuMatMult.java
@@ -26,7 +26,7 @@ import jcuda.Pointer;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
-import org.apache.sysml.api.DMLScript;
+import org.apache.sysml.conf.ConfigurationManager;
import org.apache.sysml.runtime.DMLRuntimeException;
import org.apache.sysml.runtime.controlprogram.caching.MatrixObject;
import org.apache.sysml.runtime.controlprogram.context.ExecutionContext;
@@ -161,19 +161,19 @@ public class LibMatrixCuMatMult extends LibMatrixCUDA {
// and output
CSRPointer A = left.getGPUObject(gCtx).getJcudaSparseMatrixPtr();
CSRPointer B = right.getGPUObject(gCtx).getJcudaSparseMatrixPtr();
- long t0 = DMLScript.FINEGRAINED_STATISTICS ? System.nanoTime() : 0;
+ long t0 = ConfigurationManager.isFinegrainedStatistics() ? System.nanoTime() : 0;
CSRPointer C = CSRPointer.allocateForMatrixMultiply(gCtx, getCusparseHandle(gCtx), A, transa, B, transb,
params.m, params.n, params.k);
- if (DMLScript.FINEGRAINED_STATISTICS)
+ if (ConfigurationManager.isFinegrainedStatistics())
GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_SPARSE_ALLOCATE_LIB,
System.nanoTime() - t0);
// Step 3: Invoke the kernel
- long t1 = DMLScript.FINEGRAINED_STATISTICS ? System.nanoTime() : 0;
+ long t1 = ConfigurationManager.isFinegrainedStatistics() ? System.nanoTime() : 0;
cudaSupportFunctions.cusparsecsrgemm(getCusparseHandle(gCtx), transa, transb, params.m, params.n, params.k, A.descr,
(int) A.nnz, A.val, A.rowPtr, A.colInd, B.descr, (int) B.nnz, B.val, B.rowPtr, B.colInd, C.descr,
C.val, C.rowPtr, C.colInd);
- if (DMLScript.FINEGRAINED_STATISTICS)
+ if (ConfigurationManager.isFinegrainedStatistics())
GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_SPARSE_MATRIX_SPARSE_MATRIX_LIB,
System.nanoTime() - t1);
output.getGPUObject(gCtx).setSparseMatrixCudaPointer(C);
@@ -279,14 +279,14 @@ public class LibMatrixCuMatMult extends LibMatrixCUDA {
denseSparseMatMult(getCusparseHandle(gCtx), instName, output, B, A, params);
if (outRLen != 1 && outCLen != 1) {
// Transpose: C = t(output)
- long t0 = DMLScript.FINEGRAINED_STATISTICS ? System.nanoTime() : 0;
+ long t0 = ConfigurationManager.isFinegrainedStatistics() ? System.nanoTime() : 0;
cudaSupportFunctions.cublasgeam(gCtx.getCublasHandle(), cublasOperation.CUBLAS_OP_T, cublasOperation.CUBLAS_OP_T,
toInt(outCLen), toInt(outRLen), one(), output, toInt(outRLen), zero(), new Pointer(),
toInt(outRLen), C, toInt(outCLen));
- if (!DMLScript.EAGER_CUDA_FREE)
+ if (!gCtx.EAGER_CUDA_FREE)
JCuda.cudaDeviceSynchronize();
- gCtx.cudaFreeHelper(instName, output, DMLScript.EAGER_CUDA_FREE);
- if (DMLScript.FINEGRAINED_STATISTICS)
+ gCtx.cudaFreeHelper(instName, output, gCtx.EAGER_CUDA_FREE);
+ if (ConfigurationManager.isFinegrainedStatistics())
GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_TRANSPOSE_LIB, System.nanoTime()
- t0);
}
@@ -312,7 +312,7 @@ public class LibMatrixCuMatMult extends LibMatrixCUDA {
*/
private static void denseSparseMatMult(cusparseHandle handle, String instName, Pointer C, Pointer A, CSRPointer B,
CuMatMultParameters param) {
- long t0 = DMLScript.FINEGRAINED_STATISTICS ? System.nanoTime() : 0;
+ long t0 = ConfigurationManager.isFinegrainedStatistics() ? System.nanoTime() : 0;
String kernel = GPUInstruction.MISC_TIMER_SPARSE_MATRIX_DENSE_MATRIX_LIB;
// Ignoring sparse vector dense matrix multiplication and dot product
boolean isVector = (param.leftNumRows == 1 && !param.isLeftTransposed)
@@ -336,7 +336,7 @@ public class LibMatrixCuMatMult extends LibMatrixCUDA {
cudaSupportFunctions.cusparsecsrmm2(handle, transa, transb, m, param.n, k, toInt(B.nnz), one(), B.descr, B.val,
B.rowPtr, B.colInd, A, param.ldb, zero(), C, param.ldc);
}
- if (DMLScript.FINEGRAINED_STATISTICS)
+ if (ConfigurationManager.isFinegrainedStatistics())
GPUStatistics.maintainCPMiscTimes(instName, kernel, System.nanoTime() - t0);
}
@@ -361,7 +361,7 @@ public class LibMatrixCuMatMult extends LibMatrixCUDA {
*/
private static void denseDenseMatMult(cublasHandle handle, String instName, Pointer C, Pointer A, Pointer B,
CuMatMultParameters param) {
- long t0 = DMLScript.FINEGRAINED_STATISTICS ? System.nanoTime() : 0;
+ long t0 = ConfigurationManager.isFinegrainedStatistics() ? System.nanoTime() : 0;
String kernel = null;
param.rowToColumnMajor();
param.validate();
@@ -403,7 +403,7 @@ public class LibMatrixCuMatMult extends LibMatrixCUDA {
zero(), C, param.ldc);
kernel = GPUInstruction.MISC_TIMER_DENSE_MATRIX_DENSE_MATRIX_LIB;
}
- if (DMLScript.FINEGRAINED_STATISTICS)
+ if (ConfigurationManager.isFinegrainedStatistics())
GPUStatistics.maintainCPMiscTimes(instName, kernel, System.nanoTime() - t0);
}
http://git-wip-us.apache.org/repos/asf/systemml/blob/ae268a9e/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNN.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNN.java b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNN.java
index 0c6f41a..4569dbe 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNN.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNN.java
@@ -28,7 +28,7 @@ import java.util.concurrent.atomic.AtomicLong;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
-import org.apache.sysml.api.DMLScript;
+import org.apache.sysml.conf.ConfigurationManager;
import org.apache.sysml.hops.OptimizerUtils;
import org.apache.sysml.runtime.DMLRuntimeException;
import org.apache.sysml.runtime.functionobjects.KahanPlus;
@@ -91,7 +91,7 @@ public class LibMatrixDNN {
static AtomicLong loopedConvBwdDataCol2ImTime = new AtomicLong(0);
public static void appendStatistics(StringBuilder sb) {
- if(DMLScript.FINEGRAINED_STATISTICS) {
+ if(ConfigurationManager.isFinegrainedStatistics()) {
sb.append("LibMatrixDNN dense count (conv/bwdF/bwdD/im2col/maxBwd):\t"
+ conv2dDenseCount.get() + "/"
+ conv2dBwdFilterDenseCount.get() + "/"
@@ -238,7 +238,7 @@ public class LibMatrixDNN {
throw new DMLRuntimeException("Incorrect dout dimensions in pooling_backward:" + input.getNumRows() + " " + input.getNumColumns() + " " + params.N + " " + params.K*params.P*params.Q);
}
- if(DMLScript.FINEGRAINED_STATISTICS) {
+ if(ConfigurationManager.isFinegrainedStatistics()) {
boolean isSparse = (poolType == PoolingType.MAX) ? (input.isInSparseFormat() || dout.isInSparseFormat()) : dout.isInSparseFormat();
if(isSparse)
maxPoolBwdSparseCount.addAndGet(1);
@@ -780,7 +780,7 @@ public class LibMatrixDNN {
if(params.stride_h <= 0 || params.stride_w <= 0)
throw new DMLRuntimeException("Only positive strides supported:" + params.stride_h + ", " + params.stride_w);
- if(DMLScript.FINEGRAINED_STATISTICS) {
+ if(ConfigurationManager.isFinegrainedStatistics()) {
if(filter.isInSparseFormat() || dout.isInSparseFormat()) {
conv2dBwdDataSparseCount.addAndGet(1);
}
@@ -805,7 +805,7 @@ public class LibMatrixDNN {
if(params.stride_h <= 0 || params.stride_w <= 0)
throw new DMLRuntimeException("Only positive strides supported:" + params.stride_h + ", " + params.stride_w);
- if(DMLScript.FINEGRAINED_STATISTICS) {
+ if(ConfigurationManager.isFinegrainedStatistics()) {
if(input.isInSparseFormat() || dout.isInSparseFormat()) {
conv2dBwdFilterSparseCount.addAndGet(1);
}
@@ -831,7 +831,7 @@ public class LibMatrixDNN {
if(params.stride_h <= 0 || params.stride_w <= 0)
throw new DMLRuntimeException("Only positive strides supported:" + params.stride_h + ", " + params.stride_w);
- if(DMLScript.FINEGRAINED_STATISTICS) {
+ if(ConfigurationManager.isFinegrainedStatistics()) {
if(input.isInSparseFormat() || filter.isInSparseFormat()) {
conv2dSparseCount.addAndGet(1);
}
http://git-wip-us.apache.org/repos/asf/systemml/blob/ae268a9e/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNNConv2d.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNNConv2d.java b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNNConv2d.java
index b938a0a..982949f 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNNConv2d.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNNConv2d.java
@@ -22,7 +22,7 @@ import java.util.ArrayList;
import java.util.Arrays;
import java.util.concurrent.Callable;
-import org.apache.sysml.api.DMLScript;
+import org.apache.sysml.conf.ConfigurationManager;
import org.apache.sysml.hops.OptimizerUtils;
import org.apache.sysml.runtime.DMLRuntimeException;
import org.apache.sysml.runtime.matrix.data.LibMatrixDNNRotate180.Rotate180Worker;
@@ -172,16 +172,16 @@ public class LibMatrixDNNConv2d
MatrixBlock outMM = new MatrixBlock(K, PQ, _params.output.sparse);
long time1 = 0; long time2 = 0;
for(int n = _rl; n < _ru; n++) {
- long t1 = DMLScript.FINEGRAINED_STATISTICS ? System.nanoTime() : 0;
+ long t1 = ConfigurationManager.isFinegrainedStatistics() ? System.nanoTime() : 0;
LibMatrixDNNIm2Col.im2col(_params.input1, outIm2col, n, _params, false);
- long t2 = DMLScript.FINEGRAINED_STATISTICS ? System.nanoTime() : 0;
+ long t2 = ConfigurationManager.isFinegrainedStatistics() ? System.nanoTime() : 0;
// filter %*% _im2ColOutBlock => matMultOutBlock
outMM.reset(outMM.rlen, outMM.clen, _params.output.sparse);
LibMatrixDNNHelper.singleThreadedMatMult(_params.input2, outIm2col, outMM, false, true, _params);
- long t3 = DMLScript.FINEGRAINED_STATISTICS ? System.nanoTime() : 0;
+ long t3 = ConfigurationManager.isFinegrainedStatistics() ? System.nanoTime() : 0;
- if(DMLScript.FINEGRAINED_STATISTICS) {
+ if(ConfigurationManager.isFinegrainedStatistics()) {
time1 += t2 - t1;
time2 += t3 - t2;
}
@@ -195,7 +195,7 @@ public class LibMatrixDNNConv2d
_params.bias.getDenseBlockValues(), K, PQ);
}
- if(DMLScript.FINEGRAINED_STATISTICS) {
+ if(ConfigurationManager.isFinegrainedStatistics()) {
LibMatrixDNN.loopedConvIm2ColTime.addAndGet(time1);
LibMatrixDNN.loopedConvMatMultTime.addAndGet(time2);
}
@@ -416,20 +416,20 @@ public class LibMatrixDNNConv2d
// rotate180(dout[n,]) => dout_reshaped
rotate180Worker.execute(n, 0);
// dout_reshaped %*% filter => temp
- long t1 = DMLScript.FINEGRAINED_STATISTICS ? System.nanoTime() : 0;
+ long t1 = ConfigurationManager.isFinegrainedStatistics() ? System.nanoTime() : 0;
outMM.reset(PQ, CRS, false);
LibMatrixDNNHelper.singleThreadedMatMult(outRotate, filter, outMM, !outRotate.sparse, false, _params);
- long t2 = DMLScript.FINEGRAINED_STATISTICS ? System.nanoTime() : 0;
+ long t2 = ConfigurationManager.isFinegrainedStatistics() ? System.nanoTime() : 0;
// col2im(temp) => output[n,]
LibMatrixDNNIm2Col.col2imOverSingleImage(n, outMM, _params);
- long t3 = DMLScript.FINEGRAINED_STATISTICS ? System.nanoTime() : 0;
+ long t3 = ConfigurationManager.isFinegrainedStatistics() ? System.nanoTime() : 0;
- if(DMLScript.FINEGRAINED_STATISTICS) {
+ if(ConfigurationManager.isFinegrainedStatistics()) {
time1 += t2 - t1;
time2 += t3 - t2;
}
}
- if(DMLScript.FINEGRAINED_STATISTICS) {
+ if(ConfigurationManager.isFinegrainedStatistics()) {
LibMatrixDNN.loopedConvBwdDataMatMultTime.addAndGet(time1);
LibMatrixDNN.loopedConvBwdDataCol2ImTime.addAndGet(time2);
}
@@ -512,24 +512,24 @@ public class LibMatrixDNNConv2d
rotate180Worker.execute(n, 0);
// im2col(input) => _im2ColOutBlock
- long t1 = DMLScript.FINEGRAINED_STATISTICS ? System.nanoTime() : 0;
+ long t1 = ConfigurationManager.isFinegrainedStatistics() ? System.nanoTime() : 0;
LibMatrixDNNIm2Col.im2col(_params.input1, im2ColOutBlock, n, _params, false);
- long t2 = DMLScript.FINEGRAINED_STATISTICS ? System.nanoTime() : 0;
+ long t2 = ConfigurationManager.isFinegrainedStatistics() ? System.nanoTime() : 0;
outMM.reset(CRS, K, false);
LibMatrixDNNHelper.singleThreadedMatMult(im2ColOutBlock, outRotate, outMM, !im2ColOutBlock.sparse, !outRotate.sparse, _params);
- long t3 = DMLScript.FINEGRAINED_STATISTICS ? System.nanoTime() : 0;
+ long t3 = ConfigurationManager.isFinegrainedStatistics() ? System.nanoTime() : 0;
if( !outMM.isEmptyBlock() ) //accumulate row results
LibMatrixMult.vectAdd(outMM.getDenseBlockValues(), partRet, 0, 0, K*CRS);
- if(DMLScript.FINEGRAINED_STATISTICS) {
+ if(ConfigurationManager.isFinegrainedStatistics()) {
time1 += t2 - t1;
time2 += t3 - t2;
}
}
inplaceTransAdd(partRet, _params);
- if(DMLScript.FINEGRAINED_STATISTICS) {
+ if(ConfigurationManager.isFinegrainedStatistics()) {
LibMatrixDNN.loopedConvBwdFilterIm2ColTime.addAndGet(time1);
LibMatrixDNN.loopedConvBwdFilterMatMultTime.addAndGet(time2);
}
@@ -562,27 +562,27 @@ public class LibMatrixDNNConv2d
rotate180Worker.execute(n, 0);
// im2col(input) => _im2ColOutBlock
- long t1 = DMLScript.FINEGRAINED_STATISTICS ? System.nanoTime() : 0;
+ long t1 = ConfigurationManager.isFinegrainedStatistics() ? System.nanoTime() : 0;
LibMatrixDNNIm2Col.im2col(_params.input1, im2ColOutBlock, n, _params, true);
- long t2 = DMLScript.FINEGRAINED_STATISTICS ? System.nanoTime() : 0;
+ long t2 = ConfigurationManager.isFinegrainedStatistics() ? System.nanoTime() : 0;
outMM.reset(K, CRS, false);
//Timing time = new Timing(true);
LibMatrixDNNHelper.singleThreadedMatMult(outRotate, im2ColOutBlock,
outMM, !outRotate.sparse, !im2ColOutBlock.sparse, _params);
- long t3 = DMLScript.FINEGRAINED_STATISTICS ? System.nanoTime() : 0;
+ long t3 = ConfigurationManager.isFinegrainedStatistics() ? System.nanoTime() : 0;
if( !outMM.isEmptyBlock() ) //accumulate row results
LibMatrixMult.vectAdd(outMM.getDenseBlockValues(), partRet, 0, 0, K*CRS);
- if(DMLScript.FINEGRAINED_STATISTICS) {
+ if(ConfigurationManager.isFinegrainedStatistics()) {
time1 += t2 - t1;
time2 += t3 - t2;
}
}
//no need to transpose because t(t(out)) cancel out
inplaceAdd(partRet, _params);
- if(DMLScript.FINEGRAINED_STATISTICS) {
+ if(ConfigurationManager.isFinegrainedStatistics()) {
LibMatrixDNN.loopedConvBwdFilterIm2ColTime.addAndGet(time1);
LibMatrixDNN.loopedConvBwdFilterMatMultTime.addAndGet(time2);
}
http://git-wip-us.apache.org/repos/asf/systemml/blob/ae268a9e/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixNative.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixNative.java b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixNative.java
index aa5ba86..2cb64c2 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixNative.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixNative.java
@@ -26,7 +26,6 @@ import java.util.stream.IntStream;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
-import org.apache.sysml.api.DMLScript;
import org.apache.sysml.conf.ConfigurationManager;
import org.apache.sysml.conf.DMLConfig;
import org.apache.sysml.hops.OptimizerUtils;
@@ -87,7 +86,7 @@ public class LibMatrixNative
{
ret.sparse = false;
ret.allocateDenseBlock();
- long start = DMLScript.STATISTICS ? System.nanoTime() : 0;
+ long start = ConfigurationManager.isStatistics() ? System.nanoTime() : 0;
boolean rccode = false;
if( isSinglePrecision() ) {
FloatBuffer fin1 = toFloatBuffer(m1.getDenseBlockValues(), inBuff, true);
@@ -102,7 +101,7 @@ public class LibMatrixNative
ret.getDenseBlockValues(), m1.getNumRows(), m1.getNumColumns(), m2.getNumColumns(), k);
}
if (rccode) {
- if(DMLScript.STATISTICS) {
+ if(ConfigurationManager.isStatistics()) {
Statistics.nativeLibMatrixMultTime += System.nanoTime() - start;
Statistics.numNativeLibMatrixMultCalls.increment();
}
@@ -158,7 +157,7 @@ public class LibMatrixNative
params.numThreads = params.numThreads <= 0 ? NativeHelper.getMaxNumThreads() : params.numThreads;
if(NativeHelper.isNativeLibraryLoaded() && !input.isInSparseFormat() && !filter.isInSparseFormat()) {
setNumThreads(params);
- long start = DMLScript.STATISTICS ? System.nanoTime() : 0;
+ long start = ConfigurationManager.isStatistics() ? System.nanoTime() : 0;
int nnz = 0;
if(params.bias == null) {
nnz = NativeHelper.conv2dDense(input.getDenseBlockValues(), filter.getDenseBlockValues(),
@@ -195,7 +194,7 @@ public class LibMatrixNative
}
//post processing and error handling
if(nnz != -1) {
- if(DMLScript.STATISTICS) {
+ if(ConfigurationManager.isStatistics()) {
Statistics.nativeConv2dTime += System.nanoTime() - start;
Statistics.numNativeConv2dCalls.increment();
}
@@ -234,13 +233,13 @@ public class LibMatrixNative
params.numThreads = params.numThreads <= 0 ? NativeHelper.getMaxNumThreads() : params.numThreads;
if(NativeHelper.isNativeLibraryLoaded() && !dout.isInSparseFormat() && !input.isInSparseFormat()) {
setNumThreads(params);
- long start = DMLScript.STATISTICS ? System.nanoTime() : 0;
+ long start = ConfigurationManager.isStatistics() ? System.nanoTime() : 0;
int nnz = NativeHelper.conv2dBackwardFilterDense(input.getDenseBlockValues(), dout.getDenseBlockValues(),
outputBlock.getDenseBlockValues(), params.N, params.C, params.H, params.W,
params.K, params.R, params.S, params.stride_h, params.stride_w, params.pad_h, params.pad_w,
params.P, params.Q, params.numThreads);
if(nnz != -1) {
- if(DMLScript.STATISTICS) {
+ if(ConfigurationManager.isStatistics()) {
Statistics.nativeConv2dBwdFilterTime += System.nanoTime() - start;
Statistics.numNativeConv2dBwdFilterCalls.increment();
}
@@ -270,13 +269,13 @@ public class LibMatrixNative
params.numThreads = params.numThreads <= 0 ? NativeHelper.getMaxNumThreads() : params.numThreads;
if(NativeHelper.isNativeLibraryLoaded() && !dout.isInSparseFormat() && !filter.isInSparseFormat()) {
setNumThreads(params);
- long start = DMLScript.STATISTICS ? System.nanoTime() : 0;
+ long start = ConfigurationManager.isStatistics() ? System.nanoTime() : 0;
int nnz = NativeHelper.conv2dBackwardDataDense(filter.getDenseBlockValues(), dout.getDenseBlockValues(),
outputBlock.getDenseBlockValues(), params.N, params.C, params.H, params.W,
params.K, params.R, params.S, params.stride_h, params.stride_w, params.pad_h, params.pad_w,
params.P, params.Q, params.numThreads);
if(nnz != -1) {
- if(DMLScript.STATISTICS) {
+ if(ConfigurationManager.isStatistics()) {
Statistics.nativeConv2dBwdDataTime += System.nanoTime() - start;
Statistics.numNativeConv2dBwdDataCalls.increment();
}
http://git-wip-us.apache.org/repos/asf/systemml/blob/ae268a9e/src/main/java/org/apache/sysml/runtime/matrix/data/SinglePrecisionCudaSupportFunctions.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/SinglePrecisionCudaSupportFunctions.java b/src/main/java/org/apache/sysml/runtime/matrix/data/SinglePrecisionCudaSupportFunctions.java
index 942b56b..044e943 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/SinglePrecisionCudaSupportFunctions.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/SinglePrecisionCudaSupportFunctions.java
@@ -29,7 +29,7 @@ import java.util.stream.IntStream;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
-import org.apache.sysml.api.DMLScript;
+import org.apache.sysml.conf.ConfigurationManager;
import org.apache.sysml.runtime.instructions.gpu.GPUInstruction;
import org.apache.sysml.runtime.instructions.gpu.context.GPUContext;
import org.apache.sysml.utils.GPUStatistics;
@@ -168,7 +168,7 @@ public class SinglePrecisionCudaSupportFunctions implements CudaSupportFunctions
@Override
public void deviceToHost(GPUContext gCtx, Pointer src, double[] dest, String instName, boolean isEviction) {
- long t0 = DMLScript.STATISTICS ? System.nanoTime() : 0;
+ long t0 = ConfigurationManager.isStatistics() ? System.nanoTime() : 0;
// We invoke transfer matrix from device to host in two cases:
// 1. During eviction of unlocked matrices
// 2. During acquireHostRead
@@ -182,7 +182,7 @@ public class SinglePrecisionCudaSupportFunctions implements CudaSupportFunctions
Pointer deviceDoubleData = gCtx.allocate(instName, ((long)dest.length)*Sizeof.DOUBLE);
LibMatrixCUDA.float2double(gCtx, src, deviceDoubleData, dest.length);
cudaMemcpy(Pointer.to(dest), deviceDoubleData, ((long)dest.length)*Sizeof.DOUBLE, cudaMemcpyDeviceToHost);
- gCtx.cudaFreeHelper(instName, deviceDoubleData, DMLScript.EAGER_CUDA_FREE);
+ gCtx.cudaFreeHelper(instName, deviceDoubleData, gCtx.EAGER_CUDA_FREE);
}
else {
LOG.debug("Potential OOM: Allocated additional space on host in deviceToHost");
@@ -190,11 +190,11 @@ public class SinglePrecisionCudaSupportFunctions implements CudaSupportFunctions
cudaMemcpy(Pointer.to(floatData), src, ((long)dest.length)*Sizeof.FLOAT, cudaMemcpyDeviceToHost);
LibMatrixNative.fromFloatBuffer(floatData, dest);
}
- if(DMLScript.STATISTICS) {
+ if(ConfigurationManager.isStatistics()) {
long totalTime = System.nanoTime() - t0;
GPUStatistics.cudaFloat2DoubleTime.add(totalTime);
GPUStatistics.cudaFloat2DoubleCount.add(1);
- if(DMLScript.FINEGRAINED_STATISTICS && instName != null)
+ if(ConfigurationManager.isFinegrainedStatistics() && instName != null)
GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_DEVICE_TO_HOST, totalTime);
}
}
@@ -203,12 +203,12 @@ public class SinglePrecisionCudaSupportFunctions implements CudaSupportFunctions
public void hostToDevice(GPUContext gCtx, double[] src, Pointer dest, String instName) {
LOG.debug("Potential OOM: Allocated additional space in hostToDevice");
// TODO: Perform conversion on GPU using double2float and float2double kernels
- long t0 = DMLScript.STATISTICS ? System.nanoTime() : 0;
+ long t0 = ConfigurationManager.isStatistics() ? System.nanoTime() : 0;
if(PERFORM_CONVERSION_ON_DEVICE) {
Pointer deviceDoubleData = gCtx.allocate(instName, ((long)src.length)*Sizeof.DOUBLE);
cudaMemcpy(deviceDoubleData, Pointer.to(src), ((long)src.length)*Sizeof.DOUBLE, cudaMemcpyHostToDevice);
LibMatrixCUDA.double2float(gCtx, deviceDoubleData, dest, src.length);
- gCtx.cudaFreeHelper(instName, deviceDoubleData, DMLScript.EAGER_CUDA_FREE);
+ gCtx.cudaFreeHelper(instName, deviceDoubleData, gCtx.EAGER_CUDA_FREE);
}
else {
FloatBuffer floatData = ByteBuffer.allocateDirect(Sizeof.FLOAT*src.length).order(ByteOrder.nativeOrder()).asFloatBuffer();
@@ -216,11 +216,11 @@ public class SinglePrecisionCudaSupportFunctions implements CudaSupportFunctions
cudaMemcpy(dest, Pointer.to(floatData), ((long)src.length)*Sizeof.FLOAT, cudaMemcpyHostToDevice);
}
- if(DMLScript.STATISTICS) {
+ if(ConfigurationManager.isStatistics()) {
long totalTime = System.nanoTime() - t0;
GPUStatistics.cudaDouble2FloatTime.add(totalTime);
GPUStatistics.cudaDouble2FloatCount.add(1);
- if(DMLScript.FINEGRAINED_STATISTICS && instName != null)
+ if(ConfigurationManager.isFinegrainedStatistics() && instName != null)
GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_HOST_TO_DEVICE, totalTime);
}
}
http://git-wip-us.apache.org/repos/asf/systemml/blob/ae268a9e/src/main/java/org/apache/sysml/runtime/util/ProgramConverter.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/util/ProgramConverter.java b/src/main/java/org/apache/sysml/runtime/util/ProgramConverter.java
index 21e6bd3..8289c30 100644
--- a/src/main/java/org/apache/sysml/runtime/util/ProgramConverter.java
+++ b/src/main/java/org/apache/sysml/runtime/util/ProgramConverter.java
@@ -688,7 +688,7 @@ public class ProgramConverter
builder.append(NEWLINE);
//handle additional configurations
- builder.append(CONF_STATS + "=" + DMLScript.STATISTICS);
+ builder.append(CONF_STATS + "=" + ConfigurationManager.isStatistics());
builder.append(COMPONENTS_DELIM);
builder.append(NEWLINE);
@@ -753,7 +753,7 @@ public class ProgramConverter
sb.append( NEWLINE );
//handle additional configurations
- sb.append( CONF_STATS + "=" + DMLScript.STATISTICS );
+ sb.append( CONF_STATS + "=" + ConfigurationManager.isStatistics() );
sb.append( COMPONENTS_DELIM );
sb.append( NEWLINE );
@@ -1727,7 +1727,7 @@ public class ProgramConverter
private static void parseAndSetAdditionalConfigurations(String conf) {
String[] statsFlag = conf.split("=");
- DMLScript.STATISTICS = Boolean.parseBoolean(statsFlag[1]);
+ ConfigurationManager.setStatistics(Boolean.parseBoolean(statsFlag[1]));
}
//////////
http://git-wip-us.apache.org/repos/asf/systemml/blob/ae268a9e/src/main/java/org/apache/sysml/utils/GPUStatistics.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/utils/GPUStatistics.java b/src/main/java/org/apache/sysml/utils/GPUStatistics.java
index fcbc4c4..e748057 100644
--- a/src/main/java/org/apache/sysml/utils/GPUStatistics.java
+++ b/src/main/java/org/apache/sysml/utils/GPUStatistics.java
@@ -28,7 +28,7 @@ import java.util.List;
import java.util.Map;
import java.util.concurrent.atomic.LongAdder;
-import org.apache.sysml.api.DMLScript;
+import org.apache.sysml.conf.ConfigurationManager;
/**
* Measures performance numbers when GPU mode is enabled
@@ -155,7 +155,7 @@ public class GPUStatistics {
*/
public synchronized static void maintainCPMiscTimes( String instructionName, String miscTimer, long timeNanos, long incrementCount)
{
- if (!(DMLScript.FINEGRAINED_STATISTICS))
+ if (!(ConfigurationManager.isFinegrainedStatistics()))
return;
HashMap<String, Long> miscTimesMap = _cpInstMiscTime.get(instructionName);