You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by ni...@apache.org on 2018/08/27 20:09:57 UTC

[2/5] systemml git commit: [SYSTEMML-1325] Cleanup static variables in DMLScript

http://git-wip-us.apache.org/repos/asf/systemml/blob/ae268a9e/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCUDA.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCUDA.java b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCUDA.java
index 217acd6..d02a875 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCUDA.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCUDA.java
@@ -28,6 +28,7 @@ import static jcuda.runtime.cudaMemcpyKind.cudaMemcpyDeviceToHost;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.sysml.api.DMLScript;
+import org.apache.sysml.conf.ConfigurationManager;
 import org.apache.sysml.runtime.DMLRuntimeException;
 import org.apache.sysml.runtime.controlprogram.caching.MatrixObject;
 import org.apache.sysml.runtime.controlprogram.context.ExecutionContext;
@@ -333,11 +334,11 @@ public class LibMatrixCUDA {
 		Pointer outputPointer = getDensePointer(gCtx, outputBlock, instName);
 
 		long t1=0;
-		if (DMLScript.FINEGRAINED_STATISTICS) t1 = System.nanoTime();
+		if (ConfigurationManager.isFinegrainedStatistics()) t1 = System.nanoTime();
 		getCudaKernels(gCtx).launchKernel("relu_backward",
 				ExecutionConfig.getConfigForSimpleMatrixOperations(toInt(rows), toInt(cols)),
 				imagePointer, doutPointer, outputPointer, toInt(rows), toInt(cols));
-		if (DMLScript.FINEGRAINED_STATISTICS) GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_RELU_BACKWARD_KERNEL, System.nanoTime() - t1);
+		if (ConfigurationManager.isFinegrainedStatistics()) GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_RELU_BACKWARD_KERNEL, System.nanoTime() - t1);
 
 	}
 	
@@ -367,7 +368,7 @@ public class LibMatrixCUDA {
 		Pointer tmp = gCtx.allocate(instName, cols*sizeOfDataType);
 		reduceCol(gCtx, instName, "reduce_col_sum", imagePointer, tmp, N, cols);
 		reduceRow(gCtx, instName, "reduce_row_sum", tmp, outputPointer, toInt(C), toInt(HW));
-		gCtx.cudaFreeHelper(instName, tmp, DMLScript.EAGER_CUDA_FREE);
+		gCtx.cudaFreeHelper(instName, tmp, gCtx.EAGER_CUDA_FREE);
 	}
 
 	/**
@@ -402,11 +403,11 @@ public class LibMatrixCUDA {
 		Pointer biasPointer = bias.getGPUObject(gCtx).getDensePointer();
 		Pointer outputPointer = outputBlock.getGPUObject(gCtx).getDensePointer();
 		long t1 = 0;
-		if (DMLScript.FINEGRAINED_STATISTICS) t1 = System.nanoTime();
+		if (ConfigurationManager.isFinegrainedStatistics()) t1 = System.nanoTime();
 		getCudaKernels(gCtx).launchKernel("bias_multiply",
 				ExecutionConfig.getConfigForSimpleMatrixOperations(toInt(rows), toInt(cols)),
 				imagePointer, biasPointer, outputPointer, toInt(rows), toInt(cols), toInt(PQ));
-		if (DMLScript.FINEGRAINED_STATISTICS) GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_BIAS_ADD_LIB, System.nanoTime() - t1);
+		if (ConfigurationManager.isFinegrainedStatistics()) GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_BIAS_ADD_LIB, System.nanoTime() - t1);
 
 	}
 
@@ -454,11 +455,11 @@ public class LibMatrixCUDA {
 		}
 		int PQ = cols / k;
 		long t1 = 0;
-		if (DMLScript.FINEGRAINED_STATISTICS) t1 = System.nanoTime();
+		if (ConfigurationManager.isFinegrainedStatistics()) t1 = System.nanoTime();
 		getCudaKernels(gCtx).launchKernel("bias_add",
 				ExecutionConfig.getConfigForSimpleMatrixOperations(rows, cols),
 				image, bias, output, rows, cols, PQ);
-		if (DMLScript.FINEGRAINED_STATISTICS) GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_BIAS_ADD_LIB, System.nanoTime() - t1);
+		if (ConfigurationManager.isFinegrainedStatistics()) GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_BIAS_ADD_LIB, System.nanoTime() - t1);
 	}
 	
 
@@ -525,13 +526,13 @@ public class LibMatrixCUDA {
 
 		long t0=0, t1=0;
 
-		if (DMLScript.FINEGRAINED_STATISTICS) t0 = System.nanoTime();
+		if (ConfigurationManager.isFinegrainedStatistics()) t0 = System.nanoTime();
 		cudaSupportFunctions.cublassyrk(getCublasHandle(gCtx), cublasFillMode.CUBLAS_FILL_MODE_LOWER,transa, m, k, one(), A, lda, zero(), C, ldc);
-		if (DMLScript.FINEGRAINED_STATISTICS) GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_SYRK_LIB, System.nanoTime() - t0);
+		if (ConfigurationManager.isFinegrainedStatistics()) GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_SYRK_LIB, System.nanoTime() - t0);
 
-		if (DMLScript.FINEGRAINED_STATISTICS) t1 = System.nanoTime();
+		if (ConfigurationManager.isFinegrainedStatistics()) t1 = System.nanoTime();
 		copyUpperToLowerTriangle(gCtx, instName, output);
-		if (DMLScript.FINEGRAINED_STATISTICS) GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_UPPER_TO_LOWER_TRIANGLE_KERNEL, System.nanoTime() - t1);
+		if (ConfigurationManager.isFinegrainedStatistics()) GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_UPPER_TO_LOWER_TRIANGLE_KERNEL, System.nanoTime() - t1);
 	}
 
 	/**
@@ -740,7 +741,7 @@ public class LibMatrixCUDA {
 			default:
 				throw new DMLRuntimeException("Internal Error - Unsupported reduction direction for summation squared");
 			}
-			gCtx.cudaFreeHelper(instName, tmp, DMLScript.EAGER_CUDA_FREE);
+			gCtx.cudaFreeHelper(instName, tmp, gCtx.EAGER_CUDA_FREE);
 			break;
 		}
 		case OP_MEAN:{
@@ -853,7 +854,7 @@ public class LibMatrixCUDA {
 				ScalarOperator divideOp = new RightScalarOperator(Divide.getDivideFnObject(), clen - 1);
 				matrixScalarOp(gCtx, instName, tmpRow, clen - 1, rlen, 1, out, divideOp);
 
-				gCtx.cudaFreeHelper(instName, tmpRow, DMLScript.EAGER_CUDA_FREE);
+				gCtx.cudaFreeHelper(instName, tmpRow, gCtx.EAGER_CUDA_FREE);
 
 				break;
 			}
@@ -871,15 +872,15 @@ public class LibMatrixCUDA {
 				ScalarOperator divideOp = new RightScalarOperator(Divide.getDivideFnObject(), rlen - 1);
 				matrixScalarOp(gCtx, instName, tmpCol, rlen - 1, 1, clen, out, divideOp);
 
-				gCtx.cudaFreeHelper(instName, tmpCol, DMLScript.EAGER_CUDA_FREE);
+				gCtx.cudaFreeHelper(instName, tmpCol, gCtx.EAGER_CUDA_FREE);
 
 				break;
 			}
 			default:
 				throw new DMLRuntimeException("Internal Error - Unsupported reduction direction for variance");
 			}
-			gCtx.cudaFreeHelper(instName, tmp, DMLScript.EAGER_CUDA_FREE);
-			gCtx.cudaFreeHelper(instName, tmp2, DMLScript.EAGER_CUDA_FREE);
+			gCtx.cudaFreeHelper(instName, tmp, gCtx.EAGER_CUDA_FREE);
+			gCtx.cudaFreeHelper(instName, tmp2, gCtx.EAGER_CUDA_FREE);
 			break;
 		}
 		case OP_MAXINDEX : {
@@ -938,24 +939,24 @@ public class LibMatrixCUDA {
 
 		long t1=0,t2=0;
 
-		if (DMLScript.FINEGRAINED_STATISTICS) t1 = System.nanoTime();
+		if (ConfigurationManager.isFinegrainedStatistics()) t1 = System.nanoTime();
 		getCudaKernels(gCtx).launchKernel(kernelFunction, new ExecutionConfig(blocks, threads, sharedMem), in, tempOut, n);
 		//cudaDeviceSynchronize;
-		if (DMLScript.FINEGRAINED_STATISTICS) GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_REDUCE_ALL_KERNEL, System.nanoTime() - t1);
+		if (ConfigurationManager.isFinegrainedStatistics()) GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_REDUCE_ALL_KERNEL, System.nanoTime() - t1);
 
 		int s = blocks;
 		while (s > 1) {
 			tmp = getKernelParamsForReduceAll(gCtx, s);
 			blocks = tmp[0]; threads = tmp[1]; sharedMem = tmp[2];
-			if (DMLScript.FINEGRAINED_STATISTICS) t2 = System.nanoTime();
+			if (ConfigurationManager.isFinegrainedStatistics()) t2 = System.nanoTime();
 			getCudaKernels(gCtx).launchKernel(kernelFunction, new ExecutionConfig(blocks, threads, sharedMem),
 					tempOut, tempOut, s);
-			if (DMLScript.FINEGRAINED_STATISTICS) GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_REDUCE_ALL_KERNEL, System.nanoTime() - t2);
+			if (ConfigurationManager.isFinegrainedStatistics()) GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_REDUCE_ALL_KERNEL, System.nanoTime() - t2);
 			s = (s + (threads*2-1)) / (threads*2);
 		}
 		double[] result = {-1f};
 		cudaSupportFunctions.deviceToHost(gCtx, tempOut, result, instName, false);
-		gCtx.cudaFreeHelper(instName, tempOut, DMLScript.EAGER_CUDA_FREE);
+		gCtx.cudaFreeHelper(instName, tempOut, gCtx.EAGER_CUDA_FREE);
 		return result[0];
 	}
 
@@ -978,11 +979,11 @@ public class LibMatrixCUDA {
 		int blocks = tmp[0], threads = tmp[1], sharedMem = tmp[2];
 
 		long t0=0;
-		if (DMLScript.FINEGRAINED_STATISTICS) t0 = System.nanoTime();
+		if (ConfigurationManager.isFinegrainedStatistics()) t0 = System.nanoTime();
 		getCudaKernels(gCtx).launchKernel(kernelFunction, new ExecutionConfig(blocks, threads, sharedMem),
 				in, out, rows, cols);
 		//cudaDeviceSynchronize;
-		if (DMLScript.FINEGRAINED_STATISTICS) GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_REDUCE_ROW_KERNEL, System.nanoTime() - t0);
+		if (ConfigurationManager.isFinegrainedStatistics()) GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_REDUCE_ROW_KERNEL, System.nanoTime() - t0);
 
 	}
 
@@ -1005,11 +1006,11 @@ public class LibMatrixCUDA {
 		int blocks = tmp[0], threads = tmp[1], sharedMem = tmp[2];
 
 		long t0=0;
-		if (DMLScript.FINEGRAINED_STATISTICS) t0 = System.nanoTime();
+		if (ConfigurationManager.isFinegrainedStatistics()) t0 = System.nanoTime();
 		getCudaKernels(gCtx).launchKernel(kernelFunction, new ExecutionConfig(blocks, threads, sharedMem),
 				in, out, rows, cols);
 		//cudaDeviceSynchronize;
-		if (DMLScript.FINEGRAINED_STATISTICS) GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_REDUCE_COL_KERNEL, System.nanoTime() - t0);
+		if (ConfigurationManager.isFinegrainedStatistics()) GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_REDUCE_COL_KERNEL, System.nanoTime() - t0);
 	}
 
 	/**
@@ -1329,11 +1330,11 @@ public class LibMatrixCUDA {
 		int isLeftScalar = (op instanceof LeftScalarOperator) ? 1 : 0;
 		int size = rlenA * clenA;
 		long t0=0;
-		if (DMLScript.FINEGRAINED_STATISTICS) t0 = System.nanoTime();
+		if (ConfigurationManager.isFinegrainedStatistics()) t0 = System.nanoTime();
 		getCudaKernels(gCtx).launchKernel("matrix_scalar_op",
 				ExecutionConfig.getConfigForSimpleVectorOperations(size),
 				a, scalar, c, size, getBinaryOp(op.fn), isLeftScalar);
-		if (DMLScript.FINEGRAINED_STATISTICS) GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_MATRIX_SCALAR_OP_KERNEL, System.nanoTime() - t0);
+		if (ConfigurationManager.isFinegrainedStatistics()) GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_MATRIX_SCALAR_OP_KERNEL, System.nanoTime() - t0);
 	}
 
 	/**
@@ -1431,11 +1432,11 @@ public class LibMatrixCUDA {
 			LOG.trace("GPU : matrix_matrix_cellwise_op" + ", GPUContext=" + gCtx);
 		}
 		long t0=0;
-		if (DMLScript.FINEGRAINED_STATISTICS) t0 = System.nanoTime();
+		if (ConfigurationManager.isFinegrainedStatistics()) t0 = System.nanoTime();
 		getCudaKernels(gCtx).launchKernel("matrix_matrix_cellwise_op",
 				ExecutionConfig.getConfigForSimpleMatrixOperations(maxRlen, maxClen),
 				a, b, c, maxRlen, maxClen, vecStatusA, vecStatusB, getBinaryOp(op.fn));
-		if (DMLScript.FINEGRAINED_STATISTICS) GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_MATRIX_MATRIX_CELLWISE_OP_KERNEL, System.nanoTime() - t0);
+		if (ConfigurationManager.isFinegrainedStatistics()) GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_MATRIX_MATRIX_CELLWISE_OP_KERNEL, System.nanoTime() - t0);
 	}
 
 	/**
@@ -1525,11 +1526,11 @@ public class LibMatrixCUDA {
 			int rlen = toInt(out.getNumRows());
 			int clen = toInt(out.getNumColumns());
 			long t0 = 0;
-			if (DMLScript.FINEGRAINED_STATISTICS)
+			if (ConfigurationManager.isFinegrainedStatistics())
 				t0 = System.nanoTime();
 			int size = rlen * clen;
 			getCudaKernels(gCtx).launchKernel("fill", ExecutionConfig.getConfigForSimpleVectorOperations(size), A, constant, size);
-			if (DMLScript.FINEGRAINED_STATISTICS)
+			if (ConfigurationManager.isFinegrainedStatistics())
 				GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_FILL_KERNEL, System.nanoTime() - t0);
 		}
 	}
@@ -1544,10 +1545,10 @@ public class LibMatrixCUDA {
 	 */
 	public static void deviceCopy(String instName, Pointer src, Pointer dest, int rlen, int clen) {
 		long t0=0;
-		if (DMLScript.FINEGRAINED_STATISTICS) t0 = System.nanoTime();
+		if (ConfigurationManager.isFinegrainedStatistics()) t0 = System.nanoTime();
 		int size = rlen * clen * sizeOfDataType;
 		cudaMemcpy(dest, src, size, cudaMemcpyDeviceToDevice);
-		if (DMLScript.FINEGRAINED_STATISTICS) GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_DEVICE_TO_DEVICE, System.nanoTime() - t0);
+		if (ConfigurationManager.isFinegrainedStatistics()) GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_DEVICE_TO_DEVICE, System.nanoTime() - t0);
 	}
 
 	/**
@@ -1629,19 +1630,19 @@ public class LibMatrixCUDA {
 			// Invoke cuSparse when either are in sparse format
 			// Perform sparse-sparse dgeam
 			if (!isInSparseFormat(gCtx, in1)) {
-				if (DMLScript.FINEGRAINED_STATISTICS)
+				if (ConfigurationManager.isFinegrainedStatistics())
 					t0 = System.nanoTime();
 				in1.getGPUObject(gCtx).denseToSparse();
-				if (DMLScript.FINEGRAINED_STATISTICS)
+				if (ConfigurationManager.isFinegrainedStatistics())
 					GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_DENSE_TO_SPARSE,
 							System.nanoTime() - t0);
 			}
 			CSRPointer A = in1.getGPUObject(gCtx).getJcudaSparseMatrixPtr();
 			if (!isInSparseFormat(gCtx, in2)) {
-				if (DMLScript.FINEGRAINED_STATISTICS)
+				if (ConfigurationManager.isFinegrainedStatistics())
 					t0 = System.nanoTime();
 				in2.getGPUObject(gCtx).denseToSparse();
-				if (DMLScript.FINEGRAINED_STATISTICS)
+				if (ConfigurationManager.isFinegrainedStatistics())
 					GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_DENSE_TO_SPARSE,
 							System.nanoTime() - t0);
 			}
@@ -1664,21 +1665,21 @@ public class LibMatrixCUDA {
 							"Transpose in cusparseDcsrgeam not supported for sparse matrices on GPU");
 				}
 
-				if (DMLScript.FINEGRAINED_STATISTICS)
+				if (ConfigurationManager.isFinegrainedStatistics())
 					t1 = System.nanoTime();
 				CSRPointer C = CSRPointer.allocateForDgeam(gCtx, getCusparseHandle(gCtx), A, B, m, n);
-				if (DMLScript.FINEGRAINED_STATISTICS)
+				if (ConfigurationManager.isFinegrainedStatistics())
 					GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_SPARSE_ALLOCATE_LIB,
 							System.nanoTime() - t1);
 
 				out.getGPUObject(gCtx).setSparseMatrixCudaPointer(C);
 				//long sizeOfC = CSRPointer.estimateSize(C.nnz, out.getNumRows());
-				if (DMLScript.FINEGRAINED_STATISTICS)
+				if (ConfigurationManager.isFinegrainedStatistics())
 					t0 = System.nanoTime();
 				cudaSupportFunctions.cusparsecsrgeam(getCusparseHandle(gCtx), m, n, alphaPtr, A.descr, toInt(A.nnz), A.val, A.rowPtr, A.colInd, betaPtr,
 						B.descr, toInt(B.nnz), B.val, B.rowPtr, B.colInd, C.descr, C.val, C.rowPtr, C.colInd);
 				//cudaDeviceSynchronize;
-				if (DMLScript.FINEGRAINED_STATISTICS)
+				if (ConfigurationManager.isFinegrainedStatistics())
 					GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_SPARSE_DGEAM_LIB,
 							System.nanoTime() - t0);
 			}
@@ -1705,9 +1706,9 @@ public class LibMatrixCUDA {
 			getDenseMatrixOutputForGPUInstruction(ec, instName, outputName, outRLen, outCLen);	// Allocated the dense output matrix
 			Pointer C = getDensePointer(gCtx, out, instName);
 
-			if (DMLScript.FINEGRAINED_STATISTICS) t0 = System.nanoTime();
+			if (ConfigurationManager.isFinegrainedStatistics()) t0 = System.nanoTime();
 			cudaSupportFunctions.cublasgeam(getCublasHandle(gCtx), transa, transb, m, n, alphaPtr, A, lda, betaPtr, B, ldb, C, ldc);
-			if (DMLScript.FINEGRAINED_STATISTICS) GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_DENSE_DGEAM_LIB, System.nanoTime() - t0);
+			if (ConfigurationManager.isFinegrainedStatistics()) GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_DENSE_DGEAM_LIB, System.nanoTime() - t0);
 		}
 	}
 	
@@ -1736,9 +1737,9 @@ public class LibMatrixCUDA {
 		int m = toInt(numRowsA);
 		int n = lda;
 		int ldc = m;
-		if (DMLScript.FINEGRAINED_STATISTICS) t0 = System.nanoTime();
+		if (ConfigurationManager.isFinegrainedStatistics()) t0 = System.nanoTime();
 		cudaSupportFunctions.cublasgeam(getCublasHandle(gCtx), CUBLAS_OP_T, CUBLAS_OP_T, m, n, one(), A, lda, zero(), A, ldb, C, ldc);
-		if (DMLScript.FINEGRAINED_STATISTICS) GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_DENSE_DGEAM_LIB, System.nanoTime() - t0);
+		if (ConfigurationManager.isFinegrainedStatistics()) GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_DENSE_DGEAM_LIB, System.nanoTime() - t0);
 	}
 
 
@@ -1846,7 +1847,7 @@ public class LibMatrixCUDA {
 	 */
 	protected static void sliceDenseDense(GPUContext gCtx, String instName, Pointer inPointer, Pointer outPointer, 
 			int rl, int ru, int cl, int cu, int inClen) {
-		long t0 = DMLScript.FINEGRAINED_STATISTICS ? System.nanoTime() : 0;
+		long t0 = ConfigurationManager.isFinegrainedStatistics() ? System.nanoTime() : 0;
 		long retClen = cu - cl + 1;
 		if (inClen == retClen) {
 			cudaMemcpy(outPointer, inPointer.withByteOffset(rl * inClen * sizeOfDataType), (ru - rl + 1) * inClen
@@ -1856,7 +1857,7 @@ public class LibMatrixCUDA {
 			getCudaKernels(gCtx).launchKernel("slice_dense_dense", ExecutionConfig.getConfigForSimpleVectorOperations(toInt(retRlen*retClen)),
 					inPointer, outPointer, rl, ru, cl, cu, inClen,  retRlen, retClen);
 		}
-		if (DMLScript.FINEGRAINED_STATISTICS) GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_RIX_DENSE_OP, System.nanoTime() - t0);
+		if (ConfigurationManager.isFinegrainedStatistics()) GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_RIX_DENSE_OP, System.nanoTime() - t0);
 	}
 	
 	/**
@@ -1879,7 +1880,7 @@ public class LibMatrixCUDA {
 		if(size == 0) return;
 		
 		int retRlen = ru - rl + 1;
-		long t0 = DMLScript.FINEGRAINED_STATISTICS ? System.nanoTime() : 0;
+		long t0 = ConfigurationManager.isFinegrainedStatistics() ? System.nanoTime() : 0;
 		int retClen = cu - cl + 1;
 		
 		String kernel = null; String timer = null;
@@ -1901,7 +1902,7 @@ public class LibMatrixCUDA {
 		// We can generalize this later to output sparse matrix.
 		getCudaKernels(gCtx).launchKernel(kernel, ExecutionConfig.getConfigForSimpleVectorOperations(size),
 				inPointer.val, inPointer.rowPtr, inPointer.colInd, outPointer, rl, ru, cl, cu, retClen);
-		if (DMLScript.FINEGRAINED_STATISTICS) GPUStatistics.maintainCPMiscTimes(instName, timer, System.nanoTime() - t0);
+		if (ConfigurationManager.isFinegrainedStatistics()) GPUStatistics.maintainCPMiscTimes(instName, timer, System.nanoTime() - t0);
 	}
 	
 	/**
@@ -1946,11 +1947,11 @@ public class LibMatrixCUDA {
 		int maxRows = toInt(Math.max(rowsA, rowsB));
 		int maxCols = toInt(Math.max(colsA, colsB));
 
-		if (DMLScript.FINEGRAINED_STATISTICS) t1 = System.nanoTime();
+		if (ConfigurationManager.isFinegrainedStatistics()) t1 = System.nanoTime();
 		getCudaKernels(gCtx)
 		.launchKernel("cbind", ExecutionConfig.getConfigForSimpleMatrixOperations(maxRows, maxCols), A, B, C,
 				rowsA, colsA, rowsB, colsB);
-		if (DMLScript.FINEGRAINED_STATISTICS) GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_CBIND_KERNEL, System.nanoTime() - t1);
+		if (ConfigurationManager.isFinegrainedStatistics()) GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_CBIND_KERNEL, System.nanoTime() - t1);
 
 	}
 
@@ -1981,11 +1982,11 @@ public class LibMatrixCUDA {
 		int maxRows = Math.max(rowsA, rowsB);
 		int maxCols = Math.max(colsA, colsB);
 
-		if (DMLScript.FINEGRAINED_STATISTICS) t1 = System.nanoTime();
+		if (ConfigurationManager.isFinegrainedStatistics()) t1 = System.nanoTime();
 		getCudaKernels(gCtx)
 		.launchKernel("rbind", ExecutionConfig.getConfigForSimpleMatrixOperations(maxRows, maxCols), A, B, C,
 				rowsA, colsA, rowsB, colsB);
-		if (DMLScript.FINEGRAINED_STATISTICS) GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_RBIND_KERNEL, System.nanoTime() - t1);
+		if (ConfigurationManager.isFinegrainedStatistics()) GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_RBIND_KERNEL, System.nanoTime() - t1);
 
 	}
 
@@ -2315,10 +2316,10 @@ public class LibMatrixCUDA {
 			Pointer output = getDensePointer(gCtx, out, instName);
 			Pointer input = getDensePointer(gCtx, in1, instName);
 			int size = toInt(in1.getNumColumns() * in1.getNumRows());
-			if (DMLScript.FINEGRAINED_STATISTICS) t1 = System.nanoTime();
+			if (ConfigurationManager.isFinegrainedStatistics()) t1 = System.nanoTime();
 			getCudaKernels(gCtx).launchKernel(kernel, ExecutionConfig.getConfigForSimpleVectorOperations(size),
 					input, output, size);
-			if (DMLScript.FINEGRAINED_STATISTICS) GPUStatistics.maintainCPMiscTimes(instName, kernelTimer, System.nanoTime() - t1);
+			if (ConfigurationManager.isFinegrainedStatistics()) GPUStatistics.maintainCPMiscTimes(instName, kernelTimer, System.nanoTime() - t1);
 		}
 	}
 
@@ -2356,13 +2357,13 @@ public class LibMatrixCUDA {
 			// becomes
 			// C <- A
 			// C <- alpha*B + C
-			if (DMLScript.FINEGRAINED_STATISTICS) t1 = System.nanoTime();
+			if (ConfigurationManager.isFinegrainedStatistics()) t1 = System.nanoTime();
 			cudaMemcpy(C, A, n*((long)sizeOfDataType), cudaMemcpyDeviceToDevice);
-			if (DMLScript.FINEGRAINED_STATISTICS) GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_DEVICE_TO_DEVICE, System.nanoTime() - t1);
+			if (ConfigurationManager.isFinegrainedStatistics()) GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_DEVICE_TO_DEVICE, System.nanoTime() - t1);
 
-			if (DMLScript.FINEGRAINED_STATISTICS) t2 = System.nanoTime();
+			if (ConfigurationManager.isFinegrainedStatistics()) t2 = System.nanoTime();
 			cudaSupportFunctions.cublasaxpy(getCublasHandle(gCtx), toInt(n), alphaPtr, B, 1, C, 1);
-			if (DMLScript.FINEGRAINED_STATISTICS) GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_DAXPY_LIB, System.nanoTime() - t2);
+			if (ConfigurationManager.isFinegrainedStatistics()) GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_DAXPY_LIB, System.nanoTime() - t2);
 		}
 		else {
 			if(LOG.isTraceEnabled()) {
@@ -2372,12 +2373,12 @@ public class LibMatrixCUDA {
 			// Matrix-Vector daxpy
 			// Note: Vector-Matrix operation is not supported
 			// daxpy_matrix_vector(double* A,  double* B, double alpha, double* ret, int rlenA, int clenA, int rlenB, int clenB)
-			if (DMLScript.FINEGRAINED_STATISTICS) t1 = System.nanoTime();
+			if (ConfigurationManager.isFinegrainedStatistics()) t1 = System.nanoTime();
 			int rlenA = toInt(in1.getNumRows()); int clenA =  toInt(in1.getNumColumns());
 			int rlenB = toInt(in2.getNumRows()); int clenB =  toInt(in2.getNumColumns());
 			getCudaKernels(gCtx).launchKernel("daxpy_matrix_vector", ExecutionConfig.getConfigForSimpleMatrixOperations(rlenA, clenA),
 					A, B, constant, C, rlenA, clenA, rlenB, clenB);
-			if (DMLScript.FINEGRAINED_STATISTICS) GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_DAXPY_MV_KERNEL, System.nanoTime() - t1);
+			if (ConfigurationManager.isFinegrainedStatistics()) GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_DAXPY_MV_KERNEL, System.nanoTime() - t1);
 		}
 	}
 
@@ -2424,20 +2425,20 @@ public class LibMatrixCUDA {
 		// convert dense matrices to row major
 		// Operation in cuSolver and cuBlas are for column major dense matrices
 		// and are destructive to the original input
-		if (DMLScript.FINEGRAINED_STATISTICS) t0 = System.nanoTime();
+		if (ConfigurationManager.isFinegrainedStatistics()) t0 = System.nanoTime();
 		GPUObject ATobj = (GPUObject) Aobj.clone();
-		if (DMLScript.FINEGRAINED_STATISTICS) GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_OBJECT_CLONE, System.nanoTime() - t0);
-		if (DMLScript.FINEGRAINED_STATISTICS) t0 = System.nanoTime();
+		if (ConfigurationManager.isFinegrainedStatistics()) GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_OBJECT_CLONE, System.nanoTime() - t0);
+		if (ConfigurationManager.isFinegrainedStatistics()) t0 = System.nanoTime();
 		ATobj.denseRowMajorToColumnMajor();
-		if (DMLScript.FINEGRAINED_STATISTICS) GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_ROW_TO_COLUMN_MAJOR, System.nanoTime() - t0);
+		if (ConfigurationManager.isFinegrainedStatistics()) GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_ROW_TO_COLUMN_MAJOR, System.nanoTime() - t0);
 		Pointer A = ATobj.getDensePointer();
 
-		if (DMLScript.FINEGRAINED_STATISTICS) t0 = System.nanoTime();
+		if (ConfigurationManager.isFinegrainedStatistics()) t0 = System.nanoTime();
 		GPUObject bTobj = (GPUObject) bobj.clone();
-		if (DMLScript.FINEGRAINED_STATISTICS) GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_OBJECT_CLONE, System.nanoTime() - t0);
-		if (DMLScript.FINEGRAINED_STATISTICS) t0 = System.nanoTime();
+		if (ConfigurationManager.isFinegrainedStatistics()) GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_OBJECT_CLONE, System.nanoTime() - t0);
+		if (ConfigurationManager.isFinegrainedStatistics()) t0 = System.nanoTime();
 		bTobj.denseRowMajorToColumnMajor();
-		if (DMLScript.FINEGRAINED_STATISTICS) GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_ROW_TO_COLUMN_MAJOR, System.nanoTime() - t0);
+		if (ConfigurationManager.isFinegrainedStatistics()) GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_ROW_TO_COLUMN_MAJOR, System.nanoTime() - t0);
 
 
 		Pointer b = bTobj.getDensePointer();
@@ -2446,18 +2447,18 @@ public class LibMatrixCUDA {
 		// http://docs.nvidia.com/cuda/cusolver/#ormqr-example1
 
 		// step 3: query working space of geqrf and ormqr
-		if (DMLScript.FINEGRAINED_STATISTICS) t0 = System.nanoTime();
+		if (ConfigurationManager.isFinegrainedStatistics()) t0 = System.nanoTime();
 		int[] lwork = {0};
 		cudaSupportFunctions.cusolverDngeqrf_bufferSize(gCtx.getCusolverDnHandle(), m, n, A, m, lwork);
-		if (DMLScript.FINEGRAINED_STATISTICS) GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_QR_BUFFER, System.nanoTime() - t0);
+		if (ConfigurationManager.isFinegrainedStatistics()) GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_QR_BUFFER, System.nanoTime() - t0);
 
 		// step 4: compute QR factorization
 		Pointer work = gCtx.allocate(instName, lwork[0] * sizeOfDataType);
 		Pointer tau = gCtx.allocate(instName, m * sizeOfDataType);
 		Pointer devInfo = gCtx.allocate(instName, Sizeof.INT);
-		if (DMLScript.FINEGRAINED_STATISTICS) t0 = System.nanoTime();
+		if (ConfigurationManager.isFinegrainedStatistics()) t0 = System.nanoTime();
 		cudaSupportFunctions.cusolverDngeqrf(gCtx.getCusolverDnHandle(), m, n, A, m, tau, work, lwork[0], devInfo);
-		if (DMLScript.FINEGRAINED_STATISTICS) GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_QR, System.nanoTime() - t0);
+		if (ConfigurationManager.isFinegrainedStatistics()) GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_QR, System.nanoTime() - t0);
 
 		int[] qrError = {-1};
 		cudaMemcpy(Pointer.to(qrError), devInfo, Sizeof.INT, cudaMemcpyDeviceToHost);
@@ -2466,34 +2467,34 @@ public class LibMatrixCUDA {
 		}
 
 		// step 5: compute Q^T*B
-		if (DMLScript.FINEGRAINED_STATISTICS) t0 = System.nanoTime();
+		if (ConfigurationManager.isFinegrainedStatistics()) t0 = System.nanoTime();
 		cudaSupportFunctions.cusolverDnormqr(gCtx.getCusolverDnHandle(), cublasSideMode.CUBLAS_SIDE_LEFT, cublasOperation.CUBLAS_OP_T, m, 1, n, A, m, tau, b, m, work, lwork[0], devInfo);
-		if (DMLScript.FINEGRAINED_STATISTICS) GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_ORMQR, System.nanoTime() - t0);
+		if (ConfigurationManager.isFinegrainedStatistics()) GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_ORMQR, System.nanoTime() - t0);
 		cudaMemcpy(Pointer.to(qrError), devInfo, Sizeof.INT, cudaMemcpyDeviceToHost);
 		if (qrError[0] != 0) {
 			throw new DMLRuntimeException("GPU : Error in call to ormqr (to compuete Q^T*B after QR factorization) as part of solve, argument " + qrError[0] + " was wrong");
 		}
 
 		// step 6: compute x = R \ Q^T*B
-		if (DMLScript.FINEGRAINED_STATISTICS) t0 = System.nanoTime();
+		if (ConfigurationManager.isFinegrainedStatistics()) t0 = System.nanoTime();
 		cudaSupportFunctions.cublastrsm(gCtx.getCublasHandle(),
 			cublasSideMode.CUBLAS_SIDE_LEFT, cublasFillMode.CUBLAS_FILL_MODE_UPPER, cublasOperation.CUBLAS_OP_N, cublasDiagType.CUBLAS_DIAG_NON_UNIT,
 			n, 1, dataTypePointerTo(1.0), A, m, b, m);
-		if (DMLScript.FINEGRAINED_STATISTICS) GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_TRSM, System.nanoTime() - t0);
+		if (ConfigurationManager.isFinegrainedStatistics()) GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_TRSM, System.nanoTime() - t0);
 
-		if (DMLScript.FINEGRAINED_STATISTICS) t0 = System.nanoTime();
+		if (ConfigurationManager.isFinegrainedStatistics()) t0 = System.nanoTime();
 		bTobj.denseColumnMajorToRowMajor();
-		if (DMLScript.FINEGRAINED_STATISTICS) GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_COLUMN_TO_ROW_MAJOR, System.nanoTime() - t0);
+		if (ConfigurationManager.isFinegrainedStatistics()) GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_COLUMN_TO_ROW_MAJOR, System.nanoTime() - t0);
 
 		// TODO  : Find a way to assign bTobj directly to the output and set the correct flags so as to not crash
 		// There is an avoidable copy happening here
 		MatrixObject out = getDenseMatrixOutputForGPUInstruction(ec, instName, outputName, in1.getNumColumns(), 1);
 		cudaMemcpy(out.getGPUObject(gCtx).getDensePointer(), bTobj.getDensePointer(), n * 1 * sizeOfDataType, cudaMemcpyDeviceToDevice);
 
-		gCtx.cudaFreeHelper(instName, work, DMLScript.EAGER_CUDA_FREE);
-		gCtx.cudaFreeHelper(instName, tau, DMLScript.EAGER_CUDA_FREE);
-		ATobj.clearData(instName, DMLScript.EAGER_CUDA_FREE);
-		bTobj.clearData(instName, DMLScript.EAGER_CUDA_FREE);
+		gCtx.cudaFreeHelper(instName, work, gCtx.EAGER_CUDA_FREE);
+		gCtx.cudaFreeHelper(instName, tau, gCtx.EAGER_CUDA_FREE);
+		ATobj.clearData(instName, gCtx.EAGER_CUDA_FREE);
+		bTobj.clearData(instName, gCtx.EAGER_CUDA_FREE);
 
 		//debugPrintMatrix(b, n, 1);
     }
@@ -2514,10 +2515,10 @@ public class LibMatrixCUDA {
 	 */
 	public static MatrixObject getDenseMatrixOutputForGPUInstruction(ExecutionContext ec, String instName, String name, long numRows, long numCols) {
 		long t0=0;
-		if (DMLScript.FINEGRAINED_STATISTICS) t0 = System.nanoTime();
+		if (ConfigurationManager.isFinegrainedStatistics()) t0 = System.nanoTime();
 		Pair<MatrixObject, Boolean> mb = ec.getDenseMatrixOutputForGPUInstruction(name, numRows, numCols);
 		if (mb.getValue())
-			if (DMLScript.FINEGRAINED_STATISTICS)
+			if (ConfigurationManager.isFinegrainedStatistics())
 				GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_ALLOCATE_DENSE_OUTPUT, System.nanoTime() - t0);
 		return mb.getKey();
 	}
@@ -2535,10 +2536,10 @@ public class LibMatrixCUDA {
 	 */
 	private static MatrixObject getSparseMatrixOutputForGPUInstruction(ExecutionContext ec, long numRows, long numCols, long nnz, String instName, String name) {
 		long t0=0;
-		if (DMLScript.FINEGRAINED_STATISTICS) t0 = System.nanoTime();
+		if (ConfigurationManager.isFinegrainedStatistics()) t0 = System.nanoTime();
 		Pair<MatrixObject, Boolean> mb = ec.getSparseMatrixOutputForGPUInstruction(name, numRows, numCols, nnz);
 		if (mb.getValue())
-			if (DMLScript.FINEGRAINED_STATISTICS)
+			if (ConfigurationManager.isFinegrainedStatistics())
 				GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_ALLOCATE_SPARSE_OUTPUT, System.nanoTime() - t0);
 		return mb.getKey();
 	}

http://git-wip-us.apache.org/repos/asf/systemml/blob/ae268a9e/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCuDNN.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCuDNN.java b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCuDNN.java
index c6abbfe..d3b5984 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCuDNN.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCuDNN.java
@@ -49,7 +49,7 @@ import jcuda.jcudnn.cudnnTensorDescriptor;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
-import org.apache.sysml.api.DMLScript;
+import org.apache.sysml.conf.ConfigurationManager;
 import org.apache.sysml.hops.OptimizerUtils;
 import org.apache.sysml.runtime.DMLRuntimeException;
 import org.apache.sysml.runtime.controlprogram.caching.MatrixObject;
@@ -136,7 +136,7 @@ public class LibMatrixCuDNN extends LibMatrixCUDA {
 	private static Pointer denseIm2col(GPUContext gCtx, String instName, MatrixObject image, boolean isSparseImage, long N, long C, long H, long W,
 			int R, int S, int pad_h, int pad_w, int stride_h, int stride_w, int P, int Q) {
 		Pointer im2colPointer = null;
-		long t1 = DMLScript.FINEGRAINED_STATISTICS ? System.nanoTime() : 0;
+		long t1 = ConfigurationManager.isFinegrainedStatistics() ? System.nanoTime() : 0;
 		if(isSparseImage) {
 			CSRPointer inPointer = getSparsePointer(gCtx, image, instName);
 			if(inPointer.nnz < 0) {
@@ -147,7 +147,7 @@ public class LibMatrixCuDNN extends LibMatrixCUDA {
 				getCudaKernels(gCtx).launchKernel("sparse_dense_im2col", ExecutionConfig.getConfigForSimpleVectorOperations(toInt(inPointer.nnz)), 
 						inPointer.val, inPointer.rowPtr, inPointer.colInd, im2colPointer, inPointer.nnz, N, 
 						C*H*W, H*W, W, R, S, P, Q, P*Q, R*S, N*P*Q, stride_h, stride_w, pad_h, pad_w);
-				if (DMLScript.FINEGRAINED_STATISTICS)
+				if (ConfigurationManager.isFinegrainedStatistics())
 					GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_SPARSE_IM2COL_KERNEL, System.nanoTime() - t1);
 			}
 			else
@@ -159,7 +159,7 @@ public class LibMatrixCuDNN extends LibMatrixCUDA {
 			getCudaKernels(gCtx).launchKernel("dense_dense_im2col", ExecutionConfig.getConfigForSimpleVectorOperations(toInt(N*C*H*W)), 
 					imagePointer, im2colPointer, N*C*H*W, 
 					C*H*W, H*W, W, R, S, P, Q, P*Q, R*S, N*P*Q, stride_h, stride_w, pad_h, pad_w);
-			if (DMLScript.FINEGRAINED_STATISTICS)
+			if (ConfigurationManager.isFinegrainedStatistics())
 				GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_DENSE_IM2COL_KERNEL, System.nanoTime() - t1);
 		}
 		return im2colPointer;
@@ -220,16 +220,16 @@ public class LibMatrixCuDNN extends LibMatrixCUDA {
 				CSRPointer filterPointer = filter.getGPUObject(gCtx).getJcudaSparseMatrixPtr();
 				Pointer matmultOutputPointer = gCtx.allocate(instName, NKPQ*sizeOfDataType);
 				LibMatrixCuMatMult.sparseDenseMatMult(gCtx, instName, matmultOutputPointer, filterPointer, im2colPointer, K, CRS, CRS, NPQ, K, NPQ, false, false);
-				gCtx.cudaFreeHelper(instName, im2colPointer, DMLScript.EAGER_CUDA_FREE);
+				gCtx.cudaFreeHelper(instName, im2colPointer, gCtx.EAGER_CUDA_FREE);
 				
 				// Perform reorg_knpq a reorg operation of matmultOutputPointer matrix with dimensions [K, NPQ]
 				// and return a matrix dstPointer with dimensions [N, KPQ]
-				long t1 = DMLScript.FINEGRAINED_STATISTICS ? System.nanoTime() : 0;
+				long t1 = ConfigurationManager.isFinegrainedStatistics() ? System.nanoTime() : 0;
 				getCudaKernels(gCtx).launchKernel("reorg_knpq", ExecutionConfig.getConfigForSimpleVectorOperations(toInt(NKPQ)), 
 						matmultOutputPointer, dstPointer, NKPQ, NPQ, KPQ, P*Q);
-				if (DMLScript.FINEGRAINED_STATISTICS)
+				if (ConfigurationManager.isFinegrainedStatistics())
 					GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_DENSE_REORG_KNPQ_KERNEL, System.nanoTime() - t1);
-				gCtx.cudaFreeHelper(instName, matmultOutputPointer, DMLScript.EAGER_CUDA_FREE);
+				gCtx.cudaFreeHelper(instName, matmultOutputPointer, gCtx.EAGER_CUDA_FREE);
 			}
 			else {
 				// Filter and output are accounted as dense in the memory estimation for conv2d
@@ -357,13 +357,13 @@ public class LibMatrixCuDNN extends LibMatrixCUDA {
 		}
 		try {
 			long t1 = 0;
-			if (DMLScript.FINEGRAINED_STATISTICS) t1 = System.nanoTime();
+			if (ConfigurationManager.isFinegrainedStatistics()) t1 = System.nanoTime();
 			int status = cudnnConvolutionForward(getCudnnHandle(gCtx), one(),
 					algo.nchwTensorDesc, image,
 					algo.filterDesc, filter,
 					algo.convDesc, algo.algo, algo.workSpace, algo.sizeInBytes, zero(),
 					algo.nkpqTensorDesc, output);
-			if (DMLScript.FINEGRAINED_STATISTICS)
+			if (ConfigurationManager.isFinegrainedStatistics())
 				GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_CONVOLUTION_FORWARD_LIB, System.nanoTime() - t1);
 			if (status != cudnnStatus.CUDNN_STATUS_SUCCESS) {
 				throw new DMLRuntimeException("Could not executed cudnnConvolutionForward: " + cudnnStatus.stringFor(status));
@@ -438,9 +438,9 @@ public class LibMatrixCuDNN extends LibMatrixCUDA {
 						// Perform one-input conv2dBackwardFilter
 						Pointer tempdwPointer = gCtx.allocate(instName, KCRS*sizeOfDataType);
 						for(int n = 0; n < N; n++) {
-							long t0 = DMLScript.FINEGRAINED_STATISTICS ? System.nanoTime() : 0;
+							long t0 = ConfigurationManager.isFinegrainedStatistics() ? System.nanoTime() : 0;
 							cudaMemset(tempdwPointer, 0, KCRS*sizeOfDataType);
-							if(DMLScript.FINEGRAINED_STATISTICS) GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_SET_ZERO, System.nanoTime() - t0);
+							if(ConfigurationManager.isFinegrainedStatistics()) GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_SET_ZERO, System.nanoTime() - t0);
 							// Perform one-input conv2dBackwardFilter
 							cudnnConv2dBackwardFilter(gCtx, instName, imgFetcher.getNthRow(n), doutFetcher.getNthRow(n), tempdwPointer, algo);
 							getCudaKernels(gCtx).launchKernel("inplace_add",
@@ -475,10 +475,10 @@ public class LibMatrixCuDNN extends LibMatrixCUDA {
 			LOG.trace("GPU : conv2dBackwardFilter" + ", GPUContext=" + gCtx);
 		}
 		try {
-			long t1 = DMLScript.FINEGRAINED_STATISTICS ? System.nanoTime() : 0;
+			long t1 = ConfigurationManager.isFinegrainedStatistics() ? System.nanoTime() : 0;
 			int status = cudnnConvolutionBackwardFilter(getCudnnHandle(gCtx), one(), algo.nchwTensorDesc, imagePointer,
 					algo.nkpqTensorDesc, doutPointer, algo.convDesc, algo.algo, algo.workSpace, algo.sizeInBytes, zero(), algo.filterDesc, dwPointer);
-			if (DMLScript.FINEGRAINED_STATISTICS)
+			if (ConfigurationManager.isFinegrainedStatistics())
 				GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_CONVOLUTION_BACKWARD_FILTER_LIB, System.nanoTime() - t1);
 			if (status != jcuda.jcudnn.cudnnStatus.CUDNN_STATUS_SUCCESS) {
 				throw new DMLRuntimeException("Could not executed cudnnConvolutionBackwardFilter: " + jcuda.jcudnn.cudnnStatus.stringFor(status));
@@ -578,10 +578,10 @@ public class LibMatrixCuDNN extends LibMatrixCUDA {
 			LOG.trace("GPU : conv2dBackwardData" + ", GPUContext=" + gCtx);
 		}
 		try {
-			long t1 = DMLScript.FINEGRAINED_STATISTICS ? System.nanoTime() : 0;
+			long t1 = ConfigurationManager.isFinegrainedStatistics() ? System.nanoTime() : 0;
 			int status = cudnnConvolutionBackwardData(getCudnnHandle(gCtx), one(), algo.filterDesc, w,
 					algo.nkpqTensorDesc, dy, algo.convDesc, algo.algo, algo.workSpace, algo.sizeInBytes, zero(), algo.nchwTensorDesc, dx);
-			if (DMLScript.FINEGRAINED_STATISTICS) GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_CONVOLUTION_BACKWARD_DATA_LIB, System.nanoTime() - t1);
+			if (ConfigurationManager.isFinegrainedStatistics()) GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_CONVOLUTION_BACKWARD_DATA_LIB, System.nanoTime() - t1);
 
 			if(status != jcuda.jcudnn.cudnnStatus.CUDNN_STATUS_SUCCESS) {
 				throw new DMLRuntimeException("Could not executed cudnnConvolutionBackwardData: " + jcuda.jcudnn.cudnnStatus.stringFor(status));
@@ -653,11 +653,11 @@ public class LibMatrixCuDNN extends LibMatrixCUDA {
 				LibMatrixCuDNNPoolingDescriptors.cudnnPoolingDescriptors(gCtx, instName, N, C, H, W, K, R, S, 
 						pad_h, pad_w, stride_h, stride_w, P, Q, poolingType)) {
 			long t1=0,t2=0;
-			if (DMLScript.FINEGRAINED_STATISTICS) t1 = System.nanoTime();
-			if (DMLScript.FINEGRAINED_STATISTICS) GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_CUDNN_INIT, System.nanoTime() - t1);
-			if (DMLScript.FINEGRAINED_STATISTICS) t2 = System.nanoTime();
+			if (ConfigurationManager.isFinegrainedStatistics()) t1 = System.nanoTime();
+			if (ConfigurationManager.isFinegrainedStatistics()) GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_CUDNN_INIT, System.nanoTime() - t1);
+			if (ConfigurationManager.isFinegrainedStatistics()) t2 = System.nanoTime();
 			int status = cudnnPoolingForward(getCudnnHandle(gCtx), desc.poolingDesc, one(), desc.xDesc, x, zero(), desc.yDesc, y);
-			if (DMLScript.FINEGRAINED_STATISTICS) GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_MAXPOOLING_FORWARD_LIB, System.nanoTime() - t2);
+			if (ConfigurationManager.isFinegrainedStatistics()) GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_MAXPOOLING_FORWARD_LIB, System.nanoTime() - t2);
 			if(status != jcuda.jcudnn.cudnnStatus.CUDNN_STATUS_SUCCESS) {
 				throw new DMLRuntimeException("Could not executed cudnnPoolingForward: " + jcuda.jcudnn.cudnnStatus.stringFor(status));
 			}
@@ -752,20 +752,20 @@ public class LibMatrixCuDNN extends LibMatrixCUDA {
 			long t1=0, t2=0, t3=0;
 			int status;
 			if(!isMaxPoolOutputProvided) {
-				if (DMLScript.FINEGRAINED_STATISTICS) t1 = System.nanoTime();
+				if (ConfigurationManager.isFinegrainedStatistics()) t1 = System.nanoTime();
 				long numBytes = N*C*P*Q*sizeOfDataType;
 				y = gCtx.allocate(instName, numBytes);
-				if (DMLScript.FINEGRAINED_STATISTICS) GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_CUDNN_INIT, System.nanoTime() - t1);
-				if (DMLScript.FINEGRAINED_STATISTICS) t2 = System.nanoTime();
+				if (ConfigurationManager.isFinegrainedStatistics()) GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_CUDNN_INIT, System.nanoTime() - t1);
+				if (ConfigurationManager.isFinegrainedStatistics()) t2 = System.nanoTime();
 				status = cudnnPoolingForward(getCudnnHandle(gCtx), desc.poolingDesc, one(), desc.xDesc, x, zero(), desc.yDesc, y);
-				if (DMLScript.FINEGRAINED_STATISTICS) GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_MAXPOOLING_FORWARD_LIB, System.nanoTime() - t2);
+				if (ConfigurationManager.isFinegrainedStatistics()) GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_MAXPOOLING_FORWARD_LIB, System.nanoTime() - t2);
 				if(status != jcuda.jcudnn.cudnnStatus.CUDNN_STATUS_SUCCESS) {
 					throw new DMLRuntimeException("Could not executed cudnnPoolingForward before cudnnPoolingBackward: " + jcuda.jcudnn.cudnnStatus.stringFor(status));
 				}
 			}
-			if (DMLScript.FINEGRAINED_STATISTICS) t3 = System.nanoTime();
+			if (ConfigurationManager.isFinegrainedStatistics()) t3 = System.nanoTime();
 			status = cudnnPoolingBackward(getCudnnHandle(gCtx), desc.poolingDesc, one(), desc.yDesc, y, desc.dyDesc, dy, desc.xDesc, x, zero(), desc.dxDesc, dx);
-			if (DMLScript.FINEGRAINED_STATISTICS) GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_MAXPOOLING_BACKWARD_LIB, System.nanoTime() - t3);
+			if (ConfigurationManager.isFinegrainedStatistics()) GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_MAXPOOLING_BACKWARD_LIB, System.nanoTime() - t3);
 
 			if(status != jcuda.jcudnn.cudnnStatus.CUDNN_STATUS_SUCCESS) {
 				throw new DMLRuntimeException("Could not executed cudnnPoolingBackward: " + jcuda.jcudnn.cudnnStatus.stringFor(status));
@@ -775,10 +775,10 @@ public class LibMatrixCuDNN extends LibMatrixCUDA {
 		}
 		finally {
 			long t4=0;
-			if (DMLScript.FINEGRAINED_STATISTICS) t4 = System.nanoTime();
+			if (ConfigurationManager.isFinegrainedStatistics()) t4 = System.nanoTime();
 			if(!isMaxPoolOutputProvided)
-				gCtx.cudaFreeHelper(instName, y, DMLScript.EAGER_CUDA_FREE);
-			if (DMLScript.FINEGRAINED_STATISTICS) GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_CUDNN_CLEANUP, System.nanoTime() - t4);
+				gCtx.cudaFreeHelper(instName, y, gCtx.EAGER_CUDA_FREE);
+			if (ConfigurationManager.isFinegrainedStatistics()) GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_CUDNN_CLEANUP, System.nanoTime() - t4);
 		}
 	}
 
@@ -795,18 +795,18 @@ public class LibMatrixCuDNN extends LibMatrixCUDA {
 			cudnnCreateActivationDescriptor(activationDescriptor);
 			double dummy = -1;
 			cudnnSetActivationDescriptor(activationDescriptor, CUDNN_ACTIVATION_RELU, CUDNN_PROPAGATE_NAN, dummy);
-			if (DMLScript.FINEGRAINED_STATISTICS) t0 = System.nanoTime();
+			if (ConfigurationManager.isFinegrainedStatistics()) t0 = System.nanoTime();
 			cudnnActivationForward(getCudnnHandle(gCtx), activationDescriptor,
 					one(), srcTensorDesc, srcData,
 					zero(), dstTensorDesc, dstData);
-			if (DMLScript.FINEGRAINED_STATISTICS) GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_ACTIVATION_FORWARD_LIB, System.nanoTime() - t0);
+			if (ConfigurationManager.isFinegrainedStatistics()) GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_ACTIVATION_FORWARD_LIB, System.nanoTime() - t0);
 		} catch (CudaException e) {
 			throw new DMLRuntimeException("Error in conv2d in GPUContext " + gCtx.toString() + " from Thread " + Thread.currentThread().toString(), e);
 		}
 		finally {
 			long t1=0;
-			if (DMLScript.FINEGRAINED_STATISTICS) t1 = System.nanoTime();
-			if (DMLScript.FINEGRAINED_STATISTICS) GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_CUDNN_CLEANUP, System.nanoTime() - t1);
+			if (ConfigurationManager.isFinegrainedStatistics()) t1 = System.nanoTime();
+			if (ConfigurationManager.isFinegrainedStatistics()) GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_CUDNN_CLEANUP, System.nanoTime() - t1);
 		}
 	}
 
@@ -831,11 +831,11 @@ public class LibMatrixCuDNN extends LibMatrixCUDA {
 			}
 			// Invokes relu(double* A,  double* ret, int rlen, int clen)
 			Pointer srcData = getDensePointerForCuDNN(gCtx, in, instName); // TODO: FIXME: Add sparse kernel support for relu
-			if (DMLScript.FINEGRAINED_STATISTICS) t0 = System.nanoTime();
+			if (ConfigurationManager.isFinegrainedStatistics()) t0 = System.nanoTime();
 			getCudaKernels(gCtx).launchKernel("relu",
 					ExecutionConfig.getConfigForSimpleMatrixOperations(toInt(N), toInt(CHW)),
 					srcData, dstData, toInt(N), toInt(CHW));
-			if (DMLScript.FINEGRAINED_STATISTICS) GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_RELU_KERNEL, System.nanoTime() - t0);
+			if (ConfigurationManager.isFinegrainedStatistics()) GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_RELU_KERNEL, System.nanoTime() - t0);
 		}
 		else {
 			cudnnTensorDescriptor tensorDescriptor = new cudnnTensorDescriptor();
@@ -910,13 +910,13 @@ public class LibMatrixCuDNN extends LibMatrixCUDA {
 		}
 		
 		if(return_sequences) {
-			gCtx.cudaFreeHelper(instName, hyPointer, DMLScript.EAGER_CUDA_FREE);
+			gCtx.cudaFreeHelper(instName, hyPointer, gCtx.EAGER_CUDA_FREE);
 			Pointer sysmlYPointer = getDenseOutputPointer(ec, gCtx, instName, outputName, N, T*M);
 			LibMatrixCUDA.getCudaKernels(gCtx).launchKernel("prepare_lstm_output",
 					ExecutionConfig.getConfigForSimpleVectorOperations(N*T*M),
 					sysmlYPointer, cudnnYPointer, N, T, M, N*T*M);
 		}
-		gCtx.cudaFreeHelper(instName, cudnnYPointer, DMLScript.EAGER_CUDA_FREE);
+		gCtx.cudaFreeHelper(instName, cudnnYPointer, gCtx.EAGER_CUDA_FREE);
 	}
 	
 	public static void lstmBackward(ExecutionContext ec, GPUContext gCtx, String instName,
@@ -966,7 +966,7 @@ public class LibMatrixCuDNN extends LibMatrixCUDA {
 					// ----------------------
 					algo.workSpace, algo.sizeInBytes, 
 					algo.reserveSpace, algo.reserveSpaceSizeInBytes);
-			gCtx.cudaFreeHelper(instName, dy, DMLScript.EAGER_CUDA_FREE);
+			gCtx.cudaFreeHelper(instName, dy, gCtx.EAGER_CUDA_FREE);
 			ec.releaseMatrixInputForGPUInstruction(dcyName);
 			ec.releaseMatrixOutputForGPUInstruction(dhxName);
 			ec.releaseMatrixOutputForGPUInstruction(dcxName);
@@ -976,7 +976,7 @@ public class LibMatrixCuDNN extends LibMatrixCUDA {
 					ExecutionConfig.getConfigForSimpleVectorOperations(N*T*D),
 					smlDx, cudnnDx, N, D, T*D, N*T*D);
 			ec.releaseMatrixOutputForGPUInstruction(dxName);
-			gCtx.cudaFreeHelper(instName, cudnnDx, DMLScript.EAGER_CUDA_FREE);
+			gCtx.cudaFreeHelper(instName, cudnnDx, gCtx.EAGER_CUDA_FREE);
 			
 			// -------------------------------------------------------------------------------------------
 			Pointer cudnnDwPointer = gCtx.allocate(instName, (D+M+2)*(4*M)*LibMatrixCUDA.sizeOfDataType);
@@ -991,12 +991,12 @@ public class LibMatrixCuDNN extends LibMatrixCUDA {
 					ExecutionConfig.getConfigForSimpleVectorOperations((D+M+2)*(4*M)),
 					getDenseOutputPointer(ec, gCtx, instName, dwName, D+M, 4*M), 
 					getDenseOutputPointer(ec, gCtx, instName, dbName, 1, 4*M), cudnnDwPointer, D, M);
-			gCtx.cudaFreeHelper(instName, cudnnDwPointer, DMLScript.EAGER_CUDA_FREE);
+			gCtx.cudaFreeHelper(instName, cudnnDwPointer, gCtx.EAGER_CUDA_FREE);
 			ec.releaseMatrixOutputForGPUInstruction(dwName);
 			ec.releaseMatrixOutputForGPUInstruction(dbName);
 			// -------------------------------------------------------------------------------------------
 			
-			gCtx.cudaFreeHelper(instName, yPointer, DMLScript.EAGER_CUDA_FREE);
+			gCtx.cudaFreeHelper(instName, yPointer, gCtx.EAGER_CUDA_FREE);
 		}
 	}
 	

http://git-wip-us.apache.org/repos/asf/systemml/blob/ae268a9e/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCuDNNConvolutionAlgorithm.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCuDNNConvolutionAlgorithm.java b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCuDNNConvolutionAlgorithm.java
index f70b453..dbad80c 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCuDNNConvolutionAlgorithm.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCuDNNConvolutionAlgorithm.java
@@ -19,7 +19,7 @@
 
 package org.apache.sysml.runtime.matrix.data;
 
-import org.apache.sysml.api.DMLScript;
+import org.apache.sysml.conf.ConfigurationManager;
 import org.apache.sysml.runtime.DMLRuntimeException;
 import org.apache.sysml.runtime.instructions.gpu.GPUInstruction;
 import org.apache.sysml.runtime.instructions.gpu.context.GPUContext;
@@ -86,7 +86,7 @@ public class LibMatrixCuDNNConvolutionAlgorithm implements java.lang.AutoCloseab
 	@Override
 	public void close() {
 		long t3 = 0;
-		if (DMLScript.FINEGRAINED_STATISTICS) t3 = System.nanoTime();
+		if (ConfigurationManager.isFinegrainedStatistics()) t3 = System.nanoTime();
 		if(nchwTensorDesc != null)
 			cudnnDestroyTensorDescriptor(nchwTensorDesc);
 		if(nkpqTensorDesc != null)
@@ -97,12 +97,12 @@ public class LibMatrixCuDNNConvolutionAlgorithm implements java.lang.AutoCloseab
 			cudnnDestroyConvolutionDescriptor(convDesc);
 		if(sizeInBytes != 0) {
 			try {
-				gCtx.cudaFreeHelper(instName, workSpace, DMLScript.EAGER_CUDA_FREE);
+				gCtx.cudaFreeHelper(instName, workSpace, gCtx.EAGER_CUDA_FREE);
 			} catch (DMLRuntimeException e) {
 				throw new RuntimeException(e);
 			}
 		}
-		if(DMLScript.FINEGRAINED_STATISTICS)
+		if(ConfigurationManager.isFinegrainedStatistics())
 			GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_CUDNN_CLEANUP, System.nanoTime() - t3);
 	}
 	
@@ -130,7 +130,7 @@ public class LibMatrixCuDNNConvolutionAlgorithm implements java.lang.AutoCloseab
 	public static LibMatrixCuDNNConvolutionAlgorithm cudnnGetConvolutionForwardAlgorithm(
 			GPUContext gCtx, String instName, int N, int C, int H, int W, int K, int R, int S, 
 			int pad_h, int pad_w, int stride_h, int stride_w, int P, int Q, long workspaceLimit) {
-		long t1 = DMLScript.FINEGRAINED_STATISTICS ? System.nanoTime() : 0;
+		long t1 = ConfigurationManager.isFinegrainedStatistics() ? System.nanoTime() : 0;
 		LibMatrixCuDNNConvolutionAlgorithm ret = new LibMatrixCuDNNConvolutionAlgorithm(gCtx, instName, N, C, H, W, K, R, S, 
 				pad_h, pad_w, stride_h, stride_w, P, Q);
 		int[] algos = {-1};
@@ -144,7 +144,7 @@ public class LibMatrixCuDNNConvolutionAlgorithm implements java.lang.AutoCloseab
 			ret.workSpace = gCtx.allocate(instName, sizeInBytesArray[0]);
 		ret.sizeInBytes = sizeInBytesArray[0];
 		ret.algo = algos[0];
-		if (DMLScript.FINEGRAINED_STATISTICS)
+		if (ConfigurationManager.isFinegrainedStatistics())
 			GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_CUDNN_INIT, System.nanoTime() - t1);
 		return ret;
 	}
@@ -173,7 +173,7 @@ public class LibMatrixCuDNNConvolutionAlgorithm implements java.lang.AutoCloseab
 	public static LibMatrixCuDNNConvolutionAlgorithm cudnnGetConvolutionBackwardFilterAlgorithm(
 			GPUContext gCtx, String instName, int N, int C, int H, int W, int K, int R, int S, 
 			int pad_h, int pad_w, int stride_h, int stride_w, int P, int Q, long workspaceLimit) {
-		long t1 = DMLScript.FINEGRAINED_STATISTICS ? System.nanoTime() : 0;
+		long t1 = ConfigurationManager.isFinegrainedStatistics() ? System.nanoTime() : 0;
 		LibMatrixCuDNNConvolutionAlgorithm ret = new LibMatrixCuDNNConvolutionAlgorithm(gCtx, instName, N, C, H, W, K, R, S, 
 				pad_h, pad_w, stride_h, stride_w, P, Q);
 		
@@ -190,7 +190,7 @@ public class LibMatrixCuDNNConvolutionAlgorithm implements java.lang.AutoCloseab
 		ret.sizeInBytes = sizeInBytesArray[0];
 		ret.algo = algos[0];
 		
-		if (DMLScript.FINEGRAINED_STATISTICS)
+		if (ConfigurationManager.isFinegrainedStatistics())
 			GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_CUDNN_INIT, System.nanoTime() - t1);
 		return ret;
 	}
@@ -229,7 +229,7 @@ public class LibMatrixCuDNNConvolutionAlgorithm implements java.lang.AutoCloseab
 			ret.algo = jcuda.jcudnn.cudnnConvolutionBwdDataAlgo.CUDNN_CONVOLUTION_BWD_DATA_ALGO_0;
 		}
 		else {
-			long t1 = DMLScript.FINEGRAINED_STATISTICS ? System.nanoTime() : 0;
+			long t1 = ConfigurationManager.isFinegrainedStatistics() ? System.nanoTime() : 0;
 			int[] algos = {-1};
 			long sizeInBytesArray[] = {Math.min(workspaceLimit, MAX_WORKSPACE_LIMIT_BYTES)};
 			jcuda.jcudnn.JCudnn.cudnnGetConvolutionBackwardDataAlgorithm(
@@ -242,7 +242,7 @@ public class LibMatrixCuDNNConvolutionAlgorithm implements java.lang.AutoCloseab
 				ret.workSpace = gCtx.allocate(instName, sizeInBytesArray[0]);
 			ret.sizeInBytes = sizeInBytesArray[0];
 			ret.algo = algos[0];
-			if (DMLScript.FINEGRAINED_STATISTICS)
+			if (ConfigurationManager.isFinegrainedStatistics())
 				GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_CUDNN_INIT, System.nanoTime() - t1);
 		}
 		return ret;

http://git-wip-us.apache.org/repos/asf/systemml/blob/ae268a9e/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCuDNNInputRowFetcher.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCuDNNInputRowFetcher.java b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCuDNNInputRowFetcher.java
index 0130aa6..81a703d 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCuDNNInputRowFetcher.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCuDNNInputRowFetcher.java
@@ -21,7 +21,7 @@ package org.apache.sysml.runtime.matrix.data;
 import static jcuda.runtime.JCuda.cudaMemset;
 import jcuda.Pointer;
 
-import org.apache.sysml.api.DMLScript;
+import org.apache.sysml.conf.ConfigurationManager;
 import org.apache.sysml.runtime.DMLRuntimeException;
 import org.apache.sysml.runtime.controlprogram.caching.MatrixObject;
 import org.apache.sysml.runtime.instructions.gpu.GPUInstruction;
@@ -59,10 +59,10 @@ public class LibMatrixCuDNNInputRowFetcher extends LibMatrixCUDA implements java
 	public Pointer getNthRow(int n) {
 		if(isInputInSparseFormat) {
 			jcuda.runtime.JCuda.cudaDeviceSynchronize();
-			long t0 = DMLScript.FINEGRAINED_STATISTICS ? System.nanoTime() : 0;
+			long t0 = ConfigurationManager.isFinegrainedStatistics() ? System.nanoTime() : 0;
 			cudaMemset(outPointer, 0, numColumns*sizeOfDataType);
 			jcuda.runtime.JCuda.cudaDeviceSynchronize();
-			if(DMLScript.FINEGRAINED_STATISTICS) GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_SET_ZERO, System.nanoTime() - t0);
+			if(ConfigurationManager.isFinegrainedStatistics()) GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_SET_ZERO, System.nanoTime() - t0);
 			LibMatrixCUDA.sliceSparseDense(gCtx, instName, (CSRPointer)inPointer, outPointer, n, n, 0, LibMatrixCUDA.toInt(numColumns-1), numColumns);
 		}
 		else {

http://git-wip-us.apache.org/repos/asf/systemml/blob/ae268a9e/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCuDNNRnnAlgorithm.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCuDNNRnnAlgorithm.java b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCuDNNRnnAlgorithm.java
index 8ebc4e0..7b2c601 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCuDNNRnnAlgorithm.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCuDNNRnnAlgorithm.java
@@ -32,7 +32,6 @@ import static jcuda.jcudnn.cudnnRNNInputMode.CUDNN_LINEAR_INPUT;
 import static jcuda.jcudnn.cudnnDirectionMode.CUDNN_UNIDIRECTIONAL;
 import static jcuda.jcudnn.cudnnRNNAlgo.CUDNN_RNN_ALGO_STANDARD;
 
-import org.apache.sysml.api.DMLScript;
 import org.apache.sysml.runtime.DMLRuntimeException;
 import org.apache.sysml.runtime.controlprogram.context.ExecutionContext;
 import org.apache.sysml.runtime.instructions.gpu.context.GPUContext;
@@ -301,7 +300,7 @@ public class LibMatrixCuDNNRnnAlgorithm implements java.lang.AutoCloseable {
 		}
 		if(sizeInBytes != 0) {
 			try {
-				gCtx.cudaFreeHelper(instName, workSpace, DMLScript.EAGER_CUDA_FREE);
+				gCtx.cudaFreeHelper(instName, workSpace, gCtx.EAGER_CUDA_FREE);
 			} catch (DMLRuntimeException e) {
 				throw new RuntimeException(e);
 			}
@@ -309,7 +308,7 @@ public class LibMatrixCuDNNRnnAlgorithm implements java.lang.AutoCloseable {
 		workSpace = null;
 		if(reserveSpaceSizeInBytes != 0) {
 			try {
-				gCtx.cudaFreeHelper(instName, reserveSpace, DMLScript.EAGER_CUDA_FREE);
+				gCtx.cudaFreeHelper(instName, reserveSpace, gCtx.EAGER_CUDA_FREE);
 			} catch (DMLRuntimeException e) {
 				throw new RuntimeException(e);
 			}
@@ -317,7 +316,7 @@ public class LibMatrixCuDNNRnnAlgorithm implements java.lang.AutoCloseable {
 		reserveSpace = null;
 		if(dropOutSizeInBytes != 0) {
 			try {
-				gCtx.cudaFreeHelper(instName, dropOutStateSpace, DMLScript.EAGER_CUDA_FREE);
+				gCtx.cudaFreeHelper(instName, dropOutStateSpace, gCtx.EAGER_CUDA_FREE);
 			} catch (DMLRuntimeException e) {
 				throw new RuntimeException(e);
 			}

http://git-wip-us.apache.org/repos/asf/systemml/blob/ae268a9e/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCuMatMult.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCuMatMult.java b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCuMatMult.java
index 18739a8..9833456 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCuMatMult.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCuMatMult.java
@@ -26,7 +26,7 @@ import jcuda.Pointer;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
-import org.apache.sysml.api.DMLScript;
+import org.apache.sysml.conf.ConfigurationManager;
 import org.apache.sysml.runtime.DMLRuntimeException;
 import org.apache.sysml.runtime.controlprogram.caching.MatrixObject;
 import org.apache.sysml.runtime.controlprogram.context.ExecutionContext;
@@ -161,19 +161,19 @@ public class LibMatrixCuMatMult extends LibMatrixCUDA {
 			// and output
 			CSRPointer A = left.getGPUObject(gCtx).getJcudaSparseMatrixPtr();
 			CSRPointer B = right.getGPUObject(gCtx).getJcudaSparseMatrixPtr();
-			long t0 = DMLScript.FINEGRAINED_STATISTICS ? System.nanoTime() : 0;
+			long t0 = ConfigurationManager.isFinegrainedStatistics() ? System.nanoTime() : 0;
 			CSRPointer C = CSRPointer.allocateForMatrixMultiply(gCtx, getCusparseHandle(gCtx), A, transa, B, transb,
 					params.m, params.n, params.k);
-			if (DMLScript.FINEGRAINED_STATISTICS)
+			if (ConfigurationManager.isFinegrainedStatistics())
 				GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_SPARSE_ALLOCATE_LIB,
 						System.nanoTime() - t0);
 
 			// Step 3: Invoke the kernel
-			long t1 = DMLScript.FINEGRAINED_STATISTICS ? System.nanoTime() : 0;
+			long t1 = ConfigurationManager.isFinegrainedStatistics() ? System.nanoTime() : 0;
 			cudaSupportFunctions.cusparsecsrgemm(getCusparseHandle(gCtx), transa, transb, params.m, params.n, params.k, A.descr,
 					(int) A.nnz, A.val, A.rowPtr, A.colInd, B.descr, (int) B.nnz, B.val, B.rowPtr, B.colInd, C.descr,
 					C.val, C.rowPtr, C.colInd);
-			if (DMLScript.FINEGRAINED_STATISTICS)
+			if (ConfigurationManager.isFinegrainedStatistics())
 				GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_SPARSE_MATRIX_SPARSE_MATRIX_LIB,
 						System.nanoTime() - t1);
 			output.getGPUObject(gCtx).setSparseMatrixCudaPointer(C);
@@ -279,14 +279,14 @@ public class LibMatrixCuMatMult extends LibMatrixCUDA {
 		denseSparseMatMult(getCusparseHandle(gCtx), instName, output, B, A, params);
 		if (outRLen != 1 && outCLen != 1) {
 			// Transpose: C = t(output)
-			long t0 = DMLScript.FINEGRAINED_STATISTICS ? System.nanoTime() : 0;
+			long t0 = ConfigurationManager.isFinegrainedStatistics() ? System.nanoTime() : 0;
 			cudaSupportFunctions.cublasgeam(gCtx.getCublasHandle(), cublasOperation.CUBLAS_OP_T, cublasOperation.CUBLAS_OP_T,
 					toInt(outCLen), toInt(outRLen), one(), output, toInt(outRLen), zero(), new Pointer(),
 					toInt(outRLen), C, toInt(outCLen));
-			if (!DMLScript.EAGER_CUDA_FREE)
+			if (!gCtx.EAGER_CUDA_FREE)
 				JCuda.cudaDeviceSynchronize();
-			gCtx.cudaFreeHelper(instName, output, DMLScript.EAGER_CUDA_FREE);
-			if (DMLScript.FINEGRAINED_STATISTICS)
+			gCtx.cudaFreeHelper(instName, output, gCtx.EAGER_CUDA_FREE);
+			if (ConfigurationManager.isFinegrainedStatistics())
 				GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_TRANSPOSE_LIB, System.nanoTime()
 						- t0);
 		}
@@ -312,7 +312,7 @@ public class LibMatrixCuMatMult extends LibMatrixCUDA {
 	 */
 	private static void denseSparseMatMult(cusparseHandle handle, String instName, Pointer C, Pointer A, CSRPointer B,
 			CuMatMultParameters param) {
-		long t0 = DMLScript.FINEGRAINED_STATISTICS ? System.nanoTime() : 0;
+		long t0 = ConfigurationManager.isFinegrainedStatistics() ? System.nanoTime() : 0;
 		String kernel = GPUInstruction.MISC_TIMER_SPARSE_MATRIX_DENSE_MATRIX_LIB;
 		// Ignoring sparse vector dense matrix multiplication and dot product
 		boolean isVector = (param.leftNumRows == 1 && !param.isLeftTransposed)
@@ -336,7 +336,7 @@ public class LibMatrixCuMatMult extends LibMatrixCUDA {
 			cudaSupportFunctions.cusparsecsrmm2(handle, transa, transb, m, param.n, k, toInt(B.nnz), one(), B.descr, B.val,
 					B.rowPtr, B.colInd, A, param.ldb, zero(), C, param.ldc);
 		}
-		if (DMLScript.FINEGRAINED_STATISTICS)
+		if (ConfigurationManager.isFinegrainedStatistics())
 			GPUStatistics.maintainCPMiscTimes(instName, kernel, System.nanoTime() - t0);
 	}
 
@@ -361,7 +361,7 @@ public class LibMatrixCuMatMult extends LibMatrixCUDA {
 	 */
 	private static void denseDenseMatMult(cublasHandle handle, String instName, Pointer C, Pointer A, Pointer B,
 			CuMatMultParameters param) {
-		long t0 = DMLScript.FINEGRAINED_STATISTICS ? System.nanoTime() : 0;
+		long t0 = ConfigurationManager.isFinegrainedStatistics() ? System.nanoTime() : 0;
 		String kernel = null;
 		param.rowToColumnMajor();
 		param.validate();
@@ -403,7 +403,7 @@ public class LibMatrixCuMatMult extends LibMatrixCUDA {
 					zero(), C, param.ldc);
 			kernel = GPUInstruction.MISC_TIMER_DENSE_MATRIX_DENSE_MATRIX_LIB;
 		}
-		if (DMLScript.FINEGRAINED_STATISTICS)
+		if (ConfigurationManager.isFinegrainedStatistics())
 			GPUStatistics.maintainCPMiscTimes(instName, kernel, System.nanoTime() - t0);
 	}
 

http://git-wip-us.apache.org/repos/asf/systemml/blob/ae268a9e/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNN.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNN.java b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNN.java
index 0c6f41a..4569dbe 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNN.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNN.java
@@ -28,7 +28,7 @@ import java.util.concurrent.atomic.AtomicLong;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
-import org.apache.sysml.api.DMLScript;
+import org.apache.sysml.conf.ConfigurationManager;
 import org.apache.sysml.hops.OptimizerUtils;
 import org.apache.sysml.runtime.DMLRuntimeException;
 import org.apache.sysml.runtime.functionobjects.KahanPlus;
@@ -91,7 +91,7 @@ public class LibMatrixDNN {
 	static AtomicLong loopedConvBwdDataCol2ImTime = new AtomicLong(0);
 	
 	public static void appendStatistics(StringBuilder sb) {
-		if(DMLScript.FINEGRAINED_STATISTICS) {
+		if(ConfigurationManager.isFinegrainedStatistics()) {
 			sb.append("LibMatrixDNN dense count (conv/bwdF/bwdD/im2col/maxBwd):\t" 
 					+ conv2dDenseCount.get() + "/"
 					+ conv2dBwdFilterDenseCount.get() + "/"
@@ -238,7 +238,7 @@ public class LibMatrixDNN {
 			throw new DMLRuntimeException("Incorrect dout dimensions in pooling_backward:" + input.getNumRows() + " " + input.getNumColumns() + " " + params.N + " " + params.K*params.P*params.Q);
 		}
 		
-		if(DMLScript.FINEGRAINED_STATISTICS) {
+		if(ConfigurationManager.isFinegrainedStatistics()) {
 			boolean isSparse = (poolType == PoolingType.MAX) ? (input.isInSparseFormat() || dout.isInSparseFormat()) : dout.isInSparseFormat();
 			if(isSparse)
 				maxPoolBwdSparseCount.addAndGet(1);
@@ -780,7 +780,7 @@ public class LibMatrixDNN {
 		if(params.stride_h <= 0 || params.stride_w <= 0) 
 			throw new DMLRuntimeException("Only positive strides supported:" + params.stride_h + ", " + params.stride_w);
 		
-		if(DMLScript.FINEGRAINED_STATISTICS) {
+		if(ConfigurationManager.isFinegrainedStatistics()) {
 			if(filter.isInSparseFormat() || dout.isInSparseFormat()) {
 				conv2dBwdDataSparseCount.addAndGet(1);
 			}
@@ -805,7 +805,7 @@ public class LibMatrixDNN {
 		if(params.stride_h <= 0 || params.stride_w <= 0) 
 			throw new DMLRuntimeException("Only positive strides supported:" + params.stride_h + ", " + params.stride_w);
 		
-		if(DMLScript.FINEGRAINED_STATISTICS) {
+		if(ConfigurationManager.isFinegrainedStatistics()) {
 			if(input.isInSparseFormat() || dout.isInSparseFormat()) {
 				conv2dBwdFilterSparseCount.addAndGet(1);
 			}
@@ -831,7 +831,7 @@ public class LibMatrixDNN {
 		if(params.stride_h <= 0 || params.stride_w <= 0) 
 			throw new DMLRuntimeException("Only positive strides supported:" + params.stride_h + ", " + params.stride_w);
 		
-		if(DMLScript.FINEGRAINED_STATISTICS) {
+		if(ConfigurationManager.isFinegrainedStatistics()) {
 			if(input.isInSparseFormat() || filter.isInSparseFormat()) {
 				conv2dSparseCount.addAndGet(1);
 			}

http://git-wip-us.apache.org/repos/asf/systemml/blob/ae268a9e/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNNConv2d.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNNConv2d.java b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNNConv2d.java
index b938a0a..982949f 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNNConv2d.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNNConv2d.java
@@ -22,7 +22,7 @@ import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.concurrent.Callable;
 
-import org.apache.sysml.api.DMLScript;
+import org.apache.sysml.conf.ConfigurationManager;
 import org.apache.sysml.hops.OptimizerUtils;
 import org.apache.sysml.runtime.DMLRuntimeException;
 import org.apache.sysml.runtime.matrix.data.LibMatrixDNNRotate180.Rotate180Worker;
@@ -172,16 +172,16 @@ public class LibMatrixDNNConv2d
 			MatrixBlock outMM = new MatrixBlock(K, PQ, _params.output.sparse);
 			long time1 = 0; long time2 = 0;
 			for(int n = _rl; n < _ru; n++)  {
-				long t1 = DMLScript.FINEGRAINED_STATISTICS ? System.nanoTime() : 0;
+				long t1 = ConfigurationManager.isFinegrainedStatistics() ? System.nanoTime() : 0;
 				LibMatrixDNNIm2Col.im2col(_params.input1, outIm2col, n, _params, false);
-				long t2 = DMLScript.FINEGRAINED_STATISTICS ? System.nanoTime() : 0;
+				long t2 = ConfigurationManager.isFinegrainedStatistics() ? System.nanoTime() : 0;
 				
 				// filter %*% _im2ColOutBlock => matMultOutBlock
 				outMM.reset(outMM.rlen, outMM.clen, _params.output.sparse);
 				LibMatrixDNNHelper.singleThreadedMatMult(_params.input2, outIm2col, outMM, false, true, _params);
-				long t3 = DMLScript.FINEGRAINED_STATISTICS ? System.nanoTime() : 0;
+				long t3 = ConfigurationManager.isFinegrainedStatistics() ? System.nanoTime() : 0;
 				
-				if(DMLScript.FINEGRAINED_STATISTICS) {
+				if(ConfigurationManager.isFinegrainedStatistics()) {
 					time1 += t2 - t1;
 					time2 += t3 - t2;
 				}
@@ -195,7 +195,7 @@ public class LibMatrixDNNConv2d
 						_params.bias.getDenseBlockValues(), K, PQ);
 			}
 			
-			if(DMLScript.FINEGRAINED_STATISTICS) {
+			if(ConfigurationManager.isFinegrainedStatistics()) {
 				LibMatrixDNN.loopedConvIm2ColTime.addAndGet(time1);
 				LibMatrixDNN.loopedConvMatMultTime.addAndGet(time2);
 			}
@@ -416,20 +416,20 @@ public class LibMatrixDNNConv2d
 				// rotate180(dout[n,]) => dout_reshaped
 				rotate180Worker.execute(n, 0);
 				// dout_reshaped %*% filter => temp
-				long t1 = DMLScript.FINEGRAINED_STATISTICS ? System.nanoTime() : 0;
+				long t1 = ConfigurationManager.isFinegrainedStatistics() ? System.nanoTime() : 0;
 				outMM.reset(PQ, CRS, false);
 				LibMatrixDNNHelper.singleThreadedMatMult(outRotate, filter, outMM, !outRotate.sparse, false, _params);
-				long t2 = DMLScript.FINEGRAINED_STATISTICS ? System.nanoTime() : 0;
+				long t2 = ConfigurationManager.isFinegrainedStatistics() ? System.nanoTime() : 0;
 				// col2im(temp) => output[n,] 
 				LibMatrixDNNIm2Col.col2imOverSingleImage(n, outMM, _params);
-				long t3 = DMLScript.FINEGRAINED_STATISTICS ? System.nanoTime() : 0;
+				long t3 = ConfigurationManager.isFinegrainedStatistics() ? System.nanoTime() : 0;
 				
-				if(DMLScript.FINEGRAINED_STATISTICS) {
+				if(ConfigurationManager.isFinegrainedStatistics()) {
 					time1 += t2 - t1;
 					time2 += t3 - t2;
 				}
 			}
-			if(DMLScript.FINEGRAINED_STATISTICS) {
+			if(ConfigurationManager.isFinegrainedStatistics()) {
 				LibMatrixDNN.loopedConvBwdDataMatMultTime.addAndGet(time1);
 				LibMatrixDNN.loopedConvBwdDataCol2ImTime.addAndGet(time2);
 			}
@@ -512,24 +512,24 @@ public class LibMatrixDNNConv2d
 				rotate180Worker.execute(n, 0);
 				
 				// im2col(input) => _im2ColOutBlock
-				long t1 = DMLScript.FINEGRAINED_STATISTICS ? System.nanoTime() : 0;
+				long t1 = ConfigurationManager.isFinegrainedStatistics() ? System.nanoTime() : 0;
 				LibMatrixDNNIm2Col.im2col(_params.input1, im2ColOutBlock, n, _params, false);
-				long t2 = DMLScript.FINEGRAINED_STATISTICS ? System.nanoTime() : 0;
+				long t2 = ConfigurationManager.isFinegrainedStatistics() ? System.nanoTime() : 0;
 				
 				outMM.reset(CRS, K, false);
 				LibMatrixDNNHelper.singleThreadedMatMult(im2ColOutBlock, outRotate, outMM, !im2ColOutBlock.sparse, !outRotate.sparse, _params);
-				long t3 = DMLScript.FINEGRAINED_STATISTICS ? System.nanoTime() : 0;
+				long t3 = ConfigurationManager.isFinegrainedStatistics() ? System.nanoTime() : 0;
 				
 				if( !outMM.isEmptyBlock() ) //accumulate row results
 					LibMatrixMult.vectAdd(outMM.getDenseBlockValues(), partRet, 0, 0, K*CRS);
 				
-				if(DMLScript.FINEGRAINED_STATISTICS) {
+				if(ConfigurationManager.isFinegrainedStatistics()) {
 					time1 += t2 - t1;
 					time2 += t3 - t2;
 				}
 			}
 			inplaceTransAdd(partRet, _params);
-			if(DMLScript.FINEGRAINED_STATISTICS) {
+			if(ConfigurationManager.isFinegrainedStatistics()) {
 				LibMatrixDNN.loopedConvBwdFilterIm2ColTime.addAndGet(time1);
 				LibMatrixDNN.loopedConvBwdFilterMatMultTime.addAndGet(time2);
 			}
@@ -562,27 +562,27 @@ public class LibMatrixDNNConv2d
 				rotate180Worker.execute(n, 0);
 				
 				// im2col(input) => _im2ColOutBlock
-				long t1 = DMLScript.FINEGRAINED_STATISTICS ? System.nanoTime() : 0;
+				long t1 = ConfigurationManager.isFinegrainedStatistics() ? System.nanoTime() : 0;
 				LibMatrixDNNIm2Col.im2col(_params.input1, im2ColOutBlock, n, _params, true);
-				long t2 = DMLScript.FINEGRAINED_STATISTICS ? System.nanoTime() : 0;
+				long t2 = ConfigurationManager.isFinegrainedStatistics() ? System.nanoTime() : 0;
 				
 				outMM.reset(K, CRS, false);
 				//Timing time = new Timing(true);
 				LibMatrixDNNHelper.singleThreadedMatMult(outRotate, im2ColOutBlock, 
 					outMM, !outRotate.sparse, !im2ColOutBlock.sparse, _params);
-				long t3 = DMLScript.FINEGRAINED_STATISTICS ? System.nanoTime() : 0;
+				long t3 = ConfigurationManager.isFinegrainedStatistics() ? System.nanoTime() : 0;
 				
 				if( !outMM.isEmptyBlock() ) //accumulate row results
 					LibMatrixMult.vectAdd(outMM.getDenseBlockValues(), partRet, 0, 0, K*CRS);
 				
-				if(DMLScript.FINEGRAINED_STATISTICS) {
+				if(ConfigurationManager.isFinegrainedStatistics()) {
 					time1 += t2 - t1;
 					time2 += t3 - t2;
 				}
 			}
 			//no need to transpose because t(t(out)) cancel out
 			inplaceAdd(partRet, _params);
-			if(DMLScript.FINEGRAINED_STATISTICS) {
+			if(ConfigurationManager.isFinegrainedStatistics()) {
 				LibMatrixDNN.loopedConvBwdFilterIm2ColTime.addAndGet(time1);
 				LibMatrixDNN.loopedConvBwdFilterMatMultTime.addAndGet(time2);
 			}

http://git-wip-us.apache.org/repos/asf/systemml/blob/ae268a9e/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixNative.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixNative.java b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixNative.java
index aa5ba86..2cb64c2 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixNative.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixNative.java
@@ -26,7 +26,6 @@ import java.util.stream.IntStream;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
-import org.apache.sysml.api.DMLScript;
 import org.apache.sysml.conf.ConfigurationManager;
 import org.apache.sysml.conf.DMLConfig;
 import org.apache.sysml.hops.OptimizerUtils;
@@ -87,7 +86,7 @@ public class LibMatrixNative
 		{
 			ret.sparse = false;
 			ret.allocateDenseBlock();
-			long start = DMLScript.STATISTICS ? System.nanoTime() : 0;
+			long start = ConfigurationManager.isStatistics() ? System.nanoTime() : 0;
 			boolean rccode = false;
 			if( isSinglePrecision() ) {
 				FloatBuffer fin1 = toFloatBuffer(m1.getDenseBlockValues(), inBuff, true);
@@ -102,7 +101,7 @@ public class LibMatrixNative
 					ret.getDenseBlockValues(), m1.getNumRows(), m1.getNumColumns(), m2.getNumColumns(), k);
 			}
 			if (rccode) {
-				if(DMLScript.STATISTICS) {
+				if(ConfigurationManager.isStatistics()) {
 					Statistics.nativeLibMatrixMultTime += System.nanoTime() - start;
 					Statistics.numNativeLibMatrixMultCalls.increment();
 				}
@@ -158,7 +157,7 @@ public class LibMatrixNative
 		params.numThreads = params.numThreads <= 0 ? NativeHelper.getMaxNumThreads() : params.numThreads;
 		if(NativeHelper.isNativeLibraryLoaded() && !input.isInSparseFormat() && !filter.isInSparseFormat()) {
 			setNumThreads(params);
-			long start = DMLScript.STATISTICS ? System.nanoTime() : 0;
+			long start = ConfigurationManager.isStatistics() ? System.nanoTime() : 0;
 			int nnz = 0;
 			if(params.bias == null) {
 				nnz = NativeHelper.conv2dDense(input.getDenseBlockValues(), filter.getDenseBlockValues(),
@@ -195,7 +194,7 @@ public class LibMatrixNative
 			}
 			//post processing and error handling
 			if(nnz != -1) {
-				if(DMLScript.STATISTICS) {
+				if(ConfigurationManager.isStatistics()) {
 					Statistics.nativeConv2dTime += System.nanoTime() - start;
 					Statistics.numNativeConv2dCalls.increment();
 				}
@@ -234,13 +233,13 @@ public class LibMatrixNative
 		params.numThreads = params.numThreads <= 0 ? NativeHelper.getMaxNumThreads() : params.numThreads;
 		if(NativeHelper.isNativeLibraryLoaded() && !dout.isInSparseFormat() && !input.isInSparseFormat()) {
 			setNumThreads(params);
-			long start = DMLScript.STATISTICS ? System.nanoTime() : 0;
+			long start = ConfigurationManager.isStatistics() ? System.nanoTime() : 0;
 			int nnz = NativeHelper.conv2dBackwardFilterDense(input.getDenseBlockValues(), dout.getDenseBlockValues(),
 					outputBlock.getDenseBlockValues(), params.N, params.C, params.H, params.W, 
 					params.K, params.R, params.S, params.stride_h, params.stride_w, params.pad_h, params.pad_w, 
 					params.P, params.Q, params.numThreads);
 			if(nnz != -1) {
-				if(DMLScript.STATISTICS) {
+				if(ConfigurationManager.isStatistics()) {
 					Statistics.nativeConv2dBwdFilterTime += System.nanoTime() - start;
 					Statistics.numNativeConv2dBwdFilterCalls.increment();
 				}
@@ -270,13 +269,13 @@ public class LibMatrixNative
 		params.numThreads = params.numThreads <= 0 ? NativeHelper.getMaxNumThreads() : params.numThreads;
 		if(NativeHelper.isNativeLibraryLoaded() && !dout.isInSparseFormat() && !filter.isInSparseFormat()) {
 			setNumThreads(params);
-			long start = DMLScript.STATISTICS ? System.nanoTime() : 0;
+			long start = ConfigurationManager.isStatistics() ? System.nanoTime() : 0;
 			int nnz = NativeHelper.conv2dBackwardDataDense(filter.getDenseBlockValues(), dout.getDenseBlockValues(),
 					outputBlock.getDenseBlockValues(), params.N, params.C, params.H, params.W, 
 					params.K, params.R, params.S, params.stride_h, params.stride_w, params.pad_h, params.pad_w, 
 					params.P, params.Q, params.numThreads);
 			if(nnz != -1) {
-				if(DMLScript.STATISTICS) {
+				if(ConfigurationManager.isStatistics()) {
 					Statistics.nativeConv2dBwdDataTime += System.nanoTime() - start;
 					Statistics.numNativeConv2dBwdDataCalls.increment();
 				}

http://git-wip-us.apache.org/repos/asf/systemml/blob/ae268a9e/src/main/java/org/apache/sysml/runtime/matrix/data/SinglePrecisionCudaSupportFunctions.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/SinglePrecisionCudaSupportFunctions.java b/src/main/java/org/apache/sysml/runtime/matrix/data/SinglePrecisionCudaSupportFunctions.java
index 942b56b..044e943 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/SinglePrecisionCudaSupportFunctions.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/SinglePrecisionCudaSupportFunctions.java
@@ -29,7 +29,7 @@ import java.util.stream.IntStream;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
-import org.apache.sysml.api.DMLScript;
+import org.apache.sysml.conf.ConfigurationManager;
 import org.apache.sysml.runtime.instructions.gpu.GPUInstruction;
 import org.apache.sysml.runtime.instructions.gpu.context.GPUContext;
 import org.apache.sysml.utils.GPUStatistics;
@@ -168,7 +168,7 @@ public class SinglePrecisionCudaSupportFunctions implements CudaSupportFunctions
 	
 	@Override
 	public void deviceToHost(GPUContext gCtx, Pointer src, double[] dest, String instName, boolean isEviction) {
-		long t0 = DMLScript.STATISTICS ? System.nanoTime() : 0;
+		long t0 = ConfigurationManager.isStatistics() ? System.nanoTime() : 0;
 		// We invoke transfer matrix from device to host in two cases:
 		// 1. During eviction of unlocked matrices
 		// 2. During acquireHostRead
@@ -182,7 +182,7 @@ public class SinglePrecisionCudaSupportFunctions implements CudaSupportFunctions
 			Pointer deviceDoubleData = gCtx.allocate(instName, ((long)dest.length)*Sizeof.DOUBLE);
 			LibMatrixCUDA.float2double(gCtx, src, deviceDoubleData, dest.length);
 			cudaMemcpy(Pointer.to(dest), deviceDoubleData, ((long)dest.length)*Sizeof.DOUBLE, cudaMemcpyDeviceToHost);
-			gCtx.cudaFreeHelper(instName, deviceDoubleData, DMLScript.EAGER_CUDA_FREE);
+			gCtx.cudaFreeHelper(instName, deviceDoubleData, gCtx.EAGER_CUDA_FREE);
 		}
 		else {
 			LOG.debug("Potential OOM: Allocated additional space on host in deviceToHost");
@@ -190,11 +190,11 @@ public class SinglePrecisionCudaSupportFunctions implements CudaSupportFunctions
 			cudaMemcpy(Pointer.to(floatData), src, ((long)dest.length)*Sizeof.FLOAT, cudaMemcpyDeviceToHost);
 			LibMatrixNative.fromFloatBuffer(floatData, dest);
 		}
-		if(DMLScript.STATISTICS) {
+		if(ConfigurationManager.isStatistics()) {
 			long totalTime = System.nanoTime() - t0;
 			GPUStatistics.cudaFloat2DoubleTime.add(totalTime);
 			GPUStatistics.cudaFloat2DoubleCount.add(1);
-			if(DMLScript.FINEGRAINED_STATISTICS && instName != null) 
+			if(ConfigurationManager.isFinegrainedStatistics() && instName != null) 
 				GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_DEVICE_TO_HOST, totalTime);
 		}
 	}
@@ -203,12 +203,12 @@ public class SinglePrecisionCudaSupportFunctions implements CudaSupportFunctions
 	public void hostToDevice(GPUContext gCtx, double[] src, Pointer dest, String instName) {
 		LOG.debug("Potential OOM: Allocated additional space in hostToDevice");
 		// TODO: Perform conversion on GPU using double2float and float2double kernels
-		long t0 = DMLScript.STATISTICS ? System.nanoTime() : 0;
+		long t0 = ConfigurationManager.isStatistics() ? System.nanoTime() : 0;
 		if(PERFORM_CONVERSION_ON_DEVICE) {
 			Pointer deviceDoubleData = gCtx.allocate(instName, ((long)src.length)*Sizeof.DOUBLE);
 			cudaMemcpy(deviceDoubleData, Pointer.to(src), ((long)src.length)*Sizeof.DOUBLE, cudaMemcpyHostToDevice);
 			LibMatrixCUDA.double2float(gCtx, deviceDoubleData, dest, src.length);
-			gCtx.cudaFreeHelper(instName, deviceDoubleData, DMLScript.EAGER_CUDA_FREE);
+			gCtx.cudaFreeHelper(instName, deviceDoubleData, gCtx.EAGER_CUDA_FREE);
 		}
 		else {
 			FloatBuffer floatData = ByteBuffer.allocateDirect(Sizeof.FLOAT*src.length).order(ByteOrder.nativeOrder()).asFloatBuffer();
@@ -216,11 +216,11 @@ public class SinglePrecisionCudaSupportFunctions implements CudaSupportFunctions
 			cudaMemcpy(dest, Pointer.to(floatData), ((long)src.length)*Sizeof.FLOAT, cudaMemcpyHostToDevice);
 		}
 		
-		if(DMLScript.STATISTICS) {
+		if(ConfigurationManager.isStatistics()) {
 			long totalTime = System.nanoTime() - t0;
 			GPUStatistics.cudaDouble2FloatTime.add(totalTime);
 			GPUStatistics.cudaDouble2FloatCount.add(1);
-			if(DMLScript.FINEGRAINED_STATISTICS && instName != null) 
+			if(ConfigurationManager.isFinegrainedStatistics() && instName != null) 
 				GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_HOST_TO_DEVICE, totalTime);
 		}
 	}

http://git-wip-us.apache.org/repos/asf/systemml/blob/ae268a9e/src/main/java/org/apache/sysml/runtime/util/ProgramConverter.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/util/ProgramConverter.java b/src/main/java/org/apache/sysml/runtime/util/ProgramConverter.java
index 21e6bd3..8289c30 100644
--- a/src/main/java/org/apache/sysml/runtime/util/ProgramConverter.java
+++ b/src/main/java/org/apache/sysml/runtime/util/ProgramConverter.java
@@ -688,7 +688,7 @@ public class ProgramConverter
 		builder.append(NEWLINE);
 
 		//handle additional configurations
-		builder.append(CONF_STATS + "=" + DMLScript.STATISTICS);
+		builder.append(CONF_STATS + "=" + ConfigurationManager.isStatistics());
 		builder.append(COMPONENTS_DELIM);
 		builder.append(NEWLINE);
 
@@ -753,7 +753,7 @@ public class ProgramConverter
 		sb.append( NEWLINE );
 		
 		//handle additional configurations
-		sb.append( CONF_STATS + "=" + DMLScript.STATISTICS );
+		sb.append( CONF_STATS + "=" + ConfigurationManager.isStatistics() );
 		sb.append( COMPONENTS_DELIM );
 		sb.append( NEWLINE );
 		
@@ -1727,7 +1727,7 @@ public class ProgramConverter
 	
 	private static void parseAndSetAdditionalConfigurations(String conf) {
 		String[] statsFlag = conf.split("=");
-		DMLScript.STATISTICS = Boolean.parseBoolean(statsFlag[1]);
+		ConfigurationManager.setStatistics(Boolean.parseBoolean(statsFlag[1]));
 	}
 
 	//////////

http://git-wip-us.apache.org/repos/asf/systemml/blob/ae268a9e/src/main/java/org/apache/sysml/utils/GPUStatistics.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/utils/GPUStatistics.java b/src/main/java/org/apache/sysml/utils/GPUStatistics.java
index fcbc4c4..e748057 100644
--- a/src/main/java/org/apache/sysml/utils/GPUStatistics.java
+++ b/src/main/java/org/apache/sysml/utils/GPUStatistics.java
@@ -28,7 +28,7 @@ import java.util.List;
 import java.util.Map;
 import java.util.concurrent.atomic.LongAdder;
 
-import org.apache.sysml.api.DMLScript;
+import org.apache.sysml.conf.ConfigurationManager;
 
 /**
  * Measures performance numbers when GPU mode is enabled
@@ -155,7 +155,7 @@ public class GPUStatistics {
 	 */
 	public synchronized static void maintainCPMiscTimes( String instructionName, String miscTimer, long timeNanos, long incrementCount)
 	{
-		if (!(DMLScript.FINEGRAINED_STATISTICS))
+		if (!(ConfigurationManager.isFinegrainedStatistics()))
 			return;
 
 		HashMap<String, Long> miscTimesMap = _cpInstMiscTime.get(instructionName);