You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by ni...@apache.org on 2016/07/09 16:35:41 UTC

incubator-systemml git commit: [SYSTEMML-769] Minor improvement for dense-dense conv2d and added statistics method for performance debugging

Repository: incubator-systemml
Updated Branches:
  refs/heads/master 2ebf885a6 -> ab45af17c


[SYSTEMML-769] Minor improvement for dense-dense conv2d and added
statistics method for performance debugging

Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/ab45af17
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/ab45af17
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/ab45af17

Branch: refs/heads/master
Commit: ab45af17c3ff54a77262a318c5d0be084384b8f7
Parents: 2ebf885
Author: Niketan Pansare <np...@us.ibm.com>
Authored: Sat Jul 9 09:33:52 2016 -0700
Committer: Niketan Pansare <np...@us.ibm.com>
Committed: Sat Jul 9 09:33:52 2016 -0700

----------------------------------------------------------------------
 .../sysml/runtime/matrix/data/LibMatrixDNN.java | 144 ++++++++++++-------
 1 file changed, 94 insertions(+), 50 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/ab45af17/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNN.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNN.java b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNN.java
index 26e2b8b..3014b49 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNN.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNN.java
@@ -32,11 +32,11 @@ import java.util.concurrent.atomic.AtomicLong;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
+import org.apache.sysml.api.DMLScript;
 import org.apache.sysml.hops.OptimizerUtils;
 import org.apache.sysml.runtime.DMLRuntimeException;
 import org.apache.sysml.runtime.util.ConvolutionUtils;
 
-
 public class LibMatrixDNN {
 	
 	protected static final Log LOG =  LogFactory.getLog(LibMatrixDNN.class.getName());
@@ -77,6 +77,44 @@ public class LibMatrixDNN {
 		int maxCommonIndexS;
 	}
 	
+	private static AtomicLong conv2dSparseCount = new AtomicLong(0);
+	private static AtomicLong conv2dDenseCount = new AtomicLong(0);
+	private static AtomicLong conv2dBwdFilterSparseCount = new AtomicLong(0);
+	private static AtomicLong conv2dBwdFilterDenseCount = new AtomicLong(0);
+	private static AtomicLong conv2dBwdDataSparseCount = new AtomicLong(0);
+	private static AtomicLong conv2dBwdDataDenseCount = new AtomicLong(0);
+	private static AtomicLong im2colSparseCount = new AtomicLong(0);
+	private static AtomicLong im2colDenseCount = new AtomicLong(0);
+	private static AtomicLong maxPoolBwdSparseCount = new AtomicLong(0);
+	private static AtomicLong maxPoolBwdDenseCount = new AtomicLong(0);
+	public static void appendStatistics(StringBuilder sb) {
+		sb.append("LibMatrixDNN dense count (conv/bwdF/bwdD/im2col/maxBwd):\t" 
+				+ conv2dDenseCount.get() + "/"
+				+ conv2dBwdFilterDenseCount.get() + "/"
+				+ conv2dBwdDataDenseCount.get() + "/"
+				+ im2colDenseCount.get() + "/"
+				+ maxPoolBwdDenseCount.get() + ".\n");
+		sb.append("LibMatrixDNN sparse count (conv/bwdF/bwdD/im2col/maxBwd):\t" 
+				+ conv2dSparseCount.get() + "/"
+				+ conv2dBwdFilterSparseCount.get() + "/"
+				+ conv2dBwdDataSparseCount.get() + "/"
+				+ im2colSparseCount.get() + "/"
+				+ maxPoolBwdSparseCount.get() + ".\n");
+	}
+	public static void resetStatistics() {
+		conv2dDenseCount.set(0);
+		conv2dBwdFilterDenseCount.set(0);
+		conv2dBwdDataDenseCount.set(0);
+		im2colDenseCount.set(0);
+		maxPoolBwdDenseCount.set(0);
+		
+		conv2dSparseCount.set(0);
+		conv2dBwdFilterSparseCount.set(0);
+		conv2dBwdDataSparseCount.set(0);
+		im2colSparseCount.set(0);
+		maxPoolBwdSparseCount.set(0);
+	}
+	
 	public static class ConvolutionParameters {
 		public int N; public int C; public int H; public int W;
 		public int K; public int R; public int S; public int stride_h; public int stride_w; public int pad_h; public int pad_w;
@@ -169,6 +207,15 @@ public class LibMatrixDNN {
 			throw new DMLRuntimeException("Only positive strides supported");
 		}
 		
+		if(DMLScript.STATISTICS) {
+			if(input.isInSparseFormat() || dout.isInSparseFormat()) {
+				conv2dBwdFilterSparseCount.addAndGet(1);
+			}
+			else {
+				conv2dBwdFilterDenseCount.addAndGet(1);
+			}
+		}
+		
 		int constrainedNumThreads = OptimizerUtils.getConstrainedNumThreads(params.numThreads);
 		if(!ALLOW_MULTI_THREADED_OPS || constrainedNumThreads <= 1) {
 			for (int c = 0; c < params.C; c++) {
@@ -366,6 +413,15 @@ public class LibMatrixDNN {
 			throw new DMLRuntimeException("Incorrect input to conv2d");
 		}
 		
+		if(DMLScript.STATISTICS) {
+			if(input.isInSparseFormat() || filter.isInSparseFormat()) {
+				conv2dSparseCount.addAndGet(1);
+			}
+			else {
+				conv2dDenseCount.addAndGet(1);
+			}
+		}
+		
 		params.tmpData = new TemporaryConvolutionData();
 		if(input.isInSparseFormat()) {
 			params.tmpData.minIndexArrR = new int[params.H];
@@ -433,6 +489,15 @@ public class LibMatrixDNN {
 		if(dout.getNumColumns() != params.C*params.P*params.Q || dout.getNumRows() != params.N) {
 			throw new DMLRuntimeException("Incorrect dout dimensions in maxpooling_backward:" + input.getNumRows() + " " + input.getNumColumns() + " " + params.N + " " + params.K*params.P*params.Q);
 		}
+		
+		if(DMLScript.STATISTICS) {
+			if(input.isInSparseFormat() || dout.isInSparseFormat()) {
+				maxPoolBwdSparseCount.addAndGet(1);
+			}
+			else {
+				maxPoolBwdDenseCount.addAndGet(1);
+			}
+		}
 
 		int constrainedNumThreads = OptimizerUtils.getConstrainedNumThreads(params.numThreads);
 		if(!ALLOW_MULTI_THREADED_OPS || constrainedNumThreads <= 1) {
@@ -455,41 +520,10 @@ public class LibMatrixDNN {
 		int [] minIndexArrS = params.tmpData.minIndexArrS;
 		int [] maxIndexArrS = params.tmpData.maxIndexArrS;
 		
-		int minCommonIndexS = params.tmpData.minCommonIndexS;
-		int maxCommonIndexS = params.tmpData.maxCommonIndexS;
-		
+		final int minCommonIndexS = params.tmpData.minCommonIndexS;
+		final int maxCommonIndexS = params.tmpData.maxCommonIndexS;
 		
-		int minS = 0;
-		if(params.S >= 4) {
-			minS = params.S - params.S % 4;
-			for (int n = n1; n < n2; n++) {
-				for (int c = 0; c < params.C; c++) {
-					for (int r = 0; r < params.R; r++) {
-						final int filterOffset = k*params.C*params.R*params.S + c*params.R*params.S + r*params.S;
-						for (int p = minIndexArrR[r]; p < maxIndexArrR[r]; p++) {
-							final int h = p*params.stride_h + r - params.pad_h;
-							final int inputOffSet = n*params.C*params.H*params.W + c*params.H*params.W + h*params.W - params.pad_w;
-							final int outputOffset = n*params.K*params.P*params.Q + k*params.P*params.Q + p*params.Q;
-							// ------------------------------------------------------------------------
-							// Efficient striding with vectorization
-							for (int q = minCommonIndexS; q < maxCommonIndexS; q++) {
-								final int wOffset = inputOffSet + q*params.stride_w;
-								final int outOffsetWithQ = outputOffset + q;
-								for (int s = 0; s < minS; s += 4) {
-									final int inOffsetWithS = wOffset + s;
-									final int filterOffsetWithS = filterOffset + s;
-									outputArray[outOffsetWithQ] += inputArray[inOffsetWithS]*filterArray[filterOffsetWithS]
-											+ inputArray[inOffsetWithS+1]*filterArray[filterOffsetWithS+1]
-											+ inputArray[inOffsetWithS+2]*filterArray[filterOffsetWithS+2]
-											+ inputArray[inOffsetWithS+3]*filterArray[filterOffsetWithS+3];
-								}
-							}
-							// ------------------------------------------------------------------------
-						}
-					}
-				}
-			}
-		}
+		final int minS = (params.S >= 4) ? (params.S - params.S % 4) : 0;
 		
 		for (int n = n1; n < n2; n++) {
 			for (int c = 0; c < params.C; c++) {
@@ -499,28 +533,28 @@ public class LibMatrixDNN {
 						final int h = p*params.stride_h + r - params.pad_h;
 						final int inputOffSet = n*params.C*params.H*params.W + c*params.H*params.W + h*params.W - params.pad_w;
 						final int outputOffset = n*params.K*params.P*params.Q + k*params.P*params.Q + p*params.Q;
-						// ------------------------------------------------------------------------
-						// Efficient striding
+						
 						for (int q = minCommonIndexS; q < maxCommonIndexS; q++) {
 							final int wOffset = inputOffSet + q*params.stride_w;
+							// ------------------------------------------------------------------------
+							// Efficient striding with vectorization
+							final int outOffsetWithQ = outputOffset + q;
+							for (int s = 0; s < minS; s += 4) {
+								final int inOffsetWithS = wOffset + s;
+								final int filterOffsetWithS = filterOffset + s;
+								outputArray[outOffsetWithQ] += inputArray[inOffsetWithS]*filterArray[filterOffsetWithS]
+										+ inputArray[inOffsetWithS+1]*filterArray[filterOffsetWithS+1]
+										+ inputArray[inOffsetWithS+2]*filterArray[filterOffsetWithS+2]
+										+ inputArray[inOffsetWithS+3]*filterArray[filterOffsetWithS+3];
+							}
+							// ------------------------------------------------------------------------
+							// Efficient striding without vectorization
 							for (int s = minS; s < params.S; s++) {
 								outputArray[outputOffset + q] += inputArray[wOffset + s]*filterArray[filterOffset + s];
 							}
+							// ------------------------------------------------------------------------
 						}
 						// ------------------------------------------------------------------------
-					}
-				}
-			}
-			
-			
-			for (int c = 0; c < params.C; c++) {
-				for (int r = 0; r < params.R; r++) {
-					final int filterOffset = k*params.C*params.R*params.S + c*params.R*params.S + r*params.S;
-					for (int p = minIndexArrR[r]; p < maxIndexArrR[r]; p++) {
-						final int h = p*params.stride_h + r - params.pad_h;
-						final int inputOffSet = n*params.C*params.H*params.W + c*params.H*params.W + h*params.W - params.pad_w;
-						final int outputOffset = n*params.K*params.P*params.Q + k*params.P*params.Q + p*params.Q;
-						// ------------------------------------------------------------------------
 						// Inefficient striding
 						for (int s = 0; s < params.S; s++) {
 							for (int q = minIndexArrS[s]; q < minCommonIndexS; q++) {
@@ -1032,6 +1066,16 @@ public class LibMatrixDNN {
 		params.output = outputBlock;
 		
 		params.outputNNZ.set(0);
+		
+		if(DMLScript.STATISTICS) {
+			if(input.isInSparseFormat()) {
+				im2colSparseCount.addAndGet(1);
+			}
+			else {
+				im2colDenseCount.addAndGet(1);
+			}
+		}
+		
 		int constrainedNumThreads = OptimizerUtils.getConstrainedNumThreads(params.numThreads);
 		if(!ALLOW_MULTI_THREADED_OPS || constrainedNumThreads <= 1) {
 			for (int n = 0; n < params.N; n++) { // Do following for all images