You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by ni...@apache.org on 2016/07/09 16:35:41 UTC
incubator-systemml git commit: [SYSTEMML-769] Minor improvement for
dense-dense conv2d and added statistics method for performance debugging
Repository: incubator-systemml
Updated Branches:
refs/heads/master 2ebf885a6 -> ab45af17c
[SYSTEMML-769] Minor improvement for dense-dense conv2d and added
statistics method for performance debugging
Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/ab45af17
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/ab45af17
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/ab45af17
Branch: refs/heads/master
Commit: ab45af17c3ff54a77262a318c5d0be084384b8f7
Parents: 2ebf885
Author: Niketan Pansare <np...@us.ibm.com>
Authored: Sat Jul 9 09:33:52 2016 -0700
Committer: Niketan Pansare <np...@us.ibm.com>
Committed: Sat Jul 9 09:33:52 2016 -0700
----------------------------------------------------------------------
.../sysml/runtime/matrix/data/LibMatrixDNN.java | 144 ++++++++++++-------
1 file changed, 94 insertions(+), 50 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/ab45af17/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNN.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNN.java b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNN.java
index 26e2b8b..3014b49 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNN.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNN.java
@@ -32,11 +32,11 @@ import java.util.concurrent.atomic.AtomicLong;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
+import org.apache.sysml.api.DMLScript;
import org.apache.sysml.hops.OptimizerUtils;
import org.apache.sysml.runtime.DMLRuntimeException;
import org.apache.sysml.runtime.util.ConvolutionUtils;
-
public class LibMatrixDNN {
protected static final Log LOG = LogFactory.getLog(LibMatrixDNN.class.getName());
@@ -77,6 +77,44 @@ public class LibMatrixDNN {
int maxCommonIndexS;
}
+ private static AtomicLong conv2dSparseCount = new AtomicLong(0);
+ private static AtomicLong conv2dDenseCount = new AtomicLong(0);
+ private static AtomicLong conv2dBwdFilterSparseCount = new AtomicLong(0);
+ private static AtomicLong conv2dBwdFilterDenseCount = new AtomicLong(0);
+ private static AtomicLong conv2dBwdDataSparseCount = new AtomicLong(0);
+ private static AtomicLong conv2dBwdDataDenseCount = new AtomicLong(0);
+ private static AtomicLong im2colSparseCount = new AtomicLong(0);
+ private static AtomicLong im2colDenseCount = new AtomicLong(0);
+ private static AtomicLong maxPoolBwdSparseCount = new AtomicLong(0);
+ private static AtomicLong maxPoolBwdDenseCount = new AtomicLong(0);
+ public static void appendStatistics(StringBuilder sb) {
+ sb.append("LibMatrixDNN dense count (conv/bwdF/bwdD/im2col/maxBwd):\t"
+ + conv2dDenseCount.get() + "/"
+ + conv2dBwdFilterDenseCount.get() + "/"
+ + conv2dBwdDataDenseCount.get() + "/"
+ + im2colDenseCount.get() + "/"
+ + maxPoolBwdDenseCount.get() + ".\n");
+ sb.append("LibMatrixDNN sparse count (conv/bwdF/bwdD/im2col/maxBwd):\t"
+ + conv2dSparseCount.get() + "/"
+ + conv2dBwdFilterSparseCount.get() + "/"
+ + conv2dBwdDataSparseCount.get() + "/"
+ + im2colSparseCount.get() + "/"
+ + maxPoolBwdSparseCount.get() + ".\n");
+ }
+ public static void resetStatistics() {
+ conv2dDenseCount.set(0);
+ conv2dBwdFilterDenseCount.set(0);
+ conv2dBwdDataDenseCount.set(0);
+ im2colDenseCount.set(0);
+ maxPoolBwdDenseCount.set(0);
+
+ conv2dSparseCount.set(0);
+ conv2dBwdFilterSparseCount.set(0);
+ conv2dBwdDataSparseCount.set(0);
+ im2colSparseCount.set(0);
+ maxPoolBwdSparseCount.set(0);
+ }
+
public static class ConvolutionParameters {
public int N; public int C; public int H; public int W;
public int K; public int R; public int S; public int stride_h; public int stride_w; public int pad_h; public int pad_w;
@@ -169,6 +207,15 @@ public class LibMatrixDNN {
throw new DMLRuntimeException("Only positive strides supported");
}
+ if(DMLScript.STATISTICS) {
+ if(input.isInSparseFormat() || dout.isInSparseFormat()) {
+ conv2dBwdFilterSparseCount.addAndGet(1);
+ }
+ else {
+ conv2dBwdFilterDenseCount.addAndGet(1);
+ }
+ }
+
int constrainedNumThreads = OptimizerUtils.getConstrainedNumThreads(params.numThreads);
if(!ALLOW_MULTI_THREADED_OPS || constrainedNumThreads <= 1) {
for (int c = 0; c < params.C; c++) {
@@ -366,6 +413,15 @@ public class LibMatrixDNN {
throw new DMLRuntimeException("Incorrect input to conv2d");
}
+ if(DMLScript.STATISTICS) {
+ if(input.isInSparseFormat() || filter.isInSparseFormat()) {
+ conv2dSparseCount.addAndGet(1);
+ }
+ else {
+ conv2dDenseCount.addAndGet(1);
+ }
+ }
+
params.tmpData = new TemporaryConvolutionData();
if(input.isInSparseFormat()) {
params.tmpData.minIndexArrR = new int[params.H];
@@ -433,6 +489,15 @@ public class LibMatrixDNN {
if(dout.getNumColumns() != params.C*params.P*params.Q || dout.getNumRows() != params.N) {
throw new DMLRuntimeException("Incorrect dout dimensions in maxpooling_backward:" + input.getNumRows() + " " + input.getNumColumns() + " " + params.N + " " + params.K*params.P*params.Q);
}
+
+ if(DMLScript.STATISTICS) {
+ if(input.isInSparseFormat() || dout.isInSparseFormat()) {
+ maxPoolBwdSparseCount.addAndGet(1);
+ }
+ else {
+ maxPoolBwdDenseCount.addAndGet(1);
+ }
+ }
int constrainedNumThreads = OptimizerUtils.getConstrainedNumThreads(params.numThreads);
if(!ALLOW_MULTI_THREADED_OPS || constrainedNumThreads <= 1) {
@@ -455,41 +520,10 @@ public class LibMatrixDNN {
int [] minIndexArrS = params.tmpData.minIndexArrS;
int [] maxIndexArrS = params.tmpData.maxIndexArrS;
- int minCommonIndexS = params.tmpData.minCommonIndexS;
- int maxCommonIndexS = params.tmpData.maxCommonIndexS;
-
+ final int minCommonIndexS = params.tmpData.minCommonIndexS;
+ final int maxCommonIndexS = params.tmpData.maxCommonIndexS;
- int minS = 0;
- if(params.S >= 4) {
- minS = params.S - params.S % 4;
- for (int n = n1; n < n2; n++) {
- for (int c = 0; c < params.C; c++) {
- for (int r = 0; r < params.R; r++) {
- final int filterOffset = k*params.C*params.R*params.S + c*params.R*params.S + r*params.S;
- for (int p = minIndexArrR[r]; p < maxIndexArrR[r]; p++) {
- final int h = p*params.stride_h + r - params.pad_h;
- final int inputOffSet = n*params.C*params.H*params.W + c*params.H*params.W + h*params.W - params.pad_w;
- final int outputOffset = n*params.K*params.P*params.Q + k*params.P*params.Q + p*params.Q;
- // ------------------------------------------------------------------------
- // Efficient striding with vectorization
- for (int q = minCommonIndexS; q < maxCommonIndexS; q++) {
- final int wOffset = inputOffSet + q*params.stride_w;
- final int outOffsetWithQ = outputOffset + q;
- for (int s = 0; s < minS; s += 4) {
- final int inOffsetWithS = wOffset + s;
- final int filterOffsetWithS = filterOffset + s;
- outputArray[outOffsetWithQ] += inputArray[inOffsetWithS]*filterArray[filterOffsetWithS]
- + inputArray[inOffsetWithS+1]*filterArray[filterOffsetWithS+1]
- + inputArray[inOffsetWithS+2]*filterArray[filterOffsetWithS+2]
- + inputArray[inOffsetWithS+3]*filterArray[filterOffsetWithS+3];
- }
- }
- // ------------------------------------------------------------------------
- }
- }
- }
- }
- }
+ final int minS = (params.S >= 4) ? (params.S - params.S % 4) : 0;
for (int n = n1; n < n2; n++) {
for (int c = 0; c < params.C; c++) {
@@ -499,28 +533,28 @@ public class LibMatrixDNN {
final int h = p*params.stride_h + r - params.pad_h;
final int inputOffSet = n*params.C*params.H*params.W + c*params.H*params.W + h*params.W - params.pad_w;
final int outputOffset = n*params.K*params.P*params.Q + k*params.P*params.Q + p*params.Q;
- // ------------------------------------------------------------------------
- // Efficient striding
+
for (int q = minCommonIndexS; q < maxCommonIndexS; q++) {
final int wOffset = inputOffSet + q*params.stride_w;
+ // ------------------------------------------------------------------------
+ // Efficient striding with vectorization
+ final int outOffsetWithQ = outputOffset + q;
+ for (int s = 0; s < minS; s += 4) {
+ final int inOffsetWithS = wOffset + s;
+ final int filterOffsetWithS = filterOffset + s;
+ outputArray[outOffsetWithQ] += inputArray[inOffsetWithS]*filterArray[filterOffsetWithS]
+ + inputArray[inOffsetWithS+1]*filterArray[filterOffsetWithS+1]
+ + inputArray[inOffsetWithS+2]*filterArray[filterOffsetWithS+2]
+ + inputArray[inOffsetWithS+3]*filterArray[filterOffsetWithS+3];
+ }
+ // ------------------------------------------------------------------------
+ // Efficient striding without vectorization
for (int s = minS; s < params.S; s++) {
outputArray[outputOffset + q] += inputArray[wOffset + s]*filterArray[filterOffset + s];
}
+ // ------------------------------------------------------------------------
}
// ------------------------------------------------------------------------
- }
- }
- }
-
-
- for (int c = 0; c < params.C; c++) {
- for (int r = 0; r < params.R; r++) {
- final int filterOffset = k*params.C*params.R*params.S + c*params.R*params.S + r*params.S;
- for (int p = minIndexArrR[r]; p < maxIndexArrR[r]; p++) {
- final int h = p*params.stride_h + r - params.pad_h;
- final int inputOffSet = n*params.C*params.H*params.W + c*params.H*params.W + h*params.W - params.pad_w;
- final int outputOffset = n*params.K*params.P*params.Q + k*params.P*params.Q + p*params.Q;
- // ------------------------------------------------------------------------
// Inefficient striding
for (int s = 0; s < params.S; s++) {
for (int q = minIndexArrS[s]; q < minCommonIndexS; q++) {
@@ -1032,6 +1066,16 @@ public class LibMatrixDNN {
params.output = outputBlock;
params.outputNNZ.set(0);
+
+ if(DMLScript.STATISTICS) {
+ if(input.isInSparseFormat()) {
+ im2colSparseCount.addAndGet(1);
+ }
+ else {
+ im2colDenseCount.addAndGet(1);
+ }
+ }
+
int constrainedNumThreads = OptimizerUtils.getConstrainedNumThreads(params.numThreads);
if(!ALLOW_MULTI_THREADED_OPS || constrainedNumThreads <= 1) {
for (int n = 0; n < params.N; n++) { // Do following for all images