You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by mb...@apache.org on 2017/02/10 07:02:35 UTC
[2/3] incubator-systemml git commit: [SYSTEMML-1078] Fix missing nnz
maintenance conv2d ops, incl cleanups
[SYSTEMML-1078] Fix missing nnz maintenance conv2d ops, incl cleanups
This patch extends all conv2d operations by (so far unoptimized) nnz
maintenance in order to prevent side effects with update-in-place and
other operations that incrementally maintain the number of non-zeros.
Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/d0b23d60
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/d0b23d60
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/d0b23d60
Branch: refs/heads/master
Commit: d0b23d607998e0bebd3d9d051faf748dd5530ce8
Parents: 827cdba
Author: Matthias Boehm <mb...@gmail.com>
Authored: Fri Feb 10 05:50:52 2017 +0100
Committer: Matthias Boehm <mb...@gmail.com>
Committed: Fri Feb 10 07:55:52 2017 +0100
----------------------------------------------------------------------
.../runtime/controlprogram/ProgramBlock.java | 4 +-
.../cp/ConvolutionCPInstruction.java | 90 ++++---
.../sysml/runtime/matrix/data/LibMatrixDNN.java | 233 ++++++++++---------
3 files changed, 171 insertions(+), 156 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/d0b23d60/src/main/java/org/apache/sysml/runtime/controlprogram/ProgramBlock.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/controlprogram/ProgramBlock.java b/src/main/java/org/apache/sysml/runtime/controlprogram/ProgramBlock.java
index eb504ca..739b1cf 100644
--- a/src/main/java/org/apache/sysml/runtime/controlprogram/ProgramBlock.java
+++ b/src/main/java/org/apache/sysml/runtime/controlprogram/ProgramBlock.java
@@ -400,10 +400,10 @@ public class ProgramBlock
if( nnz1 != nnz2 )
throw new DMLRuntimeException("Matrix nnz meta data was incorrect: ("+varname+", actual="+nnz1+", expected="+nnz2+", inst="+lastInst+")");
-
if( sparse1 != sparse2 )
- throw new DMLRuntimeException("Matrix was in wrong data representation: ("+varname+", actual="+sparse1+", expected="+sparse2+", nnz="+nnz1+", inst="+lastInst+")");
+ throw new DMLRuntimeException("Matrix was in wrong data representation: ("+varname+", actual="+sparse1+", expected="+sparse2 +
+ ", nrow="+mb.getNumRows()+", ncol="+mb.getNumColumns()+", nnz="+nnz1+", inst="+lastInst+")");
}
}
}
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/d0b23d60/src/main/java/org/apache/sysml/runtime/instructions/cp/ConvolutionCPInstruction.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/instructions/cp/ConvolutionCPInstruction.java b/src/main/java/org/apache/sysml/runtime/instructions/cp/ConvolutionCPInstruction.java
index ed0b548..3513201 100644
--- a/src/main/java/org/apache/sysml/runtime/instructions/cp/ConvolutionCPInstruction.java
+++ b/src/main/java/org/apache/sysml/runtime/instructions/cp/ConvolutionCPInstruction.java
@@ -21,8 +21,6 @@ package org.apache.sysml.runtime.instructions.cp;
import java.util.ArrayList;
import java.util.Arrays;
-import org.apache.sysml.parser.Expression.DataType;
-import org.apache.sysml.parser.Expression.ValueType;
import org.apache.sysml.runtime.DMLRuntimeException;
import org.apache.sysml.runtime.controlprogram.context.ExecutionContext;
import org.apache.sysml.runtime.functionobjects.SwapIndex;
@@ -33,8 +31,8 @@ import org.apache.sysml.runtime.matrix.data.MatrixBlock;
import org.apache.sysml.runtime.matrix.operators.ReorgOperator;
import org.apache.sysml.runtime.util.ConvolutionUtils;
-public class ConvolutionCPInstruction extends UnaryCPInstruction {
-
+public class ConvolutionCPInstruction extends UnaryCPInstruction
+{
private CPOperand _in2;
private CPOperand _in3;
private ArrayList<CPOperand> _input_shape;
@@ -101,8 +99,6 @@ public class ConvolutionCPInstruction extends UnaryCPInstruction {
public static ConvolutionCPInstruction parseInstruction(String str)
throws DMLRuntimeException {
- CPOperand in = new CPOperand("", ValueType.UNKNOWN, DataType.UNKNOWN);
- CPOperand out = new CPOperand("", ValueType.UNKNOWN, DataType.UNKNOWN);
String[] parts = InstructionUtils.getInstructionPartsWithValueType(str);
String opcode = parts[0];
@@ -111,8 +107,8 @@ public class ConvolutionCPInstruction extends UnaryCPInstruction {
// stride1, stride2, padding1, padding2
// input_shape1, input_shape2, input_shape3, input_shape4,
// filter_shape1, filter_shape2, filter_shape3, filter_shape4, k
- in.split(parts[1]);
- out.split(parts[14]);
+ CPOperand in = new CPOperand(parts[1]);
+ CPOperand out = new CPOperand(parts[14]);
ArrayList<CPOperand> stride = new ArrayList<CPOperand>();
ArrayList<CPOperand> padding = new ArrayList<CPOperand>();
@@ -143,10 +139,9 @@ public class ConvolutionCPInstruction extends UnaryCPInstruction {
// dout, stride1, stride2, padding1, padding2
// input_shape1, input_shape2, input_shape3, input_shape4,
// filter_shape1, filter_shape2, filter_shape3, filter_shape4, k
- in.split(parts[1]);
- CPOperand in2 = new CPOperand("", ValueType.UNKNOWN, DataType.UNKNOWN);
- in2.split(parts[2]);
- out.split(parts[15]);
+ CPOperand in = new CPOperand(parts[1]);
+ CPOperand in2 = new CPOperand(parts[2]);
+ CPOperand out = new CPOperand(parts[15]);
ArrayList<CPOperand> stride = new ArrayList<CPOperand>();
ArrayList<CPOperand> padding = new ArrayList<CPOperand>();
@@ -174,12 +169,10 @@ public class ConvolutionCPInstruction extends UnaryCPInstruction {
// dout, stride1, stride2, padding1, padding2
// input_shape1, input_shape2, input_shape3, input_shape4,
// filter_shape1, filter_shape2, filter_shape3, filter_shape4, k
- in.split(parts[1]);
- CPOperand in2 = new CPOperand("", ValueType.UNKNOWN, DataType.UNKNOWN);
- in2.split(parts[2]);
- CPOperand in3 = new CPOperand("", ValueType.UNKNOWN, DataType.UNKNOWN);
- in3.split(parts[3]);
- out.split(parts[16]);
+ CPOperand in = new CPOperand(parts[1]);
+ CPOperand in2 = new CPOperand(parts[2]);
+ CPOperand in3 = new CPOperand(parts[3]);
+ CPOperand out = new CPOperand(parts[16]);
ArrayList<CPOperand> stride = new ArrayList<CPOperand>();
ArrayList<CPOperand> padding = new ArrayList<CPOperand>();
@@ -204,10 +197,9 @@ public class ConvolutionCPInstruction extends UnaryCPInstruction {
}
else if (opcode.equalsIgnoreCase("bias_add") || opcode.equals("relu_backward")) {
InstructionUtils.checkNumFields(parts, 4);
- in.split(parts[1]);
- CPOperand in2 = new CPOperand("", ValueType.UNKNOWN, DataType.UNKNOWN);
- in2.split(parts[2]);
- out.split(parts[3]);
+ CPOperand in = new CPOperand(parts[1]);
+ CPOperand in2 = new CPOperand(parts[2]);
+ CPOperand out = new CPOperand(parts[3]);
int k = Integer.parseInt(parts[4]);
return new ConvolutionCPInstruction(in, in2, out, opcode, str, k);
}
@@ -216,24 +208,23 @@ public class ConvolutionCPInstruction extends UnaryCPInstruction {
}
}
- private int getScalarInput(ExecutionContext ec, ArrayList<CPOperand> aL,
- int index) throws DMLRuntimeException {
+ private int getScalarInput(ExecutionContext ec, ArrayList<CPOperand> aL, int index)
+ throws DMLRuntimeException {
return (int) ec.getScalarInput(aL.get(index).getName(),
aL.get(index).getValueType(), aL.get(index).isLiteral())
.getLongValue();
}
+ @SuppressWarnings("unused")
public void processReluBackwardInstruction(ExecutionContext ec) throws DMLRuntimeException {
// (X > 0) * dout
- MatrixBlock outputBlock = null;
MatrixBlock input = ec.getMatrixInput(input1.getName());
MatrixBlock dout = ec.getMatrixInput(_in2.getName());
+ MatrixBlock outputBlock = new MatrixBlock(input.getNumRows(), input.getNumColumns(),
+ LibMatrixDNN.SUPPORTS_SPARSE_OUTPUTS && (input.isInSparseFormat() || dout.isInSparseFormat()));
- if(input.isEmptyBlock() || dout.isEmptyBlock()) {
- outputBlock = new MatrixBlock(input.getNumRows(), input.getNumColumns(), true, 0);
- }
- else {
- outputBlock = getDenseOutputBlock(ec, input.getNumRows(), input.getNumColumns());
+ if( !input.isEmptyBlock() && !dout.isEmptyBlock() ) {
+ outputBlock.allocateDenseOrSparseBlock();
LibMatrixDNN.reluBackward(input, dout, outputBlock, _numThreads);
}
@@ -244,24 +235,24 @@ public class ConvolutionCPInstruction extends UnaryCPInstruction {
}
public void processBiasAddInstruction(ExecutionContext ec) throws DMLRuntimeException {
- MatrixBlock outputBlock = null;
MatrixBlock input = ec.getMatrixInput(input1.getName());
MatrixBlock bias = ec.getMatrixInput(_in2.getName());
+ MatrixBlock outputBlock = null;
if(bias.getNumColumns() != 1) {
throw new DMLRuntimeException("Expected the number of columns of bias matrix to be 1, but found " + bias.getNumColumns());
}
if(input.isEmptyBlock() && bias.isEmptyBlock()) {
- outputBlock = new MatrixBlock(input.getNumRows(), input.getNumColumns(), true, 0);
+ outputBlock = new MatrixBlock(input.getNumRows(), input.getNumColumns(), true);
}
else if(bias.isEmptyBlock()) {
- outputBlock = new MatrixBlock(input.getNumRows(), input.getNumColumns(), input.isInSparseFormat());
- outputBlock.copy(input);
+ outputBlock = new MatrixBlock(input);
}
else {
// As we always fill the output first with bias
- outputBlock = getDenseOutputBlock(ec, input.getNumRows(), input.getNumColumns());
+ outputBlock = new MatrixBlock(input.getNumRows(), input.getNumColumns(), false);
+ outputBlock.allocateDenseBlock();
LibMatrixDNN.biasAdd(input, bias, outputBlock, _numThreads);
}
@@ -307,10 +298,10 @@ public class ConvolutionCPInstruction extends UnaryCPInstruction {
ConvolutionParameters params = new ConvolutionParameters(N, C, H, W, K, R, S, stride_h, stride_w, pad_h, pad_w, _numThreads);
if (instOpcode.equalsIgnoreCase("maxpooling") || instOpcode.equalsIgnoreCase("relu_maxpooling")) {
if(matBlock.isEmptyBlock()) {
- outputBlock = new MatrixBlock(N, C*P*Q, true, 0);
+ outputBlock = new MatrixBlock(N, C*P*Q, true);
}
else {
- outputBlock = getDenseOutputBlock(ec, N, C*P*Q);
+ outputBlock = getDenseOutputBlock(N, C*P*Q);
if(instOpcode.equalsIgnoreCase("maxpooling"))
Arrays.fill(outputBlock.getDenseBlock(), -Double.MAX_VALUE);
LibMatrixDNN.maxpooling(matBlock, outputBlock, params);
@@ -319,10 +310,10 @@ public class ConvolutionCPInstruction extends UnaryCPInstruction {
else if (instOpcode.equalsIgnoreCase("maxpooling_backward")) {
MatrixBlock dout = ec.getMatrixInput(_in2.getName());
if(matBlock.isEmptyBlock() || dout.isEmptyBlock()) {
- outputBlock = new MatrixBlock(N, C*H*W, true, 0);
+ outputBlock = new MatrixBlock(N, C*H*W, true);
}
else {
- outputBlock = getDenseOutputBlock(ec, N, C*H*W);
+ outputBlock = getDenseOutputBlock(N, C*H*W);
LibMatrixDNN.maxpoolingBackward(matBlock, dout, outputBlock, params);
}
ec.releaseMatrixInput(_in2.getName());
@@ -330,10 +321,10 @@ public class ConvolutionCPInstruction extends UnaryCPInstruction {
else if (instOpcode.equalsIgnoreCase("conv2d")) {
MatrixBlock filter = ec.getMatrixInput(_in2.getName());
if(filter.isEmptyBlock() || matBlock.isEmptyBlock()) {
- outputBlock = new MatrixBlock(N, K*P*Q, true, 0);
+ outputBlock = new MatrixBlock(N, K*P*Q, true);
}
else {
- outputBlock = getDenseOutputBlock(ec, N, K*P*Q);
+ outputBlock = getDenseOutputBlock(N, K*P*Q);
LibMatrixDNN.conv2d(matBlock, filter, outputBlock, params);
}
ec.releaseMatrixInput(_in2.getName());
@@ -342,10 +333,10 @@ public class ConvolutionCPInstruction extends UnaryCPInstruction {
MatrixBlock filter = ec.getMatrixInput(_in3.getName());
MatrixBlock bias = ec.getMatrixInput(_in2.getName());
if((filter.isEmptyBlock() || matBlock.isEmptyBlock()) && bias.isEmptyBlock()) {
- outputBlock = new MatrixBlock(N, K*P*Q, true, 0);
+ outputBlock = new MatrixBlock(N, K*P*Q, true);
}
else {
- outputBlock = getDenseOutputBlock(ec, N, K*P*Q);
+ outputBlock = getDenseOutputBlock(N, K*P*Q);
if(!bias.isEmptyBlock())
params.bias = bias;
LibMatrixDNN.conv2d(matBlock, filter, outputBlock, params);
@@ -356,10 +347,10 @@ public class ConvolutionCPInstruction extends UnaryCPInstruction {
else if (instOpcode.equalsIgnoreCase("conv2d_backward_filter")) {
MatrixBlock dout = ec.getMatrixInput(_in2.getName());
if(dout.isEmptyBlock() || matBlock.isEmptyBlock()) {
- outputBlock = new MatrixBlock(K, C*R*S, true, 0);
+ outputBlock = new MatrixBlock(K, C*R*S, true);
}
else {
- outputBlock = getDenseOutputBlock(ec, K, C*R*S);
+ outputBlock = getDenseOutputBlock(K, C*R*S);
LibMatrixDNN.conv2dBackwardFilter(matBlock, dout, outputBlock, params);
}
ec.releaseMatrixInput(_in2.getName());
@@ -367,10 +358,10 @@ public class ConvolutionCPInstruction extends UnaryCPInstruction {
else if (instOpcode.equalsIgnoreCase("conv2d_backward_data")) {
MatrixBlock dout = ec.getMatrixInput(_in2.getName());
if(dout.isEmptyBlock() || matBlock.isEmptyBlock()) {
- outputBlock = new MatrixBlock(N, C * H * W, true, 0);
+ outputBlock = new MatrixBlock(N, C * H * W, true);
}
else {
- outputBlock = getDenseOutputBlock(ec, N, C * H * W);
+ outputBlock = getDenseOutputBlock(N, C * H * W);
LibMatrixDNN.conv2dBackwardData(matBlock, dout, outputBlock, params);
}
ec.releaseMatrixInput(_in2.getName());
@@ -384,10 +375,9 @@ public class ConvolutionCPInstruction extends UnaryCPInstruction {
ec.setMatrixOutput(getOutputVariableName(), outputBlock);
}
- private MatrixBlock getDenseOutputBlock(ExecutionContext ec, int numRows, int numCols) throws DMLRuntimeException {
- MatrixBlock outputBlock = new MatrixBlock(numRows, numCols, false, numRows * numCols);
+ private MatrixBlock getDenseOutputBlock(int numRows, int numCols) throws DMLRuntimeException {
+ MatrixBlock outputBlock = new MatrixBlock(numRows, numCols, false);
outputBlock.allocateDenseBlock();
- outputBlock.setNonZeros(-1);
return outputBlock;
}
}
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/d0b23d60/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNN.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNN.java b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNN.java
index 9207171..29e59bd 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNN.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNN.java
@@ -24,7 +24,6 @@ import java.util.Iterator;
import java.util.List;
import java.util.concurrent.Callable;
import java.util.concurrent.ConcurrentLinkedQueue;
-import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
@@ -48,13 +47,14 @@ import org.apache.sysml.runtime.DMLRuntimeException;
public class LibMatrixDNN {
protected static final Log LOG = LogFactory.getLog(LibMatrixDNN.class.getName());
- // ------------------------------------------------------------------------------------------------
- // Useful flags for performance testing:
- private static boolean DISPLAY_STATISTICS = false;
- private static final boolean ALLOW_MULTI_THREADED_OPS = true;
- // ------------------------------------------------------------------------------------------------
- enum TaskType {
+ //library configurations and external contracts
+ public static final boolean SUPPORTS_SPARSE_OUTPUTS = false; //operations able to handle sparse outputs
+ private static final boolean DISPLAY_STATISTICS = false; //conv2d summaries in stats output
+ private static final boolean ALLOW_MULTI_THREADED_OPS = true; //enable multi-threading in cp
+ private static final int NUM_TASK_FACTOR = 2; //number of tasks is vcores scaled by this factor
+
+ private enum TaskType {
MaxPooling_Forward, MaxPooling_Backward,
// Alternate approaches that we tried but the performance was unsatisfactory be included: direct, non-looped im2col
LoopedIm2ColConv2d, LoopedIm2ColConv2dBwdFilter, LoopedIm2ColConv2dBwdData,
@@ -79,6 +79,7 @@ public class LibMatrixDNN {
private static AtomicLong loopedConvBwdDataMatMultTime = new AtomicLong(0);
private static AtomicLong loopedConvBwdDataCol2ImTime = new AtomicLong(0);
+ @SuppressWarnings("unused")
public static void appendStatistics(StringBuilder sb) {
if(DMLScript.STATISTICS && DISPLAY_STATISTICS && (conv2dDenseCount.get() != 0 || conv2dSparseCount.get() != 0)) {
sb.append("LibMatrixDNN dense count (conv/bwdF/bwdD/im2col/maxBwd):\t"
@@ -135,6 +136,7 @@ public class LibMatrixDNN {
* @param params convolution parameters
* @throws DMLRuntimeException if DMLRuntimeException occurs
*/
+ @SuppressWarnings("unused")
public static void conv2dBackwardData(MatrixBlock filter, MatrixBlock dout, MatrixBlock outputBlock, ConvolutionParameters params) throws DMLRuntimeException {
params.input1 = filter;
params.input2 = dout;
@@ -157,6 +159,9 @@ public class LibMatrixDNN {
}
runConvTask(TaskType.LoopedIm2ColConv2dBwdData, params);
+
+ //post-processing: maintain nnz
+ outputBlock.recomputeNonZeros();
}
/**
@@ -168,6 +173,7 @@ public class LibMatrixDNN {
* @param params convolution parameters
* @throws DMLRuntimeException if DMLRuntimeException occurs
*/
+ @SuppressWarnings("unused")
public static void conv2dBackwardFilter(MatrixBlock input, MatrixBlock dout, MatrixBlock outputBlock, ConvolutionParameters params) throws DMLRuntimeException {
params.input1 = input;
params.input2 = dout;
@@ -190,6 +196,9 @@ public class LibMatrixDNN {
}
runConvTask(TaskType.LoopedIm2ColConv2dBwdFilter, params);
+
+ //post-processing: maintain nnz
+ outputBlock.recomputeNonZeros();
}
/**
@@ -259,6 +268,7 @@ public class LibMatrixDNN {
}
}
+ @SuppressWarnings("unused")
private static void doLoopedIm2ColConv2dBwdData(int n, MatrixBlock dout_reshaped, ConvolutionParameters params) throws DMLRuntimeException {
MatrixBlock filter = params.input1;
MatrixBlock dout = params.input2;
@@ -277,6 +287,7 @@ public class LibMatrixDNN {
}
}
+ @SuppressWarnings("unused")
private static MatrixBlock doLoopedIm2ColConv2dBwdFilter(int n,
MatrixBlock im2ColOutBlock, MatrixBlock dout_reshaped, MatrixBlock partialRetBlock, ConvolutionParameters params) throws DMLRuntimeException {
long t1 = DMLScript.STATISTICS && DISPLAY_STATISTICS ? System.nanoTime() : 0;
@@ -306,6 +317,7 @@ public class LibMatrixDNN {
ret[2] = j % W;
}
+ @SuppressWarnings("unused")
public static void conv2d(MatrixBlock input, MatrixBlock filter, MatrixBlock outputBlock, ConvolutionParameters params) throws DMLRuntimeException {
params.input1 = input;
params.input2 = filter;
@@ -333,8 +345,12 @@ public class LibMatrixDNN {
}
runConvTask(TaskType.LoopedIm2ColConv2d, params);
+
+ //post-processing: maintain nnz
+ outputBlock.recomputeNonZeros();
}
+ @SuppressWarnings("unused")
private static void doLoopedIm2ColConv2d(int n, MatrixBlock im2ColOutBlock, ConvolutionParameters params) throws DMLRuntimeException {
long t1 = DMLScript.STATISTICS && DISPLAY_STATISTICS ? System.nanoTime() : 0;
doIm2col(n, im2ColOutBlock, params);
@@ -372,6 +388,9 @@ public class LibMatrixDNN {
System.arraycopy(matMultOutBlock.denseBlock, 0, params.output.denseBlock, destPos, length);
}
// -----------------------------------------------------------------------------
+
+ //post-processing: maintain nnz
+ params.output.recomputeNonZeros();
}
/**
@@ -383,6 +402,7 @@ public class LibMatrixDNN {
* @param params convolution parameters
* @throws DMLRuntimeException if DMLRuntimeException occurs
*/
+ @SuppressWarnings("unused")
public static void maxpoolingBackward(MatrixBlock input, MatrixBlock dout, MatrixBlock outputBlock, ConvolutionParameters params) throws DMLRuntimeException {
params.input1 = input;
params.input2 = dout;
@@ -409,6 +429,9 @@ public class LibMatrixDNN {
fillIndexesArray(params);
runConvTask(TaskType.MaxPooling_Backward, params);
+
+ //post-processing: maintain nnz
+ outputBlock.recomputeNonZeros();
}
private static void fillIndexesArray(ConvolutionParameters params) {
@@ -611,10 +634,13 @@ public class LibMatrixDNN {
throw new DMLRuntimeException("Incorrect dimensions for relu_backward:" +
input.getNumRows() + " != " + dout.getNumRows() + " || " + input.getNumColumns() + " != " + dout.getNumColumns());
}
+
runConvTask(TaskType.ReluBackward, params);
+
+ //note: no post-processing as nnz maintained per task
}
- private static void doReluBackward(int n, ConvolutionParameters params) throws DMLRuntimeException {
+ private static long doReluBackward(ConvolutionParameters params, int rl, int ru) throws DMLRuntimeException {
// (X > 0) * dout
double [] outputArray = params.output.getDenseBlock();
int numOutCols = params.input1.getNumColumns();
@@ -622,14 +648,14 @@ public class LibMatrixDNN {
if(!params.input1.isInSparseFormat() && !params.input2.isInSparseFormat()) {
double [] inputArr = params.input1.getDenseBlock();
double [] doutArr = params.input2.getDenseBlock();
- for(int i = n*numOutCols; i < (n+1)*numOutCols; i++) {
+ for(int i = rl*numOutCols; i < ru*numOutCols; i++) {
outputArray[i] = inputArr[i] > 0 ? doutArr[i] : 0;
}
}
else {
// Perform (X > 0)
if(params.input1.isInSparseFormat()) {
- Iterator<IJV> iter = params.input1.sparseBlock.getIterator(n, n+1);
+ Iterator<IJV> iter = params.input1.sparseBlock.getIterator(rl, ru);
while(iter.hasNext()) {
IJV ijv = iter.next();
int i = ijv.getI();
@@ -639,13 +665,13 @@ public class LibMatrixDNN {
}
else {
double [] inputArr = params.input1.getDenseBlock();
- for(int i = n*numOutCols; i < (n+1)*numOutCols; i++) {
+ for(int i = rl*numOutCols; i < ru*numOutCols; i++) {
outputArray[i] = inputArr[i] > 0 ? 1 : 0;
}
}
// Then perform (X > 0) * dout
if(params.input2.isInSparseFormat()) {
- Iterator<IJV> iter = params.input2.sparseBlock.getIterator(n, n+1);
+ Iterator<IJV> iter = params.input2.sparseBlock.getIterator(rl, ru);
while(iter.hasNext()) {
IJV ijv = iter.next();
int i = ijv.getI();
@@ -655,11 +681,14 @@ public class LibMatrixDNN {
}
else {
double [] doutArr = params.input2.getDenseBlock();
- for(int i = n*numOutCols; i < (n+1)*numOutCols; i++) {
+ for(int i = rl*numOutCols; i < ru*numOutCols; i++) {
outputArray[i] *= doutArr[i];
}
}
}
+
+ //post-processing: maintain nnz
+ return params.output.recomputeNonZeros(rl, ru-1, 0, numOutCols-1);
}
@@ -704,9 +733,12 @@ public class LibMatrixDNN {
else {
runConvTask(TaskType.BiasAdd, params);
}
+
+ //post-processing: maintain nnz
+ params.output.recomputeNonZeros();
}
- private static void doBiasAdd(int n1, int n2, ConvolutionParameters params) throws DMLRuntimeException {
+ private static void doBiasAdd(ConvolutionParameters params, int rl, int ru) throws DMLRuntimeException {
double [] outputArray = params.output.getDenseBlock();
int PQ = params.C;
int numOutCols = params.input1.getNumColumns();
@@ -715,8 +747,8 @@ public class LibMatrixDNN {
double [] inputArr = params.input1.getDenseBlock();
double [] biasArr = params.input2.getDenseBlock();
int K = params.K;
- int index = n1*K*PQ;
- for(int n = n1; n < n2; n++) {
+ int index = rl*K*PQ;
+ for(int n = rl; n < ru; n++) {
for(int k = 0; k < K; k++) {
for(int pq = 0; pq < PQ; pq++, index++) {
outputArray[index] = inputArr[index] + biasArr[k];
@@ -725,9 +757,9 @@ public class LibMatrixDNN {
}
}
else {
- fillBias(params.input2, outputArray, n1, n2, params.N, params.K, PQ);
+ fillBias(params.input2, outputArray, rl, ru, params.N, params.K, PQ);
if(params.input1.isInSparseFormat()) {
- Iterator<IJV> iter = params.input1.sparseBlock.getIterator(n1, n2);
+ Iterator<IJV> iter = params.input1.sparseBlock.getIterator(rl, ru);
while(iter.hasNext()) {
IJV ijv = iter.next();
int i = ijv.getI();
@@ -737,7 +769,7 @@ public class LibMatrixDNN {
}
else {
double [] inputArr = params.input1.getDenseBlock();
- for(int i = n1*numOutCols; i < n2*numOutCols; i++) {
+ for(int i = rl*numOutCols; i < ru*numOutCols; i++) {
outputArray[i] += inputArr[i];
}
}
@@ -780,6 +812,9 @@ public class LibMatrixDNN {
fillIndexesArray(params);
runConvTask(TaskType.MaxPooling_Forward, params);
+
+ //post-processing: maintain nnz
+ outputBlock.recomputeNonZeros();
}
private static void doPooling(int n, ConvolutionParameters params) throws DMLRuntimeException {
@@ -872,75 +907,63 @@ public class LibMatrixDNN {
for(int i = 0; i < poolSize; i++) {
if(type == TaskType.LoopedIm2ColConv2d || type == TaskType.LoopedIm2ColConv2dBwdFilter) {
MatrixBlock im2ColOutBlock = new MatrixBlock(params.C*params.R*params.S, params.P*params.Q, false);
- im2ColOutBlock.allocateDenseBlock(true);
+ im2ColOutBlock.allocateDenseBlock();
im2ColOutBlocks.add(im2ColOutBlock);
}
if(type == TaskType.LoopedIm2ColConv2dBwdFilter) {
MatrixBlock partialRetBlock = new MatrixBlock(params.C*params.R*params.S, params.K, false);
- partialRetBlock.allocateDenseBlock(true);
+ partialRetBlock.allocateDenseBlock();
partialRetBlocks.add(partialRetBlock);
}
if(type == TaskType.LoopedIm2ColConv2dBwdData || type == TaskType.LoopedIm2ColConv2dBwdFilter) {
MatrixBlock doutReshapedBlock = new MatrixBlock(params.P*params.Q, params.K, false);
- doutReshapedBlock.allocateDenseBlock(true);
+ doutReshapedBlock.allocateDenseBlock();
doutReshapedBlocks.add(doutReshapedBlock);
}
}
}
// Methods to execute convolution-related tasks using multiple threads.
private static void runConvTask(TaskType type, ConvolutionParameters params) throws DMLRuntimeException {
- int constrainedNumThreads = OptimizerUtils.getConstrainedNumThreads(params.numThreads);
+ int k = OptimizerUtils.getConstrainedNumThreads(params.numThreads);
ConcurrentLinkedQueue<MatrixBlock> im2ColOutBlocks = new ConcurrentLinkedQueue<MatrixBlock>();
ConcurrentLinkedQueue<MatrixBlock> doutReshapedBlocks = new ConcurrentLinkedQueue<MatrixBlock>();
ConcurrentLinkedQueue<MatrixBlock> partialRetBlocks = new ConcurrentLinkedQueue<MatrixBlock>();
- if (ALLOW_MULTI_THREADED_OPS && params.isOutputThreadSafe() && constrainedNumThreads > 1) {
- int poolSize = Math.min(constrainedNumThreads, params.N);
+
+ if (ALLOW_MULTI_THREADED_OPS && params.isOutputThreadSafe() && k > 1) {
+ int poolSize = Math.min(k, params.N);
addMatrixBlocks(poolSize, type, params, im2ColOutBlocks, doutReshapedBlocks, partialRetBlocks);
+
ArrayList<ConvTask> tasks = new ArrayList<ConvTask>();
- int NSize = params.N - poolSize;
- if(NSize >= constrainedNumThreads) {
- for(int n = 0; n < params.N; n++)
- tasks.add(new ConvTask(n, n+1, type, params, im2ColOutBlocks, doutReshapedBlocks, partialRetBlocks));
- }
- else {
- int numNTasks = (int) Math.ceil(((double) NSize) / constrainedNumThreads);
- for (int n = 0; n < NSize; n += numNTasks) {
- tasks.add(new ConvTask(n, Math.min(NSize, n+numNTasks), type, params, im2ColOutBlocks, doutReshapedBlocks, partialRetBlocks));
- }
- for (int n = NSize; n < params.N; n++)
- tasks.add(new ConvTask(n, n+1, type, params, im2ColOutBlocks, doutReshapedBlocks, partialRetBlocks));
- }
+ int blklen = (int)(Math.ceil((double)params.N/poolSize/NUM_TASK_FACTOR));
+ for( int i=0; i<poolSize*NUM_TASK_FACTOR && i*blklen<params.N; i++ )
+ tasks.add(new ConvTask(i*blklen, Math.min((i+1)*blklen, params.N),
+ type, params, im2ColOutBlocks, doutReshapedBlocks, partialRetBlocks));
- ExecutorService pool = Executors.newFixedThreadPool( poolSize );
- List<Future<Object>> taskret;
try {
- taskret = pool.invokeAll(tasks);
+ ExecutorService pool = Executors.newFixedThreadPool( poolSize );
+ List<Future<Long>> taskret = pool.invokeAll(tasks);
pool.shutdown();
- for( Future<Object> task : taskret ) {
- task.get();
- }
+ for( Future<Long> task : taskret )
+ params.output.nonZeros += task.get();
if(type == TaskType.LoopedIm2ColConv2dBwdFilter) {
for(MatrixBlock partialRetBlock : partialRetBlocks) {
elementWiseInPlaceTransposedAddition(params.output, partialRetBlock);
}
}
- } catch (InterruptedException e) {
- throw new DMLRuntimeException("Error while executing multi-threaded " + type.name(), e);
- } catch (ExecutionException e) {
+ }
+ catch (Exception e) {
throw new DMLRuntimeException("Error while executing multi-threaded " + type.name(), e);
}
}
else {
addMatrixBlocks(1, type, params, im2ColOutBlocks, doutReshapedBlocks, partialRetBlocks);
- ConvTask task = new ConvTask(0, 0, type, params, im2ColOutBlocks, doutReshapedBlocks, partialRetBlocks);
try {
- for(int n = 0; n < params.N; n++) {
- task.n1 = n;
- task.n2 = n+1;
- task.call();
- }
+ //execute single task and maintain nnz if supported
+ params.output.setNonZeros(new ConvTask(0, params.N, type, params, im2ColOutBlocks,
+ doutReshapedBlocks, partialRetBlocks).call());
+
if(type == TaskType.LoopedIm2ColConv2dBwdFilter) {
for(MatrixBlock partialRetBlock : partialRetBlocks) {
elementWiseInPlaceTransposedAddition(params.output, partialRetBlock);
@@ -958,92 +981,94 @@ public class LibMatrixDNN {
* to be executed in multi-thread manner.
*
*/
- private static class ConvTask implements Callable<Object> {
- public int n1; public int n2;
- ConvolutionParameters params;
- TaskType type;
- ConcurrentLinkedQueue<MatrixBlock> im2ColOutBlocks;
- ConcurrentLinkedQueue<MatrixBlock> partialRetBlocks;
- ConcurrentLinkedQueue<MatrixBlock> doutReshapedBlocks;
- public ConvTask(int n1, int n2, TaskType type, ConvolutionParameters params,
+ private static class ConvTask implements Callable<Long>
+ {
+ public int _rl;
+ public int _ru;
+ private final ConvolutionParameters _params;
+ private final TaskType _type;
+ private final ConcurrentLinkedQueue<MatrixBlock> _im2ColOutBlocks;
+ private final ConcurrentLinkedQueue<MatrixBlock> _partialRetBlocks;
+ private final ConcurrentLinkedQueue<MatrixBlock> _doutReshapedBlocks;
+
+ public ConvTask(int rl, int ru, TaskType type, ConvolutionParameters params,
ConcurrentLinkedQueue<MatrixBlock> im2ColOutBlocks,
ConcurrentLinkedQueue<MatrixBlock> doutReshapedBlocks,
ConcurrentLinkedQueue<MatrixBlock> partialRetBlocks) {
- this.n1 = n1;
- this.n2 = n2;
- this.type = type;
- this.params = params;
- this.im2ColOutBlocks = im2ColOutBlocks;
- this.partialRetBlocks = partialRetBlocks;
- this.doutReshapedBlocks = doutReshapedBlocks;
+ _rl = rl;
+ _ru = ru;
+ _type = type;
+ _params = params;
+ _im2ColOutBlocks = im2ColOutBlocks;
+ _partialRetBlocks = partialRetBlocks;
+ _doutReshapedBlocks = doutReshapedBlocks;
}
@Override
- public Object call() throws DMLRuntimeException {
- switch(type) {
+ public Long call() throws DMLRuntimeException {
+ long lnnz = 0; //nnz per partition
+
+ switch(_type) {
case MaxPooling_Forward:
- {
- for(int n = n1; n < n2; n++) {
- doPooling(n, params);
- }
+ for(int n = _rl; n < _ru; n++)
+ doPooling(n, _params);
break;
- }
case MaxPooling_Backward:
- for(int n = n1; n < n2; n++)
- doPoolingBackward(n, params);
+ for(int n = _rl; n < _ru; n++)
+ doPoolingBackward(n, _params);
break;
case BiasAdd:
- doBiasAdd(n1, n2, params);
+ doBiasAdd(_params, _rl, _ru);
break;
case ReluBackward:
- for(int n = n1; n < n2; n++)
- doReluBackward(n, params);
+ lnnz = doReluBackward(_params, _rl, _ru);
break;
case LoopedIm2ColConv2d:
{
- MatrixBlock im2ColOutBlock = im2ColOutBlocks.remove();
- for(int n = n1; n < n2; n++)
- doLoopedIm2ColConv2d(n, im2ColOutBlock, params);
- im2ColOutBlocks.add(im2ColOutBlock);
- if(params.bias != null)
- addBias(n1, n2, params);
+ MatrixBlock im2ColOutBlock = _im2ColOutBlocks.remove();
+ for(int n = _rl; n < _ru; n++)
+ doLoopedIm2ColConv2d(n, im2ColOutBlock, _params);
+ _im2ColOutBlocks.add(im2ColOutBlock);
+ if(_params.bias != null)
+ addBias(_params, _rl, _ru);
break;
}
case LoopedIm2ColConv2dBwdFilter:
{
- MatrixBlock im2ColOutBlock = im2ColOutBlocks.remove();
- MatrixBlock partialRetBlock = partialRetBlocks.remove();
- MatrixBlock doutReshapedBlock = doutReshapedBlocks.remove();
- for(int n = n1; n < n2; n++)
- partialRetBlock = doLoopedIm2ColConv2dBwdFilter(n, im2ColOutBlock, doutReshapedBlock, partialRetBlock, params);
- im2ColOutBlocks.add(im2ColOutBlock);
- partialRetBlocks.add(partialRetBlock);
- doutReshapedBlocks.add(doutReshapedBlock);
+ MatrixBlock im2ColOutBlock = _im2ColOutBlocks.remove();
+ MatrixBlock partialRetBlock = _partialRetBlocks.remove();
+ MatrixBlock doutReshapedBlock = _doutReshapedBlocks.remove();
+ for(int n = _rl; n < _ru; n++)
+ partialRetBlock = doLoopedIm2ColConv2dBwdFilter(n, im2ColOutBlock, doutReshapedBlock, partialRetBlock, _params);
+ _im2ColOutBlocks.add(im2ColOutBlock);
+ _partialRetBlocks.add(partialRetBlock);
+ _doutReshapedBlocks.add(doutReshapedBlock);
break;
}
case LoopedIm2ColConv2dBwdData:
{
- MatrixBlock doutReshapedBlock = doutReshapedBlocks.remove();
- for(int n = n1; n < n2; n++)
- doLoopedIm2ColConv2dBwdData(n, doutReshapedBlock, params);
- doutReshapedBlocks.add(doutReshapedBlock);
+ MatrixBlock doutReshapedBlock = _doutReshapedBlocks.remove();
+ for(int n = _rl; n < _ru; n++)
+ doLoopedIm2ColConv2dBwdData(n, doutReshapedBlock, _params);
+ _doutReshapedBlocks.add(doutReshapedBlock);
break;
}
default:
- throw new DMLRuntimeException("Unsupported ConvTask:" + type.name());
+ throw new DMLRuntimeException("Unsupported ConvTask:" + _type.name());
}
- return null;
+
+ return lnnz;
}
}
- private static void addBias(int n1, int n2, ConvolutionParameters params) {
+ private static void addBias(ConvolutionParameters params, int rl, int ru) {
int PQ = params.P*params.Q;
int K = params.K;
double [] outputArr = params.output.getDenseBlock();
if(!params.bias.isInSparseFormat()) {
double [] biasArr = params.bias.getDenseBlock();
- int index = n1*K*PQ;
- for(int n = n1; n < n2; n++) {
+ int index = rl*K*PQ;
+ for(int n = rl; n < ru; n++) {
for(int k = 0; k < K; k++) {
for(int pq = 0; pq < PQ; pq++, index++) {
outputArr[index] += biasArr[k];
@@ -1057,7 +1082,7 @@ public class LibMatrixDNN {
IJV ijv = iter.next();
int k = ijv.getI();
double val = ijv.getV();
- for(int n = n1; n < n2; n++) {
+ for(int n = rl; n < ru; n++) {
int index = n*K*PQ + k*PQ;
for(int pq = 0; pq < PQ; pq++, index++) {
outputArr[index] += val;