You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by ni...@apache.org on 2016/08/14 20:02:51 UTC
incubator-systemml git commit: [SYSTEMML-540] Removed non-performing
operators as well as avoided unnecessary sparse conversions
Repository: incubator-systemml
Updated Branches:
refs/heads/master 623779912 -> e9aa58414
[SYSTEMML-540] Removed non-performing operators as well as avoided unnecessary sparse conversions
- Removed im2col, col2im, rotate180, reshape_col as instructions
- Improved performance of conv2d, conv2d_backward, conv2d_backward_filter
- Converted sparse filters to dense
Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/e9aa5841
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/e9aa5841
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/e9aa5841
Branch: refs/heads/master
Commit: e9aa58414fcbcc39b9099e8722ab40e7c60a159f
Parents: 6237799
Author: Niketan Pansare <np...@us.ibm.com>
Authored: Sun Aug 14 12:58:54 2016 -0700
Committer: Niketan Pansare <np...@us.ibm.com>
Committed: Sun Aug 14 12:58:54 2016 -0700
----------------------------------------------------------------------
.../org/apache/sysml/hops/ConvolutionOp.java | 133 +-----------
src/main/java/org/apache/sysml/hops/Hop.java | 5 -
.../java/org/apache/sysml/hops/ReorgOp.java | 7 -
.../apache/sysml/lops/ConvolutionTransform.java | 21 +-
.../sysml/parser/BuiltinFunctionExpression.java | 4 +-
.../org/apache/sysml/parser/DMLTranslator.java | 41 +---
.../instructions/CPInstructionParser.java | 4 -
.../cp/ConvolutionCPInstruction.java | 78 +------
.../sysml/runtime/matrix/data/LibMatrixDNN.java | 202 ++-----------------
.../runtime/matrix/data/LibMatrixMult.java | 31 ++-
.../sysml/runtime/matrix/data/MatrixBlock.java | 2 +-
.../sysml/runtime/util/ConvolutionUtils.java | 201 ------------------
.../functions/tensor/Conv2DBackwardTest.java | 51 +----
.../functions/tensor/Conv2DTest.java | 53 +----
14 files changed, 74 insertions(+), 759 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/e9aa5841/src/main/java/org/apache/sysml/hops/ConvolutionOp.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/ConvolutionOp.java b/src/main/java/org/apache/sysml/hops/ConvolutionOp.java
index fe277d1..8c38a48 100644
--- a/src/main/java/org/apache/sysml/hops/ConvolutionOp.java
+++ b/src/main/java/org/apache/sysml/hops/ConvolutionOp.java
@@ -33,15 +33,12 @@ import org.apache.sysml.parser.Expression.ValueType;
import org.apache.sysml.runtime.DMLRuntimeException;
import org.apache.sysml.runtime.matrix.MatrixCharacteristics;
import org.apache.sysml.runtime.matrix.data.LibMatrixDNN.ConvolutionParameters;
-import org.apache.sysml.runtime.util.ConvolutionUtils;
public class ConvolutionOp extends Hop implements MultiThreadedHop
{
private Hop.ConvOp op;
private int _maxNumThreads = -1; //-1 for unlimited
-
- public static boolean FORCE_NON_IM2COL = false;
private ConvolutionOp() {
//default constructor for clone
@@ -94,41 +91,14 @@ public class ConvolutionOp extends Hop implements MultiThreadedHop
ExecType et = optFindExecType();
- Lop ret = ConvolutionUtils.constructConvolutionLops(this, et);
- if(ret != null) {
- setLops(ret);
- return ret;
- }
- ret = ConvolutionUtils.constructConvolutionBackwardDataLops(this, et);
- if(ret != null) {
- setLops(ret);
- return ret;
- }
-
ArrayList<Hop> inputs = getInput();
switch( op )
{
- case IM2COL:
- case RESHAPE_COL:
- case ROTATE180:
- case COL2IM:
- {
- et = ExecType.CP; // TODO: Since max_backwards and other Convolution Ops only implemented for CP
-
- if( et == ExecType.CP )
- {
- setLops(constructConvolutionLops(et, inputs));
- break;
- }
- else {
- // TODO: Add support for SPARK/MR backends once we are happy with the performance of
- // single node Lenet script.
- throw new HopsException("Unimplemented ConvolutionOp for execution type: " + et.name());
- }
- // break;
- }
case MAX_POOLING:
case MAX_POOLING_BACKWARD:
+ case DIRECT_CONV2D:
+ case DIRECT_CONV2D_BACKWARD_DATA:
+ case DIRECT_CONV2D_BACKWARD_FILTER:
{
//TODO: Fix me. Currently forcing the instruction to GPU if gpu flag is set
if(DMLScript.USE_ACCELERATOR) {
@@ -147,22 +117,6 @@ public class ConvolutionOp extends Hop implements MultiThreadedHop
}
// break;
}
- case DIRECT_CONV2D:
- case DIRECT_CONV2D_BACKWARD_DATA:
- case DIRECT_CONV2D_BACKWARD_FILTER:
- {
- if( et == ExecType.GPU )
- {
- setLops(constructConvolutionLops(et, inputs));
- break;
- }
- else {
- // TODO: Add support for SPARK/MR backends once we are happy with the performance of
- // single node Lenet script.
- throw new HopsException("Unimplemented ConvolutionOp for execution type: " + et.name());
- }
- // break;
- }
default:
throw new HopsException("Unsupported lops construction for operation type '"+op+"'.");
}
@@ -261,24 +215,6 @@ public class ConvolutionOp extends Hop implements MultiThreadedHop
protected double computeOutputMemEstimate( long dim1, long dim2, long nnz )
{
double sparsity = 1.0;
- switch(op)
- {
- case RESHAPE_COL:
- case ROTATE180:
- {
- sparsity = OptimizerUtils.getSparsity(dim1, dim2, nnz);
- break;
- }
- case IM2COL:
- case COL2IM:
- case MAX_POOLING:
- case MAX_POOLING_BACKWARD:
- case DIRECT_CONV2D:
- case DIRECT_CONV2D_BACKWARD_FILTER:
- case DIRECT_CONV2D_BACKWARD_DATA:
- sparsity = 1.0; // worst-case estimate
- break;
- }
return OptimizerUtils.estimateSizeExactSparsity(dim1, dim2, sparsity);
}
@@ -306,38 +242,6 @@ public class ConvolutionOp extends Hop implements MultiThreadedHop
switch(op)
{
- case RESHAPE_COL:
- {
- ret = new long[3];
- ret[0] = params.N;
- ret[1] = getExtractedVal(params.K, params.P, params.Q);
- ret[2] = mc.getNonZeros(); // exact estimates
- break;
- }
- case ROTATE180:
- {
- ret = new long[3];
- ret[0] = getExtractedVal(params.N, params.P, params.Q);
- ret[1] = params.K;
- ret[2] = mc.getNonZeros(); // exact estimates
- break;
- }
- case IM2COL:
- {
- ret = new long[3];
- ret[0] = getExtractedVal(params.C, params.R, params.S);
- ret[1] = getExtractedVal(params.N, params.P, params.Q);
- ret[2] = -1;
- break;
- }
- case COL2IM:
- {
- ret = new long[3];
- ret[0] = params.N;
- ret[1] = getExtractedVal(params.C, params.H, params.W);
- ret[2] = -1;
- break;
- }
case MAX_POOLING:
{
ret = new long[3];
@@ -496,8 +400,6 @@ public class ConvolutionOp extends Hop implements MultiThreadedHop
@Override
public void refreshSizeInformation()
{
- Hop input1 = getInput().get(0);
-
ConvolutionParameters params;
try {
params = parseInput();
@@ -507,35 +409,6 @@ public class ConvolutionOp extends Hop implements MultiThreadedHop
switch(op)
{
- case IM2COL:
- {
- _dim1 = getExtractedVal(params.C, params.R, params.S);
- _dim2 = getExtractedVal(params.N, params.P, params.Q);
- _nnz = -1;
- break;
- }
- case COL2IM:
- {
- // Set _dim1, _dim2 and if possible _nnz (use input1.getNnz())
- _dim1 = params.N;
- _dim2 = getExtractedVal(params.C, params.H, params.W);
- _nnz = -1; // cannot infer stats
- break;
- }
- case RESHAPE_COL:
- {
- _dim1 = params.N;
- _dim2 = getExtractedVal(params.K, params.P, params.Q);
- _nnz = input1.getNnz(); // exact estimates
- break;
- }
- case ROTATE180:
- {
- _dim1 = getExtractedVal(params.N, params.P, params.Q);
- _dim2 = params.K;
- _nnz = input1.getNnz(); // exact estimates
- break;
- }
case MAX_POOLING:
{
_dim1 = params.N;
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/e9aa5841/src/main/java/org/apache/sysml/hops/Hop.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/Hop.java b/src/main/java/org/apache/sysml/hops/Hop.java
index 7d69940..6afe60e 100644
--- a/src/main/java/org/apache/sysml/hops/Hop.java
+++ b/src/main/java/org/apache/sysml/hops/Hop.java
@@ -1147,7 +1147,6 @@ public abstract class Hop
};
public enum ConvOp {
- IM2COL, RESHAPE_COL, ROTATE180, COL2IM,
MAX_POOLING, MAX_POOLING_BACKWARD,
DIRECT_CONV2D, DIRECT_CONV2D_BACKWARD_FILTER, DIRECT_CONV2D_BACKWARD_DATA
};
@@ -1220,10 +1219,6 @@ public abstract class Hop
protected static final HashMap<ConvOp, org.apache.sysml.lops.ConvolutionTransform.OperationTypes> HopsConv2Lops;
static {
HopsConv2Lops = new HashMap<ConvOp, org.apache.sysml.lops.ConvolutionTransform.OperationTypes>();
- HopsConv2Lops.put(ConvOp.IM2COL, org.apache.sysml.lops.ConvolutionTransform.OperationTypes.IM2COL);
- HopsConv2Lops.put(ConvOp.RESHAPE_COL, org.apache.sysml.lops.ConvolutionTransform.OperationTypes.RESHAPE_COL);
- HopsConv2Lops.put(ConvOp.ROTATE180, org.apache.sysml.lops.ConvolutionTransform.OperationTypes.ROTATE180);
- HopsConv2Lops.put(ConvOp.COL2IM, org.apache.sysml.lops.ConvolutionTransform.OperationTypes.COL2IM);
HopsConv2Lops.put(ConvOp.MAX_POOLING, org.apache.sysml.lops.ConvolutionTransform.OperationTypes.MAX_POOLING);
HopsConv2Lops.put(ConvOp.MAX_POOLING_BACKWARD, org.apache.sysml.lops.ConvolutionTransform.OperationTypes.MAX_POOLING_BACKWARD);
HopsConv2Lops.put(ConvOp.DIRECT_CONV2D, org.apache.sysml.lops.ConvolutionTransform.OperationTypes.DIRECT_CONV2D);
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/e9aa5841/src/main/java/org/apache/sysml/hops/ReorgOp.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/ReorgOp.java b/src/main/java/org/apache/sysml/hops/ReorgOp.java
index 5f5138b..7c87a76 100644
--- a/src/main/java/org/apache/sysml/hops/ReorgOp.java
+++ b/src/main/java/org/apache/sysml/hops/ReorgOp.java
@@ -35,7 +35,6 @@ import org.apache.sysml.lops.Transform.OperationTypes;
import org.apache.sysml.parser.Expression.DataType;
import org.apache.sysml.parser.Expression.ValueType;
import org.apache.sysml.runtime.matrix.MatrixCharacteristics;
-import org.apache.sysml.runtime.util.ConvolutionUtils;
/**
* Reorg (cell) operation: aij
@@ -120,12 +119,6 @@ public class ReorgOp extends Hop implements MultiThreadedHop
if( getLops() != null )
return getLops();
- Lop ret = ConvolutionUtils.constructConvolutionBackwardFilterLops(this);
- if(ret != null) {
- setLops( ret );
- return ret;
- }
-
ExecType et = optFindExecType();
switch( op )
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/e9aa5841/src/main/java/org/apache/sysml/lops/ConvolutionTransform.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/lops/ConvolutionTransform.java b/src/main/java/org/apache/sysml/lops/ConvolutionTransform.java
index fdf280d..9164d36 100644
--- a/src/main/java/org/apache/sysml/lops/ConvolutionTransform.java
+++ b/src/main/java/org/apache/sysml/lops/ConvolutionTransform.java
@@ -30,12 +30,7 @@ public class ConvolutionTransform extends Lop
public enum OperationTypes {
- IM2COL,
- RESHAPE_COL,
- ROTATE180,
- COL2IM,
- MAX_POOLING,
- MAX_POOLING_BACKWARD,
+ MAX_POOLING, MAX_POOLING_BACKWARD,
DIRECT_CONV2D, DIRECT_CONV2D_BACKWARD_FILTER, DIRECT_CONV2D_BACKWARD_DATA
};
@@ -101,19 +96,7 @@ public class ConvolutionTransform extends Lop
private String getOpcode() {
switch(operation) {
-
- case IM2COL:
- return "im2col";
-
- case RESHAPE_COL:
- return "reshape_col";
-
- case ROTATE180:
- return "rotate180";
-
- case COL2IM:
- return "col2im";
-
+
case MAX_POOLING:
return "maxpooling";
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/e9aa5841/src/main/java/org/apache/sysml/parser/BuiltinFunctionExpression.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/parser/BuiltinFunctionExpression.java b/src/main/java/org/apache/sysml/parser/BuiltinFunctionExpression.java
index 3bb7b0a..bf31347 100644
--- a/src/main/java/org/apache/sysml/parser/BuiltinFunctionExpression.java
+++ b/src/main/java/org/apache/sysml/parser/BuiltinFunctionExpression.java
@@ -1109,8 +1109,8 @@ public class BuiltinFunctionExpression extends DataIdentifier
case MAX_POOL_BACKWARD:
{
// At DML level:
- // output = conv2d(input, filter, input_shape=[3, 2, 2], filter_shape=[3, 2, 2],
- // strides=[1, 1], border_mode="valid")
+ // output = conv2d(input, filter, input_shape=[1, 3, 2, 2], filter_shape=[1, 3, 2, 2],
+ // strides=[1, 1], padding=[1,1])
//
// Converted to following in constructor (only supported NCHW):
// output = conv2d(input, filter, stride1, stride2, padding1,padding2,
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/e9aa5841/src/main/java/org/apache/sysml/parser/DMLTranslator.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/parser/DMLTranslator.java b/src/main/java/org/apache/sysml/parser/DMLTranslator.java
index b5bb7c3..f3cb0b1 100644
--- a/src/main/java/org/apache/sysml/parser/DMLTranslator.java
+++ b/src/main/java/org/apache/sysml/parser/DMLTranslator.java
@@ -2803,18 +2803,9 @@ public class DMLTranslator
case CONV2D:
{
- Hop filter = expr2;
- // Step 1: IM2COL
Hop image = expr;
- ArrayList<Hop> inHops1 = getALHopsForConvOp(image, source, 2, hops);
- Hop loweredMat = new ConvolutionOp(image.getName(), image.getDataType(), image.getValueType(), Hop.ConvOp.IM2COL, inHops1);
-
- // Step 2: Matrix multiplication
- Hop temp = new AggBinaryOp("temp" + target.getName(), target.getDataType(), target.getValueType(), OpOp2.MULT, AggOp.SUM, filter, loweredMat);
-
- // Step 3: Reshape col
- ArrayList<Hop> inHops2 = getALHopsForConvOp(temp, source, 2, hops);
- currBuiltinOp = new ConvolutionOp(target.getName(), target.getDataType(), target.getValueType(), Hop.ConvOp.RESHAPE_COL, inHops2);
+ ArrayList<Hop> inHops1 = getALHopsForConvOp(image, source, 1, hops);
+ currBuiltinOp = new ConvolutionOp(target.getName(), target.getDataType(), target.getValueType(), Hop.ConvOp.DIRECT_CONV2D, inHops1);
setBlockSizeAndRefreshSizeInfo(image, currBuiltinOp);
break;
}
@@ -2841,33 +2832,17 @@ public class DMLTranslator
case CONV2D_BACKWARD_FILTER:
{
Hop image = expr;
- Hop dout = expr2;
-
- ArrayList<Hop> inHops1 = getALHopsForConvOp(image, source, 2, hops);
- Hop x_col = new ConvolutionOp(image.getName(), image.getDataType(), image.getValueType(), Hop.ConvOp.IM2COL, inHops1);
-
- ArrayList<Hop> inHops2 = getALHopsForConvOp(dout, source, 2, hops);
- Hop dout_reshaped = new ConvolutionOp(dout.getName(), dout.getDataType(), dout.getValueType(), Hop.ConvOp.ROTATE180, inHops2);
-
- Hop dfilter1 = new AggBinaryOp(target.getName(), target.getDataType(), target.getValueType(), OpOp2.MULT, AggOp.SUM, x_col, dout_reshaped);
- currBuiltinOp = new ReorgOp("tempTranspose" + image.getName(), image.getDataType(), image.getValueType(), Hop.ReOrgOp.TRANSPOSE, dfilter1);
+ ArrayList<Hop> inHops1 = getALHopsForConvOp(image, source, 1, hops);
+ currBuiltinOp = new ConvolutionOp(target.getName(), target.getDataType(), target.getValueType(), Hop.ConvOp.DIRECT_CONV2D_BACKWARD_FILTER, inHops1);
setBlockSizeAndRefreshSizeInfo(image, currBuiltinOp);
break;
}
case CONV2D_BACKWARD_DATA:
{
- Hop filter = expr;
- Hop dout = expr2;
-
- ArrayList<Hop> inHops1 = getALHopsForConvOp(dout, source, 2, hops);
- Hop dout_reshaped = new ConvolutionOp(dout.getName(), dout.getDataType(), dout.getValueType(), Hop.ConvOp.ROTATE180, inHops1);
-
- Hop temp1 = new AggBinaryOp("temp" + target.getName(), target.getDataType(), target.getValueType(), OpOp2.MULT, AggOp.SUM, dout_reshaped, filter);
- // Hop temp2 = new ReorgOp("tempTranspose" + target.getName(), target.getDataType(), target.getValueType(), Hop.ReOrgOp.TRANSPOSE, temp1);
-
- ArrayList<Hop> inHops2 = getALHopsForConvOp(temp1, source, 2, hops);
- currBuiltinOp = new ConvolutionOp(target.getName(), target.getDataType(), target.getValueType(), Hop.ConvOp.COL2IM, inHops2);
- setBlockSizeAndRefreshSizeInfo(filter, currBuiltinOp);
+ Hop image = expr;
+ ArrayList<Hop> inHops1 = getALHopsForConvOp(image, source, 1, hops);
+ currBuiltinOp = new ConvolutionOp(target.getName(), target.getDataType(), target.getValueType(), Hop.ConvOp.DIRECT_CONV2D_BACKWARD_DATA, inHops1);
+ setBlockSizeAndRefreshSizeInfo(image, currBuiltinOp);
break;
}
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/e9aa5841/src/main/java/org/apache/sysml/runtime/instructions/CPInstructionParser.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/instructions/CPInstructionParser.java b/src/main/java/org/apache/sysml/runtime/instructions/CPInstructionParser.java
index ae13d3d..909525f 100644
--- a/src/main/java/org/apache/sysml/runtime/instructions/CPInstructionParser.java
+++ b/src/main/java/org/apache/sysml/runtime/instructions/CPInstructionParser.java
@@ -218,10 +218,6 @@ public class CPInstructionParser extends InstructionParser
String2CPInstructionType.put( "rsort" , CPINSTRUCTION_TYPE.Reorg);
// Opcodes related to convolutions
- String2CPInstructionType.put( "im2col" , CPINSTRUCTION_TYPE.Convolution);
- String2CPInstructionType.put( "reshape_col" , CPINSTRUCTION_TYPE.Convolution);
- String2CPInstructionType.put( "rotate180" , CPINSTRUCTION_TYPE.Convolution);
- String2CPInstructionType.put( "col2im" , CPINSTRUCTION_TYPE.Convolution);
String2CPInstructionType.put( "maxpooling" , CPINSTRUCTION_TYPE.Convolution);
String2CPInstructionType.put( "maxpooling_backward" , CPINSTRUCTION_TYPE.Convolution);
String2CPInstructionType.put( "conv2d" , CPINSTRUCTION_TYPE.Convolution);
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/e9aa5841/src/main/java/org/apache/sysml/runtime/instructions/cp/ConvolutionCPInstruction.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/instructions/cp/ConvolutionCPInstruction.java b/src/main/java/org/apache/sysml/runtime/instructions/cp/ConvolutionCPInstruction.java
index 4b04eca..5e83ffa 100644
--- a/src/main/java/org/apache/sysml/runtime/instructions/cp/ConvolutionCPInstruction.java
+++ b/src/main/java/org/apache/sysml/runtime/instructions/cp/ConvolutionCPInstruction.java
@@ -80,13 +80,7 @@ public class ConvolutionCPInstruction extends UnaryCPInstruction {
String[] parts = InstructionUtils.getInstructionPartsWithValueType(str);
String opcode = parts[0];
- if (opcode.equalsIgnoreCase("reshape_col")
- || opcode.equalsIgnoreCase("rotate180")
- || opcode.equalsIgnoreCase("im2col")
- || opcode.equalsIgnoreCase("col2im")
- || opcode.equalsIgnoreCase("pooling_pre_reshape")
- || opcode.equalsIgnoreCase("pooling_post_reshape")
- || opcode.equalsIgnoreCase("maxpooling")) {
+ if (opcode.equalsIgnoreCase("maxpooling")) {
InstructionUtils.checkNumFields(parts, 15);
// stride1, stride2, padding1, padding2
// input_shape1, input_shape2, input_shape3, input_shape4,
@@ -115,8 +109,7 @@ public class ConvolutionCPInstruction extends UnaryCPInstruction {
return new ConvolutionCPInstruction(in, out, opcode, str, stride,
padding, input_shape, filter_shape, k);
}
- else if (opcode.equalsIgnoreCase("pooling_backward_reshape")
- || opcode.equalsIgnoreCase("maxpooling_backward")
+ else if (opcode.equalsIgnoreCase("maxpooling_backward")
|| opcode.equalsIgnoreCase("conv2d")
|| opcode.equalsIgnoreCase("conv2d_backward_filter")
|| opcode.equalsIgnoreCase("conv2d_backward_data")) {
@@ -186,38 +179,7 @@ public class ConvolutionCPInstruction extends UnaryCPInstruction {
int Q = (int) ConvolutionUtils.getQ(W, S, stride_w, pad_w);
ConvolutionParameters params = new ConvolutionParameters(N, C, H, W, K, R, S, stride_h, stride_w, pad_h, pad_w, _numThreads);
-
- if (instOpcode.equalsIgnoreCase("im2col")) {
- checkHeightWidth(ec, params);
- checkInputDimensionForIm2col(matBlock, params);
- outputBlock = getDenseOutputBlock(ec, C * R * S, N * P * Q, true);
- params.setReuseNonZeroedOutput(_reuseNonZeroedOutput);
- LibMatrixDNN.im2col(matBlock, outputBlock, params);
- }
- else if (instOpcode.equalsIgnoreCase("reshape_col")) {
- checkHeightWidth(ec, params);
- // Is eligible for REUSE_NONZEROED_OUTPUT but cannot guarantee that previous output has been rmvar-ed
- // without somewhat expensive HashMap checks
- outputBlock = getDenseOutputBlock(ec, N, K * P * Q, true);
- params.setReuseNonZeroedOutput(_reuseNonZeroedOutput);
- LibMatrixDNN.reshape_col(matBlock, outputBlock, params);
- }
- else if (instOpcode.equalsIgnoreCase("rotate180")) {
- checkHeightWidth(ec, params);
- // Is eligible for REUSE_NONZEROED_OUTPUT and always an intermediate instruction
- outputBlock = getDenseOutputBlock(ec, N * P * Q, K, true);
- params.setReuseNonZeroedOutput(_reuseNonZeroedOutput);
- LibMatrixDNN.rotate180(matBlock, outputBlock, params);
- }
- else if (instOpcode.equalsIgnoreCase("col2im")) {
- checkHeightWidth(ec, params);
- checkInputDimensionForCol2im(matBlock, params);
- // needs to be zeroed-out
- outputBlock = getDenseOutputBlock(ec, N, C * H * W, false);
- params.setReuseNonZeroedOutput(_reuseNonZeroedOutput);
- LibMatrixDNN.col2im(matBlock, outputBlock, params);
- }
- else if (instOpcode.equalsIgnoreCase("maxpooling")) {
+ if (instOpcode.equalsIgnoreCase("maxpooling")) {
// Is eligible for REUSE_NONZEROED_OUTPUT but cannot guarantee that previous output has been rmvar-ed
// without somewhat expensive HashMap checks
outputBlock = getDenseOutputBlock(ec, N, C*P*Q, true);
@@ -284,38 +246,4 @@ public class ConvolutionCPInstruction extends UnaryCPInstruction {
Statistics.incrementAllocationTime(System.nanoTime()-start, false);
return outputBlock;
}
-
- private void checkHeightWidth(ExecutionContext ec, ConvolutionParameters params) throws DMLRuntimeException {
- int numChannelsInFilter = getScalarInput(ec, _filter_shape, 1);
-
- if (numChannelsInFilter != params.C) {
- throw new DMLRuntimeException("The number of channels of input and filter should match");
- }
- if((params.W + 2 * params.pad_w - params.S) % params.stride_w != 0) {
- throw new DMLRuntimeException("The width does not work (Hint: (W + 2 * pad_w - S) % stride_w should be 0 [ ==> (" + params.W + "+" + " 2*" + params.pad_w + "-" + params.S + ") % " + params.stride_w + "!= 0] ");
- }
- if((params.H + 2 * params.pad_h - params.R) % params.stride_h != 0) {
- throw new DMLRuntimeException("The height does not work (Hint: (H + 2 * pad_h - R) % stride_h should be 0 [ ==> (" + params.H + "+" + " 2*" + params.pad_h + "-" + params.R + ") % " + params.stride_h + "!= 0] ");
- }
- if(params.H <= 0) {
- throw new DMLRuntimeException("Height of output patch should be zero");
- }
- if(params.Q <= 0) {
- throw new DMLRuntimeException("Width of output patch should be zero");
- }
- }
-
-
-
- private void checkInputDimensionForIm2col(MatrixBlock matBlock, ConvolutionParameters params) throws DMLRuntimeException {
- if((params.N != matBlock.getNumRows() || params.C*params.H*params.W != matBlock.getNumColumns())) {
- throw new DMLRuntimeException("Incorrect input shape in im2col");
- }
- }
-
- private void checkInputDimensionForCol2im(MatrixBlock matBlock, ConvolutionParameters params) throws DMLRuntimeException {
- if((params.N*params.P*params.Q != matBlock.getNumRows() || params.C*params.R*params.S != matBlock.getNumColumns())) {
- throw new DMLRuntimeException("Incorrect input shape in col2im");
- }
- }
}
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/e9aa5841/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNN.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNN.java b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNN.java
index 59a6a47..c2b3f7d 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNN.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNN.java
@@ -74,7 +74,7 @@ public class LibMatrixDNN {
}
enum TaskType {
- ReshapeCol, Rotate180, Im2Col, Col2Im, MaxPooling_Forward, MaxPooling_Backward,
+ MaxPooling_Forward, MaxPooling_Backward,
LoopedIm2ColConv2d, LoopedIm2ColConv2dBwdFilter, LoopedIm2ColConv2dBwdData
}
@@ -250,6 +250,11 @@ public class LibMatrixDNN {
throw new DMLRuntimeException("Only positive strides supported");
}
+ // Convert filter (which is relatively small matrix) to dense
+ if(params.input1.isInSparseFormat()) {
+ params.input1.sparseToDense();
+ }
+
if(DMLScript.STATISTICS) {
if(filter.isInSparseFormat() || dout.isInSparseFormat()) {
conv2dBwdDataSparseCount.addAndGet(1);
@@ -375,7 +380,7 @@ public class LibMatrixDNN {
MatrixBlock temp = new MatrixBlock(params.P*params.Q, params.C*params.R*params.S, false);
long t1 = DMLScript.STATISTICS ? System.nanoTime() : 0;
- LibMatrixMult.matrixMult(dout_reshaped, filter, temp);
+ LibMatrixMult.matrixMult(dout_reshaped, filter, temp, false);
long t2 = DMLScript.STATISTICS ? System.nanoTime() : 0 ;
doCol2imOverSingleImage(n, temp, params);
long t3 = DMLScript.STATISTICS ? System.nanoTime() : 0 ;
@@ -400,7 +405,7 @@ public class LibMatrixDNN {
MatrixBlock temp = new MatrixBlock(params.C*params.R*params.S, params.K, false);
long t3 = DMLScript.STATISTICS ? System.nanoTime() : 0 ;
- LibMatrixMult.matrixMult(im2ColOutBlock, dout_reshaped, temp);
+ LibMatrixMult.matrixMult(im2ColOutBlock, dout_reshaped, temp, false);
long t4 = DMLScript.STATISTICS ? System.nanoTime() : 0 ;
if(DMLScript.STATISTICS) {
loopedConvBwdFilterMatMultTime.addAndGet(t4-t3);
@@ -427,6 +432,11 @@ public class LibMatrixDNN {
throw new DMLRuntimeException("Incorrect input to conv2d");
}
+ // Convert filter (which is relatively small matrix) to dense
+ if(params.input2.isInSparseFormat()) {
+ params.input2.sparseToDense();
+ }
+
if(DMLScript.STATISTICS) {
if(input.isInSparseFormat() || filter.isInSparseFormat()) {
conv2dSparseCount.addAndGet(1);
@@ -461,7 +471,7 @@ public class LibMatrixDNN {
im2ColOutBlock.setNonZeros(nnz);
MatrixBlock matMultOutBlock = new MatrixBlock(params.K, params.P*params.Q, false);
- LibMatrixMult.matrixMult(params.input2, im2ColOutBlock, matMultOutBlock);
+ LibMatrixMult.matrixMult(params.input2, im2ColOutBlock, matMultOutBlock, false);
long t3 = DMLScript.STATISTICS ? System.nanoTime() : 0;
if(DMLScript.STATISTICS) {
@@ -751,37 +761,6 @@ public class LibMatrixDNN {
}
params.outputNNZ.addAndGet(tmpNNZ);
}
-
- // Reshape a 4D tensor of dimension (N, K, P, Q) to matrix of dimension (NPQ, K)
- public static void rotate180(MatrixBlock input, MatrixBlock outputBlock, ConvolutionParameters params) throws DMLRuntimeException {
- params.input1 = input;
- params.output = outputBlock;
-
- if(input.getNumColumns() != params.K*params.P*params.Q || input.getNumRows() != params.N) {
- throw new DMLRuntimeException("Incorrect input dimensions in rotate180:" + input.getNumRows() + " " + input.getNumColumns() + " " + params.N + " " + params.K*params.P*params.Q);
- }
-
- int constrainedNumThreads = OptimizerUtils.getConstrainedNumThreads(params.numThreads);
- if(!ALLOW_MULTI_THREADED_OPS || constrainedNumThreads <= 1) {
- warnSingleThreaded();
- for (int n = 0; n < params.N; n++) {
- doRotate180(n, params);
- }
- }
- else {
- runConvTask(constrainedNumThreads, 1, TaskType.Rotate180, params);
- }
- outputBlock.setNonZeros(input.getNonZeros()); // As number of non-zeros doesnot change for rotate180
- }
-
- private static void doRotate180(int n, ConvolutionParameters params) throws DMLRuntimeException {
- double [] outputArray = null;
- if (!params.output.isInSparseFormat())
- outputArray = params.output.getDenseBlock();
- else
- throw new DMLRuntimeException("Sparse output is not supported for rotate180");
- doRotate180(n, n, params.input1, outputArray, params, false);
- }
private static void doRotate180(int inputN, int outputN, MatrixBlock input,
double [] outputArray, ConvolutionParameters params, boolean zeroOutSparseOutput) throws DMLRuntimeException {
@@ -818,29 +797,6 @@ public class LibMatrixDNN {
}
}
-
- // Reshape a matrix of dimension (K, NPQ) to 4D tensor of dimension (N, K, P, params.Q)
- public static void reshape_col(MatrixBlock input, MatrixBlock outputBlock, ConvolutionParameters params) throws DMLRuntimeException {
- params.input1 = input;
- params.output = outputBlock;
-
- if(input.getNumColumns() != params.N*params.P*params.Q || input.getNumRows() != params.K) {
- throw new DMLRuntimeException("Incorrect input dimensions in reshape_col:" + input.getNumRows() + " " + input.getNumColumns());
- }
-
- int constrainedNumThreads = OptimizerUtils.getConstrainedNumThreads(params.numThreads);
- if(!ALLOW_MULTI_THREADED_OPS || constrainedNumThreads <= 1) {
- warnSingleThreaded();
- for (int n = 0; n < params.N; n++) {
- doReshapeCol(n, params);
- }
- }
- else {
- runConvTask(constrainedNumThreads, 1, TaskType.ReshapeCol, params);
- }
- outputBlock.setNonZeros(input.getNonZeros()); // As number of non-zeros doesnot change for reshape_col
- }
-
private static int [] getTaskSize(int constrainedNumThreads, int maxNumTaskSize1, int maxNumTaskSize2) {
int taskSize1 = 1; int taskSize2 = 1;
// Why this heuristics ? To reduce the impact of the thread-creation overhead in case of small tasks
@@ -939,30 +895,6 @@ public class LibMatrixDNN {
@Override
public Object call() throws DMLRuntimeException {
switch(type) {
- case ReshapeCol:
- for (int n = n1; n < n2; n++) {
- doReshapeCol(n, params);
- }
- break;
- case Rotate180:
- for (int n = n1; n < n2; n++) {
- doRotate180(n, params);
- }
- break;
- case Im2Col:
- long nnz = 0;
- for (int n = n1; n < n2; n++) {
- for (int z = z1; z < z2; z++) {
- nnz += doIm2colOverInputPath_NCHW(n, z, params);
- }
- }
- params.outputNNZ.addAndGet(nnz);
- break;
- case Col2Im:
- for (int n = n1; n < n2; n++) {
- doCol2imOverMultipleImages(n, params);
- }
- break;
case MaxPooling_Forward:
for (int n = n1; n < n2; n++) {
for (int z = z1; z < z2; z++) {
@@ -1011,84 +943,6 @@ public class LibMatrixDNN {
}
}
- private static void doReshapeCol(int n, ConvolutionParameters params) {
- double [] inputArray = null;
- if (!params.input1.isInSparseFormat())
- inputArray = params.input1.getDenseBlock();
- double [] outputArray = null;
- if (!params.output.isInSparseFormat())
- outputArray = params.output.getDenseBlock();
-
- if(inputArray != null) {
- for (int k = 0; k < params.K; k++) {
- System.arraycopy(inputArray, k*params.N*params.P*params.Q + n*params.P*params.Q, outputArray, n*params.K*params.P*params.Q + k*params.P*params.Q, params.P*params.Q);
- }
- }
- else {
- for (int k = 0; k < params.K; k++) {
- for (int p = 0; p < params.P; p++) {
- for (int q = 0; q < params.Q; q++) {
- outputArray[n*params.K*params.P*params.Q + k*params.P*params.Q + p*params.Q + q] = params.input1.quickGetValue(k, n*params.P*params.Q + p*params.Q + q);
- }
- }
- }
- }
- }
-
- // Converts a 4D tensor (N, C, R, S) to a matrix of dimension (CRS, NPQ)
- public static void im2col(MatrixBlock input, MatrixBlock outputBlock, ConvolutionParameters params) throws DMLRuntimeException {
- params.input1 = input;
- params.output = outputBlock;
-
- params.outputNNZ.set(0);
-
- if(DMLScript.STATISTICS) {
- if(input.isInSparseFormat()) {
- im2colSparseCount.addAndGet(1);
- }
- else {
- im2colDenseCount.addAndGet(1);
- }
- }
-
- int constrainedNumThreads = OptimizerUtils.getConstrainedNumThreads(params.numThreads);
- if(!ALLOW_MULTI_THREADED_OPS || constrainedNumThreads <= 1) {
- warnSingleThreaded();
- long nnz = 0;
- for (int n = 0; n < params.N; n++) { // Do following for all images
- for (int c = 0; c < params.C; c++) { // Since format is NCHW
- nnz += doIm2colOverInputPath_NCHW(n, c, params);
- }
- }
- outputBlock.setNonZeros(nnz);
- }
- else {
- runConvTask(constrainedNumThreads, params.C, TaskType.Im2Col, params);
- outputBlock.setNonZeros(params.outputNNZ.get());
- }
-
- }
-
- // Converts a matrix of dimension (CRS, NPQ) to a 4D tensor (N, C, H, W)
- public static void col2im(MatrixBlock input, MatrixBlock outputBlock, ConvolutionParameters params) throws DMLRuntimeException {
- params.input1 = input;
- params.output = outputBlock;
-
- int constrainedNumThreads = OptimizerUtils.getConstrainedNumThreads(params.numThreads);
- if(!ALLOW_MULTI_THREADED_OPS || constrainedNumThreads <= 1) {
- warnSingleThreaded();
- // Sequential col2im
- for (int n = 0; n < params.N; n++) { // Do following for all images
- doCol2imOverMultipleImages(n, params);
- }
- }
- else {
- // Parallel col2im
- runConvTask(constrainedNumThreads, 1, TaskType.Col2Im, params);
- }
- }
-
-
// Converts input: PQ X CRS matrix and writes to 1 X CHW
private static void doCol2imOverSingleImage(int outputN, MatrixBlock input, ConvolutionParameters params) throws DMLRuntimeException {
if(input.rlen != params.P*params.Q || input.clen != params.C*params.R*params.S) {
@@ -1169,34 +1023,6 @@ public class LibMatrixDNN {
}
}
- // NPQ X CRS
- private static void doCol2imOverMultipleImages(int n, ConvolutionParameters params) throws DMLRuntimeException {
- MatrixBlock input = params.input1;
-
- if(input.rlen != params.N*params.P*params.Q || input.clen != params.C*params.R*params.S) {
- throw new DMLRuntimeException("Incorrect input dimensions");
- }
-
- double [] outputArray = null;
- if (!params.output.isInSparseFormat())
- outputArray = params.output.getDenseBlock();
- else {
- throw new DMLRuntimeException("Only dense output is implemented");
- }
-
- if(!input.isInSparseFormat()) {
- double [] inputArray = input.getDenseBlock();
- doCol2IMDenseInput(n, n, inputArray, outputArray, params);
- }
- else {
- doCol2IMSparseInput(n, n, input.getSparseBlockIterator(n*params.P*params.Q, (n+1)*params.P*params.Q), outputArray, params);
- }
- }
-
- private static long doIm2colOverInputPath_NCHW(int n, int c, ConvolutionParameters params) throws DMLRuntimeException {
- return doIm2colOverInputPath_NCHW(n, c, null, params);
- }
-
private static long doIm2colOverInputPath_NCHW(int n, int c, MatrixBlock output, ConvolutionParameters params) throws DMLRuntimeException {
double [] inputArray = null;
if (!params.input1.isInSparseFormat())
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/e9aa5841/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixMult.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixMult.java b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixMult.java
index 9d878be..6902d40 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixMult.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixMult.java
@@ -100,6 +100,31 @@ public class LibMatrixMult
}
/**
+ * This method allows one to disabling exam sparsity. This feature is useful if matrixMult is used as an intermediate
+ * operation (for example: LibMatrixDNN). It makes sense for LibMatrixDNN because the output is internally
+ * consumed by another dense instruction, which makes repeated conversion to sparse wasteful.
+ * This should be used in rare cases and if you are unsure,
+ * use the method 'matrixMult(MatrixBlock m1, MatrixBlock m2, MatrixBlock ret)' instead.
+ *
+ * @param m1
+ * @param m2
+ * @param ret
+ * @param examSparsity
+ * @throws DMLRuntimeException
+ */
+ public static void matrixMult(MatrixBlock m1, MatrixBlock m2, MatrixBlock ret, boolean examSparsity)
+ throws DMLRuntimeException
+ {
+ matrixMult(m1, m2, ret, 0, m1.rlen, examSparsity);
+ }
+
+ public static void matrixMult(MatrixBlock m1, MatrixBlock m2, MatrixBlock ret, int rl, int ru)
+ throws DMLRuntimeException
+ {
+ matrixMult(m1, m2, ret, rl, ru, true);
+ }
+
+ /**
*
* @param m1
* @param m2
@@ -108,7 +133,7 @@ public class LibMatrixMult
* @param ru
* @throws DMLRuntimeException
*/
- public static void matrixMult(MatrixBlock m1, MatrixBlock m2, MatrixBlock ret, int rl, int ru)
+ public static void matrixMult(MatrixBlock m1, MatrixBlock m2, MatrixBlock ret, int rl, int ru, boolean examSparsity)
throws DMLRuntimeException
{
//check inputs / outputs
@@ -146,7 +171,9 @@ public class LibMatrixMult
//post-processing: nnz/representation
if( !ret.sparse )
ret.recomputeNonZeros();
- ret.examSparsity();
+
+ if(examSparsity)
+ ret.examSparsity();
//System.out.println("MM ("+m1.isInSparseFormat()+","+m1.getNumRows()+","+m1.getNumColumns()+","+m1.getNonZeros()+")x" +
// "("+m2.isInSparseFormat()+","+m2.getNumRows()+","+m2.getNumColumns()+","+m2.getNonZeros()+") in "+time.stop());
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/e9aa5841/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java b/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java
index 8f84bd7..1316ad8 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java
@@ -1224,7 +1224,7 @@ public class MatrixBlock extends MatrixValue implements CacheBlock, Externalizab
*
* @throws DMLRuntimeException
*/
- private void sparseToDense()
+ void sparseToDense()
throws DMLRuntimeException
{
//set target representation
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/e9aa5841/src/main/java/org/apache/sysml/runtime/util/ConvolutionUtils.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/util/ConvolutionUtils.java b/src/main/java/org/apache/sysml/runtime/util/ConvolutionUtils.java
index ac19816..80b20cd 100644
--- a/src/main/java/org/apache/sysml/runtime/util/ConvolutionUtils.java
+++ b/src/main/java/org/apache/sysml/runtime/util/ConvolutionUtils.java
@@ -19,20 +19,6 @@
package org.apache.sysml.runtime.util;
-import java.util.ArrayList;
-
-import org.apache.sysml.api.DMLScript;
-import org.apache.sysml.hops.AggBinaryOp;
-import org.apache.sysml.hops.ConvolutionOp;
-import org.apache.sysml.hops.Hop;
-import org.apache.sysml.hops.HopsException;
-import org.apache.sysml.hops.ReorgOp;
-import org.apache.sysml.hops.Hop.ConvOp;
-import org.apache.sysml.hops.Hop.ReOrgOp;
-import org.apache.sysml.lops.Lop;
-import org.apache.sysml.lops.LopsException;
-import org.apache.sysml.lops.LopProperties.ExecType;
-
public class ConvolutionUtils {
@@ -54,191 +40,4 @@ public class ConvolutionUtils {
return ret;
}
- private static boolean isMatMult(Hop hop) {
- if(hop != null && hop instanceof AggBinaryOp) {
- return true;
- }
- return false;
- }
- private static boolean isTranspose(Hop hop) {
- if(hop != null && hop instanceof ReorgOp && ((ReorgOp)hop).getOp() == ReOrgOp.TRANSPOSE) {
- return true;
- }
- return false;
- }
- private static boolean isConvolutionOp(Hop hop, Hop.ConvOp op) {
- if(hop != null && hop instanceof ConvolutionOp && ((ConvolutionOp) hop).getOp() == op) {
- return true;
- }
- return false;
- }
-
- // Simple heuristic that prefers im2col for non-test/non-validation cases.
- private static boolean preferIm2Col(ExecType et, long N, long K, long C, long R, long S, long P, long Q) throws HopsException {
- if(et == ExecType.CP && ConvolutionOp.FORCE_NON_IM2COL) {
- return false;
- }
-// else if(et == ExecType.CP && N < 256 ) {
-// return true; // Prefer im2col to non-test/non-validation
-// }
- return false;
- }
-
- public static Lop constructConvolutionBackwardFilterLops(Hop currentHop) throws HopsException, LopsException {
- ExecType et = ExecType.CP; // TODO: Check memory estimates
- if(DMLScript.USE_ACCELERATOR)
- et = ExecType.GPU; // TODO: Add memory estimate checks
- else if(et == ExecType.MR || et == ExecType.SPARK)
- return null;
-
- if(currentHop != null && isTranspose(currentHop)) {
- Hop matMult = currentHop.getInput().get(0);
- if(matMult != null && isMatMult(matMult)) {
- Hop x_col = matMult.getInput().get(0);
- Hop right = matMult.getInput().get(1);
- if(isConvolutionOp(x_col, ConvOp.IM2COL) && isConvolutionOp(right, ConvOp.ROTATE180)) {
- Hop image = x_col.getInput().get(0);
- Hop dout = right.getInput().get(0);
- ArrayList<Hop> inputs = new ArrayList<Hop>();
- inputs.add(image);
- inputs.add(dout);
- for(int i = 1; i < x_col.getInput().size(); i++) {
- inputs.add(x_col.getInput().get(i));
- }
-
- // K, C * R * S
- long N = currentHop.computeSizeInformation(inputs.get(6));
- long C = currentHop.computeSizeInformation(inputs.get(7));
- long H = currentHop.computeSizeInformation(inputs.get(8));
- long W = currentHop.computeSizeInformation(inputs.get(9));
- long K = currentHop.computeSizeInformation(inputs.get(10));
- long R = currentHop.computeSizeInformation(inputs.get(12));
- long S = currentHop.computeSizeInformation(inputs.get(13));
- long stride_h = currentHop.computeSizeInformation(inputs.get(2));
- long stride_w = currentHop.computeSizeInformation(inputs.get(3));
- long pad_h = currentHop.computeSizeInformation(inputs.get(4));
- long pad_w = currentHop.computeSizeInformation(inputs.get(5));
- long P = -1; long Q = -1;
- if(H > 0 && R > 0 && stride_h > 0 && pad_h > 0)
- P = ConvolutionUtils.getP(H, R, stride_h, pad_h);
- if(W > 0 && S > 0 && stride_w > 0 && pad_w > 0)
- Q = ConvolutionUtils.getQ(W, S, stride_w, pad_w);
-
- if(preferIm2Col(et, N, K, C, R, S, P, Q)) {
- return null;
- }
-
- long rlen = K;
- long clen = ConvolutionOp.getExtractedVal(C, R, S);
- return ConvolutionOp.constructFusedConvolutionLops(et, inputs, ConvOp.DIRECT_CONV2D_BACKWARD_FILTER, (ConvolutionOp) x_col, rlen, clen);
- }
- }
- }
- return null;
- }
-
- public static Lop constructConvolutionLops(Hop currentHop, ExecType et) throws HopsException, LopsException {
- if(DMLScript.USE_ACCELERATOR)
- et = ExecType.GPU; // TODO: Add memory estimate checks
- else if(et == ExecType.MR || et == ExecType.SPARK)
- return null;
-
- if(currentHop != null && isConvolutionOp(currentHop, ConvOp.RESHAPE_COL)) {
- Hop matMult = currentHop.getInput().get(0);
- if(matMult != null && isMatMult(matMult)) {
- Hop filter = matMult.getInput().get(0);
- Hop x_col = matMult.getInput().get(1);
- if(isConvolutionOp(x_col, ConvOp.IM2COL)) {
- Hop image = x_col.getInput().get(0);
- ArrayList<Hop> inputs = new ArrayList<Hop>();
- inputs.add(image);
- inputs.add(filter);
- for(int i = 1; i < x_col.getInput().size(); i++) {
- inputs.add(x_col.getInput().get(i));
- }
-
- // N, K * P * Q
- long N = currentHop.computeSizeInformation(inputs.get(6));
- long C = currentHop.computeSizeInformation(inputs.get(7));
- long H = currentHop.computeSizeInformation(inputs.get(8));
- long W = currentHop.computeSizeInformation(inputs.get(9));
- long K = currentHop.computeSizeInformation(inputs.get(10));
- long R = currentHop.computeSizeInformation(inputs.get(12));
- long S = currentHop.computeSizeInformation(inputs.get(13));
- long stride_h = currentHop.computeSizeInformation(inputs.get(2));
- long stride_w = currentHop.computeSizeInformation(inputs.get(3));
- long pad_h = currentHop.computeSizeInformation(inputs.get(4));
- long pad_w = currentHop.computeSizeInformation(inputs.get(5));
- long P = -1; long Q = -1;
- if(H > 0 && R > 0 && stride_h > 0 && pad_h > 0)
- P = ConvolutionUtils.getP(H, R, stride_h, pad_h);
- if(W > 0 && S > 0 && stride_w > 0 && pad_w > 0)
- Q = ConvolutionUtils.getQ(W, S, stride_w, pad_w);
-
- if(preferIm2Col(et, N, K, C, R, S, P, Q)) {
- return null;
- }
-
- long rlen = N;
- long clen = ConvolutionOp.getExtractedVal(K, P, Q);
- return ConvolutionOp.constructFusedConvolutionLops(et, inputs, ConvOp.DIRECT_CONV2D, (ConvolutionOp) x_col, rlen, clen);
- }
- }
- }
-
- return null;
- }
-
- public static Lop constructConvolutionBackwardDataLops(Hop currentHop, ExecType et) throws HopsException, LopsException {
- if(DMLScript.USE_ACCELERATOR)
- et = ExecType.GPU; // TODO: Add memory estimate checks
- else if(et == ExecType.MR || et == ExecType.SPARK)
- return null;
-
- if(currentHop != null && isConvolutionOp(currentHop, ConvOp.COL2IM)) {
- Hop matMult = currentHop.getInput().get(0);
- if(matMult != null && isMatMult(matMult)) {
- Hop rotate180 = matMult.getInput().get(0);
- Hop filter = matMult.getInput().get(1);
- if(isConvolutionOp(rotate180, ConvOp.ROTATE180)) {
- ArrayList<Hop> inputs = new ArrayList<Hop>();
- inputs.add(filter);
- inputs.add(rotate180.getInput().get(0));
- for(int i = 1; i < rotate180.getInput().size(); i++) {
- inputs.add(rotate180.getInput().get(i));
- }
-
- // N, C * H * W
- long N = currentHop.computeSizeInformation(inputs.get(6));
- long C = currentHop.computeSizeInformation(inputs.get(7));
- long H = currentHop.computeSizeInformation(inputs.get(8));
- long W = currentHop.computeSizeInformation(inputs.get(9));
- long K = currentHop.computeSizeInformation(inputs.get(10));
- long R = currentHop.computeSizeInformation(inputs.get(12));
- long S = currentHop.computeSizeInformation(inputs.get(13));
- long stride_h = currentHop.computeSizeInformation(inputs.get(2));
- long stride_w = currentHop.computeSizeInformation(inputs.get(3));
- long pad_h = currentHop.computeSizeInformation(inputs.get(4));
- long pad_w = currentHop.computeSizeInformation(inputs.get(5));
- long P = -1; long Q = -1;
- if(H > 0 && R > 0 && stride_h > 0 && pad_h > 0)
- P = ConvolutionUtils.getP(H, R, stride_h, pad_h);
- if(W > 0 && S > 0 && stride_w > 0 && pad_w > 0)
- Q = ConvolutionUtils.getQ(W, S, stride_w, pad_w);
-
- if(preferIm2Col(et, N, K, C, R, S, P, Q)) {
- return null;
- }
- long rlen = N;
- long clen = ConvolutionOp.getExtractedVal(C, H, W);
- return ConvolutionOp.constructFusedConvolutionLops(et, inputs, ConvOp.DIRECT_CONV2D_BACKWARD_DATA, (ConvolutionOp) rotate180, rlen, clen);
- }
- }
-
- }
-
- return null;
- }
-
-
}
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/e9aa5841/src/test/java/org/apache/sysml/test/integration/functions/tensor/Conv2DBackwardTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/sysml/test/integration/functions/tensor/Conv2DBackwardTest.java b/src/test/java/org/apache/sysml/test/integration/functions/tensor/Conv2DBackwardTest.java
index c213b55..74d3d14 100644
--- a/src/test/java/org/apache/sysml/test/integration/functions/tensor/Conv2DBackwardTest.java
+++ b/src/test/java/org/apache/sysml/test/integration/functions/tensor/Conv2DBackwardTest.java
@@ -22,7 +22,6 @@ import java.util.HashMap;
import org.apache.sysml.api.DMLScript;
import org.apache.sysml.api.DMLScript.RUNTIME_PLATFORM;
-import org.apache.sysml.hops.ConvolutionOp;
import org.apache.sysml.lops.LopProperties.ExecType;
import org.apache.sysml.runtime.matrix.data.MatrixValue.CellIndex;
import org.apache.sysml.runtime.util.ConvolutionUtils;
@@ -50,70 +49,35 @@ public class Conv2DBackwardTest extends AutomatedTestBase
public void testConv2DBackwardFilterDense1()
{
int numImg = 3; int imgSize = 3; int numChannels = 3; int numFilters = 1; int filterSize = 2; int stride = 1; int pad = 0;
- runConv2DBackwardFilterTest(ExecType.CP, imgSize, numImg, numChannels, numFilters, filterSize, stride, pad, false);
+ runConv2DBackwardFilterTest(ExecType.CP, imgSize, numImg, numChannels, numFilters, filterSize, stride, pad);
}
@Test
public void testConv2DBackwardFilterDense2()
{
int numImg = 3; int imgSize = 3; int numChannels = 3; int numFilters = 4; int filterSize = 2; int stride = 1; int pad = 0;
- runConv2DBackwardFilterTest(ExecType.CP, imgSize, numImg, numChannels, numFilters, filterSize, stride, pad, false);
+ runConv2DBackwardFilterTest(ExecType.CP, imgSize, numImg, numChannels, numFilters, filterSize, stride, pad);
}
@Test
public void testConv2DBackwardFilterDense3()
{
int numImg = 3; int imgSize = 10; int numChannels = 4; int numFilters = 3; int filterSize = 2; int stride = 2; int pad = 1;
- runConv2DBackwardFilterTest(ExecType.CP, imgSize, numImg, numChannels, numFilters, filterSize, stride, pad, false);
+ runConv2DBackwardFilterTest(ExecType.CP, imgSize, numImg, numChannels, numFilters, filterSize, stride, pad);
}
@Test
public void testConv2DBackwardFilterDense4()
{
int numImg = 3; int imgSize = 10; int numChannels = 4; int numFilters = 3; int filterSize = 5; int stride = 1; int pad = 1;
- runConv2DBackwardFilterTest(ExecType.CP, imgSize, numImg, numChannels, numFilters, filterSize, stride, pad, false);
+ runConv2DBackwardFilterTest(ExecType.CP, imgSize, numImg, numChannels, numFilters, filterSize, stride, pad);
}
@Test
public void testConv2DBackwardFilterDense5()
{
int numImg = 3; int imgSize = 10; int numChannels = 2; int numFilters = 3; int filterSize = 5; int stride = 3; int pad = 2;
- runConv2DBackwardFilterTest(ExecType.CP, imgSize, numImg, numChannels, numFilters, filterSize, stride, pad, false);
- }
-
- @Test
- public void testConv2DBackwardFilterDense6()
- {
- int numImg = 3; int imgSize = 3; int numChannels = 3; int numFilters = 1; int filterSize = 2; int stride = 1; int pad = 0;
- runConv2DBackwardFilterTest(ExecType.CP, imgSize, numImg, numChannels, numFilters, filterSize, stride, pad, true);
- }
-
- @Test
- public void testConv2DBackwardFilterDense7()
- {
- int numImg = 3; int imgSize = 3; int numChannels = 3; int numFilters = 4; int filterSize = 2; int stride = 1; int pad = 0;
- runConv2DBackwardFilterTest(ExecType.CP, imgSize, numImg, numChannels, numFilters, filterSize, stride, pad, true);
- }
-
- @Test
- public void testConv2DBackwardFilterDense8()
- {
- int numImg = 3; int imgSize = 10; int numChannels = 4; int numFilters = 3; int filterSize = 2; int stride = 2; int pad = 1;
- runConv2DBackwardFilterTest(ExecType.CP, imgSize, numImg, numChannels, numFilters, filterSize, stride, pad, true);
- }
-
- @Test
- public void testConv2DBackwardFilterDense9()
- {
- int numImg = 3; int imgSize = 10; int numChannels = 4; int numFilters = 3; int filterSize = 5; int stride = 1; int pad = 1;
- runConv2DBackwardFilterTest(ExecType.CP, imgSize, numImg, numChannels, numFilters, filterSize, stride, pad, true);
- }
-
- @Test
- public void testConv2DBackwardFilterDense10()
- {
- int numImg = 3; int imgSize = 10; int numChannels = 2; int numFilters = 3; int filterSize = 5; int stride = 3; int pad = 2;
- runConv2DBackwardFilterTest(ExecType.CP, imgSize, numImg, numChannels, numFilters, filterSize, stride, pad, true);
+ runConv2DBackwardFilterTest(ExecType.CP, imgSize, numImg, numChannels, numFilters, filterSize, stride, pad);
}
/**
@@ -122,13 +86,11 @@ public class Conv2DBackwardTest extends AutomatedTestBase
* @param sparse
*/
public void runConv2DBackwardFilterTest( ExecType et, int imgSize, int numImg, int numChannels, int numFilters,
- int filterSize, int stride, int pad, boolean forceNonIm2Col)
+ int filterSize, int stride, int pad)
{
RUNTIME_PLATFORM oldRTP = rtplatform;
boolean sparkConfigOld = DMLScript.USE_LOCAL_SPARK_CONFIG;
- boolean oldForceNonIm2col = ConvolutionOp.FORCE_NON_IM2COL;
- ConvolutionOp.FORCE_NON_IM2COL = forceNonIm2Col;
try
{
TestConfiguration config = getTestConfiguration(TEST_NAME);
@@ -176,7 +138,6 @@ public class Conv2DBackwardTest extends AutomatedTestBase
{
rtplatform = oldRTP;
DMLScript.USE_LOCAL_SPARK_CONFIG = sparkConfigOld;
- ConvolutionOp.FORCE_NON_IM2COL = oldForceNonIm2col;
}
}
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/e9aa5841/src/test/java/org/apache/sysml/test/integration/functions/tensor/Conv2DTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/sysml/test/integration/functions/tensor/Conv2DTest.java b/src/test/java/org/apache/sysml/test/integration/functions/tensor/Conv2DTest.java
index 8b87372..e247d08 100644
--- a/src/test/java/org/apache/sysml/test/integration/functions/tensor/Conv2DTest.java
+++ b/src/test/java/org/apache/sysml/test/integration/functions/tensor/Conv2DTest.java
@@ -22,7 +22,6 @@ import java.util.HashMap;
import org.apache.sysml.api.DMLScript;
import org.apache.sysml.api.DMLScript.RUNTIME_PLATFORM;
-import org.apache.sysml.hops.ConvolutionOp;
import org.apache.sysml.lops.LopProperties.ExecType;
import org.apache.sysml.runtime.matrix.data.MatrixValue.CellIndex;
import org.apache.sysml.test.integration.AutomatedTestBase;
@@ -48,88 +47,49 @@ public class Conv2DTest extends AutomatedTestBase
public void testConv2DDense1()
{
int numImg = 5; int imgSize = 3; int numChannels = 3; int numFilters = 6; int filterSize = 2; int stride = 1; int pad = 0;
- runConv2DTest(ExecType.CP, imgSize, numImg, numChannels, numFilters, filterSize, stride, pad, false);
+ runConv2DTest(ExecType.CP, imgSize, numImg, numChannels, numFilters, filterSize, stride, pad);
}
@Test
public void testConv2DDense2()
{
int numImg = 1; int imgSize = 10; int numChannels = 4; int numFilters = 3; int filterSize = 4; int stride = 2; int pad = 0;
- runConv2DTest(ExecType.CP, imgSize, numImg, numChannels, numFilters, filterSize, stride, pad, false);
+ runConv2DTest(ExecType.CP, imgSize, numImg, numChannels, numFilters, filterSize, stride, pad);
}
@Test
public void testConv2DDense3()
{
int numImg = 1; int imgSize = 10; int numChannels = 4; int numFilters = 3; int filterSize = 4; int stride = 2; int pad = 1;
- runConv2DTest(ExecType.CP, imgSize, numImg, numChannels, numFilters, filterSize, stride, pad, false);
+ runConv2DTest(ExecType.CP, imgSize, numImg, numChannels, numFilters, filterSize, stride, pad);
}
@Test
public void testConv2DDense4()
{
int numImg = 3; int imgSize = 10; int numChannels = 1; int numFilters = 3; int filterSize = 2; int stride = 2; int pad = 1;
- runConv2DTest(ExecType.CP, imgSize, numImg, numChannels, numFilters, filterSize, stride, pad, false);
+ runConv2DTest(ExecType.CP, imgSize, numImg, numChannels, numFilters, filterSize, stride, pad);
}
@Test
public void testConv2DDense5()
{
int numImg = 3; int imgSize = 8; int numChannels = 2; int numFilters = 3; int filterSize = 3; int stride = 1; int pad = 2;
- runConv2DTest(ExecType.CP, imgSize, numImg, numChannels, numFilters, filterSize, stride, pad, false);
+ runConv2DTest(ExecType.CP, imgSize, numImg, numChannels, numFilters, filterSize, stride, pad);
}
- @Test
- public void testConv2DDense6()
- {
- int numImg = 5; int imgSize = 3; int numChannels = 3; int numFilters = 6; int filterSize = 2; int stride = 1; int pad = 0;
- runConv2DTest(ExecType.CP, imgSize, numImg, numChannels, numFilters, filterSize, stride, pad, true);
- }
-
- @Test
- public void testConv2DDense7()
- {
- int numImg = 1; int imgSize = 10; int numChannels = 4; int numFilters = 3; int filterSize = 4; int stride = 2; int pad = 0;
- runConv2DTest(ExecType.CP, imgSize, numImg, numChannels, numFilters, filterSize, stride, pad, true);
- }
-
- @Test
- public void testConv2DDense8()
- {
- int numImg = 1; int imgSize = 10; int numChannels = 4; int numFilters = 3; int filterSize = 4; int stride = 2; int pad = 1;
- runConv2DTest(ExecType.CP, imgSize, numImg, numChannels, numFilters, filterSize, stride, pad, true);
- }
-
- @Test
- public void testConv2DDense9()
- {
- int numImg = 3; int imgSize = 10; int numChannels = 1; int numFilters = 3; int filterSize = 2; int stride = 2; int pad = 1;
- runConv2DTest(ExecType.CP, imgSize, numImg, numChannels, numFilters, filterSize, stride, pad, true);
- }
-
- @Test
- public void testConv2DDense10()
- {
- int numImg = 3; int imgSize = 8; int numChannels = 2; int numFilters = 3; int filterSize = 3; int stride = 1; int pad = 2;
- runConv2DTest(ExecType.CP, imgSize, numImg, numChannels, numFilters, filterSize, stride, pad, true);
- }
-
-
/**
*
* @param et
* @param sparse
*/
public void runConv2DTest( ExecType et, int imgSize, int numImg, int numChannels, int numFilters,
- int filterSize, int stride, int pad, boolean FORCE_NON_IM2COL)
+ int filterSize, int stride, int pad)
{
RUNTIME_PLATFORM oldRTP = rtplatform;
boolean sparkConfigOld = DMLScript.USE_LOCAL_SPARK_CONFIG;
- boolean oldForceNonIm2col = ConvolutionOp.FORCE_NON_IM2COL;
- ConvolutionOp.FORCE_NON_IM2COL = FORCE_NON_IM2COL;
-
try
{
TestConfiguration config = getTestConfiguration(TEST_NAME);
@@ -175,7 +135,6 @@ public class Conv2DTest extends AutomatedTestBase
{
rtplatform = oldRTP;
DMLScript.USE_LOCAL_SPARK_CONFIG = sparkConfigOld;
- ConvolutionOp.FORCE_NON_IM2COL = oldForceNonIm2col;
}
}
}