You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by ni...@apache.org on 2017/08/22 21:57:01 UTC
systemml git commit: [SYSTEMML-445] Integrate GPU exectype selection
into our existing infrastructure
Repository: systemml
Updated Branches:
refs/heads/master 4d5a82ecf -> 3ca053535
[SYSTEMML-445] Integrate GPU exectype selection into our existing infrastructure
Closes #627.
Project: http://git-wip-us.apache.org/repos/asf/systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/3ca05353
Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/3ca05353
Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/3ca05353
Branch: refs/heads/master
Commit: 3ca05353593e7847dc6d6a7e862e323ffa96bfcc
Parents: 4d5a82e
Author: Niketan Pansare <np...@us.ibm.com>
Authored: Tue Aug 22 14:55:37 2017 -0700
Committer: Niketan Pansare <np...@us.ibm.com>
Committed: Tue Aug 22 14:55:37 2017 -0700
----------------------------------------------------------------------
.../java/org/apache/sysml/hops/AggBinaryOp.java | 53 +++++++++-----
.../java/org/apache/sysml/hops/AggUnaryOp.java | 53 ++++++++------
.../java/org/apache/sysml/hops/BinaryOp.java | 77 +++++++++++++-------
.../org/apache/sysml/hops/ConvolutionOp.java | 12 ++-
.../java/org/apache/sysml/hops/DataGenOp.java | 6 ++
src/main/java/org/apache/sysml/hops/DataOp.java | 5 ++
.../java/org/apache/sysml/hops/FunctionOp.java | 5 ++
src/main/java/org/apache/sysml/hops/Hop.java | 32 +++++---
.../java/org/apache/sysml/hops/IndexingOp.java | 5 ++
.../org/apache/sysml/hops/LeftIndexingOp.java | 5 ++
.../java/org/apache/sysml/hops/LiteralOp.java | 5 ++
.../java/org/apache/sysml/hops/MultipleOp.java | 5 ++
.../sysml/hops/ParameterizedBuiltinOp.java | 5 ++
.../org/apache/sysml/hops/QuaternaryOp.java | 5 ++
.../java/org/apache/sysml/hops/ReorgOp.java | 34 +++++++--
.../java/org/apache/sysml/hops/TernaryOp.java | 30 +++++---
.../java/org/apache/sysml/hops/UnaryOp.java | 38 +++++++---
.../apache/sysml/hops/codegen/SpoofFusedOp.java | 5 ++
18 files changed, 278 insertions(+), 102 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/systemml/blob/3ca05353/src/main/java/org/apache/sysml/hops/AggBinaryOp.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/AggBinaryOp.java b/src/main/java/org/apache/sysml/hops/AggBinaryOp.java
index 4f709b4..11a2399 100644
--- a/src/main/java/org/apache/sysml/hops/AggBinaryOp.java
+++ b/src/main/java/org/apache/sysml/hops/AggBinaryOp.java
@@ -48,7 +48,6 @@ import org.apache.sysml.parser.Expression.DataType;
import org.apache.sysml.parser.Expression.ValueType;
import org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PDataPartitionFormat;
import org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext;
-import org.apache.sysml.runtime.instructions.gpu.context.GPUContextPool;
import org.apache.sysml.runtime.matrix.MatrixCharacteristics;
import org.apache.sysml.runtime.matrix.data.MatrixBlock;
import org.apache.sysml.runtime.matrix.mapred.DistributedCacheInput;
@@ -143,6 +142,33 @@ public class AggBinaryOp extends Hop implements MultiThreadedHop
return _method;
}
+ @Override
+ public boolean isGPUEnabled() {
+ if(!DMLScript.USE_ACCELERATOR)
+ return false;
+
+ Hop input1 = getInput().get(0);
+ Hop input2 = getInput().get(1);
+ //matrix mult operation selection part 2 (specific pattern)
+ MMTSJType mmtsj = checkTransposeSelf(); //determine tsmm pattern
+ ChainType chain = checkMapMultChain(); //determine mmchain pattern
+
+ _method = optFindMMultMethodCP ( input1.getDim1(), input1.getDim2(),
+ input2.getDim1(), input2.getDim2(), mmtsj, chain, _hasLeftPMInput );
+ switch( _method ){
+ case TSMM:
+ return true;
+ case MAPMM_CHAIN:
+ return false;
+ case PMM:
+ return false;
+ case MM:
+ return true;
+ default:
+ throw new RuntimeException("Unsupported method:" + _method);
+ }
+ }
+
/**
* NOTE: overestimated mem in case of transpose-identity matmult, but 3/2 at worst
* and existing mem estimate advantageous in terms of consistency hops/lops,
@@ -169,7 +195,7 @@ public class AggBinaryOp extends Hop implements MultiThreadedHop
MMTSJType mmtsj = checkTransposeSelf(); //determine tsmm pattern
ChainType chain = checkMapMultChain(); //determine mmchain pattern
- if( et == ExecType.CP )
+ if( et == ExecType.CP || et == ExecType.GPU )
{
//matrix mult operation selection part 3 (CP type)
_method = optFindMMultMethodCP ( input1.getDim1(), input1.getDim2(),
@@ -178,7 +204,7 @@ public class AggBinaryOp extends Hop implements MultiThreadedHop
//dispatch CP lops construction
switch( _method ){
case TSMM:
- constructCPLopsTSMM( mmtsj );
+ constructCPLopsTSMM( mmtsj, et );
break;
case MAPMM_CHAIN:
constructCPLopsMMChain( chain );
@@ -187,7 +213,7 @@ public class AggBinaryOp extends Hop implements MultiThreadedHop
constructCPLopsPMM();
break;
case MM:
- constructCPLopsMM();
+ constructCPLopsMM(et);
break;
default:
throw new HopsException(this.printErrorLocation() + "Invalid Matrix Mult Method (" + _method + ") while constructing CP lops.");
@@ -344,7 +370,7 @@ public class AggBinaryOp extends Hop implements MultiThreadedHop
{
double ret = 0;
- if (DMLScript.USE_ACCELERATOR) {
+ if (isGPUEnabled()) {
// In GPU Mode, intermediate memory is only needed in case of one of the matrix blocks is sparse
// When sparse block is converted to dense and a dense MM takes place, we need (dim1 * dim2)
// When dense block is converted to sparse and a sparse MM takes place, we need (dim1 * dim2 * 2)
@@ -581,17 +607,11 @@ public class AggBinaryOp extends Hop implements MultiThreadedHop
// CP Lops generation
/////////////////////////
- private void constructCPLopsTSMM( MMTSJType mmtsj )
+ private void constructCPLopsTSMM( MMTSJType mmtsj, ExecType et )
throws HopsException, LopsException
{
int k = OptimizerUtils.getConstrainedNumThreads(_maxNumThreads);
- ExecType et = ExecType.CP;
- if (DMLScript.USE_ACCELERATOR && (DMLScript.FORCE_ACCELERATOR
- || getMemEstimate() < Math.min(GPUContextPool.initialGPUMemBudget(), OptimizerUtils.getLocalMemBudget()))) {
- et = ExecType.GPU;
- }
-
Lop matmultCP = new MMTSJ(getInput().get(mmtsj.isLeft()?1:0).constructLops(),
getDataType(), getValueType(), et, mmtsj, false, k);
@@ -662,13 +682,12 @@ public class AggBinaryOp extends Hop implements MultiThreadedHop
HopRewriteUtils.removeChildReference(pmInput, nrow);
}
- private void constructCPLopsMM()
+ private void constructCPLopsMM(ExecType et)
throws HopsException, LopsException
{
Lop matmultCP = null;
- if (DMLScript.USE_ACCELERATOR && (DMLScript.FORCE_ACCELERATOR
- || getMemEstimate() < Math.min(GPUContextPool.initialGPUMemBudget(), OptimizerUtils.getLocalMemBudget()))) {
+ if (et == ExecType.GPU) {
Hop h1 = getInput().get(0);
Hop h2 = getInput().get(1);
Lop left; Lop right;
@@ -691,7 +710,7 @@ public class AggBinaryOp extends Hop implements MultiThreadedHop
}
matmultCP = new Binary(left, right,
- Binary.OperationTypes.MATMULT, getDataType(), getValueType(), ExecType.GPU, isLeftTransposed, isRightTransposed);
+ Binary.OperationTypes.MATMULT, getDataType(), getValueType(), et, isLeftTransposed, isRightTransposed);
setOutputDimensions(matmultCP);
setNnz(-1);
}
@@ -702,7 +721,7 @@ public class AggBinaryOp extends Hop implements MultiThreadedHop
else {
int k = OptimizerUtils.getConstrainedNumThreads(_maxNumThreads);
matmultCP = new Binary(getInput().get(0).constructLops(),getInput().get(1).constructLops(),
- Binary.OperationTypes.MATMULT, getDataType(), getValueType(), ExecType.CP, k);
+ Binary.OperationTypes.MATMULT, getDataType(), getValueType(), et, k);
}
setOutputDimensions(matmultCP);
}
http://git-wip-us.apache.org/repos/asf/systemml/blob/3ca05353/src/main/java/org/apache/sysml/hops/AggUnaryOp.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/AggUnaryOp.java b/src/main/java/org/apache/sysml/hops/AggUnaryOp.java
index 7a6d463..4f5e2bc 100644
--- a/src/main/java/org/apache/sysml/hops/AggUnaryOp.java
+++ b/src/main/java/org/apache/sysml/hops/AggUnaryOp.java
@@ -38,7 +38,6 @@ import org.apache.sysml.lops.LopProperties.ExecType;
import org.apache.sysml.parser.Expression.DataType;
import org.apache.sysml.parser.Expression.ValueType;
import org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext;
-import org.apache.sysml.runtime.instructions.gpu.context.GPUContextPool;
import org.apache.sysml.runtime.matrix.MatrixCharacteristics;
@@ -109,6 +108,30 @@ public class AggUnaryOp extends Hop implements MultiThreadedHop
}
@Override
+ public boolean isGPUEnabled() {
+ if(!DMLScript.USE_ACCELERATOR)
+ return false;
+
+ try {
+ if( isTernaryAggregateRewriteApplicable() || isUnaryAggregateOuterCPRewriteApplicable() ) {
+ return false;
+ }
+ else if ((_op == AggOp.SUM && (_direction == Direction.RowCol || _direction == Direction.Row || _direction == Direction.Col))
+ || (_op == AggOp.SUM_SQ && (_direction == Direction.RowCol || _direction == Direction.Row || _direction == Direction.Col))
+ || (_op == AggOp.MAX && (_direction == Direction.RowCol || _direction == Direction.Row || _direction == Direction.Col))
+ || (_op == AggOp.MIN && (_direction == Direction.RowCol || _direction == Direction.Row || _direction == Direction.Col))
+ || (_op == AggOp.MEAN && (_direction == Direction.RowCol || _direction == Direction.Row || _direction == Direction.Col))
+ || (_op == AggOp.VAR && (_direction == Direction.RowCol || _direction == Direction.Row || _direction == Direction.Col))
+ || (_op == AggOp.PROD && (_direction == Direction.RowCol))){
+ return true;
+ }
+ } catch (HopsException e) {
+ throw new RuntimeException(e);
+ }
+ return false;
+ }
+
+ @Override
public Lop constructLops()
throws HopsException, LopsException
{
@@ -121,10 +144,10 @@ public class AggUnaryOp extends Hop implements MultiThreadedHop
ExecType et = optFindExecType();
Hop input = getInput().get(0);
- if ( et == ExecType.CP )
+ if ( et == ExecType.CP || et == ExecType.GPU )
{
Lop agg1 = null;
- if( isTernaryAggregateRewriteApplicable(et) ) {
+ if( isTernaryAggregateRewriteApplicable() ) {
agg1 = constructLopsTernaryAggregateRewrite(et);
}
else if( isUnaryAggregateOuterCPRewriteApplicable() )
@@ -149,20 +172,6 @@ public class AggUnaryOp extends Hop implements MultiThreadedHop
}
else { //general case
int k = OptimizerUtils.getConstrainedNumThreads(_maxNumThreads);
- if (DMLScript.USE_ACCELERATOR && (DMLScript.FORCE_ACCELERATOR
- || getMemEstimate() < Math.min(GPUContextPool.initialGPUMemBudget(), OptimizerUtils.getLocalMemBudget()))) {
- // Only implemented methods for GPU
- if ((_op == AggOp.SUM && (_direction == Direction.RowCol || _direction == Direction.Row || _direction == Direction.Col))
- || (_op == AggOp.SUM_SQ && (_direction == Direction.RowCol || _direction == Direction.Row || _direction == Direction.Col))
- || (_op == AggOp.MAX && (_direction == Direction.RowCol || _direction == Direction.Row || _direction == Direction.Col))
- || (_op == AggOp.MIN && (_direction == Direction.RowCol || _direction == Direction.Row || _direction == Direction.Col))
- || (_op == AggOp.MEAN && (_direction == Direction.RowCol || _direction == Direction.Row || _direction == Direction.Col))
- || (_op == AggOp.VAR && (_direction == Direction.RowCol || _direction == Direction.Row || _direction == Direction.Col))
- || (_op == AggOp.PROD && (_direction == Direction.RowCol))){
- et = ExecType.GPU;
- k = 1;
- }
- }
agg1 = new PartialAggregate(input.constructLops(),
HopsAgg2Lops.get(_op), HopsDirection2Lops.get(_direction), getDataType(),getValueType(), et, k);
}
@@ -251,7 +260,7 @@ public class AggUnaryOp extends Hop implements MultiThreadedHop
DirectionTypes dir = HopsDirection2Lops.get(_direction);
//unary aggregate
- if( isTernaryAggregateRewriteApplicable(et) )
+ if( isTernaryAggregateRewriteApplicable() )
{
Lop aggregate = constructLopsTernaryAggregateRewrite(et);
setOutputDimensions(aggregate); //0x0 (scalar)
@@ -330,7 +339,7 @@ public class AggUnaryOp extends Hop implements MultiThreadedHop
protected double computeOutputMemEstimate( long dim1, long dim2, long nnz )
{
double sparsity = -1;
- if (DMLScript.USE_ACCELERATOR) {
+ if (isGPUEnabled()) {
// The GPU version (for the time being) only does dense outputs
sparsity = 1.0;
} else {
@@ -373,7 +382,7 @@ public class AggUnaryOp extends Hop implements MultiThreadedHop
break;
case VAR:
//worst-case correction LASTFOURROWS / LASTFOURCOLUMNS
- if (DMLScript.USE_ACCELERATOR) {
+ if (isGPUEnabled()) {
// The GPU implementation only operates on dense data
// It allocates 2 dense blocks to help with these ops:
// Assume Y = var(X) Or colVars(X), Or rowVars(X)
@@ -506,7 +515,7 @@ public class AggUnaryOp extends Hop implements MultiThreadedHop
return SparkAggType.MULTI_BLOCK;
}
- private boolean isTernaryAggregateRewriteApplicable(ExecType et)
+ private boolean isTernaryAggregateRewriteApplicable()
throws HopsException
{
boolean ret = false;
@@ -726,6 +735,8 @@ public class AggUnaryOp extends Hop implements MultiThreadedHop
// The execution type of a unary aggregate instruction should depend on the execution type of inputs to avoid OOM
// Since we only support matrix-vector and not vector-matrix, checking the execution type of input1 should suffice.
ExecType et_input = input1.optFindExecType();
+ // Because ternary aggregate are not supported on GPU
+ et_input = et_input == ExecType.GPU ? ExecType.CP : et_input;
DirectionTypes dir = HopsDirection2Lops.get(_direction);
return new TernaryAggregate(in1, in2, in3, Aggregate.OperationTypes.KahanSum,
http://git-wip-us.apache.org/repos/asf/systemml/blob/3ca05353/src/main/java/org/apache/sysml/hops/BinaryOp.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/BinaryOp.java b/src/main/java/org/apache/sysml/hops/BinaryOp.java
index 54c06f7..ad9f0ad 100644
--- a/src/main/java/org/apache/sysml/hops/BinaryOp.java
+++ b/src/main/java/org/apache/sysml/hops/BinaryOp.java
@@ -53,7 +53,6 @@ import org.apache.sysml.lops.UnaryCP;
import org.apache.sysml.parser.Expression.DataType;
import org.apache.sysml.parser.Expression.ValueType;
import org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PDataPartitionFormat;
-import org.apache.sysml.runtime.instructions.gpu.context.GPUContextPool;
import org.apache.sysml.runtime.matrix.MatrixCharacteristics;
import org.apache.sysml.runtime.matrix.mapred.DistributedCacheInput;
@@ -134,6 +133,56 @@ public class BinaryOp extends Hop
}
@Override
+ public boolean isGPUEnabled() {
+ if(!DMLScript.USE_ACCELERATOR)
+ return false;
+
+ switch(op)
+ {
+ case IQM:
+ case CENTRALMOMENT:
+ case COVARIANCE:
+ case QUANTILE:
+ case INTERQUANTILE:
+ case MEDIAN:
+ return false;
+ case CBIND:
+ case RBIND: {
+ DataType dt1 = getInput().get(0).getDataType();
+ return dt1 == DataType.MATRIX; // only matrix cbind, rbind supported on GPU
+ }
+ default: {
+ DataType dt1 = getInput().get(0).getDataType();
+ DataType dt2 = getInput().get(1).getDataType();
+
+ boolean isMatrixScalar = (dt1 == DataType.MATRIX && dt2 == DataType.SCALAR) || (dt1 == DataType.SCALAR && dt2 == DataType.MATRIX);
+ boolean isMatrixMatrix = (dt1 == DataType.MATRIX && dt2 == DataType.MATRIX);
+
+ OpOp2 [] supportedOps = { OpOp2.MULT, OpOp2.PLUS, OpOp2.MINUS, OpOp2.DIV, OpOp2.POW, OpOp2.MINUS1_MULT,
+ OpOp2.MODULUS, OpOp2.INTDIV, OpOp2.LESS, OpOp2.LESSEQUAL, OpOp2.EQUAL, OpOp2.NOTEQUAL, OpOp2.GREATER, OpOp2.GREATEREQUAL};
+
+ if(isMatrixScalar && op == OpOp2.MINUS_NZ) {
+ // Only supported for matrix scalar:
+ return true;
+ }
+ else if(isMatrixMatrix && op == OpOp2.SOLVE) {
+ // Only supported for matrix matrix:
+ return true;
+ }
+ else if(isMatrixScalar || isMatrixMatrix) {
+ for(OpOp2 supportedOp : supportedOps) {
+ if(op == supportedOp)
+ return true;
+ }
+ return false;
+ }
+ else
+ return false;
+ }
+ }
+ }
+
+ @Override
public Lop constructLops()
throws HopsException, LopsException
{
@@ -527,11 +576,6 @@ public class BinaryOp extends Hop
}
else //CP
{
- if (DMLScript.USE_ACCELERATOR && dt1 == DataType.MATRIX && (DMLScript.FORCE_ACCELERATOR
- || getMemEstimate() < GPUContextPool.initialGPUMemBudget())) {
- et = ExecType.GPU;
- }
-
Lop offset = createOffsetLop( getInput().get(0), cbind ); //offset 1st input
append = new Append(getInput().get(0).constructLops(), getInput().get(1).constructLops(), offset, getDataType(), getValueType(), cbind, et);
append.getOutputParameters().setDimensions(rlen, clen, getRowsInBlock(), getColsInBlock(), getNnz());
@@ -582,14 +626,6 @@ public class BinaryOp extends Hop
else //general case
ot = HopsOpOp2LopsU.get(op);
- if (DMLScript.USE_ACCELERATOR && (DMLScript.FORCE_ACCELERATOR
- || getMemEstimate() < Math.min(GPUContextPool.initialGPUMemBudget(), OptimizerUtils.getLocalMemBudget()))
- && (op == OpOp2.MULT || op == OpOp2.PLUS || op == OpOp2.MINUS || op == OpOp2.DIV || op == OpOp2.POW
- || op == OpOp2.MINUS_NZ || op == OpOp2.MINUS1_MULT || op == OpOp2.MODULUS || op == OpOp2.INTDIV
- || op == OpOp2.LESS || op == OpOp2.LESSEQUAL || op == OpOp2.EQUAL || op == OpOp2.NOTEQUAL
- || op == OpOp2.GREATER || op == OpOp2.GREATEREQUAL)) {
- et = ExecType.GPU;
- }
Unary unary1 = new Unary(getInput().get(0).constructLops(),
getInput().get(1).constructLops(), ot, getDataType(), getValueType(), et);
@@ -602,17 +638,8 @@ public class BinaryOp extends Hop
{
// Both operands are Matrixes
ExecType et = optFindExecType();
- if ( et == ExecType.CP )
+ if ( et == ExecType.CP || et == ExecType.GPU )
{
- if(DMLScript.USE_ACCELERATOR && (DMLScript.FORCE_ACCELERATOR
- || getMemEstimate() < Math.min(GPUContextPool.initialGPUMemBudget(), OptimizerUtils.getLocalMemBudget()))
- && (op == OpOp2.MULT || op == OpOp2.PLUS || op == OpOp2.MINUS || op == OpOp2.DIV || op == OpOp2.POW
- || op == OpOp2.SOLVE || op == OpOp2.MINUS1_MULT || op == OpOp2.MODULUS || op == OpOp2.INTDIV
- || op == OpOp2.LESS || op == OpOp2.LESSEQUAL || op == OpOp2.EQUAL || op == OpOp2.NOTEQUAL
- || op == OpOp2.GREATER || op == OpOp2.GREATEREQUAL)) {
- et = ExecType.GPU;
- }
-
Lop binary = null;
boolean isLeftXGt = (getInput().get(0) instanceof BinaryOp) && ((BinaryOp) getInput().get(0)).getOp() == OpOp2.GREATER;
@@ -827,7 +854,7 @@ public class BinaryOp extends Hop
ret = getInput().get(0).getMemEstimate() * 3;
}
else if ( op == OpOp2.SOLVE ) {
- if (DMLScript.USE_ACCELERATOR) {
+ if (isGPUEnabled()) {
// Solve on the GPU takes an awful lot of intermediate space
// First the inputs are converted from row-major to column major
// Then a workspace and a temporary output (workSize, tauSize) are needed
http://git-wip-us.apache.org/repos/asf/systemml/blob/3ca05353/src/main/java/org/apache/sysml/hops/ConvolutionOp.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/ConvolutionOp.java b/src/main/java/org/apache/sysml/hops/ConvolutionOp.java
index a3d8a81..2b9335c 100644
--- a/src/main/java/org/apache/sysml/hops/ConvolutionOp.java
+++ b/src/main/java/org/apache/sysml/hops/ConvolutionOp.java
@@ -19,6 +19,7 @@
package org.apache.sysml.hops;
+import org.apache.sysml.api.DMLScript;
import org.apache.sysml.hops.Hop.MultiThreadedHop;
import org.apache.sysml.lops.ConvolutionTransform;
import org.apache.sysml.lops.ConvolutionTransform.OperationTypes;
@@ -79,6 +80,13 @@ public class ConvolutionOp extends Hop implements MultiThreadedHop
}
@Override
+ public boolean isGPUEnabled() {
+ if(!DMLScript.USE_ACCELERATOR)
+ return false;
+ return true;
+ }
+
+ @Override
public Lop constructLops()
throws HopsException, LopsException
{
@@ -315,12 +323,12 @@ public class ConvolutionOp extends Hop implements MultiThreadedHop
if( _etypeForced != null )
{
- _etype = findGPUExecTypeByMemEstimate(_etypeForced);
+ _etype = _etypeForced;
}
else
{
if ( OptimizerUtils.isMemoryBasedOptLevel() ) {
- _etype = findGPUExecTypeByMemEstimate(findExecTypeByMemEstimate());
+ _etype = findExecTypeByMemEstimate();
}
else {
_etype = REMOTE;
http://git-wip-us.apache.org/repos/asf/systemml/blob/3ca05353/src/main/java/org/apache/sysml/hops/DataGenOp.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/DataGenOp.java b/src/main/java/org/apache/sysml/hops/DataGenOp.java
index ce08dbc..89a5814 100644
--- a/src/main/java/org/apache/sysml/hops/DataGenOp.java
+++ b/src/main/java/org/apache/sysml/hops/DataGenOp.java
@@ -146,6 +146,11 @@ public class DataGenOp extends Hop implements MultiThreadedHop
}
@Override
+ public boolean isGPUEnabled() {
+ return false;
+ }
+
+ @Override
public Lop constructLops()
throws HopsException, LopsException
{
@@ -502,4 +507,5 @@ public class DataGenOp extends Hop implements MultiThreadedHop
return ret;
}
+
}
http://git-wip-us.apache.org/repos/asf/systemml/blob/3ca05353/src/main/java/org/apache/sysml/hops/DataOp.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/DataOp.java b/src/main/java/org/apache/sysml/hops/DataOp.java
index bcded04..f410210 100644
--- a/src/main/java/org/apache/sysml/hops/DataOp.java
+++ b/src/main/java/org/apache/sysml/hops/DataOp.java
@@ -241,6 +241,11 @@ public class DataOp extends Hop
}
@Override
+ public boolean isGPUEnabled() {
+ return false;
+ }
+
+ @Override
public Lop constructLops()
throws HopsException, LopsException
{
http://git-wip-us.apache.org/repos/asf/systemml/blob/3ca05353/src/main/java/org/apache/sysml/hops/FunctionOp.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/FunctionOp.java b/src/main/java/org/apache/sysml/hops/FunctionOp.java
index c677bb8..3ad2d15 100644
--- a/src/main/java/org/apache/sysml/hops/FunctionOp.java
+++ b/src/main/java/org/apache/sysml/hops/FunctionOp.java
@@ -209,6 +209,11 @@ public class FunctionOp extends Hop
}
@Override
+ public boolean isGPUEnabled() {
+ return false;
+ }
+
+ @Override
public Lop constructLops()
throws HopsException, LopsException
{
http://git-wip-us.apache.org/repos/asf/systemml/blob/3ca05353/src/main/java/org/apache/sysml/hops/Hop.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/Hop.java b/src/main/java/org/apache/sysml/hops/Hop.java
index bfbdbaf..1cf875f 100644
--- a/src/main/java/org/apache/sysml/hops/Hop.java
+++ b/src/main/java/org/apache/sysml/hops/Hop.java
@@ -192,7 +192,9 @@ public abstract class Hop
public void checkAndSetForcedPlatform()
{
- if ( DMLScript.rtplatform == RUNTIME_PLATFORM.SINGLE_NODE )
+ if(DMLScript.USE_ACCELERATOR && DMLScript.FORCE_ACCELERATOR && isGPUEnabled())
+ _etypeForced = ExecType.GPU;
+ else if ( DMLScript.rtplatform == RUNTIME_PLATFORM.SINGLE_NODE )
_etypeForced = ExecType.CP;
else if ( DMLScript.rtplatform == RUNTIME_PLATFORM.HADOOP )
_etypeForced = ExecType.MR;
@@ -768,8 +770,12 @@ public abstract class Hop
protected ExecType findExecTypeByMemEstimate() {
ExecType et = null;
char c = ' ';
- if ( getMemEstimate() < OptimizerUtils.getLocalMemBudget() ) {
- et = ExecType.CP;
+ double memEst = getMemEstimate();
+ if ( memEst < OptimizerUtils.getLocalMemBudget() ) {
+ if (DMLScript.USE_ACCELERATOR && isGPUEnabled() && memEst < GPUContextPool.initialGPUMemBudget())
+ et = ExecType.GPU;
+ else
+ et = ExecType.CP;
}
else {
if( DMLScript.rtplatform == DMLScript.RUNTIME_PLATFORM.HYBRID )
@@ -788,14 +794,6 @@ public abstract class Hop
return et;
}
-
- protected ExecType findGPUExecTypeByMemEstimate(ExecType et) {
- if (DMLScript.USE_ACCELERATOR && (DMLScript.FORCE_ACCELERATOR
- || getMemEstimate() < Math.min(GPUContextPool.initialGPUMemBudget(), OptimizerUtils.getLocalMemBudget()))) {
- return ExecType.GPU;
- }
- return et;
- }
public ArrayList<Hop> getParent() {
return _parent;
@@ -850,6 +848,18 @@ public abstract class Hop
public abstract String getOpString();
+ /**
+ * In memory-based optimizer mode (see OptimizerUtils.isMemoryBasedOptLevel()),
+ * the exectype is determined by checking this method as well as memory budget of this Hop.
+ * Please see findExecTypeByMemEstimate for more detail.
+ *
+ * This method is necessary because not all operator are supported efficiently
+ * on GPU (for example: operations on frames and scalar as well as operations such as table).
+ *
+ * @return true if the Hop is eligible for GPU Exectype.
+ */
+ public abstract boolean isGPUEnabled();
+
protected boolean isVector() {
return (dimsKnown() && (_dim1 == 1 || _dim2 == 1) );
}
http://git-wip-us.apache.org/repos/asf/systemml/blob/3ca05353/src/main/java/org/apache/sysml/hops/IndexingOp.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/IndexingOp.java b/src/main/java/org/apache/sysml/hops/IndexingOp.java
index 5a27ed6..5f2ce34 100644
--- a/src/main/java/org/apache/sysml/hops/IndexingOp.java
+++ b/src/main/java/org/apache/sysml/hops/IndexingOp.java
@@ -94,6 +94,11 @@ public class IndexingOp extends Hop
public void setColLowerEqualsUpper(boolean passed) {
_colLowerEqualsUpper = passed;
}
+
+ @Override
+ public boolean isGPUEnabled() {
+ return false;
+ }
@Override
public Lop constructLops()
http://git-wip-us.apache.org/repos/asf/systemml/blob/3ca05353/src/main/java/org/apache/sysml/hops/LeftIndexingOp.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/LeftIndexingOp.java b/src/main/java/org/apache/sysml/hops/LeftIndexingOp.java
index a641622..02e7753 100644
--- a/src/main/java/org/apache/sysml/hops/LeftIndexingOp.java
+++ b/src/main/java/org/apache/sysml/hops/LeftIndexingOp.java
@@ -99,6 +99,11 @@ public class LeftIndexingOp extends Hop
}
@Override
+ public boolean isGPUEnabled() {
+ return false;
+ }
+
+ @Override
public Lop constructLops()
throws HopsException, LopsException
{
http://git-wip-us.apache.org/repos/asf/systemml/blob/3ca05353/src/main/java/org/apache/sysml/hops/LiteralOp.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/LiteralOp.java b/src/main/java/org/apache/sysml/hops/LiteralOp.java
index b96d032..16ebf1b 100644
--- a/src/main/java/org/apache/sysml/hops/LiteralOp.java
+++ b/src/main/java/org/apache/sysml/hops/LiteralOp.java
@@ -73,6 +73,11 @@ public class LiteralOp extends Hop
public void checkArity() throws HopsException {
HopsException.check(_input.isEmpty(), this, "should have 0 inputs but has %d inputs", _input.size());
}
+
+ @Override
+ public boolean isGPUEnabled() {
+ return false;
+ }
@Override
public Lop constructLops()
http://git-wip-us.apache.org/repos/asf/systemml/blob/3ca05353/src/main/java/org/apache/sysml/hops/MultipleOp.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/MultipleOp.java b/src/main/java/org/apache/sysml/hops/MultipleOp.java
index 5fb6b29..5c178c0 100644
--- a/src/main/java/org/apache/sysml/hops/MultipleOp.java
+++ b/src/main/java/org/apache/sysml/hops/MultipleOp.java
@@ -80,6 +80,11 @@ public class MultipleOp extends Hop {
public String getOpString() {
return "m(" + _op.name().toLowerCase() + ")";
}
+
+ @Override
+ public boolean isGPUEnabled() {
+ return false;
+ }
/**
* Construct the corresponding Lops for this Hop
http://git-wip-us.apache.org/repos/asf/systemml/blob/3ca05353/src/main/java/org/apache/sysml/hops/ParameterizedBuiltinOp.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/ParameterizedBuiltinOp.java b/src/main/java/org/apache/sysml/hops/ParameterizedBuiltinOp.java
index ab276d7..a611893 100644
--- a/src/main/java/org/apache/sysml/hops/ParameterizedBuiltinOp.java
+++ b/src/main/java/org/apache/sysml/hops/ParameterizedBuiltinOp.java
@@ -175,6 +175,11 @@ public class ParameterizedBuiltinOp extends Hop implements MultiThreadedHop
}
@Override
+ public boolean isGPUEnabled() {
+ return false;
+ }
+
+ @Override
public Lop constructLops()
throws HopsException, LopsException
{
http://git-wip-us.apache.org/repos/asf/systemml/blob/3ca05353/src/main/java/org/apache/sysml/hops/QuaternaryOp.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/QuaternaryOp.java b/src/main/java/org/apache/sysml/hops/QuaternaryOp.java
index 6517de6..17188be 100644
--- a/src/main/java/org/apache/sysml/hops/QuaternaryOp.java
+++ b/src/main/java/org/apache/sysml/hops/QuaternaryOp.java
@@ -189,6 +189,11 @@ public class QuaternaryOp extends Hop implements MultiThreadedHop
}
@Override
+ public boolean isGPUEnabled() {
+ return false;
+ }
+
+ @Override
public Lop constructLops()
throws HopsException, LopsException
{
http://git-wip-us.apache.org/repos/asf/systemml/blob/3ca05353/src/main/java/org/apache/sysml/hops/ReorgOp.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/ReorgOp.java b/src/main/java/org/apache/sysml/hops/ReorgOp.java
index 3e27eb3..f0560d3 100644
--- a/src/main/java/org/apache/sysml/hops/ReorgOp.java
+++ b/src/main/java/org/apache/sysml/hops/ReorgOp.java
@@ -34,7 +34,6 @@ import org.apache.sysml.lops.LopProperties.ExecType;
import org.apache.sysml.lops.Transform.OperationTypes;
import org.apache.sysml.parser.Expression.DataType;
import org.apache.sysml.parser.Expression.ValueType;
-import org.apache.sysml.runtime.instructions.gpu.context.GPUContextPool;
import org.apache.sysml.runtime.matrix.MatrixCharacteristics;
/**
@@ -129,6 +128,35 @@ public class ReorgOp extends Hop implements MultiThreadedHop
s += "r(" + HopsTransf2String.get(op) + ")";
return s;
}
+
+ @Override
+ public boolean isGPUEnabled() {
+ if(!DMLScript.USE_ACCELERATOR)
+ return false;
+ switch( op ) {
+ case TRANSPOSE: {
+ Lop lin;
+ try {
+ lin = getInput().get(0).constructLops();
+ } catch (HopsException | LopsException e) {
+ throw new RuntimeException("Unable to create child lop", e);
+ }
+ if( lin instanceof Transform && ((Transform)lin).getOperationType()==OperationTypes.Transpose )
+ return false; //if input is already a transpose, avoid redundant transpose ops
+ else if( getDim1()==1 && getDim2()==1 )
+ return false; //if input of size 1x1, avoid unnecessary transpose
+ else
+ return true;
+ }
+ case DIAG:
+ case REV:
+ case RESHAPE:
+ case SORT:
+ return false;
+ default:
+ throw new RuntimeException("Unsupported operator:" + op.name());
+ }
+ }
@Override
public Lop constructLops()
@@ -151,10 +179,6 @@ public class ReorgOp extends Hop implements MultiThreadedHop
setLops(lin); //if input of size 1x1, avoid unnecessary transpose
else { //general case
int k = OptimizerUtils.getConstrainedNumThreads(_maxNumThreads);
- if (DMLScript.USE_ACCELERATOR && (DMLScript.FORCE_ACCELERATOR
- || getMemEstimate() < Math.min(GPUContextPool.initialGPUMemBudget(), OptimizerUtils.getLocalMemBudget()))) {
- et = ExecType.GPU;
- }
Transform transform1 = new Transform( lin,
HopsTransf2Lops.get(op), getDataType(), getValueType(), et, k);
setOutputDimensions(transform1);
http://git-wip-us.apache.org/repos/asf/systemml/blob/3ca05353/src/main/java/org/apache/sysml/hops/TernaryOp.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/TernaryOp.java b/src/main/java/org/apache/sysml/hops/TernaryOp.java
index 98c8ad3..47b012e 100644
--- a/src/main/java/org/apache/sysml/hops/TernaryOp.java
+++ b/src/main/java/org/apache/sysml/hops/TernaryOp.java
@@ -42,7 +42,6 @@ import org.apache.sysml.lops.PartialAggregate.CorrectionLocationType;
import org.apache.sysml.parser.Statement;
import org.apache.sysml.parser.Expression.DataType;
import org.apache.sysml.parser.Expression.ValueType;
-import org.apache.sysml.runtime.instructions.gpu.context.GPUContextPool;
import org.apache.sysml.runtime.matrix.MatrixCharacteristics;
/** Primary use cases for now, are
@@ -128,6 +127,25 @@ public class TernaryOp extends Hop
}
@Override
+ public boolean isGPUEnabled() {
+ if(!DMLScript.USE_ACCELERATOR)
+ return false;
+ switch( _op ) {
+ case CENTRALMOMENT:
+ case COVARIANCE:
+ case CTABLE:
+ case INTERQUANTILE:
+ case QUANTILE:
+ return false;
+ case MINUS_MULT:
+ case PLUS_MULT:
+ return true;
+ default:
+ throw new RuntimeException("Unsupported operator:" + _op.name());
+ }
+ }
+
+ @Override
public Lop constructLops()
throws HopsException, LopsException
{
@@ -631,13 +649,7 @@ public class TernaryOp extends Hop
if ( _op != OpOp3.PLUS_MULT && _op != OpOp3.MINUS_MULT )
throw new HopsException("Unexpected operation: " + _op + ", expecting " + OpOp3.PLUS_MULT + " or" + OpOp3.MINUS_MULT);
- ExecType et = null;
- if (DMLScript.USE_ACCELERATOR && (DMLScript.FORCE_ACCELERATOR
- || getMemEstimate() < Math.min(GPUContextPool.initialGPUMemBudget(), OptimizerUtils.getLocalMemBudget()))) {
- et = ExecType.GPU;
- } else {
- et = optFindExecType();
- }
+ ExecType et = optFindExecType();
PlusMult plusmult = null;
if( et == ExecType.CP || et == ExecType.SPARK || et == ExecType.GPU ) {
@@ -711,7 +723,7 @@ public class TernaryOp extends Hop
return OptimizerUtils.estimateSizeExactSparsity(dim1, dim2, 1.0);
case PLUS_MULT:
case MINUS_MULT: {
- if (DMLScript.USE_ACCELERATOR) {
+ if (isGPUEnabled()) {
// For the GPU, the input is converted to dense
sparsity = 1.0;
} else {
http://git-wip-us.apache.org/repos/asf/systemml/blob/3ca05353/src/main/java/org/apache/sysml/hops/UnaryOp.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/UnaryOp.java b/src/main/java/org/apache/sysml/hops/UnaryOp.java
index 2b31247..0a5bc65 100644
--- a/src/main/java/org/apache/sysml/hops/UnaryOp.java
+++ b/src/main/java/org/apache/sysml/hops/UnaryOp.java
@@ -99,6 +99,29 @@ public class UnaryOp extends Hop implements MultiThreadedHop
}
@Override
+ public boolean isGPUEnabled() {
+ if(!DMLScript.USE_ACCELERATOR)
+ return false;
+ boolean isScalar = ( getDataType() == DataType.SCALAR //value type casts or matrix to scalar
+ || (_op == OpOp1.CAST_AS_MATRIX && getInput().get(0).getDataType()==DataType.SCALAR)
+ || (_op == OpOp1.CAST_AS_FRAME && getInput().get(0).getDataType()==DataType.SCALAR));
+ if(!isScalar) {
+ switch(_op) {
+ case SELP:case EXP:case SQRT:case LOG:case ABS:
+ case ROUND:case FLOOR:case CEIL:
+ case SIN:case COS: case TAN:case ASIN:case ACOS:case ATAN:
+ case SIGN:
+ return true;
+ default:
+ return false;
+ }
+ }
+ else {
+ return false;
+ }
+ }
+
+ @Override
public Lop constructLops()
throws HopsException, LopsException
{
@@ -149,7 +172,7 @@ public class UnaryOp extends Hop implements MultiThreadedHop
ExecType et = optFindExecType();
//special handling cumsum/cumprod/cummin/cumsum
- if( isCumulativeUnaryOperation() && et != ExecType.CP )
+ if( isCumulativeUnaryOperation() && !(et == ExecType.CP || et == ExecType.GPU) )
{
//TODO additional physical operation if offsets fit in memory
Lop cumsumLop = null;
@@ -162,15 +185,6 @@ public class UnaryOp extends Hop implements MultiThreadedHop
else //default unary
{
int k = isCumulativeUnaryOperation() ? OptimizerUtils.getConstrainedNumThreads( _maxNumThreads ) : 1;
- switch(_op) {
- case SELP:case EXP:case SQRT:case LOG:case ABS:
- case ROUND:case FLOOR:case CEIL:
- case SIN:case COS: case TAN:case ASIN:case ACOS:case ATAN:
- case SIGN:
- et = findGPUExecTypeByMemEstimate(et);
- break;
- default:
- }
Unary unary1 = new Unary(input.constructLops(), HopsOpOp1LopsU.get(_op),
getDataType(), getValueType(), et, k);
setOutputDimensions(unary1);
@@ -550,7 +564,7 @@ public class UnaryOp extends Hop implements MultiThreadedHop
protected double computeOutputMemEstimate( long dim1, long dim2, long nnz )
{
double sparsity = -1;
- if (DMLScript.USE_ACCELERATOR) {
+ if (isGPUEnabled()) {
sparsity = 1.0; // Output is always dense (for now) on the GPU
} else {
sparsity = OptimizerUtils.getSparsity(dim1, dim2, nnz);
@@ -569,7 +583,7 @@ public class UnaryOp extends Hop implements MultiThreadedHop
ret = getInput().get(0).getMemEstimate() * 3;
}
- if (DMLScript.USE_ACCELERATOR) {
+ if (isGPUEnabled()) {
OptimizerUtils.estimateSize(dim1, dim2); // Intermediate memory required to convert sparse to dense
}
http://git-wip-us.apache.org/repos/asf/systemml/blob/3ca05353/src/main/java/org/apache/sysml/hops/codegen/SpoofFusedOp.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/codegen/SpoofFusedOp.java b/src/main/java/org/apache/sysml/hops/codegen/SpoofFusedOp.java
index 0d4b8db..247a142 100644
--- a/src/main/java/org/apache/sysml/hops/codegen/SpoofFusedOp.java
+++ b/src/main/java/org/apache/sysml/hops/codegen/SpoofFusedOp.java
@@ -283,4 +283,9 @@ public class SpoofFusedOp extends Hop implements MultiThreadedHop
return ret;
}
+
+ @Override
+ public boolean isGPUEnabled() {
+ return false;
+ }
}