You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by mb...@apache.org on 2016/08/11 19:00:21 UTC
[2/2] incubator-systemml git commit: [SYSTEMML-824] Performance
scalar/agg/binary ops (sparse, par, overlap)
[SYSTEMML-824] Performance scalar/agg/binary ops (sparse, par, overlap)
This patch makes various performance improvements some of which change
the asymptotic behavior with huge improvements on large data.
(1) Scalar operations: Right scalar divide with non-zero scalar (e.g.,
X/7) are now marked as sparse-safe.
(2) Unary aggregate operations: The min size threshold for parallel
operations is now checked on the number of non-zeros instead of number
of cells in order to avoid unnecessary thread pool creation overhead on
small data.
(3) Ternary aggregate operations: The rewrites of sum(v1*v2*v3) to
tak+(v1,v2,v3) is now only applied if sum is the only consumer of the
intermediate in order to avoid unnecessary redundancy.
(4) Binary operations: There is now an additional special case for "skip
empty" operations like * and / (with dense rhs) that were handled so far
with the generic case (e.g., * sparse-dense with sparse output) leading
to an asymptotic improvement from O(n log n) to O(n).
Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/2137a7e4
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/2137a7e4
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/2137a7e4
Branch: refs/heads/master
Commit: 2137a7e4a0af7a48326294fa6bb56084ade9d2b5
Parents: b1dc0d5
Author: Matthias Boehm <mb...@us.ibm.com>
Authored: Wed Aug 10 23:04:28 2016 -0700
Committer: Matthias Boehm <mb...@us.ibm.com>
Committed: Thu Aug 11 11:52:13 2016 -0700
----------------------------------------------------------------------
.../java/org/apache/sysml/hops/AggUnaryOp.java | 16 ++-
.../sysml/runtime/matrix/data/LibMatrixAgg.java | 2 +-
.../runtime/matrix/data/LibMatrixBincell.java | 101 +++++++++++--------
.../matrix/operators/RightScalarOperator.java | 9 +-
4 files changed, 70 insertions(+), 58 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/2137a7e4/src/main/java/org/apache/sysml/hops/AggUnaryOp.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/AggUnaryOp.java b/src/main/java/org/apache/sysml/hops/AggUnaryOp.java
index 0152d54..8b44e4b 100644
--- a/src/main/java/org/apache/sysml/hops/AggUnaryOp.java
+++ b/src/main/java/org/apache/sysml/hops/AggUnaryOp.java
@@ -495,31 +495,29 @@ public class AggUnaryOp extends Hop implements MultiThreadedHop
//currently we support only sum over binary multiply but potentially
//it can be generalized to any RC aggregate over two common binary operations
- if( _direction == Direction.RowCol
- && _op == AggOp.SUM )
+ if( OptimizerUtils.ALLOW_SUM_PRODUCT_REWRITES &&
+ _direction == Direction.RowCol && _op == AggOp.SUM )
{
Hop input1 = getInput().get(0);
- if( input1 instanceof BinaryOp && ((BinaryOp)input1).getOp()==OpOp2.MULT )
+ if( input1.getParent().size() == 1 && //sum single consumer
+ input1 instanceof BinaryOp && ((BinaryOp)input1).getOp()==OpOp2.MULT )
{
Hop input11 = input1.getInput().get(0);
Hop input12 = input1.getInput().get(1);
- if( input11 instanceof BinaryOp && ((BinaryOp)input11).getOp()==OpOp2.MULT )
- {
+ if( input11 instanceof BinaryOp && ((BinaryOp)input11).getOp()==OpOp2.MULT ) {
//ternary, arbitrary matrices but no mv/outer operations.
ret = HopRewriteUtils.isEqualSize(input11.getInput().get(0), input1)
&& HopRewriteUtils.isEqualSize(input11.getInput().get(1), input1)
&& HopRewriteUtils.isEqualSize(input12, input1);
}
- else if( input12 instanceof BinaryOp && ((BinaryOp)input12).getOp()==OpOp2.MULT )
- {
+ else if( input12 instanceof BinaryOp && ((BinaryOp)input12).getOp()==OpOp2.MULT ) {
//ternary, arbitrary matrices but no mv/outer operations.
ret = HopRewriteUtils.isEqualSize(input12.getInput().get(0), input1)
&& HopRewriteUtils.isEqualSize(input12.getInput().get(1), input1)
&& HopRewriteUtils.isEqualSize(input11, input1);
}
- else
- {
+ else {
//binary, arbitrary matrices but no mv/outer operations.
ret = HopRewriteUtils.isEqualSize(input11, input12);
}
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/2137a7e4/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixAgg.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixAgg.java b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixAgg.java
index cf689b8..3357dee 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixAgg.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixAgg.java
@@ -229,7 +229,7 @@ public class LibMatrixAgg
throws DMLRuntimeException
{
//fall back to sequential version if necessary
- if( k <= 1 || (long)in.rlen*in.clen < PAR_NUMCELL_THRESHOLD || in.rlen <= k
+ if( k <= 1 || (long)in.nonZeros < PAR_NUMCELL_THRESHOLD || in.rlen <= k/2
|| (!(uaop.indexFn instanceof ReduceCol) && out.clen*8*k > PAR_INTERMEDIATE_SIZE_THRESHOLD ) ||
!out.isThreadSafe()) {
aggregateUnaryMatrix(in, out, uaop);
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/2137a7e4/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixBincell.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixBincell.java b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixBincell.java
index f9269e8..8e7d330 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixBincell.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixBincell.java
@@ -228,8 +228,8 @@ public class LibMatrixBincell
private static void safeBinary(MatrixBlock m1, MatrixBlock m2, MatrixBlock ret, BinaryOperator op)
throws DMLRuntimeException
{
- boolean isMultiply = (op.fn instanceof Multiply);
- boolean skipEmpty = (isMultiply);
+ boolean skipEmpty = (op.fn instanceof Multiply
+ || isSparseSafeDivide(op, m2) );
//skip empty blocks (since sparse-safe)
if( m1.isEmptyBlock(false) && m2.isEmptyBlock(false)
@@ -425,18 +425,35 @@ public class LibMatrixBincell
}
ret.nonZeros = nnz;
}
+ else if( skipEmpty && (m1.sparse || m2.sparse) )
+ {
+ SparseBlock a = m1.sparse ? m1.sparseBlock : m2.sparseBlock;
+ if( a != null ) {
+ MatrixBlock b = m1.sparse ? m2 : m1;
+ for( int i=0; i<a.numRows(); i++ ) {
+ if( a.isEmpty(i) ) continue;
+ int apos = a.pos(i);
+ int alen = a.size(i);
+ int[] aix = a.indexes(i);
+ double[] avals = a.values(i);
+ for(int k = apos; k < apos+alen; k++) {
+ double in2 = b.quickGetValue(i, aix[k]);
+ if( in2==0 ) continue;
+ double val = op.fn.execute(avals[k], in2);
+ ret.appendValue(i, aix[k], val);
+ }
+ }
+ }
+ }
else //generic case
{
- double thisvalue, thatvalue, resultvalue;
for(int r=0; r<rlen; r++)
- for(int c=0; c<clen; c++)
- {
- thisvalue=m1.quickGetValue(r, c);
- thatvalue=m2.quickGetValue(r, c);
- if(thisvalue==0 && thatvalue==0)
- continue;
- resultvalue=op.fn.execute(thisvalue, thatvalue);
- ret.appendValue(r, c, resultvalue);
+ for(int c=0; c<clen; c++) {
+ double in1 = m1.quickGetValue(r, c);
+ double in2 = m2.quickGetValue(r, c);
+ if( in1==0 && in2==0) continue;
+ double val = op.fn.execute(in1, in2);
+ ret.appendValue(r, c, val);
}
}
}
@@ -995,23 +1012,9 @@ public class LibMatrixBincell
}
ret.nonZeros = nnz;
}
- else //DENSE <- DENSE
- {
- //allocate dense block
- ret.allocateDenseBlock(true);
-
- double[] a = m1.denseBlock;
- double[] c = ret.denseBlock;
-
- int limit = m1.rlen*m1.clen;
- int nnz = 0;
- for( int i=0; i<limit; i++ ) {
- c[i] = op.executeScalar( a[i] );
- nnz += (c[i] != 0) ? 1 : 0;
- }
- ret.nonZeros = nnz;
+ else { //DENSE <- DENSE
+ denseBinaryScalar(m1, ret, op);
}
-
}
/**
@@ -1068,27 +1071,39 @@ public class LibMatrixBincell
}
ret.nonZeros = nnz;
}
- else //DENSE MATRIX
- {
- //allocate dense block (if necessary), incl clear nnz
- ret.allocateDenseBlock(true);
-
- double[] a = m1.denseBlock;
- double[] c = ret.denseBlock;
-
- //compute scalar operation, incl nnz maintenance
- int limit = m1.rlen*m1.clen;
- int nnz = 0;
- for( int i=0; i<limit; i++ ) {
- c[i] = op.executeScalar( a[i] );
- nnz += (c[i] != 0) ? 1 : 0;
- }
- ret.nonZeros = nnz;
+ else { //DENSE MATRIX
+ denseBinaryScalar(m1, ret, op);
}
}
/**
*
+ * @param m1
+ * @param ret
+ * @param op
+ * @throws DMLRuntimeException
+ */
+ private static void denseBinaryScalar(MatrixBlock m1, MatrixBlock ret, ScalarOperator op)
+ throws DMLRuntimeException
+ {
+ //allocate dense block (if necessary), incl clear nnz
+ ret.allocateDenseBlock(true);
+
+ double[] a = m1.denseBlock;
+ double[] c = ret.denseBlock;
+
+ //compute scalar operation, incl nnz maintenance
+ int limit = m1.rlen*m1.clen;
+ int nnz = 0;
+ for( int i=0; i<limit; i++ ) {
+ c[i] = op.executeScalar( a[i] );
+ nnz += (c[i] != 0) ? 1 : 0;
+ }
+ ret.nonZeros = nnz;
+ }
+
+ /**
+ *
* @param m1ret
* @param m2
* @param op
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/2137a7e4/src/main/java/org/apache/sysml/runtime/matrix/operators/RightScalarOperator.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/matrix/operators/RightScalarOperator.java b/src/main/java/org/apache/sysml/runtime/matrix/operators/RightScalarOperator.java
index 3a4c481..cea41f1 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/operators/RightScalarOperator.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/operators/RightScalarOperator.java
@@ -21,6 +21,7 @@
package org.apache.sysml.runtime.matrix.operators;
import org.apache.sysml.runtime.DMLRuntimeException;
+import org.apache.sysml.runtime.functionobjects.Divide;
import org.apache.sysml.runtime.functionobjects.GreaterThan;
import org.apache.sysml.runtime.functionobjects.GreaterThanEquals;
import org.apache.sysml.runtime.functionobjects.LessThan;
@@ -49,12 +50,10 @@ public class RightScalarOperator extends ScalarOperator
super.setConstant(cst);
//enable conditionally sparse safe operations
- if( (fn instanceof GreaterThan && _constant>=0)
+ sparseSafe = (fn instanceof GreaterThan && _constant>=0)
|| (fn instanceof GreaterThanEquals && _constant>0)
|| (fn instanceof LessThan && _constant<=0)
- || (fn instanceof LessThanEquals && _constant<0))
- {
- sparseSafe = true;
- }
+ || (fn instanceof LessThanEquals && _constant<0)
+ || (fn instanceof Divide && _constant!=0);
}
}