You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by mb...@apache.org on 2016/08/11 19:00:21 UTC

[2/2] incubator-systemml git commit: [SYSTEMML-824] Performance scalar/agg/binary ops (sparse, par, overlap)

[SYSTEMML-824] Performance scalar/agg/binary ops (sparse, par, overlap)

This patch makes various performance improvements some of which change
the asymptotic behavior with huge improvements on large data.

(1) Scalar operations: Right scalar divide with non-zero scalar (e.g.,
X/7) are now marked as sparse-safe.

(2) Unary aggregate operations: The min size threshold for parallel
operations is now checked on the number of non-zeros instead of number
of cells in order to avoid unnecessary thread pool creation overhead on
small data. 

(3) Ternary aggregate operations: The rewrites of sum(v1*v2*v3) to
tak+(v1,v2,v3) is now only applied if sum is the only consumer of the
intermediate in order to avoid unnecessary redundancy.

(4) Binary operations: There is now an additional special case for "skip
empty" operations like * and / (with dense rhs) that were handled so far
with the generic case (e.g., * sparse-dense with sparse output) leading
to an asymptotic improvement from O(n log n) to O(n).

Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/2137a7e4
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/2137a7e4
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/2137a7e4

Branch: refs/heads/master
Commit: 2137a7e4a0af7a48326294fa6bb56084ade9d2b5
Parents: b1dc0d5
Author: Matthias Boehm <mb...@us.ibm.com>
Authored: Wed Aug 10 23:04:28 2016 -0700
Committer: Matthias Boehm <mb...@us.ibm.com>
Committed: Thu Aug 11 11:52:13 2016 -0700

----------------------------------------------------------------------
 .../java/org/apache/sysml/hops/AggUnaryOp.java  |  16 ++-
 .../sysml/runtime/matrix/data/LibMatrixAgg.java |   2 +-
 .../runtime/matrix/data/LibMatrixBincell.java   | 101 +++++++++++--------
 .../matrix/operators/RightScalarOperator.java   |   9 +-
 4 files changed, 70 insertions(+), 58 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/2137a7e4/src/main/java/org/apache/sysml/hops/AggUnaryOp.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/AggUnaryOp.java b/src/main/java/org/apache/sysml/hops/AggUnaryOp.java
index 0152d54..8b44e4b 100644
--- a/src/main/java/org/apache/sysml/hops/AggUnaryOp.java
+++ b/src/main/java/org/apache/sysml/hops/AggUnaryOp.java
@@ -495,31 +495,29 @@ public class AggUnaryOp extends Hop implements MultiThreadedHop
 		
 		//currently we support only sum over binary multiply but potentially 
 		//it can be generalized to any RC aggregate over two common binary operations
-		if( _direction == Direction.RowCol 
-			&& _op == AggOp.SUM ) 
+		if( OptimizerUtils.ALLOW_SUM_PRODUCT_REWRITES &&
+			_direction == Direction.RowCol && _op == AggOp.SUM ) 
 		{
 			Hop input1 = getInput().get(0);
-			if( input1 instanceof BinaryOp && ((BinaryOp)input1).getOp()==OpOp2.MULT )
+			if( input1.getParent().size() == 1 && //sum single consumer
+				input1 instanceof BinaryOp && ((BinaryOp)input1).getOp()==OpOp2.MULT )
 			{
 				Hop input11 = input1.getInput().get(0);
 				Hop input12 = input1.getInput().get(1);
 				
-				if( input11 instanceof BinaryOp && ((BinaryOp)input11).getOp()==OpOp2.MULT )
-				{
+				if( input11 instanceof BinaryOp && ((BinaryOp)input11).getOp()==OpOp2.MULT ) {
 					//ternary, arbitrary matrices but no mv/outer operations.
 					ret = HopRewriteUtils.isEqualSize(input11.getInput().get(0), input1)
 						&& HopRewriteUtils.isEqualSize(input11.getInput().get(1), input1)	
 						&& HopRewriteUtils.isEqualSize(input12, input1);
 				}
-				else if( input12 instanceof BinaryOp && ((BinaryOp)input12).getOp()==OpOp2.MULT )
-				{
+				else if( input12 instanceof BinaryOp && ((BinaryOp)input12).getOp()==OpOp2.MULT ) {
 					//ternary, arbitrary matrices but no mv/outer operations.
 					ret = HopRewriteUtils.isEqualSize(input12.getInput().get(0), input1)
 							&& HopRewriteUtils.isEqualSize(input12.getInput().get(1), input1)	
 							&& HopRewriteUtils.isEqualSize(input11, input1);
 				}
-				else
-				{
+				else {
 					//binary, arbitrary matrices but no mv/outer operations.
 					ret = HopRewriteUtils.isEqualSize(input11, input12);
 				}

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/2137a7e4/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixAgg.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixAgg.java b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixAgg.java
index cf689b8..3357dee 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixAgg.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixAgg.java
@@ -229,7 +229,7 @@ public class LibMatrixAgg
 		throws DMLRuntimeException
 	{
 		//fall back to sequential version if necessary
-		if(    k <= 1 || (long)in.rlen*in.clen < PAR_NUMCELL_THRESHOLD || in.rlen <= k
+		if(    k <= 1 || (long)in.nonZeros < PAR_NUMCELL_THRESHOLD || in.rlen <= k/2
 			|| (!(uaop.indexFn instanceof ReduceCol) &&  out.clen*8*k > PAR_INTERMEDIATE_SIZE_THRESHOLD ) || 
 			!out.isThreadSafe()) {
 			aggregateUnaryMatrix(in, out, uaop);

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/2137a7e4/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixBincell.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixBincell.java b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixBincell.java
index f9269e8..8e7d330 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixBincell.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixBincell.java
@@ -228,8 +228,8 @@ public class LibMatrixBincell
 	private static void safeBinary(MatrixBlock m1, MatrixBlock m2, MatrixBlock ret, BinaryOperator op) 
 		throws DMLRuntimeException 
 	{
-		boolean isMultiply = (op.fn instanceof Multiply);
-		boolean skipEmpty = (isMultiply);
+		boolean skipEmpty = (op.fn instanceof Multiply 
+				|| isSparseSafeDivide(op, m2) );
 		
 		//skip empty blocks (since sparse-safe)
 		if(    m1.isEmptyBlock(false) && m2.isEmptyBlock(false) 
@@ -425,18 +425,35 @@ public class LibMatrixBincell
 				}
 				ret.nonZeros = nnz;
 			}
+			else if( skipEmpty && (m1.sparse || m2.sparse) ) 
+			{
+				SparseBlock a = m1.sparse ? m1.sparseBlock : m2.sparseBlock;
+				if( a != null ) {
+					MatrixBlock b = m1.sparse ? m2 : m1;
+					for( int i=0; i<a.numRows(); i++ ) {
+						if( a.isEmpty(i) ) continue;
+						int apos = a.pos(i);
+						int alen = a.size(i);
+						int[] aix = a.indexes(i);
+						double[] avals = a.values(i);
+						for(int k = apos; k < apos+alen; k++) {
+							double in2 = b.quickGetValue(i, aix[k]);
+							if( in2==0 ) continue;
+							double val = op.fn.execute(avals[k], in2);
+							ret.appendValue(i, aix[k], val);
+						}
+					}
+				}
+			}
 			else //generic case
 			{
-				double thisvalue, thatvalue, resultvalue;
 				for(int r=0; r<rlen; r++)
-					for(int c=0; c<clen; c++)
-					{
-						thisvalue=m1.quickGetValue(r, c);
-						thatvalue=m2.quickGetValue(r, c);
-						if(thisvalue==0 && thatvalue==0)
-							continue;
-						resultvalue=op.fn.execute(thisvalue, thatvalue);
-						ret.appendValue(r, c, resultvalue);
+					for(int c=0; c<clen; c++) {
+						double in1 = m1.quickGetValue(r, c);
+						double in2 = m2.quickGetValue(r, c);
+						if( in1==0 && in2==0) continue;
+						double val = op.fn.execute(in1, in2);
+						ret.appendValue(r, c, val);
 					}
 			}
 		}
@@ -995,23 +1012,9 @@ public class LibMatrixBincell
 			}
 			ret.nonZeros = nnz;
 		}
-		else //DENSE <- DENSE
-		{
-			//allocate dense block
-			ret.allocateDenseBlock(true);
-		
-			double[] a = m1.denseBlock;
-			double[] c = ret.denseBlock;
-			
-			int limit = m1.rlen*m1.clen;
-			int nnz = 0;
-			for( int i=0; i<limit; i++ ) {
-				c[i] = op.executeScalar( a[i] );
-				nnz += (c[i] != 0) ? 1 : 0;
-			}
-			ret.nonZeros = nnz;
+		else { //DENSE <- DENSE
+			denseBinaryScalar(m1, ret, op);
 		}
-		
 	}
 	
 	/**
@@ -1068,27 +1071,39 @@ public class LibMatrixBincell
 			}
 			ret.nonZeros = nnz;
 		}
-		else //DENSE MATRIX
-		{
-			//allocate dense block (if necessary), incl clear nnz
-			ret.allocateDenseBlock(true);
-			
-			double[] a = m1.denseBlock;
-			double[] c = ret.denseBlock;
-			
-			//compute scalar operation, incl nnz maintenance
-			int limit = m1.rlen*m1.clen;
-			int nnz = 0;
-			for( int i=0; i<limit; i++ ) {
-				c[i] = op.executeScalar( a[i] );
-				nnz += (c[i] != 0) ? 1 : 0;
-			}
-			ret.nonZeros = nnz;
+		else { //DENSE MATRIX
+			denseBinaryScalar(m1, ret, op);
 		}
 	}
 
 	/**
 	 * 
+	 * @param m1
+	 * @param ret
+	 * @param op
+	 * @throws DMLRuntimeException 
+	 */
+	private static void denseBinaryScalar(MatrixBlock m1, MatrixBlock ret, ScalarOperator op) 
+		throws DMLRuntimeException 
+	{
+		//allocate dense block (if necessary), incl clear nnz
+		ret.allocateDenseBlock(true);
+		
+		double[] a = m1.denseBlock;
+		double[] c = ret.denseBlock;
+		
+		//compute scalar operation, incl nnz maintenance
+		int limit = m1.rlen*m1.clen;
+		int nnz = 0;
+		for( int i=0; i<limit; i++ ) {
+			c[i] = op.executeScalar( a[i] );
+			nnz += (c[i] != 0) ? 1 : 0;
+		}
+		ret.nonZeros = nnz;
+	}
+	
+	/**
+	 * 
 	 * @param m1ret
 	 * @param m2
 	 * @param op

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/2137a7e4/src/main/java/org/apache/sysml/runtime/matrix/operators/RightScalarOperator.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/matrix/operators/RightScalarOperator.java b/src/main/java/org/apache/sysml/runtime/matrix/operators/RightScalarOperator.java
index 3a4c481..cea41f1 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/operators/RightScalarOperator.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/operators/RightScalarOperator.java
@@ -21,6 +21,7 @@
 package org.apache.sysml.runtime.matrix.operators;
 
 import org.apache.sysml.runtime.DMLRuntimeException;
+import org.apache.sysml.runtime.functionobjects.Divide;
 import org.apache.sysml.runtime.functionobjects.GreaterThan;
 import org.apache.sysml.runtime.functionobjects.GreaterThanEquals;
 import org.apache.sysml.runtime.functionobjects.LessThan;
@@ -49,12 +50,10 @@ public class RightScalarOperator extends ScalarOperator
 		super.setConstant(cst);
 		
 		//enable conditionally sparse safe operations
-		if(    (fn instanceof GreaterThan && _constant>=0)
+		sparseSafe = (fn instanceof GreaterThan && _constant>=0)
 			|| (fn instanceof GreaterThanEquals && _constant>0)
 			|| (fn instanceof LessThan && _constant<=0)
-			|| (fn instanceof LessThanEquals && _constant<0))
-		{
-			sparseSafe = true;
-		}
+			|| (fn instanceof LessThanEquals && _constant<0)
+			|| (fn instanceof Divide && _constant!=0);
 	}
 }