You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by mb...@apache.org on 2016/07/27 17:37:27 UTC

[1/2] incubator-systemml git commit: [SYSTEMML-400] Multi-threaded sparse-sparse transpose operations

Repository: incubator-systemml
Updated Branches:
  refs/heads/master fceb6620e -> 97b136601


[SYSTEMML-400] Multi-threaded sparse-sparse transpose operations

So far we only supported multi-threaded dense-dense and sparse-dense
transpose operations. This patch adds multi-threaded operation support
for sparse-sparse operations too. The performance improvements are
substantial due to latency hiding and parallel allocation in case of
MCSR. For example, on ImageNet (1262102x900): 19s -> 3.2s and on random
1Mx1K, sp=0.1: 5s -> 0.6s.

Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/ca4fb975
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/ca4fb975
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/ca4fb975

Branch: refs/heads/master
Commit: ca4fb97557b329b4c525bc20709b2216f1de421a
Parents: fceb662
Author: Matthias Boehm <mb...@us.ibm.com>
Authored: Tue Jul 26 16:08:03 2016 -0700
Committer: Matthias Boehm <mb...@us.ibm.com>
Committed: Tue Jul 26 16:08:03 2016 -0700

----------------------------------------------------------------------
 .../runtime/matrix/data/LibMatrixReorg.java     | 68 +++++++++++---------
 1 file changed, 39 insertions(+), 29 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/ca4fb975/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixReorg.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixReorg.java b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixReorg.java
index e472413..59663b5 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixReorg.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixReorg.java
@@ -162,7 +162,7 @@ public class LibMatrixReorg
 		if( !in.sparse && !out.sparse )
 			transposeDenseToDense( in, out, 0, in.rlen, 0, in.clen );
 		else if( in.sparse && out.sparse )
-			transposeSparseToSparse( in, out );
+			transposeSparseToSparse( in, out, 0, in.rlen, 0, in.clen );
 		else if( in.sparse )
 			transposeSparseToDense( in, out, 0, in.rlen, 0, in.clen );
 		else
@@ -187,7 +187,8 @@ public class LibMatrixReorg
 		//redirect small or special cases to sequential execution
 		if( in.isEmptyBlock(false) || (in.rlen * in.clen < PAR_NUMCELL_THRESHOLD)
 			|| (SHALLOW_DENSE_VECTOR_TRANSPOSE && !in.sparse && !out.sparse && (in.rlen==1 || in.clen==1) )
-			|| (in.sparse && !out.sparse && in.rlen==1) || out.sparse || !out.isThreadSafe())
+			|| (in.sparse && !out.sparse && in.rlen==1) || (!in.sparse && out.sparse && in.rlen==1) 
+			|| (!in.sparse && out.sparse) || !out.isThreadSafe())
 		{
 			return transpose(in, out);
 		}
@@ -205,11 +206,11 @@ public class LibMatrixReorg
 		try {
 			ExecutorService pool = Executors.newFixedThreadPool( k );
 			ArrayList<TransposeTask> tasks = new ArrayList<TransposeTask>();
-			boolean row = in.sparse || in.rlen >= in.clen;
+			boolean row = (in.sparse || in.rlen >= in.clen) && !out.sparse;
 			int len = row ? in.rlen : in.clen;
 			int blklen = (int)(Math.ceil((double)len/k));
 			blklen += (blklen%8 != 0)?8-blklen%8:0;
-			for( int i=0; i<k & i*blklen<in.rlen; i++ )
+			for( int i=0; i<k & i*blklen<len; i++ )
 				tasks.add(new TransposeTask(in, out, row, i*blklen, Math.min((i+1)*blklen, len)));
 			//execute tasks and check for errors
 			List<Future<Object>> taskret = pool.invokeAll(tasks);	
@@ -892,9 +893,11 @@ public class LibMatrixReorg
 	 * @param in
 	 * @param out
 	 */
-	private static void transposeSparseToSparse(MatrixBlock in, MatrixBlock out)
+	private static void transposeSparseToSparse(MatrixBlock in, MatrixBlock out, int rl, int ru, int cl, int cu)
 	{
-		//NOTE: called only in sequential execution
+		//NOTE: called only in sequential or column-wise parallel execution
+		if( rl > 0 || ru < in.rlen )
+			throw new RuntimeException("Unsupported row-parallel transposeSparseToSparse: "+rl+", "+ru);
 		
 		final int m = in.rlen;
 		final int n = in.clen;
@@ -918,42 +921,49 @@ public class LibMatrixReorg
 		
 		//allocate output sparse rows
 		if( cnt != null ) {
-			for( int i=0; i<m2; i++ )
+			for( int i=cl; i<cu; i++ )
 				if( cnt[i] > 0 )
 					c.allocate(i, cnt[i]);
 		}
 		
 		//blocking according to typical L2 cache sizes 
 		final int blocksizeI = 128;
-		final int blocksizeJ = 128; 
+		final int blocksizeJ = 128;
 	
 		//temporary array for block boundaries (for preventing binary search) 
 		int[] ix = new int[blocksizeI];
 		
 		//blocked execution
-		for( int bi = 0; bi<m; bi+=blocksizeI )
+		for( int bi=rl; bi<ru; bi+=blocksizeI )
 		{
-			Arrays.fill(ix, 0);
-			for( int bj = 0; bj<n; bj+=blocksizeJ )
-			{
-				int bimin = Math.min(bi+blocksizeI, m);
-				int bjmin = Math.min(bj+blocksizeJ, n);
-
-				//core transpose operation
-				for( int i=bi, iix=0; i<bimin; i++, iix++ )
-				{
+			Arrays.fill(ix, 0);			
+			//find column starting positions
+			int bimin = Math.min(bi+blocksizeI, ru);
+			if( cl > 0 ) {
+				for( int i=bi; i<bimin; i++ )
 					if( !a.isEmpty(i) ) {
-						int apos = a.pos(i);
-						int alen = a.size(i);
-						int[] aix = a.indexes(i);
-						double[] avals = a.values(i);
-						int j = ix[iix]; //last block boundary
-						for( ; j<alen && aix[j]<bjmin; j++ ) {
-							c.allocate(aix[apos+j], ennz2,n2);
-							c.append(aix[apos+j], i, avals[apos+j]);
-						}
-						ix[iix] = j; //keep block boundary
+						int pos = a.posFIndexGTE(i, cl);
+						ix[i-bi] = (pos>=0) ? pos : a.size(i);
+					}
+			}
+			
+			for( int bj=cl; bj<cu; bj+=blocksizeJ ) {
+				int bjmin = Math.min(bj+blocksizeJ, cu);
+
+				//core block transpose operation
+				for( int i=bi, iix=0; i<bimin; i++, iix++ ) {
+					if( a.isEmpty(i) ) continue;
+					
+					int apos = a.pos(i);
+					int alen = a.size(i);
+					int[] aix = a.indexes(i);
+					double[] avals = a.values(i);
+					int j = ix[iix]; //last block boundary
+					for( ; j<alen && aix[j]<bjmin; j++ ) {
+						c.allocate(aix[apos+j], ennz2,n2);
+						c.append(aix[apos+j], i, avals[apos+j]);
 					}
+					ix[iix] = j; //keep block boundary
 				}
 			}
 		}
@@ -2362,7 +2372,7 @@ public class LibMatrixReorg
 			if( !_in.sparse && !_out.sparse )
 				transposeDenseToDense( _in, _out, rl, ru, cl, cu );
 			else if( _in.sparse && _out.sparse )
-				throw new DMLRuntimeException("Unsupported multi-threaded sparse-sparse transpose.");
+				transposeSparseToSparse( _in, _out, rl, ru, cl, cu );
 			else if( _in.sparse )
 				transposeSparseToDense( _in, _out, rl, ru, cl, cu );
 			else


[2/2] incubator-systemml git commit: [SYSTEMML-766] Fix axpy runtime operators (missing double-int support)

Posted by mb...@apache.org.
[SYSTEMML-766] Fix axpy runtime operators (missing double-int support)

Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/97b13660
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/97b13660
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/97b13660

Branch: refs/heads/master
Commit: 97b136601d1fdd4d19cfccdf53b66131b2b03513
Parents: ca4fb97
Author: Matthias Boehm <mb...@us.ibm.com>
Authored: Tue Jul 26 20:41:26 2016 -0700
Committer: Matthias Boehm <mb...@us.ibm.com>
Committed: Tue Jul 26 20:41:26 2016 -0700

----------------------------------------------------------------------
 .../runtime/functionobjects/MinusMultiply.java     | 17 ++++++++++++++++-
 .../runtime/functionobjects/PlusMultiply.java      | 17 ++++++++++++++++-
 2 files changed, 32 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/97b13660/src/main/java/org/apache/sysml/runtime/functionobjects/MinusMultiply.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/functionobjects/MinusMultiply.java b/src/main/java/org/apache/sysml/runtime/functionobjects/MinusMultiply.java
index 2036cf6..1d90b44 100644
--- a/src/main/java/org/apache/sysml/runtime/functionobjects/MinusMultiply.java
+++ b/src/main/java/org/apache/sysml/runtime/functionobjects/MinusMultiply.java
@@ -42,6 +42,21 @@ public class MinusMultiply extends ValueFunctionWithConstant implements Serializ
 	
 	@Override
 	public double execute(double in1, double in2) {
-		return in1 - _constant*in2;	
+		return in1 - _constant * in2;	
+	}
+	
+	@Override
+	public double execute(double in1, long in2) {
+		return in1 - _constant * in2;	
+	}
+	
+	@Override
+	public double execute(long in1, double in2) {
+		return in1 - _constant * in2;	
+	}
+	
+	@Override
+	public double execute(long in1, long in2) {
+		return in1 - _constant * in2;	
 	}
 }

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/97b13660/src/main/java/org/apache/sysml/runtime/functionobjects/PlusMultiply.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/functionobjects/PlusMultiply.java b/src/main/java/org/apache/sysml/runtime/functionobjects/PlusMultiply.java
index 2a1eea0..9d1a746 100644
--- a/src/main/java/org/apache/sysml/runtime/functionobjects/PlusMultiply.java
+++ b/src/main/java/org/apache/sysml/runtime/functionobjects/PlusMultiply.java
@@ -42,6 +42,21 @@ public class PlusMultiply extends ValueFunctionWithConstant implements Serializa
 	
 	@Override
 	public double execute(double in1, double in2) {
-		return in1 + _constant*in2;	
+		return in1 + _constant * in2;	
+	}
+	
+	@Override
+	public double execute(double in1, long in2) {
+		return in1 + _constant * in2;	
+	}
+	
+	@Override
+	public double execute(long in1, double in2) {
+		return in1 + _constant * in2;	
+	}
+	
+	@Override
+	public double execute(long in1, long in2) {
+		return in1 + _constant * in2;	
 	}
 }