You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by mb...@apache.org on 2016/07/27 17:37:27 UTC
[1/2] incubator-systemml git commit: [SYSTEMML-400] Multi-threaded
sparse-sparse transpose operations
Repository: incubator-systemml
Updated Branches:
refs/heads/master fceb6620e -> 97b136601
[SYSTEMML-400] Multi-threaded sparse-sparse transpose operations
So far we only supported multi-threaded dense-dense and sparse-dense
transpose operations. This patch adds multi-threaded operation support
for sparse-sparse operations too. The performance improvements are
substantial due to latency hiding and parallel allocation in case of
MCSR. For example, on ImageNet (1262102x900): 19s -> 3.2s and on random
1Mx1K, sp=0.1: 5s -> 0.6s.
Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/ca4fb975
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/ca4fb975
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/ca4fb975
Branch: refs/heads/master
Commit: ca4fb97557b329b4c525bc20709b2216f1de421a
Parents: fceb662
Author: Matthias Boehm <mb...@us.ibm.com>
Authored: Tue Jul 26 16:08:03 2016 -0700
Committer: Matthias Boehm <mb...@us.ibm.com>
Committed: Tue Jul 26 16:08:03 2016 -0700
----------------------------------------------------------------------
.../runtime/matrix/data/LibMatrixReorg.java | 68 +++++++++++---------
1 file changed, 39 insertions(+), 29 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/ca4fb975/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixReorg.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixReorg.java b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixReorg.java
index e472413..59663b5 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixReorg.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixReorg.java
@@ -162,7 +162,7 @@ public class LibMatrixReorg
if( !in.sparse && !out.sparse )
transposeDenseToDense( in, out, 0, in.rlen, 0, in.clen );
else if( in.sparse && out.sparse )
- transposeSparseToSparse( in, out );
+ transposeSparseToSparse( in, out, 0, in.rlen, 0, in.clen );
else if( in.sparse )
transposeSparseToDense( in, out, 0, in.rlen, 0, in.clen );
else
@@ -187,7 +187,8 @@ public class LibMatrixReorg
//redirect small or special cases to sequential execution
if( in.isEmptyBlock(false) || (in.rlen * in.clen < PAR_NUMCELL_THRESHOLD)
|| (SHALLOW_DENSE_VECTOR_TRANSPOSE && !in.sparse && !out.sparse && (in.rlen==1 || in.clen==1) )
- || (in.sparse && !out.sparse && in.rlen==1) || out.sparse || !out.isThreadSafe())
+ || (in.sparse && !out.sparse && in.rlen==1) || (!in.sparse && out.sparse && in.rlen==1)
+ || (!in.sparse && out.sparse) || !out.isThreadSafe())
{
return transpose(in, out);
}
@@ -205,11 +206,11 @@ public class LibMatrixReorg
try {
ExecutorService pool = Executors.newFixedThreadPool( k );
ArrayList<TransposeTask> tasks = new ArrayList<TransposeTask>();
- boolean row = in.sparse || in.rlen >= in.clen;
+ boolean row = (in.sparse || in.rlen >= in.clen) && !out.sparse;
int len = row ? in.rlen : in.clen;
int blklen = (int)(Math.ceil((double)len/k));
blklen += (blklen%8 != 0)?8-blklen%8:0;
- for( int i=0; i<k & i*blklen<in.rlen; i++ )
+ for( int i=0; i<k & i*blklen<len; i++ )
tasks.add(new TransposeTask(in, out, row, i*blklen, Math.min((i+1)*blklen, len)));
//execute tasks and check for errors
List<Future<Object>> taskret = pool.invokeAll(tasks);
@@ -892,9 +893,11 @@ public class LibMatrixReorg
* @param in
* @param out
*/
- private static void transposeSparseToSparse(MatrixBlock in, MatrixBlock out)
+ private static void transposeSparseToSparse(MatrixBlock in, MatrixBlock out, int rl, int ru, int cl, int cu)
{
- //NOTE: called only in sequential execution
+ //NOTE: called only in sequential or column-wise parallel execution
+ if( rl > 0 || ru < in.rlen )
+ throw new RuntimeException("Unsupported row-parallel transposeSparseToSparse: "+rl+", "+ru);
final int m = in.rlen;
final int n = in.clen;
@@ -918,42 +921,49 @@ public class LibMatrixReorg
//allocate output sparse rows
if( cnt != null ) {
- for( int i=0; i<m2; i++ )
+ for( int i=cl; i<cu; i++ )
if( cnt[i] > 0 )
c.allocate(i, cnt[i]);
}
//blocking according to typical L2 cache sizes
final int blocksizeI = 128;
- final int blocksizeJ = 128;
+ final int blocksizeJ = 128;
//temporary array for block boundaries (for preventing binary search)
int[] ix = new int[blocksizeI];
//blocked execution
- for( int bi = 0; bi<m; bi+=blocksizeI )
+ for( int bi=rl; bi<ru; bi+=blocksizeI )
{
- Arrays.fill(ix, 0);
- for( int bj = 0; bj<n; bj+=blocksizeJ )
- {
- int bimin = Math.min(bi+blocksizeI, m);
- int bjmin = Math.min(bj+blocksizeJ, n);
-
- //core transpose operation
- for( int i=bi, iix=0; i<bimin; i++, iix++ )
- {
+ Arrays.fill(ix, 0);
+ //find column starting positions
+ int bimin = Math.min(bi+blocksizeI, ru);
+ if( cl > 0 ) {
+ for( int i=bi; i<bimin; i++ )
if( !a.isEmpty(i) ) {
- int apos = a.pos(i);
- int alen = a.size(i);
- int[] aix = a.indexes(i);
- double[] avals = a.values(i);
- int j = ix[iix]; //last block boundary
- for( ; j<alen && aix[j]<bjmin; j++ ) {
- c.allocate(aix[apos+j], ennz2,n2);
- c.append(aix[apos+j], i, avals[apos+j]);
- }
- ix[iix] = j; //keep block boundary
+ int pos = a.posFIndexGTE(i, cl);
+ ix[i-bi] = (pos>=0) ? pos : a.size(i);
+ }
+ }
+
+ for( int bj=cl; bj<cu; bj+=blocksizeJ ) {
+ int bjmin = Math.min(bj+blocksizeJ, cu);
+
+ //core block transpose operation
+ for( int i=bi, iix=0; i<bimin; i++, iix++ ) {
+ if( a.isEmpty(i) ) continue;
+
+ int apos = a.pos(i);
+ int alen = a.size(i);
+ int[] aix = a.indexes(i);
+ double[] avals = a.values(i);
+ int j = ix[iix]; //last block boundary
+ for( ; j<alen && aix[j]<bjmin; j++ ) {
+ c.allocate(aix[apos+j], ennz2,n2);
+ c.append(aix[apos+j], i, avals[apos+j]);
}
+ ix[iix] = j; //keep block boundary
}
}
}
@@ -2362,7 +2372,7 @@ public class LibMatrixReorg
if( !_in.sparse && !_out.sparse )
transposeDenseToDense( _in, _out, rl, ru, cl, cu );
else if( _in.sparse && _out.sparse )
- throw new DMLRuntimeException("Unsupported multi-threaded sparse-sparse transpose.");
+ transposeSparseToSparse( _in, _out, rl, ru, cl, cu );
else if( _in.sparse )
transposeSparseToDense( _in, _out, rl, ru, cl, cu );
else
[2/2] incubator-systemml git commit: [SYSTEMML-766] Fix axpy runtime
operators (missing double-int support)
Posted by mb...@apache.org.
[SYSTEMML-766] Fix axpy runtime operators (missing double-int support)
Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/97b13660
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/97b13660
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/97b13660
Branch: refs/heads/master
Commit: 97b136601d1fdd4d19cfccdf53b66131b2b03513
Parents: ca4fb97
Author: Matthias Boehm <mb...@us.ibm.com>
Authored: Tue Jul 26 20:41:26 2016 -0700
Committer: Matthias Boehm <mb...@us.ibm.com>
Committed: Tue Jul 26 20:41:26 2016 -0700
----------------------------------------------------------------------
.../runtime/functionobjects/MinusMultiply.java | 17 ++++++++++++++++-
.../runtime/functionobjects/PlusMultiply.java | 17 ++++++++++++++++-
2 files changed, 32 insertions(+), 2 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/97b13660/src/main/java/org/apache/sysml/runtime/functionobjects/MinusMultiply.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/functionobjects/MinusMultiply.java b/src/main/java/org/apache/sysml/runtime/functionobjects/MinusMultiply.java
index 2036cf6..1d90b44 100644
--- a/src/main/java/org/apache/sysml/runtime/functionobjects/MinusMultiply.java
+++ b/src/main/java/org/apache/sysml/runtime/functionobjects/MinusMultiply.java
@@ -42,6 +42,21 @@ public class MinusMultiply extends ValueFunctionWithConstant implements Serializ
@Override
public double execute(double in1, double in2) {
- return in1 - _constant*in2;
+ return in1 - _constant * in2;
+ }
+
+ @Override
+ public double execute(double in1, long in2) {
+ return in1 - _constant * in2;
+ }
+
+ @Override
+ public double execute(long in1, double in2) {
+ return in1 - _constant * in2;
+ }
+
+ @Override
+ public double execute(long in1, long in2) {
+ return in1 - _constant * in2;
}
}
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/97b13660/src/main/java/org/apache/sysml/runtime/functionobjects/PlusMultiply.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/functionobjects/PlusMultiply.java b/src/main/java/org/apache/sysml/runtime/functionobjects/PlusMultiply.java
index 2a1eea0..9d1a746 100644
--- a/src/main/java/org/apache/sysml/runtime/functionobjects/PlusMultiply.java
+++ b/src/main/java/org/apache/sysml/runtime/functionobjects/PlusMultiply.java
@@ -42,6 +42,21 @@ public class PlusMultiply extends ValueFunctionWithConstant implements Serializa
@Override
public double execute(double in1, double in2) {
- return in1 + _constant*in2;
+ return in1 + _constant * in2;
+ }
+
+ @Override
+ public double execute(double in1, long in2) {
+ return in1 + _constant * in2;
+ }
+
+ @Override
+ public double execute(long in1, double in2) {
+ return in1 + _constant * in2;
+ }
+
+ @Override
+ public double execute(long in1, long in2) {
+ return in1 + _constant * in2;
}
}