You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by mb...@apache.org on 2017/10/29 23:06:10 UTC
systemml git commit: [MINOR] Fix consistency task partitioning in mm,
mmchain, codegen row
Repository: systemml
Updated Branches:
refs/heads/master 06d5bb073 -> d75a669a4
[MINOR] Fix consistency task partitioning in mm, mmchain, codegen row
Project: http://git-wip-us.apache.org/repos/asf/systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/d75a669a
Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/d75a669a
Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/d75a669a
Branch: refs/heads/master
Commit: d75a669a46381a0a5b54109e7b207613e17ab54e
Parents: 06d5bb0
Author: Matthias Boehm <mb...@gmail.com>
Authored: Sun Oct 29 16:06:55 2017 -0700
Committer: Matthias Boehm <mb...@gmail.com>
Committed: Sun Oct 29 16:07:05 2017 -0700
----------------------------------------------------------------------
.../sysml/runtime/codegen/SpoofRowwise.java | 19 +++----
.../runtime/matrix/data/LibMatrixMult.java | 56 ++++++++++++--------
2 files changed, 42 insertions(+), 33 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/systemml/blob/d75a669a/src/main/java/org/apache/sysml/runtime/codegen/SpoofRowwise.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/codegen/SpoofRowwise.java b/src/main/java/org/apache/sysml/runtime/codegen/SpoofRowwise.java
index 9d5675b..b0afd88 100644
--- a/src/main/java/org/apache/sysml/runtime/codegen/SpoofRowwise.java
+++ b/src/main/java/org/apache/sysml/runtime/codegen/SpoofRowwise.java
@@ -39,7 +39,6 @@ import org.apache.sysml.runtime.matrix.data.MatrixBlock;
import org.apache.sysml.runtime.matrix.data.SparseBlock;
import org.apache.sysml.runtime.matrix.data.SparseRow;
import org.apache.sysml.runtime.matrix.data.SparseRowVector;
-import org.apache.sysml.runtime.util.UtilFunctions;
public abstract class SpoofRowwise extends SpoofOperator
@@ -198,11 +197,9 @@ public abstract class SpoofRowwise extends SpoofOperator
//core parallel execute
ExecutorService pool = Executors.newFixedThreadPool( k );
- int nk = (a instanceof CompressedMatrixBlock) ? k :
- UtilFunctions.roundToNext(Math.min(8*k,m/32), k);
- int blklen = (int)(Math.ceil((double)m/nk));
- if( a instanceof CompressedMatrixBlock )
- blklen = BitmapEncoder.getAlignedBlocksize(blklen);
+ ArrayList<Integer> blklens = (a instanceof CompressedMatrixBlock) ?
+ LibMatrixMult.getAlignedBlockSizes(m, k, BitmapEncoder.BITMAP_BLOCK_SZ) :
+ LibMatrixMult.getBalancedBlockSizesDefault(m, k, false);
try
{
@@ -210,9 +207,9 @@ public abstract class SpoofRowwise extends SpoofOperator
//execute tasks
ArrayList<ParColAggTask> tasks = new ArrayList<>();
int outLen = out.getNumRows() * out.getNumColumns();
- for( int i=0; i<nk & i*blklen<m; i++ )
- tasks.add(new ParColAggTask(a, b, scalars, n, n2, outLen, i*blklen, Math.min((i+1)*blklen, m)));
- List<Future<double[]>> taskret = pool.invokeAll(tasks);
+ for( int i=0, lb=0; i<blklens.size(); lb+=blklens.get(i), i++ )
+ tasks.add(new ParColAggTask(a, b, scalars, n, n2, outLen, lb, lb+blklens.get(i)));
+ List<Future<double[]>> taskret = pool.invokeAll(tasks);
//aggregate partial results
int len = _type.isColumnAgg() ? out.getNumRows()*out.getNumColumns() : 1;
for( Future<double[]> task : taskret )
@@ -222,8 +219,8 @@ public abstract class SpoofRowwise extends SpoofOperator
else {
//execute tasks
ArrayList<ParExecTask> tasks = new ArrayList<>();
- for( int i=0; i<nk & i*blklen<m; i++ )
- tasks.add(new ParExecTask(a, b, out, scalars, n, n2, i*blklen, Math.min((i+1)*blklen, m)));
+ for( int i=0, lb=0; i<blklens.size(); lb+=blklens.get(i), i++ )
+ tasks.add(new ParExecTask(a, b, out, scalars, n, n2, lb, lb+blklens.get(i)));
List<Future<Long>> taskret = pool.invokeAll(tasks);
//aggregate nnz, no need to aggregate results
long nnz = 0;
http://git-wip-us.apache.org/repos/asf/systemml/blob/d75a669a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixMult.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixMult.java b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixMult.java
index 684f327..a1f648e 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixMult.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixMult.java
@@ -209,8 +209,7 @@ public class LibMatrixMult
try {
ExecutorService pool = Executors.newFixedThreadPool( k );
ArrayList<MatrixMultTask> tasks = new ArrayList<>();
- int nk = (pm2r||pm2c) ? k : UtilFunctions.roundToNext(Math.min(8*k,num/32), k);
- ArrayList<Integer> blklens = getBalancedBlockSizes(num, nk);
+ ArrayList<Integer> blklens = getBalancedBlockSizesDefault(num, k, (pm2r||pm2c));
for( int i=0, lb=0; i<blklens.size(); lb+=blklens.get(i), i++ )
tasks.add(new MatrixMultTask(m1, m2, ret, tm2, pm2r, pm2c, lb, lb+blklens.get(i)));
//execute tasks
@@ -261,7 +260,7 @@ public class LibMatrixMult
}
//Timing time = new Timing(true);
-
+
//pre-processing: output allocation
ret.sparse = false;
ret.allocateDenseBlock();
@@ -312,7 +311,7 @@ public class LibMatrixMult
}
//Timing time = new Timing(true);
-
+
//pre-processing (no need to check isThreadSafe)
ret.sparse = false;
ret.allocateDenseBlock();
@@ -321,11 +320,10 @@ public class LibMatrixMult
//(currently: always parallelization over number of rows)
try {
ExecutorService pool = Executors.newFixedThreadPool( k );
+ ArrayList<Integer> blklens = getBalancedBlockSizesDefault(mX.rlen, k, true);
ArrayList<MatrixMultChainTask> tasks = new ArrayList<>();
- int blklen = (int)(Math.ceil((double)mX.rlen/k));
- blklen += (blklen%24 != 0)?24-blklen%24:0;
- for( int i=0; i<k & i*blklen<mX.rlen; i++ )
- tasks.add(new MatrixMultChainTask(mX, mV, mW, ct, i*blklen, Math.min((i+1)*blklen, mX.rlen)));
+ for( int i=0, lb=0; i<blklens.size(); lb+=blklens.get(i), i++ )
+ tasks.add(new MatrixMultChainTask(mX, mV, mW, ct, lb, lb+blklens.get(i)));
//execute tasks
List<Future<double[]>> taskret = pool.invokeAll(tasks);
pool.shutdown();
@@ -1606,10 +1604,18 @@ public class LibMatrixMult
final int blocksizeI = 24; // constraint: factor of 4
final int blocksizeJ = 1024;
double[] tmp = new double[blocksizeI];
+ final int bn = (ru-rl) % blocksizeI;
+
+ //compute rest (not aligned to blocksize)
+ for( int i=rl, aix=rl*cd; i < rl+bn; i++, aix+=cd ) {
+ double val = dotProduct(a, b, aix, 0, cd);
+ val *= (weights) ? w[i] : 1;
+ val -= (weights2) ? w[i] : 0;
+ vectMultiplyAdd(val, a, c, aix, 0, cd);
+ }
//blockwise mmchain computation
- final int bn = ru - ru % blocksizeI; //rl blocksize aligned
- for( int bi=rl; bi < bn; bi+=blocksizeI )
+ for( int bi=rl+bn; bi < ru; bi+=blocksizeI )
{
//compute 1st matrix-vector for row block
Arrays.fill(tmp, 0);
@@ -1621,10 +1627,10 @@ public class LibMatrixMult
//multiply/subtract weights (in-place), if required
if( weights )
- vectMultiply(w, tmp, bi, 0, blocksizeI);
+ vectMultiply(w, tmp, bi, 0, blocksizeI);
else if( weights2 )
vectSubtract(w, tmp, bi, 0, blocksizeI);
-
+
//compute 2nd matrix vector for row block and aggregate
for( int bj = 0; bj<cd; bj+=blocksizeJ ) {
int bjmin = Math.min(cd-bj, blocksizeJ);
@@ -1633,14 +1639,6 @@ public class LibMatrixMult
a, c, aix, aix+cd, aix+2*cd, aix+3*cd, bj, bjmin);
}
}
-
- //compute rest (not aligned to blocksize)
- for( int i=bn, aix=bn*cd; i < ru; i++, aix+=cd ) {
- double val = dotProduct(a, b, aix, 0, cd);
- val *= (weights) ? w[i] : 1;
- val -= (weights2) ? w[i] : 0;
- vectMultiplyAdd(val, a, c, aix, 0, cd);
- }
}
private static void matrixMultChainSparse(MatrixBlock mX, MatrixBlock mV, MatrixBlock mW, MatrixBlock ret, ChainType ct, int rl, int ru)
@@ -3578,9 +3576,9 @@ public class LibMatrixMult
public static boolean checkParColumnAgg(MatrixBlock m1, int k, boolean inclFLOPs) {
return (8L * m1.clen * k <= MEM_OVERHEAD_THRESHOLD
- && (!inclFLOPs || 4L * m1.rlen * m1.clen >= PAR_MINFLOP_THRESHOLD));
+ && (!inclFLOPs || 4L * m1.rlen * m1.clen / (m1.sparse?2:1) >= PAR_MINFLOP_THRESHOLD));
}
-
+
private static boolean checkParMatrixMultRightInputRows( MatrixBlock m1, MatrixBlock m2, int k ) {
//parallelize over rows in rhs matrix if number of rows in lhs/output is very small
return (m1.rlen==1 && LOW_LEVEL_OPTIMIZATION && m2.clen>1 && !(m1.isUltraSparse()||m2.isUltraSparse()))
@@ -3676,6 +3674,20 @@ public class LibMatrixMult
}
+ public static ArrayList<Integer> getBalancedBlockSizesDefault(int len, int k, boolean constK) {
+ int nk = constK ? k : UtilFunctions.roundToNext(Math.min(8*k,len/32), k);
+ return getBalancedBlockSizes(len, nk);
+ }
+
+ public static ArrayList<Integer> getAlignedBlockSizes(int len, int k, int align) {
+ int blklen = (int)(Math.ceil((double)len/k));
+ blklen += ((blklen%align != 0) ? align-blklen%align : 0);
+ ArrayList<Integer> ret = new ArrayList<>();
+ for(int i=0; i<len; i+=blklen)
+ ret.add(Math.min(blklen, len-i));
+ return ret;
+ }
+
private static ArrayList<Integer> getBalancedBlockSizes(int len, int k) {
ArrayList<Integer> ret = new ArrayList<>();
int base = len / k;