You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by mb...@apache.org on 2017/07/13 02:25:05 UTC
systemml git commit: [SYSTEMML-1767] Performance codegen rowwise
template w/ column agg
Repository: systemml
Updated Branches:
refs/heads/master 4e3ebcaeb -> 62a1b75ba
[SYSTEMML-1767] Performance codegen rowwise template w/ column agg
This patch makes the codegen row-wise template consistent with the
mmchain operation in terms of its condition to fallback to
single-threaded operations if the temporary memory for partial
aggregations exceed the internal threshold. On a scenario with 2M spase
features, this patch improved performance by 20x because it avoids
unnecessary L3 cache thrashing.
Project: http://git-wip-us.apache.org/repos/asf/systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/62a1b75b
Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/62a1b75b
Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/62a1b75b
Branch: refs/heads/master
Commit: 62a1b75baf5d3ae3225ca4126e5a3ea93aa86a0f
Parents: 4e3ebca
Author: Matthias Boehm <mb...@gmail.com>
Authored: Wed Jul 12 19:25:52 2017 -0700
Committer: Matthias Boehm <mb...@gmail.com>
Committed: Wed Jul 12 19:26:12 2017 -0700
----------------------------------------------------------------------
.../hops/codegen/template/TemplateRow.java | 1 +
.../sysml/runtime/codegen/SpoofRowwise.java | 20 +++++++++++++-------
.../runtime/matrix/data/LibMatrixMult.java | 9 ++++++---
3 files changed, 20 insertions(+), 10 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/systemml/blob/62a1b75b/src/main/java/org/apache/sysml/hops/codegen/template/TemplateRow.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/codegen/template/TemplateRow.java b/src/main/java/org/apache/sysml/hops/codegen/template/TemplateRow.java
index 0bc0380..5cb016c 100644
--- a/src/main/java/org/apache/sysml/hops/codegen/template/TemplateRow.java
+++ b/src/main/java/org/apache/sysml/hops/codegen/template/TemplateRow.java
@@ -320,6 +320,7 @@ public class TemplateRow extends TemplateBase
//special case for cbind with zeros
CNode cdata1 = tmp.get(hop.getInput().get(0).getHopID());
out = new CNodeUnary(cdata1, UnaryType.CBIND0);
+ inHops.remove(hop.getInput().get(1)); //rm 0-matrix
}
else if(hop instanceof BinaryOp)
{
http://git-wip-us.apache.org/repos/asf/systemml/blob/62a1b75b/src/main/java/org/apache/sysml/runtime/codegen/SpoofRowwise.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/codegen/SpoofRowwise.java b/src/main/java/org/apache/sysml/runtime/codegen/SpoofRowwise.java
index 13536d3..dc6baff 100644
--- a/src/main/java/org/apache/sysml/runtime/codegen/SpoofRowwise.java
+++ b/src/main/java/org/apache/sysml/runtime/codegen/SpoofRowwise.java
@@ -131,7 +131,7 @@ public abstract class SpoofRowwise extends SpoofOperator
double[] scalars = prepInputScalars(scalarObjects);
//setup thread-local memory if necessary
- if( allocTmp )
+ if( allocTmp &&_reqVectMem > 0 )
LibSpoofPrimitives.setupThreadLocalMemory(_reqVectMem, n, n2);
//core sequential execute
@@ -144,7 +144,7 @@ public abstract class SpoofRowwise extends SpoofOperator
executeSparse(a.getSparseBlock(), b, scalars, c, n, 0, m);
//post-processing
- if( allocTmp )
+ if( allocTmp &&_reqVectMem > 0 )
LibSpoofPrimitives.cleanupThreadLocalMemory();
out.recomputeNonZeros();
out.examSparsity();
@@ -155,7 +155,8 @@ public abstract class SpoofRowwise extends SpoofOperator
throws DMLRuntimeException
{
//redirect to serial execution
- if( k <= 1 || (long)inputs.get(0).getNumRows()*inputs.get(0).getNumColumns()<PAR_NUMCELL_THRESHOLD ) {
+ if( k <= 1 || (_type.isColumnAgg() && !LibMatrixMult.checkParColumnAgg(inputs.get(0), k, false))
+ || (long)inputs.get(0).getNumRows()*inputs.get(0).getNumColumns()<PAR_NUMCELL_THRESHOLD ) {
execute(inputs, scalarObjects, out);
return;
}
@@ -320,7 +321,8 @@ public abstract class SpoofRowwise extends SpoofOperator
public double[] call() throws DMLRuntimeException {
//allocate vector intermediates and partial output
- LibSpoofPrimitives.setupThreadLocalMemory(_reqVectMem, _clen, _clen2);
+ if( _reqVectMem > 0 )
+ LibSpoofPrimitives.setupThreadLocalMemory(_reqVectMem, _clen, _clen2);
double[] c = new double[(_clen2>0)?_clen*_clen2 : _clen];
if( _a instanceof CompressedMatrixBlock )
@@ -330,7 +332,8 @@ public abstract class SpoofRowwise extends SpoofOperator
else
executeSparse(_a.getSparseBlock(), _b, _scalars, c, _clen, _rl, _ru);
- LibSpoofPrimitives.cleanupThreadLocalMemory();
+ if( _reqVectMem > 0 )
+ LibSpoofPrimitives.cleanupThreadLocalMemory();
return c;
}
}
@@ -363,7 +366,8 @@ public abstract class SpoofRowwise extends SpoofOperator
@Override
public Long call() throws DMLRuntimeException {
//allocate vector intermediates
- LibSpoofPrimitives.setupThreadLocalMemory(_reqVectMem, _clen, _clen2);
+ if( _reqVectMem > 0 )
+ LibSpoofPrimitives.setupThreadLocalMemory(_reqVectMem, _clen, _clen2);
if( _a instanceof CompressedMatrixBlock )
executeCompressed((CompressedMatrixBlock)_a, _b, _scalars, _c.getDenseBlock(), _clen, _rl, _ru);
@@ -371,7 +375,9 @@ public abstract class SpoofRowwise extends SpoofOperator
executeDense(_a.getDenseBlock(), _b, _scalars, _c.getDenseBlock(), _clen, _rl, _ru);
else
executeSparse(_a.getSparseBlock(), _b, _scalars, _c.getDenseBlock(), _clen, _rl, _ru);
- LibSpoofPrimitives.cleanupThreadLocalMemory();
+
+ if( _reqVectMem > 0 )
+ LibSpoofPrimitives.cleanupThreadLocalMemory();
//maintain nnz for row partition
return _c.recomputeNonZeros(_rl, _ru-1, 0, _c.getNumColumns()-1);
http://git-wip-us.apache.org/repos/asf/systemml/blob/62a1b75b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixMult.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixMult.java b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixMult.java
index da3b12b..30e7d3d 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixMult.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixMult.java
@@ -309,9 +309,7 @@ public class LibMatrixMult
//check too high additional memory requirements (fallback to sequential)
//check too small workload in terms of flops (fallback to sequential too)
- if( 8L * mV.rlen * k > MEM_OVERHEAD_THRESHOLD
- || 4L * mX.rlen * mX.clen < PAR_MINFLOP_THRESHOLD)
- {
+ if( !checkParColumnAgg(mX, k, true) ) {
matrixMultChain(mX, mV, mW, ret, ct);
return;
}
@@ -3531,6 +3529,11 @@ public class LibMatrixMult
return m1rlen > m2clen && m2rlen > m2clen && m2clen > 1
&& m2clen < 64 && 8*m2rlen*m2clen < L2_CACHESIZE;
}
+
+ public static boolean checkParColumnAgg(MatrixBlock m1, int k, boolean inclFLOPs) {
+ return (8L * m1.clen * k <= MEM_OVERHEAD_THRESHOLD
+ && (!inclFLOPs || 4L * m1.rlen * m1.clen >= PAR_MINFLOP_THRESHOLD));
+ }
private static boolean checkParMatrixMultRightInputRows( MatrixBlock m1, MatrixBlock m2, int k ) {
//parallelize over rows in rhs matrix if number of rows in lhs/output is very small