You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by mb...@apache.org on 2017/11/18 01:39:41 UTC
[2/2] systemml git commit: [SYSTEMML-2020] Avoid sparse-dense
conversion in codegen outer tpls
[SYSTEMML-2020] Avoid sparse-dense conversion in codegen outer tpls
This patch generalizes the codegen outer product template to consume -
similar to the cell and row templates - generic SideInputs instead of
dense matrices as side input, which avoids unnecessary conversions from
sparse to dense matrices and thus improves performance.
Project: http://git-wip-us.apache.org/repos/asf/systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/db3d54c1
Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/db3d54c1
Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/db3d54c1
Branch: refs/heads/master
Commit: db3d54c1ce6651f2abf43d3ba919527b943aa8ba
Parents: 9391728
Author: Matthias Boehm <mb...@gmail.com>
Authored: Fri Nov 17 16:49:45 2017 -0800
Committer: Matthias Boehm <mb...@gmail.com>
Committed: Fri Nov 17 16:49:45 2017 -0800
----------------------------------------------------------------------
.../hops/codegen/cplan/CNodeOuterProduct.java | 5 +-
.../runtime/codegen/SpoofOuterProduct.java | 66 +++++++++++---------
2 files changed, 39 insertions(+), 32 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/systemml/blob/db3d54c1/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeOuterProduct.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeOuterProduct.java b/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeOuterProduct.java
index e518246..e2967a5 100644
--- a/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeOuterProduct.java
+++ b/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeOuterProduct.java
@@ -31,6 +31,7 @@ public class CNodeOuterProduct extends CNodeTpl
private static final String TEMPLATE =
"package codegen;\n"
+ "import org.apache.sysml.runtime.codegen.LibSpoofPrimitives;\n"
+ + "import org.apache.sysml.runtime.codegen.SpoofOperator.SideInput;\n"
+ "import org.apache.sysml.runtime.codegen.SpoofOuterProduct;\n"
+ "import org.apache.sysml.runtime.codegen.SpoofOuterProduct.OutProdType;\n"
+ "import org.apache.commons.math3.util.FastMath;\n"
@@ -39,10 +40,10 @@ public class CNodeOuterProduct extends CNodeTpl
+ " public %TMP%() {\n"
+ " super(OutProdType.%TYPE%);\n"
+ " }\n"
- + " protected void genexecDense(double a, double[] a1, int a1i, double[] a2, int a2i, double[][] b, double[] scalars, double[] c, int ci, int m, int n, int len, int rix, int cix) { \n"
+ + " protected void genexecDense(double a, double[] a1, int a1i, double[] a2, int a2i, SideInput[] b, double[] scalars, double[] c, int ci, int m, int n, int len, int rix, int cix) { \n"
+ "%BODY_dense%"
+ " }\n"
- + " protected double genexecCellwise(double a, double[] a1, int a1i, double[] a2, int a2i, double[][] b, double[] scalars, int m, int n, int len, int rix, int cix) { \n"
+ + " protected double genexecCellwise(double a, double[] a1, int a1i, double[] a2, int a2i, SideInput[] b, double[] scalars, int m, int n, int len, int rix, int cix) { \n"
+ "%BODY_cellwise%"
+ " return %OUT_cellwise%;\n"
+ " }\n"
http://git-wip-us.apache.org/repos/asf/systemml/blob/db3d54c1/src/main/java/org/apache/sysml/runtime/codegen/SpoofOuterProduct.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/codegen/SpoofOuterProduct.java b/src/main/java/org/apache/sysml/runtime/codegen/SpoofOuterProduct.java
index 26a661a..4d4da51 100644
--- a/src/main/java/org/apache/sysml/runtime/codegen/SpoofOuterProduct.java
+++ b/src/main/java/org/apache/sysml/runtime/codegen/SpoofOuterProduct.java
@@ -82,7 +82,7 @@ public abstract class SpoofOuterProduct extends SpoofOperator
//input preparation
double[][] ab = getDenseMatrices(prepInputMatrices(inputs, 1, 2, true, false));
- double[][] b = getDenseMatrices(prepInputMatrices(inputs, 3, true));
+ SideInput[] b = prepInputMatrices(inputs, 3, false);
double[] scalars = prepInputScalars(scalarObjects);
//core sequential execute
@@ -118,7 +118,7 @@ public abstract class SpoofOuterProduct extends SpoofOperator
//input preparation
double[][] ab = getDenseMatrices(prepInputMatrices(inputs, 1, 2, true, false));
- double[][] b = getDenseMatrices(prepInputMatrices(inputs, 3, true));
+ SideInput[] b = prepInputMatrices(inputs, 3, false);
double[] scalars = prepInputScalars(scalarObjects);
//core sequential execute
@@ -186,9 +186,9 @@ public abstract class SpoofOuterProduct extends SpoofOperator
//input preparation
double[][] ab = getDenseMatrices(prepInputMatrices(inputs, 1, 2, true, false));
- double[][] b = getDenseMatrices(prepInputMatrices(inputs, 3, true));
+ SideInput[] b = prepInputMatrices(inputs, 3, false);
double[] scalars = prepInputScalars(scalarObjects);
-
+
//core sequential execute
final int m = inputs.get(0).getNumRows();
final int n = inputs.get(0).getNumColumns();
@@ -268,7 +268,7 @@ public abstract class SpoofOuterProduct extends SpoofOperator
//input preparation
double[][] ab = getDenseMatrices(prepInputMatrices(inputs, 1, 2, true, false));
- double[][] b = getDenseMatrices(prepInputMatrices(inputs, 3, true));
+ SideInput[] b = prepInputMatrices(inputs, 3, false);
double[] scalars = prepInputScalars(scalarObjects);
//core sequential execute
@@ -338,13 +338,14 @@ public abstract class SpoofOuterProduct extends SpoofOperator
return UtilFunctions.roundToNext(base, k);
}
- private void executeDense(double[] a, double[] u, double[] v, double[][] b, double[] scalars,
+ private void executeDense(double[] a, double[] u, double[] v, SideInput[] b, double[] scalars,
double[] c, int m, int n, int k, OutProdType type, int rl, int ru, int cl, int cu )
{
//approach: iterate over non-zeros of w, selective mm computation
//cache-conscious blocking: due to blocksize constraint (default 1000),
//a blocksize of 16 allows to fit blocks of UV into L2 cache (256KB)
+ SideInput[] lb = createSparseSideInputs(b);
final int blocksizeIJ = 16; //u/v block (max at typical L2 size)
int cix = 0;
//blocked execution
@@ -358,18 +359,19 @@ public abstract class SpoofOuterProduct extends SpoofOperator
for( int j=bj, vix=bj*k; j<bjmin; j++, vix+=k)
if( a[ix+j] != 0 ) {
cix = (type == OutProdType.LEFT_OUTER_PRODUCT) ? vix : uix;
- genexecDense( a[ix+j], u, uix, v, vix, b, scalars, c, cix, m, n, k, i, j);
+ genexecDense( a[ix+j], u, uix, v, vix, lb, scalars, c, cix, m, n, k, i, j);
}
}
}
- private void executeCellwiseDense(double[] a, double[] u, double[] v, double[][] b, double[] scalars,
+ private void executeCellwiseDense(double[] a, double[] u, double[] v, SideInput[] b, double[] scalars,
double[] c, int m, int n, int k, OutProdType type, int rl, int ru, int cl, int cu )
{
//approach: iterate over non-zeros of w, selective mm computation
//cache-conscious blocking: due to blocksize constraint (default 1000),
//a blocksize of 16 allows to fit blocks of UV into L2 cache (256KB)
+ SideInput[] lb = createSparseSideInputs(b);
final int blocksizeIJ = 16; //u/v block (max at typical L2 size)
//blocked execution
double sum = 0;
@@ -383,18 +385,19 @@ public abstract class SpoofOuterProduct extends SpoofOperator
for( int j=bj, vix=bj*k; j<bjmin; j++, vix+=k)
if( a[ix+j] != 0 ) {
if(type == OutProdType.CELLWISE_OUTER_PRODUCT)
- c[ix+j] = genexecCellwise( a[ix+j], u, uix, v, vix, b, scalars, m, n, k, i, j );
+ c[ix+j] = genexecCellwise( a[ix+j], u, uix, v, vix, lb, scalars, m, n, k, i, j );
else
- sum += genexecCellwise( a[ix+j], u, uix, v, vix, b, scalars, m, n, k, i, j);
+ sum += genexecCellwise( a[ix+j], u, uix, v, vix, lb, scalars, m, n, k, i, j);
}
}
if( type != OutProdType.CELLWISE_OUTER_PRODUCT )
c[0] = sum;
}
- private void executeSparse(SparseBlock sblock, double[] u, double[] v, double[][] b, double[] scalars,
+ private void executeSparse(SparseBlock sblock, double[] u, double[] v, SideInput[] b, double[] scalars,
double[] c, int m, int n, int k, long nnz, OutProdType type, int rl, int ru, int cl, int cu)
{
+ SideInput[] lb = createSparseSideInputs(b);
boolean left = (_outerProductType== OutProdType.LEFT_OUTER_PRODUCT);
//approach: iterate over non-zeros of w, selective mm computation
@@ -420,7 +423,7 @@ public abstract class SpoofOuterProduct extends SpoofOperator
int index = (cl==0||sblock.isEmpty(i)) ? 0 : sblock.posFIndexGTE(i,cl);
index = wpos + ((index>=0) ? index : n);
for( ; index<wpos+wlen && wix[index]<cu; index++ ) {
- genexecDense(wval[index], u, uix, v, wix[index]*k, b, scalars, c,
+ genexecDense(wval[index], u, uix, v, wix[index]*k, lb, scalars, c,
(left ? wix[index]*k : uix), m, n, k, i, wix[index]);
}
}
@@ -454,7 +457,7 @@ public abstract class SpoofOuterProduct extends SpoofOperator
int index = wpos + curk[i-bi];
for( ; index<wpos+wlen && wix[index]<bjmin; index++ ) {
- genexecDense(wval[index], u, uix, v, wix[index]*k, b, scalars, c,
+ genexecDense(wval[index], u, uix, v, wix[index]*k, lb, scalars, c,
(left ? wix[index]*k : uix), m, n, k, i, wix[index]);
}
curk[i-bi] = index - wpos;
@@ -464,9 +467,10 @@ public abstract class SpoofOuterProduct extends SpoofOperator
}
}
- private void executeCellwiseSparse(SparseBlock sblock, double[] u, double[] v, double[][] b, double[] scalars,
+ private void executeCellwiseSparse(SparseBlock sblock, double[] u, double[] v, SideInput[] b, double[] scalars,
MatrixBlock out, int m, int n, int k, long nnz, OutProdType type, int rl, int ru, int cl, int cu )
{
+ SideInput[] lb = createSparseSideInputs(b);
final int blocksizeIJ = (int) (8L*m*n/nnz);
int[] curk = new int[Math.min(blocksizeIJ, ru-rl)];
@@ -491,11 +495,11 @@ public abstract class SpoofOuterProduct extends SpoofOperator
if( type == OutProdType.CELLWISE_OUTER_PRODUCT )
for( ; index<wpos+wlen && wix[index]<bjmin; index++ )
c[wix[index]] = genexecCellwise( wval[index],
- u, uix, v, wix[index]*k, b, scalars, m, n, k, i, wix[index] );
+ u, uix, v, wix[index]*k, lb, scalars, m, n, k, i, wix[index] );
else
for( ; index<wpos+wlen && wix[index]<bjmin; index++ )
tmp += genexecCellwise( wval[index],
- u, uix, v, wix[index]*k, b, scalars, m, n, k, i, wix[index]);
+ u, uix, v, wix[index]*k, lb, scalars, m, n, k, i, wix[index]);
curk[i-bi] = index - wpos;
}
}
@@ -522,7 +526,7 @@ public abstract class SpoofOuterProduct extends SpoofOperator
int index = wpos + curk[i-bi];
for( ; index<wpos+wlen && wix[index]<bjmin; index++ ) {
c.append(i, wix[index], genexecCellwise( wval[index], u, uix, v,
- wix[index]*k, b, scalars, m, n, k, i, wix[index] ));
+ wix[index]*k, lb, scalars, m, n, k, i, wix[index] ));
}
curk[i-bi] = index - wpos;
}
@@ -531,9 +535,10 @@ public abstract class SpoofOuterProduct extends SpoofOperator
}
}
- private void executeCompressed(CompressedMatrixBlock a, double[] u, double[] v, double[][] b, double[] scalars,
+ private void executeCompressed(CompressedMatrixBlock a, double[] u, double[] v, SideInput[] b, double[] scalars,
double[] c, int m, int n, int k, OutProdType type, int rl, int ru, int cl, int cu)
{
+ SideInput[] lb = createSparseSideInputs(b);
boolean left = (_outerProductType==OutProdType.LEFT_OUTER_PRODUCT);
Iterator<IJV> iter = !left ? a.getIterator(rl, ru, false) :
@@ -542,14 +547,15 @@ public abstract class SpoofOuterProduct extends SpoofOperator
IJV cell = iter.next();
int uix = cell.getI() * k;
int vix = cell.getJ() * k;
- genexecDense(cell.getV(), u, uix, v, vix, b, scalars, c,
+ genexecDense(cell.getV(), u, uix, v, vix, lb, scalars, c,
left ? vix : uix, m, n, k, cell.getI(), cell.getJ());
}
}
- private void executeCellwiseCompressed(CompressedMatrixBlock a, double[] u, double[] v, double[][] b, double[] scalars,
+ private void executeCellwiseCompressed(CompressedMatrixBlock a, double[] u, double[] v, SideInput[] b, double[] scalars,
MatrixBlock out, int m, int n, int k, OutProdType type, int rl, int ru, int cl, int cu )
- {
+ {
+ SideInput[] lb = createSparseSideInputs(b);
double[] c = out.getDenseBlock();
SparseBlock csblock = out.getSparseBlock();
@@ -562,23 +568,23 @@ public abstract class SpoofOuterProduct extends SpoofOperator
if( out.isInSparseFormat() ) {
csblock.allocate(cell.getI());
csblock.append(cell.getI(), cell.getJ(),
- genexecCellwise(cell.getV(), u, uix, v, vix, b, scalars, m, n, k, cell.getI(), cell.getJ()));
+ genexecCellwise(cell.getV(), u, uix, v, vix, lb, scalars, m, n, k, cell.getI(), cell.getJ()));
}
else {
c[cell.getI()*n+cell.getJ()] =
- genexecCellwise(cell.getV(), u, uix, v, vix, b, scalars, m, n, k, cell.getI(), cell.getJ());
+ genexecCellwise(cell.getV(), u, uix, v, vix, lb, scalars, m, n, k, cell.getI(), cell.getJ());
}
}
else {
- c[0] += genexecCellwise(cell.getV(), u, uix, v, vix, b, scalars, m, n, k, cell.getI(), cell.getJ());
+ c[0] += genexecCellwise(cell.getV(), u, uix, v, vix, lb, scalars, m, n, k, cell.getI(), cell.getJ());
}
}
}
- protected abstract void genexecDense( double a, double[] u, int ui, double[] v, int vi, double[][] b,
+ protected abstract void genexecDense( double a, double[] u, int ui, double[] v, int vi, SideInput[] b,
double[] scalars, double[] c, int ci, int m, int n, int k, int rowIndex, int colIndex);
- protected abstract double genexecCellwise( double a, double[] u, int ui, double[] v, int vi, double[][] b,
+ protected abstract double genexecCellwise( double a, double[] u, int ui, double[] v, int vi, SideInput[] b,
double[] scalars, int m, int n, int k, int rowIndex, int colIndex);
private class ParExecTask implements Callable<Long>
@@ -586,7 +592,7 @@ public abstract class SpoofOuterProduct extends SpoofOperator
private final MatrixBlock _a;
private final double[] _u;
private final double[] _v;
- private final double[][] _b;
+ private final SideInput[] _b;
private final double[] _scalars;
private final MatrixBlock _c;
private final int _clen;
@@ -598,7 +604,7 @@ public abstract class SpoofOuterProduct extends SpoofOperator
private final int _cl;
private final int _cu;
- protected ParExecTask( MatrixBlock a, double[] u, double[] v, double[][] b, double[] scalars , MatrixBlock c, int m, int n, int k, OutProdType type, int rl, int ru, int cl, int cu ) {
+ protected ParExecTask( MatrixBlock a, double[] u, double[] v, SideInput[] b, double[] scalars , MatrixBlock c, int m, int n, int k, OutProdType type, int rl, int ru, int cl, int cu ) {
_a = a;
_u = u;
_v = v;
@@ -653,7 +659,7 @@ public abstract class SpoofOuterProduct extends SpoofOperator
private final MatrixBlock _a;
private final double[] _u;
private final double[] _v;
- private final double[][] _b;
+ private final SideInput[] _b;
private final double[] _scalars;
private final int _rlen;
private final int _clen;
@@ -664,7 +670,7 @@ public abstract class SpoofOuterProduct extends SpoofOperator
private final int _cl;
private final int _cu;
- protected ParOuterProdAggTask( MatrixBlock a, double[] u, double[] v, double[][] b, double[] scalars, int m, int n, int k, OutProdType type, int rl, int ru, int cl, int cu ) {
+ protected ParOuterProdAggTask( MatrixBlock a, double[] u, double[] v, SideInput[] b, double[] scalars, int m, int n, int k, OutProdType type, int rl, int ru, int cl, int cu ) {
_a = a;
_u = u;
_v = v;