You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by mb...@apache.org on 2017/05/26 04:02:28 UTC
incubator-systemml git commit: [SYSTEMML-1592] Improved codegen
cellwise operations w/ sparse outputs
Repository: incubator-systemml
Updated Branches:
refs/heads/master c697c30eb -> ab1c70508
[SYSTEMML-1592] Improved codegen cellwise operations w/ sparse outputs
This patch improves codegen cellwise operations with the ability to
directly create sparse outputs instead of dense intermediates and a
subsequent conversion to sparse. We apply this for sparse inputs with
sparse-safe cellwise operations. Furthermore, this also includes a
refactoring of the cellwise operator skeleton for better instruction
locality.
Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/ab1c7050
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/ab1c7050
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/ab1c7050
Branch: refs/heads/master
Commit: ab1c7050890bcfc7032c33df3c3d8244a906c8a3
Parents: c697c30
Author: Matthias Boehm <mb...@gmail.com>
Authored: Thu May 25 20:40:52 2017 -0700
Committer: Matthias Boehm <mb...@gmail.com>
Committed: Thu May 25 20:40:52 2017 -0700
----------------------------------------------------------------------
.../sysml/runtime/codegen/SpoofCellwise.java | 607 ++++++++++---------
1 file changed, 335 insertions(+), 272 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/ab1c7050/src/main/java/org/apache/sysml/runtime/codegen/SpoofCellwise.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/codegen/SpoofCellwise.java b/src/main/java/org/apache/sysml/runtime/codegen/SpoofCellwise.java
index b01914a..fe23a04 100644
--- a/src/main/java/org/apache/sysml/runtime/codegen/SpoofCellwise.java
+++ b/src/main/java/org/apache/sysml/runtime/codegen/SpoofCellwise.java
@@ -188,28 +188,28 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl
k = 1; //serial execution
}
- //result allocation and preparations
- out.reset(inputs.get(0).getNumRows(), _type == CellType.NO_AGG ?
- inputs.get(0).getNumColumns() : 1, false);
- out.allocateDenseBlock();
- double[] c = out.getDenseBlock();
-
//input preparation
double[][] b = prepInputMatrices(inputs);
double[] scalars = prepInputScalars(scalarObjects);
final int m = inputs.get(0).getNumRows();
- final int n = inputs.get(0).getNumColumns();
+ final int n = inputs.get(0).getNumColumns();
//sparse safe check
- boolean sparseSafe = isSparseSafe() || (b.length == 0
+ boolean sparseSafe = isSparseSafe() || (b.length == 0
&& genexec( 0, b, scalars, m, n, 0, 0 ) == 0);
+ //result allocation and preparations
+ boolean sparseOut = sparseSafe && inputs.get(0).isInSparseFormat() && _type == CellType.NO_AGG;
+ out.reset(inputs.get(0).getNumRows(), _type == CellType.NO_AGG ?
+ inputs.get(0).getNumColumns() : 1, sparseOut);
+ out.allocateDenseOrSparseBlock();
+
long lnnz = 0;
if( k <= 1 ) //SINGLE-THREADED
{
lnnz = (!inputs.get(0).isInSparseFormat()) ?
- executeDense(inputs.get(0).getDenseBlock(), b, scalars, c, m, n, sparseSafe, 0, m) :
- executeSparse(inputs.get(0).getSparseBlock(), b, scalars, c, m, n, sparseSafe, 0, m);
+ executeDense(inputs.get(0).getDenseBlock(), b, scalars, out, m, n, sparseSafe, 0, m) :
+ executeSparse(inputs.get(0).getSparseBlock(), b, scalars, out, m, n, sparseSafe, 0, m);
}
else //MULTI-THREADED
{
@@ -219,7 +219,7 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl
int nk = UtilFunctions.roundToNext(Math.min(8*k,m/32), k);
int blklen = (int)(Math.ceil((double)m/nk));
for( int i=0; i<nk & i*blklen<m; i++ )
- tasks.add(new ParExecTask(inputs.get(0), b, scalars, c,
+ tasks.add(new ParExecTask(inputs.get(0), b, scalars, out,
m, n, sparseSafe, i*blklen, Math.min((i+1)*blklen, m)));
//execute tasks
List<Future<Long>> taskret = pool.invokeAll(tasks);
@@ -235,129 +235,56 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl
}
//post-processing
- out.setNonZeros(lnnz);
- out.examSparsity();
+ out.setNonZeros(lnnz);
+ out.examSparsity();
}
- private double executeDenseAndAgg(double[] a, double[][] b, double[] scalars, int m, int n, boolean sparseSafe, int rl, int ru) throws DMLRuntimeException
+ private long executeDense(double[] a, double[][] b, double[] scalars, MatrixBlock out, int m, int n, boolean sparseSafe, int rl, int ru)
+ throws DMLRuntimeException
{
- ValueFunction vfun = getAggFunction();
- double ret = 0;
+ double[] c = out.getDenseBlock();
- //numerically stable aggregation for sum/sum_sq
- if( vfun instanceof KahanFunction ) {
- KahanObject kbuff = new KahanObject(0, 0);
- KahanFunction kplus = (KahanFunction) vfun;
-
- if( a == null && !sparseSafe ) { //empty
- for( int i=rl; i<ru; i++ )
- for( int j=0; j<n; j++ )
- kplus.execute2(kbuff, genexec( 0, b, scalars, m, n, i, j ));
- }
- else if( a != null ) { //general case
- for( int i=rl, ix=rl*n; i<ru; i++ )
- for( int j=0; j<n; j++, ix++ )
- if( a[ix] != 0 || !sparseSafe)
- kplus.execute2(kbuff, genexec( a[ix], b, scalars, m, n, i, j ));
- }
- ret = kbuff._sum;
+ if( _type == CellType.NO_AGG ) {
+ return executeDenseNoAgg(a, b, scalars, c, m, n, sparseSafe, rl, ru);
}
- //safe aggregation for min/max w/ handling of zero entries
- //note: sparse safe with zero value as min/max handled outside
- else {
- ret = (_aggOp==AggOp.MIN) ? Double.MAX_VALUE : -Double.MAX_VALUE;
- if( a == null && !sparseSafe ) { //empty
- for( int i=rl; i<ru; i++ )
- for( int j=0; j<n; j++ )
- ret = vfun.execute(ret, genexec( 0, b, scalars, m, n, i, j ));
- }
- else if( a != null ) { //general case
- for( int i=rl, ix=rl*n; i<ru; i++ )
- for( int j=0; j<n; j++, ix++ )
- if( a[ix] != 0 || !sparseSafe)
- ret = vfun.execute(ret, genexec( a[ix], b, scalars, m, n, i, j ));
- }
+ else if( _type == CellType.ROW_AGG ) {
+ if( _aggOp == AggOp.SUM || _aggOp == AggOp.SUM_SQ )
+ return executeDenseRowAggSum(a, b, scalars, c, m, n, sparseSafe, rl, ru);
+ else
+ return executeDenseRowAggMxx(a, b, scalars, c, m, n, sparseSafe, rl, ru);
}
-
- return ret;
+ return -1;
}
- private long executeDense(double[] a, double[][] b, double[] scalars, double[] c, int m, int n, boolean sparseSafe, int rl, int ru)
+ private double executeDenseAndAgg(double[] a, double[][] b, double[] scalars, int m, int n, boolean sparseSafe, int rl, int ru) throws DMLRuntimeException
+ {
+ //numerically stable aggregation for sum/sum_sq
+ if( _aggOp == AggOp.SUM || _aggOp == AggOp.SUM_SQ )
+ return executeDenseAggSum(a, b, scalars, m, n, sparseSafe, rl, ru);
+ else
+ return executeDenseAggMxx(a, b, scalars, m, n, sparseSafe, rl, ru);
+ }
+
+ private long executeSparse(SparseBlock sblock, double[][] b, double[] scalars, MatrixBlock out, int m, int n, boolean sparseSafe, int rl, int ru)
throws DMLRuntimeException
{
- long lnnz = 0;
+ if( sparseSafe && sblock == null )
+ return 0;
- if( _type == CellType.NO_AGG )
- {
- if( a == null && !sparseSafe ) { //empty
- //note: we can't determine sparse-safeness by executing the operator once
- //as the output might change with different row indices
- for( int i=rl, ix=rl*n; i<ru; i++ )
- for( int j=0; j<n; j++, ix++ ) {
- c[ix] = genexec( 0, b, scalars, m, n, i, j );
- lnnz += (c[ix]!=0) ? 1 : 0;
- }
- }
- else if( a != null ) { //general case
- for( int i=rl, ix=rl*n; i<ru; i++ )
- for( int j=0; j<n; j++, ix++ )
- if( a[ix] != 0 || !sparseSafe) {
- c[ix] = genexec( a[ix], b, scalars, m, n, i, j);
- lnnz += (c[ix]!=0) ? 1 : 0;
- }
- }
+ if( _type == CellType.NO_AGG ) {
+ if( out.isInSparseFormat() )
+ return executeSparseNoAggSparse(sblock, b, scalars, out, m, n, sparseSafe, rl, ru);
+ else
+ return executeSparseNoAggDense(sblock, b, scalars, out, m, n, sparseSafe, rl, ru);
}
- else if( _type == CellType.ROW_AGG )
- {
- ValueFunction vfun = getAggFunction();
-
- if( vfun instanceof KahanFunction ) {
- KahanObject kbuff = new KahanObject(0, 0);
- KahanFunction kplus = (KahanFunction) vfun;
-
- if( a == null && !sparseSafe ) { //empty
- for( int i=rl; i<ru; i++ ) {
- kbuff.set(0, 0);
- for( int j=0; j<n; j++ )
- kplus.execute2(kbuff, genexec( 0, b, scalars, m, n, i, j ));
- lnnz += ((c[i] = kbuff._sum)!=0) ? 1 : 0;
- }
- }
- else if( a != null ) { //general case
- for( int i=rl, ix=rl*n; i<ru; i++ ) {
- kbuff.set(0, 0);
- for( int j=0; j<n; j++, ix++ )
- if( a[ix] != 0 || !sparseSafe)
- kplus.execute2(kbuff, genexec( a[ix], b, scalars, m, n, i, j ));
- lnnz += ((c[i] = kbuff._sum)!=0) ? 1 : 0;
- }
- }
- }
- else {
- double initialVal = (_aggOp==AggOp.MIN) ? Double.MAX_VALUE : -Double.MAX_VALUE;
- if( a == null && !sparseSafe ) { //empty
- for( int i=rl; i<ru; i++ ) {
- double tmp = initialVal;
- for( int j=0; j<n; j++ )
- tmp = vfun.execute(tmp, genexec( 0, b, scalars, m, n, i, j ));
- lnnz += ((c[i] = tmp)!=0) ? 1 : 0;
- }
- }
- else if( a != null ) { //general case
- for( int i=rl, ix=rl*n; i<ru; i++ ) {
- double tmp = initialVal;
- for( int j=0; j<n; j++, ix++ )
- if( a[ix] != 0 || !sparseSafe)
- tmp = vfun.execute(tmp, genexec( a[ix], b, scalars, m, n, i, j ));
- if( sparseSafe && UtilFunctions.containsZero(a, ix-n, n) )
- tmp = vfun.execute(tmp, 0);
- lnnz += ((c[i] = tmp)!=0) ? 1 : 0;
- }
- }
- }
+ else if( _type == CellType.ROW_AGG ) {
+ if( _aggOp == AggOp.SUM || _aggOp == AggOp.SUM_SQ )
+ return executeSparseRowAggSum(sblock, b, scalars, out, m, n, sparseSafe, rl, ru);
+ else
+ return executeSparseRowAggMxx(sblock, b, scalars, out, m, n, sparseSafe, rl, ru);
}
- return lnnz;
+ return -1;
}
private double executeSparseAndAgg(SparseBlock sblock, double[][] b, double[] scalars, int m, int n, boolean sparseSafe, int rl, int ru)
@@ -366,184 +293,320 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl
if( sparseSafe && sblock == null )
return 0;
- ValueFunction vfun = getAggFunction();
- double ret = 0;
+ if( _aggOp == AggOp.SUM || _aggOp == AggOp.SUM_SQ )
+ return executeSparseAggSum(sblock, b, scalars, m, n, sparseSafe, rl, ru);
+ else
+ return executeSparseAggMxx(sblock, b, scalars, m, n, sparseSafe, rl, ru);
+ }
+
+ private long executeSparseNoAggSparse(SparseBlock sblock, double[][] b, double[] scalars, MatrixBlock out, int m, int n, boolean sparseSafe, int rl, int ru)
+ throws DMLRuntimeException
+ {
+ //note: sequential scan algorithm for both sparse-safe and -unsafe
+ //in order to avoid binary search for sparse-unsafe
+ SparseBlock c = out.getSparseBlock();
+ long lnnz = 0;
+ for(int i=rl; i<ru; i++) {
+ int lastj = -1;
+ //handle non-empty rows
+ if( sblock != null && !sblock.isEmpty(i) ) {
+ int apos = sblock.pos(i);
+ int alen = sblock.size(i);
+ int[] aix = sblock.indexes(i);
+ double[] avals = sblock.values(i);
+ c.allocate(i, sparseSafe ? alen : n);
+ for(int k=apos; k<apos+alen; k++) {
+ //process zeros before current non-zero
+ if( !sparseSafe )
+ for(int j=lastj+1; j<aix[k]; j++)
+ c.append(i, j, genexec(0, b, scalars, m, n, i, j));
+ //process current non-zero
+ lastj = aix[k];
+ c.append(i, lastj, genexec(avals[k], b, scalars, m, n, i, lastj));
+ }
+ }
+ //process empty rows or remaining zeros
+ if( !sparseSafe )
+ for(int j=lastj+1; j<n; j++)
+ c.append(i, j, genexec(0, b, scalars, m, n, i, j));
+ lnnz += c.size(i);
+ }
- //numerically stable aggregation for sum/sum_sq
- if( vfun instanceof KahanFunction ) {
- KahanObject kbuff = new KahanObject(0, 0);
- KahanFunction kplus = (KahanFunction) vfun;
+ return lnnz;
+ }
- //note: sequential scan algorithm for both sparse-safe and -unsafe
- //in order to avoid binary search for sparse-unsafe
- for(int i=rl; i<ru; i++) {
- int lastj = -1;
- //handle non-empty rows
- if( sblock != null && !sblock.isEmpty(i) ) {
- int apos = sblock.pos(i);
- int alen = sblock.size(i);
- int[] aix = sblock.indexes(i);
- double[] avals = sblock.values(i);
- for(int k=apos; k<apos+alen; k++) {
- //process zeros before current non-zero
- if( !sparseSafe )
- for(int j=lastj+1; j<aix[k]; j++)
- kplus.execute2(kbuff, genexec(0, b, scalars, m, n, i, j));
- //process current non-zero
- lastj = aix[k];
- kplus.execute2(kbuff, genexec(avals[k], b, scalars, m, n, i, lastj));
- }
+ private long executeSparseNoAggDense(SparseBlock sblock, double[][] b, double[] scalars, MatrixBlock out, int m, int n, boolean sparseSafe, int rl, int ru)
+ throws DMLRuntimeException
+ {
+ //note: sequential scan algorithm for both sparse-safe and -unsafe
+ //in order to avoid binary search for sparse-unsafe
+ double[] c = out.getDenseBlock();
+ long lnnz = 0;
+ for(int i=rl, cix=rl*n; i<ru; i++, cix+=n) {
+ int lastj = -1;
+ //handle non-empty rows
+ if( sblock != null && !sblock.isEmpty(i) ) {
+ int apos = sblock.pos(i);
+ int alen = sblock.size(i);
+ int[] aix = sblock.indexes(i);
+ double[] avals = sblock.values(i);
+ for(int k=apos; k<apos+alen; k++) {
+ //process zeros before current non-zero
+ if( !sparseSafe )
+ for(int j=lastj+1; j<aix[k]; j++)
+ lnnz += ((c[cix+j]=genexec(0, b, scalars, m, n, i, j))!=0)?1:0;
+ //process current non-zero
+ lastj = aix[k];
+ lnnz += ((c[cix+lastj]=genexec(avals[k], b, scalars, m, n, i, lastj))!=0)?1:0;
}
- //process empty rows or remaining zeros
- if( !sparseSafe )
- for(int j=lastj+1; j<n; j++)
- kplus.execute2(kbuff, genexec(0, b, scalars, m, n, i, j));
}
- ret = kbuff._sum;
+ //process empty rows or remaining zeros
+ if( !sparseSafe )
+ for(int j=lastj+1; j<n; j++)
+ lnnz += ((c[cix+j]=genexec(0, b, scalars, m, n, i, j))!=0)?1:0;
}
- //safe aggregation for min/max w/ handling of zero entries
- //note: sparse safe with zero value as min/max handled outside
- else {
- ret = (_aggOp==AggOp.MIN) ? Double.MAX_VALUE : -Double.MAX_VALUE;
- ret = (sparseSafe && sblock.size() < (long)m*n) ? 0 : ret;
-
- //note: sequential scan algorithm for both sparse-safe and -unsafe
- //in order to avoid binary search for sparse-unsafe
- for(int i=rl; i<ru; i++) {
- int lastj = -1;
- //handle non-empty rows
- if( sblock != null && !sblock.isEmpty(i) ) {
- int apos = sblock.pos(i);
- int alen = sblock.size(i);
- int[] aix = sblock.indexes(i);
- double[] avals = sblock.values(i);
- for(int k=apos; k<apos+alen; k++) {
- //process zeros before current non-zero
- if( !sparseSafe )
- for(int j=lastj+1; j<aix[k]; j++)
- ret = vfun.execute(ret, genexec(0, b, scalars, m, n, i, j));
- //process current non-zero
- lastj = aix[k];
- ret = vfun.execute(ret, genexec(avals[k], b, scalars, m, n, i, lastj));
- }
+ return lnnz;
+ }
+
+ private long executeSparseRowAggSum(SparseBlock sblock, double[][] b, double[] scalars, MatrixBlock out, int m, int n, boolean sparseSafe, int rl, int ru)
+ throws DMLRuntimeException
+ {
+ KahanFunction kplus = (KahanFunction) getAggFunction();
+ KahanObject kbuff = new KahanObject(0, 0);
+
+ //note: sequential scan algorithm for both sparse-safe and -unsafe
+ //in order to avoid binary search for sparse-unsafe
+ double[] c = out.getDenseBlock();
+ long lnnz = 0;
+ for(int i=rl; i<ru; i++) {
+ kbuff.set(0, 0);
+ int lastj = -1;
+ //handle non-empty rows
+ if( sblock != null && !sblock.isEmpty(i) ) {
+ int apos = sblock.pos(i);
+ int alen = sblock.size(i);
+ int[] aix = sblock.indexes(i);
+ double[] avals = sblock.values(i);
+ for(int k=apos; k<apos+alen; k++) {
+ //process zeros before current non-zero
+ if( !sparseSafe )
+ for(int j=lastj+1; j<aix[k]; j++)
+ kplus.execute2(kbuff, genexec(0, b, scalars, m, n, i, j));
+ //process current non-zero
+ lastj = aix[k];
+ kplus.execute2(kbuff, genexec(avals[k], b, scalars, m, n, i, lastj));
}
- //process empty rows or remaining zeros
- if( !sparseSafe )
- for(int j=lastj+1; j<n; j++)
- ret = vfun.execute(ret, genexec(0, b, scalars, m, n, i, j));
}
+ //process empty rows or remaining zeros
+ if( !sparseSafe )
+ for(int j=lastj+1; j<n; j++)
+ kplus.execute2(kbuff, genexec(0, b, scalars, m, n, i, j));
+ lnnz += ((c[i] = kbuff._sum)!=0) ? 1 : 0;
}
-
- return ret;
+ return lnnz;
}
- private long executeSparse(SparseBlock sblock, double[][] b, double[] scalars, double[] c, int m, int n, boolean sparseSafe, int rl, int ru)
+ private long executeSparseRowAggMxx(SparseBlock sblock, double[][] b, double[] scalars, MatrixBlock out, int m, int n, boolean sparseSafe, int rl, int ru)
throws DMLRuntimeException
{
- if( sparseSafe && sblock == null )
- return 0;
+ double initialVal = (_aggOp==AggOp.MIN) ? Double.MAX_VALUE : -Double.MAX_VALUE;
+ ValueFunction vfun = getAggFunction();
+ //note: sequential scan algorithm for both sparse-safe and -unsafe
+ //in order to avoid binary search for sparse-unsafe
+ double[] c = out.getDenseBlock();
long lnnz = 0;
- if( _type == CellType.NO_AGG )
- {
- //note: sequential scan algorithm for both sparse-safe and -unsafe
- //in order to avoid binary search for sparse-unsafe
- for(int i=rl, cix=rl*n; i<ru; i++, cix+=n) {
- int lastj = -1;
- //handle non-empty rows
- if( sblock != null && !sblock.isEmpty(i) ) {
- int apos = sblock.pos(i);
- int alen = sblock.size(i);
- int[] aix = sblock.indexes(i);
- double[] avals = sblock.values(i);
- for(int k=apos; k<apos+alen; k++) {
- //process zeros before current non-zero
- if( !sparseSafe )
- for(int j=lastj+1; j<aix[k]; j++)
- lnnz += ((c[cix+j]=genexec(0, b, scalars, m, n, i, j))!=0)?1:0;
- //process current non-zero
- lastj = aix[k];
- lnnz += ((c[cix+lastj]=genexec(avals[k], b, scalars, m, n, i, lastj))!=0)?1:0;
- }
+ for(int i=rl; i<ru; i++) {
+ double tmp = (sparseSafe && sblock.size(i) < n) ? 0 : initialVal;
+ int lastj = -1;
+ //handle non-empty rows
+ if( sblock != null && !sblock.isEmpty(i) ) {
+ int apos = sblock.pos(i);
+ int alen = sblock.size(i);
+ int[] aix = sblock.indexes(i);
+ double[] avals = sblock.values(i);
+ for(int k=apos; k<apos+alen; k++) {
+ //process zeros before current non-zero
+ if( !sparseSafe )
+ for(int j=lastj+1; j<aix[k]; j++)
+ tmp = vfun.execute(tmp, genexec(0, b, scalars, m, n, i, j));
+ //process current non-zero
+ lastj = aix[k];
+ tmp = vfun.execute( tmp, genexec(avals[k], b, scalars, m, n, i, lastj));
}
- //process empty rows or remaining zeros
- if( !sparseSafe )
- for(int j=lastj+1; j<n; j++)
- lnnz += ((c[cix+j]=genexec(0, b, scalars, m, n, i, j))!=0)?1:0;
}
+ //process empty rows or remaining zeros
+ if( !sparseSafe )
+ for(int j=lastj+1; j<n; j++)
+ tmp = vfun.execute(tmp, genexec(0, b, scalars, m, n, i, j));
+ lnnz += ((c[i] = tmp)!=0) ? 1 : 0;
}
- else if( _type == CellType.ROW_AGG )
- {
- ValueFunction vfun = getAggFunction();
-
- if( vfun instanceof KahanFunction ) {
- KahanObject kbuff = new KahanObject(0, 0);
- KahanFunction kplus = (KahanFunction) vfun;
+ return lnnz;
+ }
+
+ private double executeSparseAggSum(SparseBlock sblock, double[][] b, double[] scalars, int m, int n, boolean sparseSafe, int rl, int ru)
+ throws DMLRuntimeException
+ {
+ KahanFunction kplus = (KahanFunction) getAggFunction();
+ KahanObject kbuff = new KahanObject(0, 0);
- //note: sequential scan algorithm for both sparse-safe and -unsafe
- //in order to avoid binary search for sparse-unsafe
- for(int i=rl; i<ru; i++) {
- kbuff.set(0, 0);
- int lastj = -1;
- //handle non-empty rows
- if( sblock != null && !sblock.isEmpty(i) ) {
- int apos = sblock.pos(i);
- int alen = sblock.size(i);
- int[] aix = sblock.indexes(i);
- double[] avals = sblock.values(i);
- for(int k=apos; k<apos+alen; k++) {
- //process zeros before current non-zero
- if( !sparseSafe )
- for(int j=lastj+1; j<aix[k]; j++)
- kplus.execute2(kbuff, genexec(0, b, scalars, m, n, i, j));
- //process current non-zero
- lastj = aix[k];
- kplus.execute2(kbuff, genexec(avals[k], b, scalars, m, n, i, lastj));
- }
- }
- //process empty rows or remaining zeros
+ //note: sequential scan algorithm for both sparse-safe and -unsafe
+ //in order to avoid binary search for sparse-unsafe
+ for(int i=rl; i<ru; i++) {
+ int lastj = -1;
+ //handle non-empty rows
+ if( sblock != null && !sblock.isEmpty(i) ) {
+ int apos = sblock.pos(i);
+ int alen = sblock.size(i);
+ int[] aix = sblock.indexes(i);
+ double[] avals = sblock.values(i);
+ for(int k=apos; k<apos+alen; k++) {
+ //process zeros before current non-zero
if( !sparseSafe )
- for(int j=lastj+1; j<n; j++)
+ for(int j=lastj+1; j<aix[k]; j++)
kplus.execute2(kbuff, genexec(0, b, scalars, m, n, i, j));
- lnnz += ((c[i] = kbuff._sum)!=0) ? 1 : 0;
+ //process current non-zero
+ lastj = aix[k];
+ kplus.execute2(kbuff, genexec(avals[k], b, scalars, m, n, i, lastj));
}
}
- else {
- double initialVal = (_aggOp==AggOp.MIN) ? Double.MAX_VALUE : -Double.MAX_VALUE;
-
- //note: sequential scan algorithm for both sparse-safe and -unsafe
- //in order to avoid binary search for sparse-unsafe
- for(int i=rl; i<ru; i++) {
- double tmp = (sparseSafe && sblock.size(i) < n) ? 0 : initialVal;
- int lastj = -1;
- //handle non-empty rows
- if( sblock != null && !sblock.isEmpty(i) ) {
- int apos = sblock.pos(i);
- int alen = sblock.size(i);
- int[] aix = sblock.indexes(i);
- double[] avals = sblock.values(i);
- for(int k=apos; k<apos+alen; k++) {
- //process zeros before current non-zero
- if( !sparseSafe )
- for(int j=lastj+1; j<aix[k]; j++)
- tmp = vfun.execute(tmp, genexec(0, b, scalars, m, n, i, j));
- //process current non-zero
- lastj = aix[k];
- tmp = vfun.execute( tmp, genexec(avals[k], b, scalars, m, n, i, lastj));
- }
- }
- //process empty rows or remaining zeros
+ //process empty rows or remaining zeros
+ if( !sparseSafe )
+ for(int j=lastj+1; j<n; j++)
+ kplus.execute2(kbuff, genexec(0, b, scalars, m, n, i, j));
+ }
+ return kbuff._sum;
+ }
+
+ private double executeSparseAggMxx(SparseBlock sblock, double[][] b, double[] scalars, int m, int n, boolean sparseSafe, int rl, int ru)
+ throws DMLRuntimeException
+ {
+ double ret = (_aggOp==AggOp.MIN) ? Double.MAX_VALUE : -Double.MAX_VALUE;
+ ret = (sparseSafe && sblock.size() < (long)m*n) ? 0 : ret;
+ ValueFunction vfun = getAggFunction();
+
+ //note: sequential scan algorithm for both sparse-safe and -unsafe
+ //in order to avoid binary search for sparse-unsafe
+ for(int i=rl; i<ru; i++) {
+ int lastj = -1;
+ //handle non-empty rows
+ if( sblock != null && !sblock.isEmpty(i) ) {
+ int apos = sblock.pos(i);
+ int alen = sblock.size(i);
+ int[] aix = sblock.indexes(i);
+ double[] avals = sblock.values(i);
+ for(int k=apos; k<apos+alen; k++) {
+ //process zeros before current non-zero
if( !sparseSafe )
- for(int j=lastj+1; j<n; j++)
- tmp = vfun.execute(tmp, genexec(0, b, scalars, m, n, i, j));
- lnnz += ((c[i] = tmp)!=0) ? 1 : 0;
+ for(int j=lastj+1; j<aix[k]; j++)
+ ret = vfun.execute(ret, genexec(0, b, scalars, m, n, i, j));
+ //process current non-zero
+ lastj = aix[k];
+ ret = vfun.execute(ret, genexec(avals[k], b, scalars, m, n, i, lastj));
}
}
+ //process empty rows or remaining zeros
+ if( !sparseSafe )
+ for(int j=lastj+1; j<n; j++)
+ ret = vfun.execute(ret, genexec(0, b, scalars, m, n, i, j));
}
-
+ return ret;
+ }
+
+ private long executeDenseNoAgg(double[] a, double[][] b, double[] scalars, double[] c, int m, int n, boolean sparseSafe, int rl, int ru)
+ throws DMLRuntimeException
+ {
+ long lnnz = 0;
+ for( int i=rl, ix=rl*n; i<ru; i++ )
+ for( int j=0; j<n; j++, ix++ ) {
+ double aval = (a != null) ? a[ix] : 0;
+ if( aval != 0 || !sparseSafe) {
+ c[ix] = genexec( aval, b, scalars, m, n, i, j);
+ lnnz += (c[ix]!=0) ? 1 : 0;
+ }
+ }
return lnnz;
}
-
+
+ private long executeDenseRowAggSum(double[] a, double[][] b, double[] scalars, double[] c, int m, int n, boolean sparseSafe, int rl, int ru)
+ throws DMLRuntimeException
+ {
+ KahanFunction kplus = (KahanFunction) getAggFunction();
+ KahanObject kbuff = new KahanObject(0, 0);
+ long lnnz = 0;
+ for( int i=rl, ix=rl*n; i<ru; i++ ) {
+ kbuff.set(0, 0);
+ for( int j=0; j<n; j++, ix++ ) {
+ double aval = (a != null) ? a[ix] : 0;
+ if( aval != 0 || !sparseSafe)
+ kplus.execute2(kbuff, genexec(aval, b, scalars, m, n, i, j));
+ }
+ lnnz += ((c[i] = kbuff._sum)!=0) ? 1 : 0;
+ }
+ return lnnz;
+ }
+
+ private long executeDenseRowAggMxx(double[] a, double[][] b, double[] scalars, double[] c, int m, int n, boolean sparseSafe, int rl, int ru)
+ throws DMLRuntimeException
+ {
+ double initialVal = (_aggOp==AggOp.MIN) ? Double.MAX_VALUE : -Double.MAX_VALUE;
+ ValueFunction vfun = getAggFunction();
+ long lnnz = 0;
+ if( a == null && !sparseSafe ) { //empty
+ for( int i=rl; i<ru; i++ ) {
+ double tmp = initialVal;
+ for( int j=0; j<n; j++ )
+ tmp = vfun.execute(tmp, genexec( 0, b, scalars, m, n, i, j ));
+ lnnz += ((c[i] = tmp)!=0) ? 1 : 0;
+ }
+ }
+ else if( a != null ) { //general case
+ for( int i=rl, ix=rl*n; i<ru; i++ ) {
+ double tmp = initialVal;
+ for( int j=0; j<n; j++, ix++ )
+ if( a[ix] != 0 || !sparseSafe)
+ tmp = vfun.execute(tmp, genexec( a[ix], b, scalars, m, n, i, j ));
+ if( sparseSafe && UtilFunctions.containsZero(a, ix-n, n) )
+ tmp = vfun.execute(tmp, 0);
+ lnnz += ((c[i] = tmp)!=0) ? 1 : 0;
+ }
+ }
+ return lnnz;
+ }
+
+ private double executeDenseAggSum(double[] a, double[][] b, double[] scalars, int m, int n, boolean sparseSafe, int rl, int ru)
+ throws DMLRuntimeException
+ {
+ KahanFunction kplus = (KahanFunction) getAggFunction();
+ KahanObject kbuff = new KahanObject(0, 0);
+
+ for( int i=rl, ix=rl*n; i<ru; i++ )
+ for( int j=0; j<n; j++, ix++ ) {
+ double aval = (a != null) ? a[ix] : 0;
+ if( aval != 0 || !sparseSafe)
+ kplus.execute2(kbuff, genexec(aval, b, scalars, m, n, i, j));
+ }
+ return kbuff._sum;
+ }
+
+ private double executeDenseAggMxx(double[] a, double[][] b, double[] scalars, int m, int n, boolean sparseSafe, int rl, int ru)
+ throws DMLRuntimeException
+ {
+ //safe aggregation for min/max w/ handling of zero entries
+ //note: sparse safe with zero value as min/max handled outside
+ double ret = (_aggOp==AggOp.MIN) ? Double.MAX_VALUE : -Double.MAX_VALUE;
+ ValueFunction vfun = getAggFunction();
+
+ for( int i=rl, ix=rl*n; i<ru; i++ )
+ for( int j=0; j<n; j++, ix++ ) {
+ double aval = (a != null) ? a[ix] : 0;
+ if( aval != 0 || !sparseSafe)
+ ret = vfun.execute(ret, genexec(aval, b, scalars, m, n, i, j));
+ }
+ return ret;
+ }
+
+
protected abstract double genexec( double a, double[][] b, double[] scalars, int m, int n, int rowIndex, int colIndex);
private class ParAggTask implements Callable<Double>
@@ -582,14 +645,14 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl
private final MatrixBlock _a;
private final double[][] _b;
private final double[] _scalars;
- private final double[] _c;
+ private final MatrixBlock _c;
private final int _rlen;
private final int _clen;
private final boolean _safe;
private final int _rl;
private final int _ru;
- protected ParExecTask( MatrixBlock a, double[][] b, double[] scalars, double[] c,
+ protected ParExecTask( MatrixBlock a, double[][] b, double[] scalars, MatrixBlock c,
int rlen, int clen, boolean sparseSafe, int rl, int ru ) {
_a = a;
_b = b;