You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by mb...@apache.org on 2017/05/08 05:29:34 UTC
incubator-systemml git commit: [SYSTEMML-1591] Performance
sparse-unsafe cellwise codegen operators
Repository: incubator-systemml
Updated Branches:
refs/heads/master 6c215e700 -> b0a2022e9
[SYSTEMML-1591] Performance sparse-unsafe cellwise codegen operators
So far, sparse-unsafe cellwise codegen operations iterated over all
cells and used binary search to access the individual values of sparse
matrices, which was unnecessarily inefficient. This patch modified all
sparse-unsafe cellwise operations (with and without aggregations) to use
a sequential scan with gap handling and consolidated the different code
paths for sparse-safe and -unsafe operations over sparse matrices.
Furthermore, this patch also fixes issues where sparse-safe operations
passed the wrong column indexes to generated operators.
Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/b0a2022e
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/b0a2022e
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/b0a2022e
Branch: refs/heads/master
Commit: b0a2022e9fa7653fe56816edf4103d7f366f4d69
Parents: 6c215e7
Author: Matthias Boehm <mb...@gmail.com>
Authored: Sun May 7 22:27:20 2017 -0700
Committer: Matthias Boehm <mb...@gmail.com>
Committed: Sun May 7 22:31:29 2017 -0700
----------------------------------------------------------------------
.../sysml/runtime/codegen/SpoofCellwise.java | 209 +++++++++++--------
1 file changed, 124 insertions(+), 85 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/b0a2022e/src/main/java/org/apache/sysml/runtime/codegen/SpoofCellwise.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/codegen/SpoofCellwise.java b/src/main/java/org/apache/sysml/runtime/codegen/SpoofCellwise.java
index 12d14ad..b01914a 100644
--- a/src/main/java/org/apache/sysml/runtime/codegen/SpoofCellwise.java
+++ b/src/main/java/org/apache/sysml/runtime/codegen/SpoofCellwise.java
@@ -363,6 +363,9 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl
private double executeSparseAndAgg(SparseBlock sblock, double[][] b, double[] scalars, int m, int n, boolean sparseSafe, int rl, int ru)
throws DMLRuntimeException
{
+ if( sparseSafe && sblock == null )
+ return 0;
+
ValueFunction vfun = getAggFunction();
double ret = 0;
@@ -371,22 +374,30 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl
KahanObject kbuff = new KahanObject(0, 0);
KahanFunction kplus = (KahanFunction) vfun;
- if( !sparseSafe ) {
- for(int i=rl; i<ru; i++)
- for(int j=0; j<n; j++) {
- double valij = (sblock != null) ? sblock.get(i, j) : 0;
- kplus.execute2( kbuff, genexec(valij, b, scalars, m, n, i, j));
+ //note: sequential scan algorithm for both sparse-safe and -unsafe
+ //in order to avoid binary search for sparse-unsafe
+ for(int i=rl; i<ru; i++) {
+ int lastj = -1;
+ //handle non-empty rows
+ if( sblock != null && !sblock.isEmpty(i) ) {
+ int apos = sblock.pos(i);
+ int alen = sblock.size(i);
+ int[] aix = sblock.indexes(i);
+ double[] avals = sblock.values(i);
+ for(int k=apos; k<apos+alen; k++) {
+ //process zeros before current non-zero
+ if( !sparseSafe )
+ for(int j=lastj+1; j<aix[k]; j++)
+ kplus.execute2(kbuff, genexec(0, b, scalars, m, n, i, j));
+ //process current non-zero
+ lastj = aix[k];
+ kplus.execute2(kbuff, genexec(avals[k], b, scalars, m, n, i, lastj));
}
- }
- else if( sblock != null ) {
- for( int i=rl; i<ru; i++ )
- if( !sblock.isEmpty(i) ) {
- int apos = sblock.pos(i);
- int alen = sblock.size(i);
- double[] avals = sblock.values(i);
- for( int j=apos; j<apos+alen; j++ )
- kplus.execute2( kbuff, genexec(avals[j], b, scalars, m, n, i, j));
- }
+ }
+ //process empty rows or remaining zeros
+ if( !sparseSafe )
+ for(int j=lastj+1; j<n; j++)
+ kplus.execute2(kbuff, genexec(0, b, scalars, m, n, i, j));
}
ret = kbuff._sum;
}
@@ -394,24 +405,34 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl
//note: sparse safe with zero value as min/max handled outside
else {
ret = (_aggOp==AggOp.MIN) ? Double.MAX_VALUE : -Double.MAX_VALUE;
- if( !sparseSafe ) {
- for(int i=rl; i<ru; i++)
- for(int j=0; j<n; j++) {
- double valij = (sblock != null) ? sblock.get(i, j) : 0;
- ret = vfun.execute( ret, genexec(valij, b, scalars, m, n, i, j));
+ ret = (sparseSafe && sblock.size() < (long)m*n) ? 0 : ret;
+
+ //note: sequential scan algorithm for both sparse-safe and -unsafe
+ //in order to avoid binary search for sparse-unsafe
+ for(int i=rl; i<ru; i++) {
+ int lastj = -1;
+ //handle non-empty rows
+ if( sblock != null && !sblock.isEmpty(i) ) {
+ int apos = sblock.pos(i);
+ int alen = sblock.size(i);
+ int[] aix = sblock.indexes(i);
+ double[] avals = sblock.values(i);
+ for(int k=apos; k<apos+alen; k++) {
+ //process zeros before current non-zero
+ if( !sparseSafe )
+ for(int j=lastj+1; j<aix[k]; j++)
+ ret = vfun.execute(ret, genexec(0, b, scalars, m, n, i, j));
+ //process current non-zero
+ lastj = aix[k];
+ ret = vfun.execute(ret, genexec(avals[k], b, scalars, m, n, i, lastj));
}
+ }
+ //process empty rows or remaining zeros
+ if( !sparseSafe )
+ for(int j=lastj+1; j<n; j++)
+ ret = vfun.execute(ret, genexec(0, b, scalars, m, n, i, j));
}
- else if( sblock != null ) {
- for( int i=rl; i<ru; i++ )
- if( !sblock.isEmpty(i) ) {
- int apos = sblock.pos(i);
- int alen = sblock.size(i);
- double[] avals = sblock.values(i);
- for( int j=apos; j<apos+alen; j++ )
- ret = vfun.execute( ret, genexec(avals[j], b, scalars, m, n, i, j));
- }
- }
- }
+ }
return ret;
}
@@ -419,31 +440,36 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl
private long executeSparse(SparseBlock sblock, double[][] b, double[] scalars, double[] c, int m, int n, boolean sparseSafe, int rl, int ru)
throws DMLRuntimeException
{
+ if( sparseSafe && sblock == null )
+ return 0;
+
long lnnz = 0;
if( _type == CellType.NO_AGG )
{
- if( sparseSafe ) {
- if( sblock != null ) {
- for( int i=rl; i<ru; i++ )
- if( !sblock.isEmpty(i) ) {
- int apos = sblock.pos(i);
- int alen = sblock.size(i);
- double[] avals = sblock.values(i);
- for( int j=apos; j<apos+alen; j++ ) {
- double val = genexec(avals[j], b, scalars, m, n, i, j);
- c[i*n+sblock.indexes(i)[j]] = val;
- lnnz += (val!=0) ? 1 : 0;
- }
- }
- }
- }
- else { //sparse-unsafe
- for(int i=rl, cix=rl*n; i<ru; i++, cix+=n)
- for(int j=0; j<n; j++) {
- double valij = (sblock != null) ? sblock.get(i, j) : 0;
- c[cix+j] = genexec(valij, b, scalars, m, n, i, j);
- lnnz += (c[cix+j]!=0) ? 1 : 0;
+ //note: sequential scan algorithm for both sparse-safe and -unsafe
+ //in order to avoid binary search for sparse-unsafe
+ for(int i=rl, cix=rl*n; i<ru; i++, cix+=n) {
+ int lastj = -1;
+ //handle non-empty rows
+ if( sblock != null && !sblock.isEmpty(i) ) {
+ int apos = sblock.pos(i);
+ int alen = sblock.size(i);
+ int[] aix = sblock.indexes(i);
+ double[] avals = sblock.values(i);
+ for(int k=apos; k<apos+alen; k++) {
+ //process zeros before current non-zero
+ if( !sparseSafe )
+ for(int j=lastj+1; j<aix[k]; j++)
+ lnnz += ((c[cix+j]=genexec(0, b, scalars, m, n, i, j))!=0)?1:0;
+ //process current non-zero
+ lastj = aix[k];
+ lnnz += ((c[cix+lastj]=genexec(avals[k], b, scalars, m, n, i, lastj))!=0)?1:0;
}
+ }
+ //process empty rows or remaining zeros
+ if( !sparseSafe )
+ for(int j=lastj+1; j<n; j++)
+ lnnz += ((c[cix+j]=genexec(0, b, scalars, m, n, i, j))!=0)?1:0;
}
}
else if( _type == CellType.ROW_AGG )
@@ -454,51 +480,64 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl
KahanObject kbuff = new KahanObject(0, 0);
KahanFunction kplus = (KahanFunction) vfun;
- if( !sparseSafe ) {
- for(int i=rl; i<ru; i++) {
- kbuff.set(0, 0);
- for(int j=0; j<n; j++)
- kplus.execute2( kbuff, genexec( (sblock != null) ?
- sblock.get(i, j) : 0, b, scalars, m, n, i, j));
- lnnz += ((c[i] = kbuff._sum)!=0) ? 1 : 0;
- }
- }
- else if( sblock != null ) { //general case
- for( int i=rl; i<ru; i++ ) {
- if( sblock.isEmpty(i) ) continue;
- kbuff.set(0, 0);
+ //note: sequential scan algorithm for both sparse-safe and -unsafe
+ //in order to avoid binary search for sparse-unsafe
+ for(int i=rl; i<ru; i++) {
+ kbuff.set(0, 0);
+ int lastj = -1;
+ //handle non-empty rows
+ if( sblock != null && !sblock.isEmpty(i) ) {
int apos = sblock.pos(i);
int alen = sblock.size(i);
+ int[] aix = sblock.indexes(i);
double[] avals = sblock.values(i);
- for( int j=apos; j<apos+alen; j++ )
- kplus.execute2(kbuff, genexec(avals[j], b, scalars, m, n, i, j));
- lnnz += ((c[i] = kbuff._sum)!=0) ? 1 : 0;
+ for(int k=apos; k<apos+alen; k++) {
+ //process zeros before current non-zero
+ if( !sparseSafe )
+ for(int j=lastj+1; j<aix[k]; j++)
+ kplus.execute2(kbuff, genexec(0, b, scalars, m, n, i, j));
+ //process current non-zero
+ lastj = aix[k];
+ kplus.execute2(kbuff, genexec(avals[k], b, scalars, m, n, i, lastj));
+ }
}
+ //process empty rows or remaining zeros
+ if( !sparseSafe )
+ for(int j=lastj+1; j<n; j++)
+ kplus.execute2(kbuff, genexec(0, b, scalars, m, n, i, j));
+ lnnz += ((c[i] = kbuff._sum)!=0) ? 1 : 0;
}
}
else {
double initialVal = (_aggOp==AggOp.MIN) ? Double.MAX_VALUE : -Double.MAX_VALUE;
- if( !sparseSafe ) {
- for(int i=rl; i<ru; i++) {
- double tmp = initialVal;
- for(int j=0; j<n; j++)
- tmp = vfun.execute( tmp, genexec( (sblock != null) ?
- sblock.get(i, j) : 0, b, scalars, m, n, i, j));
- lnnz += ((c[i] = tmp)!=0) ? 1 : 0;
- }
- }
- else if( sblock != null ) { //general case
- for( int i=rl; i<ru; i++ ) {
- if( sblock.isEmpty(i) ) continue;
+
+ //note: sequential scan algorithm for both sparse-safe and -unsafe
+ //in order to avoid binary search for sparse-unsafe
+ for(int i=rl; i<ru; i++) {
+ double tmp = (sparseSafe && sblock.size(i) < n) ? 0 : initialVal;
+ int lastj = -1;
+ //handle non-empty rows
+ if( sblock != null && !sblock.isEmpty(i) ) {
int apos = sblock.pos(i);
int alen = sblock.size(i);
+ int[] aix = sblock.indexes(i);
double[] avals = sblock.values(i);
- double tmp = (alen < n) ? 0 : initialVal;
- for( int j=apos; j<apos+alen; j++ )
- tmp = vfun.execute(tmp, genexec(avals[j], b, scalars, m, n, i, j));
- lnnz += ((c[i] = tmp)!=0) ? 1 : 0;
+ for(int k=apos; k<apos+alen; k++) {
+ //process zeros before current non-zero
+ if( !sparseSafe )
+ for(int j=lastj+1; j<aix[k]; j++)
+ tmp = vfun.execute(tmp, genexec(0, b, scalars, m, n, i, j));
+ //process current non-zero
+ lastj = aix[k];
+ tmp = vfun.execute( tmp, genexec(avals[k], b, scalars, m, n, i, lastj));
+ }
}
- }
+ //process empty rows or remaining zeros
+ if( !sparseSafe )
+ for(int j=lastj+1; j<n; j++)
+ tmp = vfun.execute(tmp, genexec(0, b, scalars, m, n, i, j));
+ lnnz += ((c[i] = tmp)!=0) ? 1 : 0;
+ }
}
}