You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by mb...@apache.org on 2017/05/08 05:29:34 UTC

incubator-systemml git commit: [SYSTEMML-1591] Performance sparse-unsafe cellwise codegen operators

Repository: incubator-systemml
Updated Branches:
  refs/heads/master 6c215e700 -> b0a2022e9


[SYSTEMML-1591] Performance sparse-unsafe cellwise codegen operators

So far, sparse-unsafe cellwise codegen operations iterated over all
cells and used binary search to access the individual values of sparse
matrices, which was unnecessarily inefficient. This patch modified all
sparse-unsafe cellwise operations (with and without aggregations) to use
a sequential scan with gap handling and consolidated the different code
paths for sparse-safe and -unsafe operations over sparse matrices.

Furthermore, this patch also fixes issues where sparse-safe operations
passed the wrong column indexes to generated operators.


Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/b0a2022e
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/b0a2022e
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/b0a2022e

Branch: refs/heads/master
Commit: b0a2022e9fa7653fe56816edf4103d7f366f4d69
Parents: 6c215e7
Author: Matthias Boehm <mb...@gmail.com>
Authored: Sun May 7 22:27:20 2017 -0700
Committer: Matthias Boehm <mb...@gmail.com>
Committed: Sun May 7 22:31:29 2017 -0700

----------------------------------------------------------------------
 .../sysml/runtime/codegen/SpoofCellwise.java    | 209 +++++++++++--------
 1 file changed, 124 insertions(+), 85 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/b0a2022e/src/main/java/org/apache/sysml/runtime/codegen/SpoofCellwise.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/codegen/SpoofCellwise.java b/src/main/java/org/apache/sysml/runtime/codegen/SpoofCellwise.java
index 12d14ad..b01914a 100644
--- a/src/main/java/org/apache/sysml/runtime/codegen/SpoofCellwise.java
+++ b/src/main/java/org/apache/sysml/runtime/codegen/SpoofCellwise.java
@@ -363,6 +363,9 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl
 	private double executeSparseAndAgg(SparseBlock sblock, double[][] b, double[] scalars, int m, int n, boolean sparseSafe, int rl, int ru) 
 		throws DMLRuntimeException 
 	{
+		if( sparseSafe && sblock == null )
+			return 0;
+		
 		ValueFunction vfun = getAggFunction();
 		double ret = 0;
 		
@@ -371,22 +374,30 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl
 			KahanObject kbuff = new KahanObject(0, 0);
 			KahanFunction kplus = (KahanFunction) vfun;
 	
-			if( !sparseSafe ) {
-				for(int i=rl; i<ru; i++)
-					for(int j=0; j<n; j++) {
-						double valij = (sblock != null) ? sblock.get(i, j) : 0;
-						kplus.execute2( kbuff, genexec(valij, b, scalars, m, n, i, j)); 
+			//note: sequential scan algorithm for both sparse-safe and -unsafe 
+			//in order to avoid binary search for sparse-unsafe 
+			for(int i=rl; i<ru; i++) {
+				int lastj = -1;
+				//handle non-empty rows
+				if( sblock != null && !sblock.isEmpty(i) ) {
+					int apos = sblock.pos(i);
+					int alen = sblock.size(i);
+					int[] aix = sblock.indexes(i);
+					double[] avals = sblock.values(i);
+					for(int k=apos; k<apos+alen; k++) {
+						//process zeros before current non-zero
+						if( !sparseSafe )
+							for(int j=lastj+1; j<aix[k]; j++)
+								kplus.execute2(kbuff, genexec(0, b, scalars, m, n, i, j));
+						//process current non-zero
+						lastj = aix[k];
+						kplus.execute2(kbuff, genexec(avals[k], b, scalars, m, n, i, lastj));
 					}
-			}
-			else if( sblock != null ) {
-				for( int i=rl; i<ru; i++ )
-					if( !sblock.isEmpty(i) ) {
-						int apos = sblock.pos(i);
-						int alen = sblock.size(i);
-						double[] avals = sblock.values(i);
-						for( int j=apos; j<apos+alen; j++ )
-							kplus.execute2( kbuff, genexec(avals[j], b, scalars, m, n, i, j)); 
-					}	
+				}
+				//process empty rows or remaining zeros
+				if( !sparseSafe )
+					for(int j=lastj+1; j<n; j++)
+						kplus.execute2(kbuff, genexec(0, b, scalars, m, n, i, j));
 			}
 			ret = kbuff._sum;
 		}
@@ -394,24 +405,34 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl
 		//note: sparse safe with zero value as min/max handled outside
 		else {
 			ret = (_aggOp==AggOp.MIN) ? Double.MAX_VALUE : -Double.MAX_VALUE; 
-			if( !sparseSafe ) {
-				for(int i=rl; i<ru; i++)
-					for(int j=0; j<n; j++) {
-						double valij = (sblock != null) ? sblock.get(i, j) : 0;
-						ret = vfun.execute( ret, genexec(valij, b, scalars, m, n, i, j)); 
+			ret = (sparseSafe && sblock.size() < (long)m*n) ? 0 : ret;
+			
+			//note: sequential scan algorithm for both sparse-safe and -unsafe 
+			//in order to avoid binary search for sparse-unsafe 
+			for(int i=rl; i<ru; i++) {
+				int lastj = -1;
+				//handle non-empty rows
+				if( sblock != null && !sblock.isEmpty(i) ) {
+					int apos = sblock.pos(i);
+					int alen = sblock.size(i);
+					int[] aix = sblock.indexes(i);
+					double[] avals = sblock.values(i);
+					for(int k=apos; k<apos+alen; k++) {
+						//process zeros before current non-zero
+						if( !sparseSafe )
+							for(int j=lastj+1; j<aix[k]; j++)
+								ret = vfun.execute(ret, genexec(0, b, scalars, m, n, i, j));
+						//process current non-zero
+						lastj = aix[k];
+						ret = vfun.execute(ret, genexec(avals[k], b, scalars, m, n, i, lastj));
 					}
+				}
+				//process empty rows or remaining zeros
+				if( !sparseSafe )
+					for(int j=lastj+1; j<n; j++)
+						ret = vfun.execute(ret, genexec(0, b, scalars, m, n, i, j));
 			}
-			else if( sblock != null ) {
-				for( int i=rl; i<ru; i++ )
-					if( !sblock.isEmpty(i) ) {
-						int apos = sblock.pos(i);
-						int alen = sblock.size(i);
-						double[] avals = sblock.values(i);
-						for( int j=apos; j<apos+alen; j++ )
-							ret = vfun.execute( ret, genexec(avals[j], b, scalars, m, n, i, j)); 
-					}	
-			}
-		}		
+		}
 		
 		return ret;
 	}
@@ -419,31 +440,36 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl
 	private long executeSparse(SparseBlock sblock, double[][] b, double[] scalars, double[] c, int m, int n, boolean sparseSafe, int rl, int ru) 
 		throws DMLRuntimeException 
 	{
+		if( sparseSafe && sblock == null )
+			return 0;
+		
 		long lnnz = 0;
 		if( _type == CellType.NO_AGG )
 		{
-			if( sparseSafe ) {
-				if( sblock != null ) {
-					for( int i=rl; i<ru; i++ )
-						if( !sblock.isEmpty(i) ) {
-							int apos = sblock.pos(i);
-							int alen = sblock.size(i);
-							double[] avals = sblock.values(i);
-							for( int j=apos; j<apos+alen; j++ ) {
-								double val = genexec(avals[j], b, scalars, m, n, i, j);
-								c[i*n+sblock.indexes(i)[j]] = val;
-								lnnz += (val!=0) ? 1 : 0;
-							}
-						}
-				}
-			}
-			else { //sparse-unsafe
-				for(int i=rl, cix=rl*n; i<ru; i++, cix+=n)
-					for(int j=0; j<n; j++) {
-						double valij = (sblock != null) ? sblock.get(i, j) : 0;
-						c[cix+j] = genexec(valij, b, scalars, m, n, i, j); 
-						lnnz += (c[cix+j]!=0) ? 1 : 0;
+			//note: sequential scan algorithm for both sparse-safe and -unsafe 
+			//in order to avoid binary search for sparse-unsafe 
+			for(int i=rl, cix=rl*n; i<ru; i++, cix+=n) {
+				int lastj = -1;
+				//handle non-empty rows
+				if( sblock != null && !sblock.isEmpty(i) ) {
+					int apos = sblock.pos(i);
+					int alen = sblock.size(i);
+					int[] aix = sblock.indexes(i);
+					double[] avals = sblock.values(i);
+					for(int k=apos; k<apos+alen; k++) {
+						//process zeros before current non-zero
+						if( !sparseSafe )
+							for(int j=lastj+1; j<aix[k]; j++)
+								lnnz += ((c[cix+j]=genexec(0, b, scalars, m, n, i, j))!=0)?1:0;
+						//process current non-zero
+						lastj = aix[k];
+						lnnz += ((c[cix+lastj]=genexec(avals[k], b, scalars, m, n, i, lastj))!=0)?1:0;
 					}
+				}
+				//process empty rows or remaining zeros
+				if( !sparseSafe )
+					for(int j=lastj+1; j<n; j++)
+						lnnz += ((c[cix+j]=genexec(0, b, scalars, m, n, i, j))!=0)?1:0;
 			}
 		}
 		else if( _type == CellType.ROW_AGG ) 
@@ -454,51 +480,64 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl
 				KahanObject kbuff = new KahanObject(0, 0);
 				KahanFunction kplus = (KahanFunction) vfun;
 
-				if( !sparseSafe ) { 
-					for(int i=rl; i<ru; i++) {
-						kbuff.set(0, 0);
-						for(int j=0; j<n; j++)
-							kplus.execute2( kbuff, genexec( (sblock != null) ? 
-								sblock.get(i, j) : 0, b, scalars, m, n, i, j)); 
-						lnnz += ((c[i] = kbuff._sum)!=0) ? 1 : 0;
-					}
-				}
-				else if( sblock != null ) { //general case
-					for( int i=rl; i<ru; i++ ) {
-						if( sblock.isEmpty(i) ) continue;
-						kbuff.set(0, 0);
+				//note: sequential scan algorithm for both sparse-safe and -unsafe 
+				//in order to avoid binary search for sparse-unsafe 
+				for(int i=rl; i<ru; i++) {
+					kbuff.set(0, 0);
+					int lastj = -1;
+					//handle non-empty rows
+					if( sblock != null && !sblock.isEmpty(i) ) {
 						int apos = sblock.pos(i);
 						int alen = sblock.size(i);
+						int[] aix = sblock.indexes(i);
 						double[] avals = sblock.values(i);
-						for( int j=apos; j<apos+alen; j++ )
-							kplus.execute2(kbuff, genexec(avals[j], b, scalars, m, n, i, j));
-						lnnz += ((c[i] = kbuff._sum)!=0) ? 1 : 0;	
+						for(int k=apos; k<apos+alen; k++) {
+							//process zeros before current non-zero
+							if( !sparseSafe )
+								for(int j=lastj+1; j<aix[k]; j++)
+									kplus.execute2(kbuff, genexec(0, b, scalars, m, n, i, j));
+							//process current non-zero
+							lastj = aix[k];
+							kplus.execute2(kbuff, genexec(avals[k], b, scalars, m, n, i, lastj));
+						}
 					}
+					//process empty rows or remaining zeros
+					if( !sparseSafe )
+						for(int j=lastj+1; j<n; j++)
+							kplus.execute2(kbuff, genexec(0, b, scalars, m, n, i, j));
+					lnnz += ((c[i] = kbuff._sum)!=0) ? 1 : 0;
 				}
 			}
 			else {
 				double initialVal = (_aggOp==AggOp.MIN) ? Double.MAX_VALUE : -Double.MAX_VALUE;
-				if( !sparseSafe ) { 
-					for(int i=rl; i<ru; i++) {
-						double tmp = initialVal;
-						for(int j=0; j<n; j++)
-							tmp = vfun.execute( tmp, genexec( (sblock != null) ? 
-								sblock.get(i, j) : 0, b, scalars, m, n, i, j)); 
-						lnnz += ((c[i] = tmp)!=0) ? 1 : 0;
-					}
-				}
-				else if( sblock != null ) { //general case
-					for( int i=rl; i<ru; i++ ) {
-						if( sblock.isEmpty(i) ) continue;
+				
+				//note: sequential scan algorithm for both sparse-safe and -unsafe 
+				//in order to avoid binary search for sparse-unsafe 
+				for(int i=rl; i<ru; i++) {
+					double tmp = (sparseSafe && sblock.size(i) < n) ? 0 : initialVal;
+					int lastj = -1;
+					//handle non-empty rows
+					if( sblock != null && !sblock.isEmpty(i) ) {
 						int apos = sblock.pos(i);
 						int alen = sblock.size(i);
+						int[] aix = sblock.indexes(i);
 						double[] avals = sblock.values(i);
-						double tmp = (alen < n) ? 0 : initialVal;
-						for( int j=apos; j<apos+alen; j++ )
-							tmp = vfun.execute(tmp, genexec(avals[j], b, scalars, m, n, i, j));
-						lnnz += ((c[i] = tmp)!=0) ? 1 : 0;	
+						for(int k=apos; k<apos+alen; k++) {
+							//process zeros before current non-zero
+							if( !sparseSafe )
+								for(int j=lastj+1; j<aix[k]; j++)
+									tmp = vfun.execute(tmp, genexec(0, b, scalars, m, n, i, j));
+							//process current non-zero
+							lastj = aix[k];
+							tmp = vfun.execute( tmp, genexec(avals[k], b, scalars, m, n, i, lastj));
+						}
 					}
-				}				
+					//process empty rows or remaining zeros
+					if( !sparseSafe )
+						for(int j=lastj+1; j<n; j++)
+							tmp = vfun.execute(tmp, genexec(0, b, scalars, m, n, i, j));
+					lnnz += ((c[i] = tmp)!=0) ? 1 : 0;
+				}
 			}
 		}