You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by mb...@apache.org on 2016/03/27 22:41:24 UTC

[3/4] incubator-systemml git commit: Cache-conscious sparse-sparse core block matrix multiplication

Cache-conscious sparse-sparse core block matrix multiplication

In a similar spirit as the new sparse-dense cache-conscious matrix
multiply, this patch also makes sparse-sparse matrix multiply
cache-conscious. However, due to the sparse inner loop, we use a best
effort cache blocking with dynamic block sizes computed from the
sparsity of A to reflect average row reuse. Overall, we've seen
performance improvements of up to 2x without degradation over a variety
of usecases with different sparsity and shapes.

Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/5e9782ff
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/5e9782ff
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/5e9782ff

Branch: refs/heads/master
Commit: 5e9782ffcb46f7718c57cdce5b924fa6c85efeff
Parents: e035649
Author: Matthias Boehm <mb...@us.ibm.com>
Authored: Sat Mar 26 21:32:39 2016 -0700
Committer: Matthias Boehm <mb...@us.ibm.com>
Committed: Sat Mar 26 21:57:37 2016 -0700

----------------------------------------------------------------------
 .../runtime/matrix/data/LibMatrixMult.java      | 52 +++++++++++---------
 .../sysml/runtime/util/UtilFunctions.java       |  9 +++-
 2 files changed, 37 insertions(+), 24 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/5e9782ff/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixMult.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixMult.java b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixMult.java
index 5f8f649..698149c 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixMult.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixMult.java
@@ -1403,10 +1403,9 @@ public class LibMatrixMult
 		SparseBlock b = m2.sparseBlock;
 		double[] c = ret.denseBlock;
 		int m = m1.rlen;
+		int cd = m1.clen;
 		int n = m2.clen;
 		
-		//TODO perf sparse block
-		
 		// MATRIX-MATRIX (VV, MV not applicable here because V always dense)
 		if(LOW_LEVEL_OPTIMIZATION)
 		{
@@ -1433,28 +1432,37 @@ public class LibMatrixMult
 			}	
 			else                       //MATRIX-MATRIX
 			{
-				for( int i=rl, cix=rl*n; i<ru; i++, cix+=n )
-				{
-					if( !a.isEmpty(i) ) 
-					{
-						int apos = a.pos(i);
-						int alen = a.size(i);
-						int[] aix = a.indexes(i);
-						double[] avals = a.values(i);					
-						
-						for(int k = apos; k < apos+alen; k++) 
-						{
-							double val = avals[k];
-							if( !b.isEmpty(aix[k]) ) 
-							{
-								int bpos = b.pos(aix[k]);
-								int blen = b.size(aix[k]);
-								int[] bix = b.indexes(aix[k]);
-								double[] bvals = b.values(aix[k]);	
+				//block sizes for best-effort blocking w/ sufficient row reuse in B yet small overhead
+				final int blocksizeI = 32;
+				final int blocksizeK = (int)Math.max(32, UtilFunctions.nextIntPow2(
+						(int)Math.pow((double)m*cd/m1.nonZeros,2)));
+				
+				//temporary array of current sparse positions
+				int[] curk = new int[blocksizeI];
+				
+				//blocked execution over IK 
+				for( int bi = rl; bi < ru; bi+=blocksizeI ) {
+					Arrays.fill(curk, 0); //reset positions
+					for( int bk = 0, bimin = Math.min(ru, bi+blocksizeI); bk < cd; bk+=blocksizeK ) {
+						final int bkmin = Math.min(cd, bk+blocksizeK); 
+				
+						//core sub block matrix multiplication
+						for( int i=bi, cix=bi*n; i<bimin; i++, cix+=n ) {
+							if( !a.isEmpty(i) ) {
+								final int apos = a.pos(i);
+								final int alen = a.size(i);
+								int[] aix = a.indexes(i);
+								double[] avals = a.values(i);	
 								
-								vectMultiplyAdd(val, bvals, c, bix, bpos, cix, blen);
+								int k = curk[i-bi] + apos;									
+				    			for(; k < apos+alen && aix[k]<bkmin; k++) {
+									if( !b.isEmpty(aix[k]) )
+										vectMultiplyAdd(avals[k], b.values(aix[k]), c, 
+											b.indexes(aix[k]), b.pos(aix[k]), cix, b.size(aix[k]));
+								}
+								curk[i-bi] = k - apos;
 							}
-						}						
+						}
 					}
 				}
 			}

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/5e9782ff/src/main/java/org/apache/sysml/runtime/util/UtilFunctions.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/util/UtilFunctions.java b/src/main/java/org/apache/sysml/runtime/util/UtilFunctions.java
index fe66e55..c1d83e7 100644
--- a/src/main/java/org/apache/sysml/runtime/util/UtilFunctions.java
+++ b/src/main/java/org/apache/sysml/runtime/util/UtilFunctions.java
@@ -32,11 +32,16 @@ public class UtilFunctions
 	public static double DOUBLE_EPS = Math.pow(2, -53);
 	
 	
-	public static int longHashFunc(long v)
-	{
+	public static int longHashFunc(long v) {
 		return (int)(v^(v>>>32));
 	}
 	
+	public static int nextIntPow2( int in ) {
+		int expon = (in==0) ? 0 : 32-Integer.numberOfLeadingZeros(in-1);
+		long pow2 = (long) Math.pow(2, expon);
+		return (int)((pow2>Integer.MAX_VALUE)?Integer.MAX_VALUE : pow2);	
+	}
+	
 	//return one block index given the index in cell format and block size
 	//TODO to be deleted
 	public static long blockIndexCalculation(long cellIndex, int blockSize)