You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by mb...@apache.org on 2018/05/31 05:05:34 UTC
[2/2] systemml git commit: [SYSTEMML-2351] Performance sparse matrix-vector mult w/ large vector

[SYSTEMML-2351] Performance sparse matrix-vector mult w/ large vector

This patch improves the performance of sparse matrix-vector
multiplication with large vectors (> L3 cache size) as used for
algorithms such as PageRank and local singlenode operations. Although we
already had a cache-conscious implementation, this case required
additional tuning. In detail, this includes better blocking for L1 (pos
and output vector), and L2 (input vector) which allows a better
exploitation of cache locality of the vector across rows of the large,
sparse input matrix.

For executing 100 iterations of PageRank over a graph with 10M nodes and
5G edges (~60GB, 80MB vector) on a 2-socket box w/ Xeon Gold 6138 (80
vcores), this patch improved the end-to-end runtime from 288s to 164s.


Project: http://git-wip-us.apache.org/repos/asf/systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/af9cc8a9
Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/af9cc8a9
Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/af9cc8a9

Branch: refs/heads/master
Commit: af9cc8a90b73520b04ecd579d7359a62d577009f
Parents: 7eebb42
Author: Matthias Boehm <mb...@gmail.com>
Authored: Wed May 30 22:05:18 2018 -0700
Committer: Matthias Boehm <mb...@gmail.com>
Committed: Wed May 30 22:05:18 2018 -0700

----------------------------------------------------------------------
 .../runtime/matrix/data/LibMatrixMult.java      | 23 +++++++++++++++++---
 1 file changed, 20 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/systemml/blob/af9cc8a9/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixMult.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixMult.java b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixMult.java
index 92a6b7a..31f827b 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixMult.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixMult.java
@@ -1237,14 +1237,14 @@ public class LibMatrixMult
 		double[] bvals = b.valuesAt(0);
 		double[] cvals = c.valuesAt(0);
 		
-		final int blocksizeI = 32;
-		final int blocksizeK = (int)Math.max(2*1024,2*1024*xsp/32); //~ 16KB L1
+		final int blocksizeI = 512; //8KB curk+cvals in L1
+		final int blocksizeK = (int)Math.max(2048,2048*xsp/32); //~256KB bvals in L2
 		int[] curk = new int[blocksizeI];
 		
 		for( int bi = rl; bi < ru; bi+=blocksizeI ) {
 			Arrays.fill(curk, 0); //reset positions
 			for( int bk=0, bimin = Math.min(ru, bi+blocksizeI); bk<cd; bk+=blocksizeK ) {
-				for( int i=bi, bkmin = Math.min(bk+blocksizeK, cd); i<bimin; i++) {
+				for( int i=bi, bkmin = bk+blocksizeK; i<bimin; i++) {
 					if( a.isEmpty(i) ) continue;
 					int apos = a.pos(i);
 					int alen = a.size(i);
@@ -3814,6 +3814,23 @@ public class LibMatrixMult
 		
 		return knnz;
 	}
+	
+	@SuppressWarnings("unused")
+	private static void resetPosVect(int[] curk, SparseBlock sblock, int rl, int ru) {
+		if( sblock instanceof SparseBlockMCSR ) {
+			//all rows start at position 0 (individual arrays)
+			Arrays.fill(curk, 0, ru-rl, 0);
+		}
+		else if( sblock instanceof SparseBlockCSR ) {
+			//row start positions given in row ptr array
+			SparseBlockCSR csr = (SparseBlockCSR) sblock;
+			System.arraycopy(csr.rowPointers(), rl, curk, 0, ru-rl);
+		}
+		else { //general case
+			for(int i=rl; i<ru; i++)
+				curk[i-rl] = sblock.pos(i);
+		}
+	}
 
 	private static void sumScalarResults(List<Future<Double>> tasks, MatrixBlock ret) 
 		throws InterruptedException, ExecutionException