You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by mb...@apache.org on 2018/05/31 05:05:34 UTC
[2/2] systemml git commit: [SYSTEMML-2351] Performance sparse
matrix-vector mult w/ large vector
[SYSTEMML-2351] Performance sparse matrix-vector mult w/ large vector
This patch improves the performance of sparse matrix-vector
multiplication with large vectors (> L3 cache size) as used for
algorithms such as PageRank and local singlenode operations. Although we
already had a cache-conscious implementation, this case required
additional tuning. In detail, this includes better blocking for L1 (pos
and output vector), and L2 (input vector) which allows a better
exploitation of cache locality of the vector across rows of the large,
sparse input matrix.
For executing 100 iterations of PageRank over a graph with 10M nodes and
5G edges (~60GB, 80MB vector) on a 2-socket box w/ Xeon Gold 6138 (80
vcores), this patch improved the end-to-end runtime from 288s to 164s.
Project: http://git-wip-us.apache.org/repos/asf/systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/af9cc8a9
Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/af9cc8a9
Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/af9cc8a9
Branch: refs/heads/master
Commit: af9cc8a90b73520b04ecd579d7359a62d577009f
Parents: 7eebb42
Author: Matthias Boehm <mb...@gmail.com>
Authored: Wed May 30 22:05:18 2018 -0700
Committer: Matthias Boehm <mb...@gmail.com>
Committed: Wed May 30 22:05:18 2018 -0700
----------------------------------------------------------------------
.../runtime/matrix/data/LibMatrixMult.java | 23 +++++++++++++++++---
1 file changed, 20 insertions(+), 3 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/systemml/blob/af9cc8a9/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixMult.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixMult.java b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixMult.java
index 92a6b7a..31f827b 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixMult.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixMult.java
@@ -1237,14 +1237,14 @@ public class LibMatrixMult
double[] bvals = b.valuesAt(0);
double[] cvals = c.valuesAt(0);
- final int blocksizeI = 32;
- final int blocksizeK = (int)Math.max(2*1024,2*1024*xsp/32); //~ 16KB L1
+ final int blocksizeI = 512; //8KB curk+cvals in L1
+ final int blocksizeK = (int)Math.max(2048,2048*xsp/32); //~256KB bvals in L2
int[] curk = new int[blocksizeI];
for( int bi = rl; bi < ru; bi+=blocksizeI ) {
Arrays.fill(curk, 0); //reset positions
for( int bk=0, bimin = Math.min(ru, bi+blocksizeI); bk<cd; bk+=blocksizeK ) {
- for( int i=bi, bkmin = Math.min(bk+blocksizeK, cd); i<bimin; i++) {
+ for( int i=bi, bkmin = bk+blocksizeK; i<bimin; i++) {
if( a.isEmpty(i) ) continue;
int apos = a.pos(i);
int alen = a.size(i);
@@ -3814,6 +3814,23 @@ public class LibMatrixMult
return knnz;
}
+
+ @SuppressWarnings("unused")
+ private static void resetPosVect(int[] curk, SparseBlock sblock, int rl, int ru) {
+ if( sblock instanceof SparseBlockMCSR ) {
+ //all rows start at position 0 (individual arrays)
+ Arrays.fill(curk, 0, ru-rl, 0);
+ }
+ else if( sblock instanceof SparseBlockCSR ) {
+ //row start positions given in row ptr array
+ SparseBlockCSR csr = (SparseBlockCSR) sblock;
+ System.arraycopy(csr.rowPointers(), rl, curk, 0, ru-rl);
+ }
+ else { //general case
+ for(int i=rl; i<ru; i++)
+ curk[i-rl] = sblock.pos(i);
+ }
+ }
private static void sumScalarResults(List<Future<Double>> tasks, MatrixBlock ret)
throws InterruptedException, ExecutionException