You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by mb...@apache.org on 2016/03/27 22:41:24 UTC
[3/4] incubator-systemml git commit: Cache-conscious sparse-sparse
core block matrix multiplication
Cache-conscious sparse-sparse core block matrix multiplication
In a similar spirit as the new sparse-dense cache-conscious matrix
multiply, this patch also makes sparse-sparse matrix multiply
cache-conscious. However, due to the sparse inner loop, we use a best
effort cache blocking with dynamic block sizes computed from the
sparsity of A to reflect average row reuse. Overall, we've seen
performance improvements of up to 2x without degradation over a variety
of usecases with different sparsity and shapes.
Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/5e9782ff
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/5e9782ff
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/5e9782ff
Branch: refs/heads/master
Commit: 5e9782ffcb46f7718c57cdce5b924fa6c85efeff
Parents: e035649
Author: Matthias Boehm <mb...@us.ibm.com>
Authored: Sat Mar 26 21:32:39 2016 -0700
Committer: Matthias Boehm <mb...@us.ibm.com>
Committed: Sat Mar 26 21:57:37 2016 -0700
----------------------------------------------------------------------
.../runtime/matrix/data/LibMatrixMult.java | 52 +++++++++++---------
.../sysml/runtime/util/UtilFunctions.java | 9 +++-
2 files changed, 37 insertions(+), 24 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/5e9782ff/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixMult.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixMult.java b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixMult.java
index 5f8f649..698149c 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixMult.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixMult.java
@@ -1403,10 +1403,9 @@ public class LibMatrixMult
SparseBlock b = m2.sparseBlock;
double[] c = ret.denseBlock;
int m = m1.rlen;
+ int cd = m1.clen;
int n = m2.clen;
- //TODO perf sparse block
-
// MATRIX-MATRIX (VV, MV not applicable here because V always dense)
if(LOW_LEVEL_OPTIMIZATION)
{
@@ -1433,28 +1432,37 @@ public class LibMatrixMult
}
else //MATRIX-MATRIX
{
- for( int i=rl, cix=rl*n; i<ru; i++, cix+=n )
- {
- if( !a.isEmpty(i) )
- {
- int apos = a.pos(i);
- int alen = a.size(i);
- int[] aix = a.indexes(i);
- double[] avals = a.values(i);
-
- for(int k = apos; k < apos+alen; k++)
- {
- double val = avals[k];
- if( !b.isEmpty(aix[k]) )
- {
- int bpos = b.pos(aix[k]);
- int blen = b.size(aix[k]);
- int[] bix = b.indexes(aix[k]);
- double[] bvals = b.values(aix[k]);
+ //block sizes for best-effort blocking w/ sufficient row reuse in B yet small overhead
+ final int blocksizeI = 32;
+ final int blocksizeK = (int)Math.max(32, UtilFunctions.nextIntPow2(
+ (int)Math.pow((double)m*cd/m1.nonZeros,2)));
+
+ //temporary array of current sparse positions
+ int[] curk = new int[blocksizeI];
+
+ //blocked execution over IK
+ for( int bi = rl; bi < ru; bi+=blocksizeI ) {
+ Arrays.fill(curk, 0); //reset positions
+ for( int bk = 0, bimin = Math.min(ru, bi+blocksizeI); bk < cd; bk+=blocksizeK ) {
+ final int bkmin = Math.min(cd, bk+blocksizeK);
+
+ //core sub block matrix multiplication
+ for( int i=bi, cix=bi*n; i<bimin; i++, cix+=n ) {
+ if( !a.isEmpty(i) ) {
+ final int apos = a.pos(i);
+ final int alen = a.size(i);
+ int[] aix = a.indexes(i);
+ double[] avals = a.values(i);
- vectMultiplyAdd(val, bvals, c, bix, bpos, cix, blen);
+ int k = curk[i-bi] + apos;
+ for(; k < apos+alen && aix[k]<bkmin; k++) {
+ if( !b.isEmpty(aix[k]) )
+ vectMultiplyAdd(avals[k], b.values(aix[k]), c,
+ b.indexes(aix[k]), b.pos(aix[k]), cix, b.size(aix[k]));
+ }
+ curk[i-bi] = k - apos;
}
- }
+ }
}
}
}
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/5e9782ff/src/main/java/org/apache/sysml/runtime/util/UtilFunctions.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/util/UtilFunctions.java b/src/main/java/org/apache/sysml/runtime/util/UtilFunctions.java
index fe66e55..c1d83e7 100644
--- a/src/main/java/org/apache/sysml/runtime/util/UtilFunctions.java
+++ b/src/main/java/org/apache/sysml/runtime/util/UtilFunctions.java
@@ -32,11 +32,16 @@ public class UtilFunctions
public static double DOUBLE_EPS = Math.pow(2, -53);
- public static int longHashFunc(long v)
- {
+ public static int longHashFunc(long v) {
return (int)(v^(v>>>32));
}
+ public static int nextIntPow2( int in ) {
+ int expon = (in==0) ? 0 : 32-Integer.numberOfLeadingZeros(in-1);
+ long pow2 = (long) Math.pow(2, expon);
+ return (int)((pow2>Integer.MAX_VALUE)?Integer.MAX_VALUE : pow2);
+ }
+
//return one block index given the index in cell format and block size
//TODO to be deleted
public static long blockIndexCalculation(long cellIndex, int blockSize)