You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemds.apache.org by mb...@apache.org on 2020/09/15 19:14:03 UTC
[systemds] 01/02: [MINOR] Performance sparse-sparse permutation
matrix multiply
This is an automated email from the ASF dual-hosted git repository.
mboehm7 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/systemds.git
commit 366521f9298a465a4b260287c29a453b4eb47572
Author: Matthias Boehm <mb...@gmail.com>
AuthorDate: Tue Sep 15 21:09:24 2020 +0200
[MINOR] Performance sparse-sparse permutation matrix multiply
This patch leverages the existing kernels for ultra-sparse permutation
matrix multiply (where the ultra-sparse left-hand-side copies rows of
the sparse or dense right-hand-side) for scenarios where the lhs is a
sparse permutation matrix but does not yet qualify as ultra-sparse.
As this approach avoids dense output allocation it can change the
asymptotic behavior. For example, in slicefinder over the KDD98 dataset,
the expression (P1 %*% S + P2 %*% S) != 0 took ~420s because the
3,061,575 x 7,909 was allocated in dense (190GB), and after the
operation converted to sparse. With the new fine-tuning, we now use the
existing permutation matrix multiply kernel and allocate the output
directly in sparse, which improved performance to 431ms (1000x).
---
.../sysds/runtime/matrix/data/LibMatrixMult.java | 25 ++++++++++++----------
.../sysds/runtime/matrix/data/MatrixBlock.java | 6 +++---
2 files changed, 17 insertions(+), 14 deletions(-)
diff --git a/src/main/java/org/apache/sysds/runtime/matrix/data/LibMatrixMult.java b/src/main/java/org/apache/sysds/runtime/matrix/data/LibMatrixMult.java
index 671eea7..db61927 100644
--- a/src/main/java/org/apache/sysds/runtime/matrix/data/LibMatrixMult.java
+++ b/src/main/java/org/apache/sysds/runtime/matrix/data/LibMatrixMult.java
@@ -122,8 +122,9 @@ public class LibMatrixMult
//Timing time = new Timing(true);
//pre-processing: output allocation
+ boolean m1Perm = m1.isSparsePermutationMatrix();
boolean ultraSparse = (fixedRet && ret.sparse)
- || (!fixedRet && isUltraSparseMatrixMult(m1, m2));
+ || (!fixedRet && isUltraSparseMatrixMult(m1, m2, m1Perm));
boolean tm2 = checkPrepMatrixMultRightInput(m1,m2);
m2 = prepMatrixMultRightInput(m1, m2);
ret.sparse = ultraSparse;
@@ -137,7 +138,7 @@ public class LibMatrixMult
//core matrix mult computation
if( ultraSparse )
- matrixMultUltraSparse(m1, m2, ret, 0, ru2);
+ matrixMultUltraSparse(m1, m2, ret, m1Perm, 0, ru2);
else if(!m1.sparse && !m2.sparse)
matrixMultDenseDense(m1, m2, ret, tm2, pm2, 0, ru2, 0, cu);
else if(m1.sparse && m2.sparse)
@@ -184,7 +185,8 @@ public class LibMatrixMult
//pre-processing: output allocation (in contrast to single-threaded,
//we need to allocate sparse as well in order to prevent synchronization)
- boolean ultraSparse = isUltraSparseMatrixMult(m1, m2);
+ boolean m1Perm = m1.isSparsePermutationMatrix();
+ boolean ultraSparse = isUltraSparseMatrixMult(m1, m2, m1Perm);
boolean tm2 = checkPrepMatrixMultRightInput(m1,m2);
m2 = prepMatrixMultRightInput(m1, m2);
ret.sparse = ultraSparse;
@@ -207,7 +209,7 @@ public class LibMatrixMult
ArrayList<MatrixMultTask> tasks = new ArrayList<>();
ArrayList<Integer> blklens = UtilFunctions.getBalancedBlockSizesDefault(num, k, (pm2r||pm2c));
for( int i=0, lb=0; i<blklens.size(); lb+=blklens.get(i), i++ )
- tasks.add(new MatrixMultTask(m1, m2, ret, tm2, pm2r, pm2c, lb, lb+blklens.get(i)));
+ tasks.add(new MatrixMultTask(m1, m2, ret, tm2, pm2r, pm2c, m1Perm, lb, lb+blklens.get(i)));
//execute tasks
List<Future<Object>> taskret = pool.invokeAll(tasks);
pool.shutdown();
@@ -1509,14 +1511,14 @@ public class LibMatrixMult
* @param rl row lower bound
* @param ru row upper bound
*/
- private static void matrixMultUltraSparse(MatrixBlock m1, MatrixBlock m2, MatrixBlock ret, int rl, int ru) {
+ private static void matrixMultUltraSparse(MatrixBlock m1, MatrixBlock m2, MatrixBlock ret, boolean m1Perm, int rl, int ru) {
final boolean leftUS = m1.isUltraSparse()
|| (m1.isUltraSparse(false) && !m2.isUltraSparse())
|| (m1.sparse && !m2.sparse);
if( m1 == m2 ) //self-product
matrixMultUltraSparseSelf(m1, ret, rl, ru);
- else if( leftUS )
+ else if( leftUS || m1Perm )
matrixMultUltraSparseLeft(m1, m2, ret, rl, ru);
else
matrixMultUltraSparseRight(m1, m2, ret, rl, ru);
@@ -3833,7 +3835,7 @@ public class LibMatrixMult
||(!leftTranspose && FPfactor * m1.clen * m1.rlen * m1.rlen > threshold));
}
- public static boolean isUltraSparseMatrixMult(MatrixBlock m1, MatrixBlock m2) {
+ public static boolean isUltraSparseMatrixMult(MatrixBlock m1, MatrixBlock m2, boolean m1Perm) {
if( m2.clen == 1 ) //mv always dense
return false;
//note: ultra-sparse matrix mult implies also sparse outputs, hence we need
@@ -3842,8 +3844,7 @@ public class LibMatrixMult
m1.getSparsity(), m2.getSparsity(), m1.rlen, m1.clen, m2.clen, true);
return (m1.isUltraSparse() || m2.isUltraSparse()) //base case
|| (m1.isUltraSparse(false) && m1 == m2) //ultra-sparse self product
- || (m1.isUltraSparsePermutationMatrix()
- && OptimizerUtils.getSparsity(m2.rlen, m2.clen, m2.nonZeros)<1.0)
+ || (m1Perm && OptimizerUtils.getSparsity(m2.rlen, m2.clen, m2.nonZeros)<1.0)
|| ((m1.isUltraSparse(false) || m2.isUltraSparse(false))
&& outSp < MatrixBlock.ULTRA_SPARSITY_TURN_POINT2)
|| (m1.getSparsity() < MatrixBlock.ULTRA_SPARSITY_TURN_POINT2
@@ -3949,17 +3950,19 @@ public class LibMatrixMult
private final boolean _tm2; //transposed m2
private final boolean _pm2r; //par over m2 rows
private final boolean _pm2c; //par over m2 rows
+ private final boolean _m1Perm; //sparse permutation
private final int _rl;
private final int _ru;
protected MatrixMultTask( MatrixBlock m1, MatrixBlock m2, MatrixBlock ret,
- boolean tm2, boolean pm2r, boolean pm2c, int rl, int ru )
+ boolean tm2, boolean pm2r, boolean pm2c, boolean m1Perm, int rl, int ru )
{
_m1 = m1;
_m2 = m2;
_tm2 = tm2;
_pm2r = pm2r;
_pm2c = pm2c;
+ _m1Perm = m1Perm;
_rl = rl;
_ru = ru;
@@ -3986,7 +3989,7 @@ public class LibMatrixMult
//compute block matrix multiplication
if( _ret.sparse ) //ultra-sparse
- matrixMultUltraSparse(_m1, _m2, _ret, rl, ru);
+ matrixMultUltraSparse(_m1, _m2, _ret, _m1Perm, rl, ru);
else if(!_m1.sparse && !_m2.sparse)
matrixMultDenseDense(_m1, _m2, _ret, _tm2, _pm2r, rl, ru, cl, cu);
else if(_m1.sparse && _m2.sparse)
diff --git a/src/main/java/org/apache/sysds/runtime/matrix/data/MatrixBlock.java b/src/main/java/org/apache/sysds/runtime/matrix/data/MatrixBlock.java
index 0eb484a..032b79e 100644
--- a/src/main/java/org/apache/sysds/runtime/matrix/data/MatrixBlock.java
+++ b/src/main/java/org/apache/sysds/runtime/matrix/data/MatrixBlock.java
@@ -958,11 +958,11 @@ public class MatrixBlock extends MatrixValue implements CacheBlock, Externalizab
&& (!checkNnz || nonZeros<ULTRA_SPARSE_BLOCK_NNZ);
}
- public boolean isUltraSparsePermutationMatrix() {
- if( !isUltraSparse(false) )
+ public boolean isSparsePermutationMatrix() {
+ if( !isInSparseFormat() )
return false;
- boolean isPM = true;
SparseBlock sblock = getSparseBlock();
+ boolean isPM = (sblock != null);
for( int i=0; i<rlen & isPM; i++ )
isPM &= sblock.isEmpty(i) || sblock.size(i) == 1;
return isPM;