You are viewing a plain text version of this content. The canonical link for it is here.

Posted to commits@systemds.apache.org by mb...@apache.org on 2021/07/25 16:28:46 UTC

[systemds] branch master updated (07c69e6 -> 85c5492)

This is an automated email from the ASF dual-hosted git repository.

mboehm7 pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/systemds.git.


    from 07c69e6  [SYSTEMDS-3069] Extended rewrites for splitting DAGs after compression
     new 698388e  [SYSTEMDS-3072] Fix integer overflow in sparse random number generation
     new 85c5492  [SYSTEMDS-3073] Performance ultra-sparse transpose operations

The 2 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 .../runtime/matrix/data/LibMatrixDatagen.java      | 12 ++++----
 .../sysds/runtime/matrix/data/LibMatrixReorg.java  | 32 ++++++++++++++++------
 2 files changed, 31 insertions(+), 13 deletions(-)

[systemds] 02/02: [SYSTEMDS-3073] Performance ultra-sparse transpose operations

Posted by mb...@apache.org.

This is an automated email from the ASF dual-hosted git repository.

mboehm7 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/systemds.git

commit 85c54928d78679137222dc0de1057eb75e39fa3f
Author: Matthias Boehm <mb...@gmail.com>
AuthorDate: Sun Jul 25 18:28:05 2021 +0200

    [SYSTEMDS-3073] Performance ultra-sparse transpose operations
    
    This patch adds an additional specialized code path for ultra-sparse
    transpose operations for nnz < max(rlen, clen) in order to avoid
    unnecessary overhead for cache blocking and preallocation. On the
    following scenario it improve the cummulative transpose time from 5.061s
    to 0.546s.
    
    X = rand(rows=1e6, cols=1e6, sparsity=1e-11);
    for(i in 1:100)
      X = t(X)
    print(sum(X))
---
 .../sysds/runtime/matrix/data/LibMatrixReorg.java  | 32 ++++++++++++++++------
 1 file changed, 24 insertions(+), 8 deletions(-)

diff --git a/src/main/java/org/apache/sysds/runtime/matrix/data/LibMatrixReorg.java b/src/main/java/org/apache/sysds/runtime/matrix/data/LibMatrixReorg.java
index ecad23c..72d4ace 100644
--- a/src/main/java/org/apache/sysds/runtime/matrix/data/LibMatrixReorg.java
+++ b/src/main/java/org/apache/sysds/runtime/matrix/data/LibMatrixReorg.java
@@ -169,15 +169,18 @@ public class LibMatrixReorg {
 			out.allocateDenseBlock(false);
 	
 		//execute transpose operation
+		boolean ultraSparse = (in.sparse && out.sparse && in.nonZeros < Math.max(in.rlen, in.clen));
 		if( !in.sparse && !out.sparse )
-			transposeDenseToDense( in, out, 0, in.rlen, 0, in.clen );
+			transposeDenseToDense(in, out, 0, in.rlen, 0, in.clen);
+		else if( ultraSparse )
+			transposeUltraSparse(in, out);
 		else if( in.sparse && out.sparse )
-			transposeSparseToSparse( in, out, 0, in.rlen, 0, in.clen, 
+			transposeSparseToSparse(in, out, 0, in.rlen, 0, in.clen, 
 				countNnzPerColumn(in, 0, in.rlen));
 		else if( in.sparse )
-			transposeSparseToDense( in, out, 0, in.rlen, 0, in.clen );
+			transposeSparseToDense(in, out, 0, in.rlen, 0, in.clen);
 		else
-			transposeDenseToSparse( in, out );
+			transposeDenseToSparse(in, out);
 		
 		// System.out.println("r' ("+in.rlen+", "+in.clen+", "+in.sparse+", "+out.sparse+") in "+time.stop()+" ms.");
 		
@@ -190,10 +193,11 @@ public class LibMatrixReorg {
 	
 	public static MatrixBlock transpose(MatrixBlock in, MatrixBlock out, int k, boolean allowCSR) {
 		// redirect small or special cases to sequential execution
-		if(in.isEmptyBlock(false) || ((long) in.rlen * (long) in.clen < PAR_NUMCELL_THRESHOLD) || k == 1 ||
-			(SHALLOW_COPY_REORG && !in.sparse && !out.sparse && (in.rlen == 1 || in.clen == 1)) ||
-			(in.sparse && !out.sparse && in.rlen == 1) || (!in.sparse && out.sparse && in.rlen == 1) ||
-			(!in.sparse && out.sparse)) {
+		if(in.isEmptyBlock(false) || ((long) in.rlen * (long) in.clen < PAR_NUMCELL_THRESHOLD) || k == 1
+			|| (SHALLOW_COPY_REORG && !in.sparse && !out.sparse && (in.rlen == 1 || in.clen == 1))
+			|| (in.sparse && !out.sparse && in.rlen == 1) || (!in.sparse && out.sparse && in.rlen == 1)
+			|| (in.sparse && out.sparse && in.nonZeros < Math.max(in.rlen, in.clen)) //ultra-sparse
+			|| (!in.sparse && out.sparse) ) {
 			return transpose(in, out);
 		}
 		// set meta data and allocate output arrays (if required)
@@ -901,6 +905,18 @@ public class LibMatrixReorg {
 		}
 	}
 
+	private static void transposeUltraSparse(MatrixBlock in, MatrixBlock out) {
+		//note: applied if nnz < max(rlen, clen) - so no cache blocking
+		// but basic, naive transposition in a single-threaded context
+		Iterator<IJV> iter = in.getSparseBlockIterator();
+		SparseBlock b = out.getSparseBlock();
+		while( iter.hasNext() ) {
+			IJV cell = iter.next();
+			b.append(cell.getJ(), cell.getI(), cell.getV());
+		}
+		out.setNonZeros(in.getNonZeros());
+	}
+	
 	private static void transposeSparseToSparse(MatrixBlock in, MatrixBlock out, int rl, int ru, int cl, int cu, int[] cnt)
 	{
 		//NOTE: called only in sequential or column-wise parallel execution

[systemds] 01/02: [SYSTEMDS-3072] Fix integer overflow in sparse random number generation

Posted by mb...@apache.org.

This is an automated email from the ASF dual-hosted git repository.

mboehm7 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/systemds.git

commit 698388ed12ae99ba57e4952331a398544df72e0f
Author: Matthias Boehm <mb...@gmail.com>
AuthorDate: Sun Jul 25 17:40:53 2021 +0200

    [SYSTEMDS-3072] Fix integer overflow in sparse random number generation
    
    This patch fixes an integer overflow in rand(), which has specialized
    code paths for dense and spare random matrix generation. For sparse, we
    compute skips until the next value, and on a scenario with ultra-sparse
    matrices (with sparsity 1e-11) this skip ran into an integer overflow.
---
 .../apache/sysds/runtime/matrix/data/LibMatrixDatagen.java   | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/src/main/java/org/apache/sysds/runtime/matrix/data/LibMatrixDatagen.java b/src/main/java/org/apache/sysds/runtime/matrix/data/LibMatrixDatagen.java
index 586dc5a..b547b4f 100644
--- a/src/main/java/org/apache/sysds/runtime/matrix/data/LibMatrixDatagen.java
+++ b/src/main/java/org/apache/sysds/runtime/matrix/data/LibMatrixDatagen.java
@@ -533,15 +533,17 @@ public class LibMatrixDatagen
 	{
 		// Prob [k-1 zeros before a nonzero] = Prob [k-1 < log(uniform)/log(1-p) < k] = p*(1-p)^(k-1), where p=sparsity
 		double log1mp = Math.log(1-sparsity);
-		int idx = 0;  // takes values in range [1, blen*blen] (both ends including)
+		long idx = 0;  // takes values in range [1, blen*blen] (both ends including)
 		long blocksize = blockrows*blockcols;
 		while(idx < blocksize) {
 			//compute skip to next index
-			idx = idx + (int) Math.ceil(Math.log(nnzPRNG.nextDouble())/log1mp);
-			if ( idx > blocksize) break;
+			idx = idx + (long) Math.ceil(Math.log(nnzPRNG.nextDouble())/log1mp);
+			//check blocksize and save int casts
+			if ( idx > blocksize)
+				break;
 			// translate idx into (r,c) within the block
-			int rix = (idx-1)/blockcols;
-			int cix = (idx-1)%blockcols;
+			int rix = (int)(idx-1)/blockcols;
+			int cix = (int)(idx-1)%blockcols;
 			double val = min + (range * valuePRNG.nextDouble());
 			c.allocate(rowoffset+rix, estnnzRow, clen);
 			c.append(rowoffset+rix, coloffset+cix, val);