You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by mb...@apache.org on 2016/08/15 19:04:15 UTC

incubator-systemml git commit: [SYSTEMML-824] Performance dense-sparse block conversion (row pre-alloc)

Repository: incubator-systemml
Updated Branches:
  refs/heads/master e9aa58414 -> 5ac32d6be


[SYSTEMML-824] Performance dense-sparse block conversion (row pre-alloc)

This patch improves the performance of the core matrix block primitive
denseToSparse as triggered in examSparsity(). Similar to sparse
transpose, we now pre-allocate sparse rows with exact nnz information,
in order to avoid repeated reallocations and internal copies. On
scenario with a 10k x 10k matrix and sparsity 0.25, the runtime improved
from 1.7s to 490ms.

Furthermore, this patch also includes some minor cleanups (e.g., missing
imports and unused variables). 

Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/5ac32d6b
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/5ac32d6b
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/5ac32d6b

Branch: refs/heads/master
Commit: 5ac32d6be15ce5ab212cc2d040546825195e41e6
Parents: e9aa584
Author: Matthias Boehm <mb...@us.ibm.com>
Authored: Sun Aug 14 19:15:11 2016 -0700
Committer: Matthias Boehm <mb...@us.ibm.com>
Committed: Sun Aug 14 19:15:11 2016 -0700

----------------------------------------------------------------------
 .../org/apache/sysml/hops/ConvolutionOp.java    |  3 --
 .../runtime/matrix/data/LibMatrixCUDA.java      |  1 -
 .../sysml/runtime/matrix/data/MatrixBlock.java  | 33 ++++++++++++++------
 3 files changed, 23 insertions(+), 14 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/5ac32d6b/src/main/java/org/apache/sysml/hops/ConvolutionOp.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/ConvolutionOp.java b/src/main/java/org/apache/sysml/hops/ConvolutionOp.java
index 8c38a48..c010de6 100644
--- a/src/main/java/org/apache/sysml/hops/ConvolutionOp.java
+++ b/src/main/java/org/apache/sysml/hops/ConvolutionOp.java
@@ -31,7 +31,6 @@ import org.apache.sysml.lops.LopProperties.ExecType;
 import org.apache.sysml.parser.Expression.DataType;
 import org.apache.sysml.parser.Expression.ValueType;
 import org.apache.sysml.runtime.DMLRuntimeException;
-import org.apache.sysml.runtime.matrix.MatrixCharacteristics;
 import org.apache.sysml.runtime.matrix.data.LibMatrixDNN.ConvolutionParameters;
 
 public class ConvolutionOp extends Hop  implements MultiThreadedHop
@@ -231,9 +230,7 @@ public class ConvolutionOp extends Hop  implements MultiThreadedHop
 		// [numRows, numCols, NNZ] 
 		long[] ret = null;
 	
-		Hop input1 = getInput().get(0);
 		ConvolutionParameters params;
-		MatrixCharacteristics mc = memo.getAllInputStats(input1);
 		try {
 			params = parseInput();
 		} catch (DMLRuntimeException e) {

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/5ac32d6b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCUDA.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCUDA.java b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCUDA.java
index 52272a0..6a25b49 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCUDA.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCUDA.java
@@ -49,7 +49,6 @@ import static jcuda.runtime.JCuda.cudaFree;
 import jcuda.Pointer;
 import jcuda.Sizeof;
 import jcuda.jcublas.JCublas;
-import jcuda.jcublas.JCublas2;
 import jcuda.jcublas.cublasHandle;
 import jcuda.jcudnn.cudnnConvolutionDescriptor;
 import jcuda.jcudnn.cudnnFilterDescriptor;

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/5ac32d6b/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java b/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java
index 1316ad8..8bd7b79 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java
@@ -1203,20 +1203,33 @@ public class MatrixBlock extends MatrixValue implements CacheBlock, Externalizab
 		allocateSparseRowsBlock();
 		reset();
 		
-		//copy dense to sparse
+		//copy dense to sparse with (1) row pre-allocation to avoid repeated 
+		//allocation on append, and (2) nnz re-computation 
 		double[] a = denseBlock;
 		SparseBlock c = sparseBlock;
+		final int m = rlen;
+		final int n = clen;
 		
-		for( int i=0, aix=0; i<rlen; i++ )
-			for(int j=0; j<clen; j++, aix++)
-				if( a[aix] != 0 ) {
-					//create sparse row only if required
-					c.allocate(i, estimatedNNzsPerRow, clen);
-					c.append(i, j, a[aix]);
-					nonZeros++;
-				}
+		long nnz = 0;
+		for( int i=0, aix=0; i<m; i++, aix+=n ) {
+			//recompute nnz per row (not via recomputeNonZeros as sparse allocated)
+			int lnnz = 0;
+			for(int j=0; j<n; j++)
+				lnnz += (a[aix+j]!=0) ? 1 : 0;
+			if( lnnz <= 0 ) continue;
+			
+			//allocate sparse row and append non-zero values
+			c.allocate(i, lnnz); 
+			for(int j=0; j<n; j++) {
+				double val = a[aix+j];
+				if( val != 0 )
+					c.append(i, j, val);
+			}
+			nnz += lnnz;
+		}
 				
-		//cleanup dense block
+		//update nnz and cleanup dense block
+		nonZeros = nnz;
 		denseBlock = null;
 	}