You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by mb...@apache.org on 2017/10/12 08:12:47 UTC

[5/5] systemml git commit: [SYSTEMML-1954] Memory-efficient rexpand output (CSR instead of MCSR)

[SYSTEMML-1954] Memory-efficient rexpand output (CSR instead of MCSR)

The rexpand operator is compiled for operations such as table(seq(1,N),
v) and (for the case of seq on the lhs), simply horizontally expands the
given value into the respective column index. For large N, this creates
a huge but ultra-sparse matrix. In past we introduced scalar rows for
the MCSR sparse block to reduce the memory overhead of sparse rows with
a single entry.

However, this approach was still memory-inefficient and causes
unnecessary GC overhead. This patch now creates the output for rexpand
columns in CSR instead of MCSR representation. In order to avoid
unnecessary CSR row pointer updates on append, we also provide the CSR
primitives to construct the block directly from a given column index
array. On a scenario of rexpand 60M rows, this patch improved the
rexpand performance from 39.4s to 3.8s with additional end-to-end
improvements due to reduce GC overhead which also affects subsequent
operations.


Project: http://git-wip-us.apache.org/repos/asf/systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/a97bc53f
Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/a97bc53f
Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/a97bc53f

Branch: refs/heads/master
Commit: a97bc53f7c2ab74374733376e0137174f821bd95
Parents: a8f6a3c
Author: Matthias Boehm <mb...@gmail.com>
Authored: Thu Oct 12 01:05:03 2017 -0700
Committer: Matthias Boehm <mb...@gmail.com>
Committed: Thu Oct 12 01:13:09 2017 -0700

----------------------------------------------------------------------
 .../data/LibMatrixCuDNNInputRowFetcher.java     |  2 +-
 .../runtime/matrix/data/LibMatrixReorg.java     | 28 +++++++++++++++++---
 .../sysml/runtime/matrix/data/MatrixBlock.java  |  6 ++---
 .../runtime/matrix/data/SparseBlockCSR.java     | 27 +++++++++++++++++++
 4 files changed, 55 insertions(+), 8 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/systemml/blob/a97bc53f/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCuDNNInputRowFetcher.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCuDNNInputRowFetcher.java b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCuDNNInputRowFetcher.java
index b9619c8..a21dae0 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCuDNNInputRowFetcher.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCuDNNInputRowFetcher.java
@@ -56,7 +56,7 @@ public class LibMatrixCuDNNInputRowFetcher implements java.lang.AutoCloseable {
 	 * Copy the nth row and return the dense pointer
 	 * @param n zero-based row index
 	 * @return dense pointer containing the nth row. This row is reused in the next iteration
-	 * @throws DMLRuntimeException
+	 * @throws DMLRuntimeException ?
 	 */
 	public Pointer getNthRow(int n) throws DMLRuntimeException {
 		if(isInputInSparseFormat) {

http://git-wip-us.apache.org/repos/asf/systemml/blob/a97bc53f/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixReorg.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixReorg.java b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixReorg.java
index 0bd4402..d2d3d86 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixReorg.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixReorg.java
@@ -71,6 +71,9 @@ public class LibMatrixReorg
 	//allow reuse of temporary blocks for certain operations
 	public static final boolean ALLOW_BLOCK_REUSE = false;
 	
+	//use csr instead of mcsr sparse block for rexpand columns
+	public static final boolean REXPAND_COLS_IN_CSR = true;
+	
 	private enum ReorgType {
 		TRANSPOSE,
 		REV,
@@ -1907,7 +1910,7 @@ public class LibMatrixReorg
 		
 		return ret;
 	}
-
+	
 	private static MatrixBlock rexpandColumns(MatrixBlock in, MatrixBlock ret, int max, boolean cast, boolean ignore, int k) 
 		throws DMLRuntimeException
 	{
@@ -1921,7 +1924,8 @@ public class LibMatrixReorg
 		
 		//execute rexpand columns
 		long rnnz = 0; //real nnz (due to cutoff max)
-		if( k <= 1 || in.getNumRows() <= PAR_NUMCELL_THRESHOLD ) {
+		if( k <= 1 || in.getNumRows() <= PAR_NUMCELL_THRESHOLD
+			|| (sp && REXPAND_COLS_IN_CSR) ) {
 			rnnz = rexpandColumns(in, ret, max, cast, ignore, 0, rlen);
 		}
 		else {
@@ -1951,6 +1955,14 @@ public class LibMatrixReorg
 	private static long rexpandColumns(MatrixBlock in, MatrixBlock ret, int max, boolean cast, boolean ignore, int rl, int ru) 
 		throws DMLRuntimeException
 	{
+		//initialize auxiliary data structures 
+		int lnnz = 0;
+		int[] cix = null;
+		if( ret.sparse && REXPAND_COLS_IN_CSR ) {
+			cix = new int[in.rlen];
+			Arrays.fill(cix, -1);
+		}
+		
 		//expand input horizontally (input vector likely dense 
 		//but generic implementation for general case)
 		for( int i=rl; i<ru; i++ )
@@ -1967,17 +1979,25 @@ public class LibMatrixReorg
 			//set expanded value if matching
 			if( val == Math.floor(val) && val >= 1 && val <= max ) {
 				//update target without global nnz maintenance
-				if( ret.isInSparseFormat() ) {
+				if( cix != null ) {
+					cix[i] = (int)(val-1);
+				}
+				else if( ret.isInSparseFormat() ) {
 					ret.sparseBlock.allocate(i, 1);
 					ret.sparseBlock.append(i, (int)(val-1), 1);
 				}
 				else
 					ret.setValueDenseUnsafe(i, (int)(val-1), 1);
+				lnnz ++;
 			}
 		}
 		
+		//init CSR block once to avoid repeated updates of row pointers on append
+		if( cix != null )
+			ret.sparseBlock = new SparseBlockCSR(in.rlen, lnnz, cix);
+		
 		//recompute nnz of partition
-		return ret.recomputeNonZeros(rl, ru-1, 0, ret.getNumColumns()-1);
+		return ret.setNonZeros(lnnz);
 	}
 	
 	private static void copyColVector( MatrixBlock in, int ixin, double[] tmp, int[] tmpi, int len)

http://git-wip-us.apache.org/repos/asf/systemml/blob/a97bc53f/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java b/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java
index 8f9ae9e..822c6f9 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java
@@ -463,8 +463,8 @@ public class MatrixBlock extends MatrixValue implements CacheBlock, Externalizab
 		return nonZeros;
 	}
 	
-	public void setNonZeros(long nnz) {
-		nonZeros = nnz;
+	public long setNonZeros(long nnz) {
+		return (nonZeros = nnz);
 	}
 	
 	public boolean isVector() {
@@ -4810,7 +4810,7 @@ public class MatrixBlock extends MatrixValue implements CacheBlock, Externalizab
 	 * This function computes the total weight
 	 * 
 	 * @return sum weight for quantile
-	 * @throws DMLRuntimeException
+	 * @throws DMLRuntimeException on error
 	 */
 	public double sumWeightForQuantile() 
 		throws DMLRuntimeException 

http://git-wip-us.apache.org/repos/asf/systemml/blob/a97bc53f/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockCSR.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockCSR.java b/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockCSR.java
index bc817c7..35ffedf 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockCSR.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockCSR.java
@@ -166,6 +166,33 @@ public class SparseBlockCSR extends SparseBlock
 	}
 	
 	/**
+	 * Copy constructor for given array of column indexes, which
+	 * identifies rows by position and implies values of 1.
+	 * 
+	 * @param rows number of rows
+	 * @param nnz number of non-zeros
+	 * @param colInd column indexes
+	 */
+	public SparseBlockCSR(int rows, int nnz, int[] colInd) {
+		_ptr = new int[rows+1];
+		_indexes = (rows==nnz) ? colInd : new int[nnz];
+		_values = new double[nnz];
+		Arrays.fill(_values, 1);
+		_size = nnz;
+		
+		//single-pass construction of row pointers
+		//and copy of column indexes if necessary
+		for(int i=0, pos=0; i<rows; i++) {
+			if( colInd[i] >= 0 ) {
+				if( rows > nnz )
+					_indexes[pos] = colInd[i];
+				pos++;
+			}
+			_ptr[i+1] = pos;
+		}
+	}
+	
+	/**
 	 * Initializes the CSR sparse block from an ordered input
 	 * stream of ultra-sparse ijv triples. 
 	 *