You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by mb...@apache.org on 2017/04/21 05:11:53 UTC

[1/2] incubator-systemml git commit: [SYSTEMML-1548] Performance ultra-sparse read and fix CSR sparse blocks

Repository: incubator-systemml
Updated Branches:
  refs/heads/master 39bf1e6d5 -> 2a1eb4c9b


[SYSTEMML-1548] Performance ultra-sparse read and fix CSR sparse blocks

This patch fixes performance issues of reading ultra-sparse matrices
from CP, which can be dominated by garbage collection overhead under
certain data sizes and heap configurations. In detail, we now
deserialize ultra-sparse blocks into CSR format and reuse these sparse
blocks. This change improved performance by >3x for scenarios with large
heaps. However, additional changes for small heaps need to be addressed
in a separate patch.

Furthermore, this also includes a fix of the underlying CSR sparse block
for the case of updates (inserts/appends) with entire sparse rows as
well as a cleanup of CSR construction from COO inputs (single pass). 


Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/46bd37c1
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/46bd37c1
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/46bd37c1

Branch: refs/heads/master
Commit: 46bd37c14b9b4e71e006a2fe1c6a39ae9ec92f86
Parents: 39bf1e6
Author: Matthias Boehm <mb...@gmail.com>
Authored: Thu Apr 20 20:44:50 2017 -0700
Committer: Matthias Boehm <mb...@gmail.com>
Committed: Thu Apr 20 22:13:48 2017 -0700

----------------------------------------------------------------------
 .../sysml/runtime/matrix/data/MatrixBlock.java  | 50 +++++++----
 .../runtime/matrix/data/SparseBlockCSR.java     | 89 +++++++++++++-------
 .../runtime/matrix/data/SparseBlockFactory.java | 12 +--
 3 files changed, 99 insertions(+), 52 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/46bd37c1/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java b/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java
index 2d6e2b2..0ed64e3 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java
@@ -364,9 +364,9 @@ public class MatrixBlock extends MatrixValue implements CacheBlock, Externalizab
 		allocateSparseRowsBlock(true);
 	}
 
-	public void allocateSparseRowsBlock(boolean clearNNZ)
-	{	
+	public void allocateSparseRowsBlock(boolean clearNNZ) {
 		//allocate block if non-existing or too small (guaranteed to be 0-initialized)
+		//but do not replace existing block even if not in default type
 		if( sparseBlock == null || sparseBlock.numRows()<rlen ) {
 			sparseBlock = SparseBlockFactory.createSparseBlock(DEFAULT_SPARSEBLOCK, rlen);
 		}
@@ -377,6 +377,23 @@ public class MatrixBlock extends MatrixValue implements CacheBlock, Externalizab
 		}
 	}
 	
+	public void allocateAndResetSparseRowsBlock(boolean clearNNZ, SparseBlock.Type stype)
+	{
+		//allocate block if non-existing or too small (guaranteed to be 0-initialized)
+		if( sparseBlock == null || sparseBlock.numRows()<rlen
+			|| !SparseBlockFactory.isSparseBlockType(sparseBlock, stype))  {
+			sparseBlock = SparseBlockFactory.createSparseBlock(stype, rlen);
+		}
+		else {
+			sparseBlock.reset(estimatedNNzsPerRow, clen);
+		}
+		
+		//clear nnz if necessary
+		if( clearNNZ ) {
+			nonZeros = 0;
+		}
+	}
+	
 	
 	/**
 	 * This should be called only in the read and write functions for CP
@@ -1767,7 +1784,7 @@ public class MatrixBlock extends MatrixValue implements CacheBlock, Externalizab
 		//check type information
 		if( bformat<0 || bformat>=BlockType.values().length )
 			throw new IOException("invalid format: '"+bformat+"' (need to be 0-"+BlockType.values().length+").");
-			
+		
 		BlockType format=BlockType.values()[bformat];
 		try 
 		{
@@ -1776,7 +1793,7 @@ public class MatrixBlock extends MatrixValue implements CacheBlock, Externalizab
 				case ULTRA_SPARSE_BLOCK:
 					nonZeros = readNnzInfo( in, true );
 					sparse = evalSparseFormatInMemory(rlen, clen, nonZeros);
-					cleanupBlock(true, true); //clean all
+					cleanupBlock(true, !(sparse && sparseBlock instanceof SparseBlockCSR));
 					if( sparse )
 						readUltraSparseBlock(in);
 					else
@@ -1798,7 +1815,9 @@ public class MatrixBlock extends MatrixValue implements CacheBlock, Externalizab
 					break;
 				case EMPTY_BLOCK:
 					sparse = true;
-					cleanupBlock(true, true); //clean all
+					cleanupBlock(true, !(sparseBlock instanceof SparseBlockCSR));
+					if( sparseBlock != null )
+						sparseBlock.reset();
 					nonZeros = 0;
 					break;
 			}
@@ -1902,20 +1921,19 @@ public class MatrixBlock extends MatrixValue implements CacheBlock, Externalizab
 
 	private void readUltraSparseBlock(DataInput in) 
 		throws IOException 
-	{	
-		allocateSparseRowsBlock(false); //adjust to size
-		resetSparse(); //reset all sparse rows
+	{
+		//allocate ultra-sparse block in CSR to avoid unnecessary size overhead 
+		//and to allow efficient reset without repeated sparse row allocation
+		
+		//adjust size and ensure reuse block is in CSR format
+		allocateAndResetSparseRowsBlock(false, SparseBlock.Type.CSR);
 		
 		if( clen > 1 ) //ULTRA-SPARSE BLOCK
 		{ 
-			//block: read ijv-triples
-			for(long i=0; i<nonZeros; i++) {
-				int r = in.readInt();
-				int c = in.readInt();
-				double val = in.readDouble();
-				sparseBlock.allocate(r, 1, clen);
-				sparseBlock.append(r, c, val);
-			}
+			//block: read ijv-triples (ordered by row and column) via custom 
+			//init to avoid repeated updates of row pointers per append
+			SparseBlockCSR sblockCSR = (SparseBlockCSR) sparseBlock;
+			sblockCSR.initUltraSparse((int)nonZeros, in);
 		}
 		else //ULTRA-SPARSE COL
 		{

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/46bd37c1/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockCSR.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockCSR.java b/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockCSR.java
index 3ddecfe..02f009a 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockCSR.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockCSR.java
@@ -20,6 +20,8 @@
 
 package org.apache.sysml.runtime.matrix.data;
 
+import java.io.DataInput;
+import java.io.IOException;
 import java.util.Arrays;
 
 import org.apache.sysml.runtime.util.SortUtils;
@@ -79,7 +81,7 @@ public class SparseBlockCSR extends SparseBlock
 			throw new RuntimeException("SparseBlockCSR supports nnz<=Integer.MAX_VALUE but got "+size);
 		
 		//special case SparseBlockCSR
-		if( sblock instanceof SparseBlockCSR ) { 
+		if( sblock instanceof SparseBlockCSR ) {
 			SparseBlockCSR ocsr = (SparseBlockCSR)sblock;
 			_ptr = Arrays.copyOf(ocsr._ptr, ocsr.numRows()+1);
 			_indexes = Arrays.copyOf(ocsr._indexes, ocsr._size);
@@ -145,36 +147,57 @@ public class SparseBlockCSR extends SparseBlock
 	 * @param colInd	column indices
 	 * @param values	non zero values
 	 */
-	public SparseBlockCSR(int rows, int[] rowInd, int[] colInd, double[] values){
+	public SparseBlockCSR(int rows, int[] rowInd, int[] colInd, double[] values) {
 		int nnz = values.length;
 		_ptr = new int[rows+1];
 		_indexes = Arrays.copyOf(colInd, colInd.length);
 		_values = Arrays.copyOf(values, values.length);
 		_size = nnz;
 		
-		for (int i=0; i<rows; i++){
-			_ptr[i] = -1;
+		//single-pass construction of row pointers
+		int rlast = 0;
+		for(int i=0; i<nnz; i++) {
+			int r = rowInd[i];
+			if( rlast < r )
+				Arrays.fill(_ptr, rlast+1, r+1, i);
+			rlast = r;
 		}
-		_ptr[rows] = nnz;
-		_ptr[0]    = 0;
+		Arrays.fill(_ptr, rlast+1, numRows()+1, nnz);
+	}
+	
+	/**
+	 * Initializes the CSR sparse block from an ordered input
+	 * stream of ultra-sparse ijv triples. 
+	 * 
+	 * @param nnz number of non-zeros to read
+	 * @param in data input stream of ijv triples, ordered by ij
+	 * @throws IOException
+	 */
+	public void initUltraSparse(int nnz, DataInput in) 
+		throws IOException 
+	{
+		//ensure empty block before init
+		if( _size > 0 )
+			reset();
+			
+		//allocate space if necessary
+		if( _values.length < nnz )
+			resize(nnz);
 		
-		// Input Example -> rowInd = [0,0,1,1,2,2,2,4,4,5]
-		//							 [0,1,2,3,4,5,6,7,8,9]
-		for (int i=nnz-1; i>=1; i--){
-			_ptr[rowInd[i]] = i;
+		//read ijv triples, append and update pointers
+		int rlast = 0;
+		for(int i=0; i<nnz; i++) {
+			int r = in.readInt();
+			if( rlast < r )
+				Arrays.fill(_ptr, rlast+1, r+1, i);
+			rlast = r;
+			_indexes[i] = in.readInt();
+			_values[i] = in.readDouble();
 		}
-		// Output Example -> _ptr = [0|2|_|4|7|9|nnz]
-		// _ = -1
-		
-		// Pad out the missing values
-		// Input example -> _ptr = [0|2|_|4|7|9|nnz]
-		for (int i=1; i<rows; i++){
-			if (_ptr[i] == -1){
-				_ptr[i] = _ptr[i-1];
-			}
-		}
-		// Output example -> _ptr = [0|2|2|4|7|9|nnz]
-				
+		Arrays.fill(_ptr, rlast+1, numRows()+1, nnz);
+		
+		//update meta data
+		_size = nnz;
 	}
 	
 	/**
@@ -234,14 +257,18 @@ public class SparseBlockCSR extends SparseBlock
 	
 	@Override 
 	public void reset() {
-		_size = 0;
-		Arrays.fill(_ptr, 0);
+		if( _size > 0 ) {
+			Arrays.fill(_ptr, 0);
+			_size = 0;
+		}
 	}
 
 	@Override 
 	public void reset(int ennz, int maxnnz) {
-		_size = 0;
-		Arrays.fill(_ptr, 0);
+		if( _size > 0 ) {
+			Arrays.fill(_ptr, 0);
+			_size = 0;
+		}
 	}
 	
 	@Override 
@@ -348,19 +375,19 @@ public class SparseBlockCSR extends SparseBlock
 		double[] avals = row.values();
 		
 		//delete existing values if necessary
-		if( len > 0 )
+		if( len > 0 ) //incl size update
 			deleteIndexRange(r, aix[0], aix[alen-1]+1);
 		
 		//prepare free space (allocate and shift)
 		int lsize = _size+alen;
 		if( _values.length < lsize )
-			resize(lsize);				
-		shiftRightByN(pos, alen);
+			resize(lsize);
+		shiftRightByN(pos, alen); //incl size update
+		incrPtr(r+1, alen);
 		
 		//copy input row into internal representation
 		System.arraycopy(aix, 0, _indexes, pos, alen);
 		System.arraycopy(avals, 0, _values, pos, alen);
-		_size+=alen;
 	}
 	
 	@Override
@@ -576,7 +603,7 @@ public class SparseBlockCSR extends SparseBlock
 		//overlapping array copy (shift rhs values left)
 		System.arraycopy(_indexes, end, _indexes, start, _size-end);
 		System.arraycopy(_values, end, _values, start, _size-end);
-		_size -= (end-start);		
+		_size -= (end-start);
 		
 		decrPtr(r+1, end-start);
 	}

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/46bd37c1/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockFactory.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockFactory.java b/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockFactory.java
index b62cff3..395ba18 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockFactory.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockFactory.java
@@ -44,11 +44,7 @@ public abstract class SparseBlockFactory
 			return null;
 		
 		//check for existing target type
-		if( !forceCopy && 
-			( (sblock instanceof SparseBlockMCSR && type == SparseBlock.Type.MCSR)
-			||(sblock instanceof SparseBlockCSR && type == SparseBlock.Type.CSR)
-			||(sblock instanceof SparseBlockCOO && type == SparseBlock.Type.COO))  )
-		{
+		if( !forceCopy && isSparseBlockType(sblock, type) ){
 			return sblock;
 		}
 		
@@ -61,6 +57,12 @@ public abstract class SparseBlockFactory
 				throw new RuntimeException("Unexpected sparse block type: "+type.toString());
 		}
 	}
+	
+	public static boolean isSparseBlockType(SparseBlock sblock, SparseBlock.Type type) {
+		return (sblock instanceof SparseBlockMCSR && type == SparseBlock.Type.MCSR)
+			||(sblock instanceof SparseBlockCSR && type == SparseBlock.Type.CSR)
+			||(sblock instanceof SparseBlockCOO && type == SparseBlock.Type.COO);
+	}
 
 	public static long estimateSizeSparseInMemory(SparseBlock.Type type, long nrows, long ncols, double sparsity) {
 		switch( type ) {


[2/2] incubator-systemml git commit: [SYSTEMML-1550] Improved cp dim/sparsity check (exploit worst-case est)

Posted by mb...@apache.org.
[SYSTEMML-1550] Improved cp dim/sparsity check (exploit worst-case est)

This patch improves the operator selection of all hops by taking the
worst-case estimates in account during checks for valid cp dimensions
and sparsity. With this improvement, we are able to avoid unnecessary
distributed operations for hops with large dimensions and unknown exact
but known worst-case sparsity estimates such as rexpand.


Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/2a1eb4c9
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/2a1eb4c9
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/2a1eb4c9

Branch: refs/heads/master
Commit: 2a1eb4c9b290c3af62f3ed2b0744cc507d1994d6
Parents: 46bd37c
Author: Matthias Boehm <mb...@gmail.com>
Authored: Thu Apr 20 21:47:06 2017 -0700
Committer: Matthias Boehm <mb...@gmail.com>
Committed: Thu Apr 20 22:13:49 2017 -0700

----------------------------------------------------------------------
 src/main/java/org/apache/sysml/hops/Hop.java | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/2a1eb4c9/src/main/java/org/apache/sysml/hops/Hop.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/Hop.java b/src/main/java/org/apache/sysml/hops/Hop.java
index 15a0cc3..eb9aa5e 100644
--- a/src/main/java/org/apache/sysml/hops/Hop.java
+++ b/src/main/java/org/apache/sysml/hops/Hop.java
@@ -50,7 +50,6 @@ import org.apache.sysml.runtime.util.UtilFunctions;
 
 public abstract class Hop 
 {
-	
 	protected static final Log LOG =  LogFactory.getLog(Hop.class.getName());
 	
 	public static final long CPThreshold = 2000;
@@ -92,6 +91,7 @@ public abstract class Hop
 	protected double _memEstimate = OptimizerUtils.INVALID_SIZE;
 	protected double _processingMemEstimate = 0;
 	protected double _spBroadcastMemEstimate = 0;
+	protected boolean _validCPSizeEstimate = false;
 	
 	// indicates if there are unknowns during compilation 
 	// (in that case re-complication ensures robustness and efficiency)
@@ -198,10 +198,10 @@ public abstract class Hop
 			//Step 2: check valid output and input sizes for cp (<16GB for DENSE)
 			//(if the memory estimate is smaller than max_numcells we are guaranteed to have it in sparse representation)
 			invalid |= !(  OptimizerUtils.isValidCPMatrixSize(_dim1, _dim2, OptimizerUtils.getSparsity(_dim1, _dim2, _nnz))
-					    || getOutputMemEstimate() < OptimizerUtils.MAX_NUMCELLS_CP_DENSE );
+					    || getOutputMemEstimate() < 8*OptimizerUtils.MAX_NUMCELLS_CP_DENSE || _validCPSizeEstimate );
 			for( Hop in : getInput() )
 				invalid |= !(   OptimizerUtils.isValidCPMatrixSize(in._dim1, in._dim2, OptimizerUtils.getSparsity(in._dim1, in._dim2, in._nnz))
-						     || in.getOutputMemEstimate() < OptimizerUtils.MAX_NUMCELLS_CP_DENSE);
+						     || in.getOutputMemEstimate() < 8*OptimizerUtils.MAX_NUMCELLS_CP_DENSE || in._validCPSizeEstimate);
 			
 			//force exec type mr if necessary
 			if( invalid ) { 
@@ -612,7 +612,7 @@ public abstract class Hop
 					//nnz always exactly known (see dimsKnown(true))
 					_outputMemEstimate = computeOutputMemEstimate( _dim1, _dim2, _nnz );
 				}
-				//1b) infer output statistics and mem estimate based on these statistics
+				//1b) infer output statistics and mem estimate based on worst-case statistics
 				else if( memo.hasInputStatistics(this) )
 				{
 					//infer the output stats
@@ -682,9 +682,13 @@ public abstract class Hop
 		
 		////////
 		//Step 3) Compute final hop memory estimate  
-			
+		
 		//final estimate (sum of inputs/intermediates/output)
 		_memEstimate = getInputOutputSize();
+		
+		//update optional valid cp size estimate (based on worst-case dimensions)
+		_validCPSizeEstimate = (wstats!=null) ? OptimizerUtils.isValidCPMatrixSize(
+			wstats[0], wstats[1], OptimizerUtils.getSparsity(wstats[0], wstats[1], wstats[2])) : false;
 	}