You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by mb...@apache.org on 2017/10/14 09:30:12 UTC

[2/3] systemml git commit: [SYSTEMML-1959] Extended shallow-serialize for sparse matrices

[SYSTEMML-1959] Extended shallow-serialize for sparse matrices 

Originally, all dense and sparse matrices where serializes into byte
arrays on write into the buffer pool. Later, we introduced the concept
of shallow-serialize where we simply hold a strong reference to all
matrices whose in-memory size is equal or close to its size in
serialized form, which greatly reduced the bufferpool overhead.

For tall and skinny sparse matrices in MCSR format (our default), this
is not the case, which causes serialization overhead and potentially
unnecessary evictions if the serialized representation is larger than
2GB (max integer size of byte arrays). This patch overcomes these
issues, by deciding upon a potential conversion from MCSR to CSR, which
allows for shallow serialize. Since this only happens in cases where we
would have serialized or evicted the MCSR block, this conversion is safe
with regard to potentially unnecessary overhead.


Project: http://git-wip-us.apache.org/repos/asf/systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/db725dec
Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/db725dec
Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/db725dec

Branch: refs/heads/master
Commit: db725dec447be39b4e2f16ec66d669ad48217874
Parents: d641c22
Author: Matthias Boehm <mb...@gmail.com>
Authored: Fri Oct 13 22:44:09 2017 -0700
Committer: Matthias Boehm <mb...@gmail.com>
Committed: Sat Oct 14 02:30:37 2017 -0700

----------------------------------------------------------------------
 .../controlprogram/caching/ByteBuffer.java      | 17 +++---
 .../controlprogram/caching/CacheBlock.java      | 20 +++++++
 .../sysml/runtime/matrix/data/FrameBlock.java   | 11 +++-
 .../sysml/runtime/matrix/data/MatrixBlock.java  | 56 ++++++++++++++------
 4 files changed, 81 insertions(+), 23 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/systemml/blob/db725dec/src/main/java/org/apache/sysml/runtime/controlprogram/caching/ByteBuffer.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/controlprogram/caching/ByteBuffer.java b/src/main/java/org/apache/sysml/runtime/controlprogram/caching/ByteBuffer.java
index 807fdc4..a87e4b4 100644
--- a/src/main/java/org/apache/sysml/runtime/controlprogram/caching/ByteBuffer.java
+++ b/src/main/java/org/apache/sysml/runtime/controlprogram/caching/ByteBuffer.java
@@ -52,7 +52,7 @@ public class ByteBuffer
 	public void serializeBlock( CacheBlock cb ) 
 		throws IOException
 	{	
-		_shallow = cb.isShallowSerialize();
+		_shallow = cb.isShallowSerialize(true);
 		_matrix = (cb instanceof MatrixBlock);
 		
 		try
@@ -69,6 +69,10 @@ public class ByteBuffer
 			}
 			else //SPARSE/DENSE -> DENSE
 			{
+				//convert to shallow serialize block if necessary
+				if( !cb.isShallowSerialize() )
+					cb.toShallowSerializeBlock();
+				
 				//shallow serialize
 				_cdata = cb;
 			}
@@ -160,14 +164,15 @@ public class ByteBuffer
 	 */
 	public static boolean isValidCapacity( long size, CacheBlock cb )
 	{
-		if( !cb.isShallowSerialize() ) { //SPARSE matrix blocks
+		if( !cb.isShallowSerialize(true) ) { //SPARSE matrix blocks
 			// since cache blocks are serialized into a byte representation
 			// the buffer buffer can hold at most 2GB in size 
-			return ( size <= Integer.MAX_VALUE );	
+			return ( size <= Integer.MAX_VALUE );
 		}
-		else {//DENSE matrix / frame blocks
-			// since for dense matrix blocks we use a shallow serialize (strong reference), 
-			// the byte buffer can hold any size (currently upper bounded by 16GB) 
+		else {//DENSE/SPARSE matrix / frame blocks
+			// for dense and under special conditions also sparse matrix blocks 
+			// we use a shallow serialize (strong reference), there is no additional
+			// capacity constraint for serializing these blocks into byte arrays
 			return true;
 		}
 	}

http://git-wip-us.apache.org/repos/asf/systemml/blob/db725dec/src/main/java/org/apache/sysml/runtime/controlprogram/caching/CacheBlock.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/controlprogram/caching/CacheBlock.java b/src/main/java/org/apache/sysml/runtime/controlprogram/caching/CacheBlock.java
index eb34b09..bd03ead 100644
--- a/src/main/java/org/apache/sysml/runtime/controlprogram/caching/CacheBlock.java
+++ b/src/main/java/org/apache/sysml/runtime/controlprogram/caching/CacheBlock.java
@@ -59,6 +59,26 @@ public interface CacheBlock extends Writable
 	public boolean isShallowSerialize();
 	
 	/**
+	 * Indicates if the cache block is subject to shallow serialized,
+	 * which is generally true if in-memory size and serialized size
+	 * are almost identical allowing to avoid unnecessary deep serialize.
+	 * 
+	 * @param inclConvert if true report blocks as shallow serialize that are
+	 * currently not amenable but can be brought into an amenable form
+	 * via {@link #toShallowSerializeBlock() toShallowSerializeBlock}.
+	 * 
+	 * @return true if shallow serialized
+	 */
+	public boolean isShallowSerialize(boolean inclConvert);
+	
+	/**
+	 * Converts a cache block that is not shallow serializable into
+	 * a form that is shallow serializable. This methods has no affect
+	 * if the given cache block is not amenable.
+	 */
+	public void toShallowSerializeBlock();
+	
+	/**
 	 * Free unnecessarily allocated empty block.
 	 */
 	public void compactEmptyBlock();

http://git-wip-us.apache.org/repos/asf/systemml/blob/db725dec/src/main/java/org/apache/sysml/runtime/matrix/data/FrameBlock.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/FrameBlock.java b/src/main/java/org/apache/sysml/runtime/matrix/data/FrameBlock.java
index a79f6c2..a56fd6a 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/FrameBlock.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/FrameBlock.java
@@ -753,15 +753,24 @@ public class FrameBlock implements Writable, CacheBlock, Externalizable
 	
 	@Override
 	public boolean isShallowSerialize() {
+		return isShallowSerialize(false);
+	}
+	
+	@Override
+	public boolean isShallowSerialize(boolean inclConvert) {
 		//shallow serialize if non-string schema because a frame block
 		//is always dense but strings have large array overhead per cell
 		boolean ret = true;
 		for( int j=0; j<_schema.length && ret; j++ )
 			ret &= (_schema[j] != ValueType.STRING);
-		
 		return ret;
 	}
 	
+	@Override 
+	public void toShallowSerializeBlock() {
+		//do nothing (not applicable).
+	}
+	
 	@Override
 	public void compactEmptyBlock() {
 		//do nothing

http://git-wip-us.apache.org/repos/asf/systemml/blob/db725dec/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java b/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java
index 8117c04..e563c60 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java
@@ -105,6 +105,8 @@ public class MatrixBlock extends MatrixValue implements CacheBlock, Externalizab
 	public static final SparseBlock.Type DEFAULT_INPLACE_SPARSEBLOCK = SparseBlock.Type.CSR;
 	//allowed overhead for shallow serialize in terms of in-memory-size/x <= serialized-size 
 	public static final double MAX_SHALLOW_SERIALIZE_OVERHEAD = 1.3;
+	//flag if MCSR blocks that do not qualify for shallow serialize should be converted to CSR
+	public static final boolean CONVERT_MCSR_TO_CSR_ON_DEEP_SERIALIZE = true;
 	//basic header (int rlen, int clen, byte type)
 	public static final int HEADER_SIZE = 9;
 	
@@ -947,6 +949,10 @@ public class MatrixBlock extends MatrixValue implements CacheBlock, Externalizab
 			isPM &= sblock.isEmpty(i) || sblock.size(i) == 1;
 		return isPM;
 	}
+	
+	private boolean isUltraSparseSerialize(boolean sparseDst) {
+		return nonZeros<rlen && sparseDst;
+	}
 
 	/**
 	 * Evaluates if this matrix block should be in sparse format in
@@ -1004,7 +1010,7 @@ public class MatrixBlock extends MatrixValue implements CacheBlock, Externalizab
 		//ensure exact size estimates for write
 		if( nonZeros <= 0 ) {
 			recomputeNonZeros();
-		}	
+		}
 		
 		//decide on in-memory representation
 		return evalSparseFormatOnDisk(lrlen, lclen, nonZeros);
@@ -1674,8 +1680,8 @@ public class MatrixBlock extends MatrixValue implements CacheBlock, Externalizab
 				if( !a.isEmpty(i) ) {
 					boolean update = a.set(i, cl, 0);
 					if( updateNNZ )
-						nonZeros -= update ? 1 : 0;							
-				}			
+						nonZeros -= update ? 1 : 0;
+				}
 		}
 		else
 		{
@@ -1685,7 +1691,7 @@ public class MatrixBlock extends MatrixValue implements CacheBlock, Externalizab
 					a.deleteIndexRange(i, cl, cu+1);
 					if( updateNNZ )
 						nonZeros += (a.size(i)-lnnz);
-				}	
+				}
 		}
 	}
 	
@@ -2028,7 +2034,7 @@ public class MatrixBlock extends MatrixValue implements CacheBlock, Externalizab
 			for(long i=0; i<nonZeros; i++) {
 				int r = in.readInt();
 				int c = in.readInt();
-				double val = in.readDouble();			
+				double val = in.readDouble();
 				denseBlock[r*clen+c] = val;
 			}
 		}
@@ -2037,7 +2043,7 @@ public class MatrixBlock extends MatrixValue implements CacheBlock, Externalizab
 			//col: read iv-pairs
 			for(long i=0; i<nonZeros; i++) {
 				int r = in.readInt();
-				double val = in.readDouble();			
+				double val = in.readDouble();
 				denseBlock[r] = val;
 			}
 		}
@@ -2060,7 +2066,7 @@ public class MatrixBlock extends MatrixValue implements CacheBlock, Externalizab
 			//write sparse to *
 			if( sparseBlock==null || nonZeros==0 ) 
 				writeEmptyBlock(out);
-			else if( nonZeros<rlen && sparseDst ) 
+			else if( isUltraSparseSerialize(sparseDst) ) 
 				writeSparseToUltraSparse(out); 
 			else if( sparseDst ) 
 				writeSparseBlock(out);
@@ -2072,7 +2078,7 @@ public class MatrixBlock extends MatrixValue implements CacheBlock, Externalizab
 			//write dense to *
 			if( denseBlock==null || nonZeros==0 ) 
 				writeEmptyBlock(out);
-			else if( nonZeros<rlen && sparseDst )
+			else if( isUltraSparseSerialize(sparseDst) )
 				writeDenseToUltraSparse(out);
 			else if( sparseDst )
 				writeDenseToSparse(out);
@@ -2127,8 +2133,8 @@ public class MatrixBlock extends MatrixValue implements CacheBlock, Externalizab
 					for(int j=pos; j<pos+nr; j++) {
 						out.writeInt(cols[j]);
 						out.writeDouble(values[j]);
-					}					
-				}	
+					}
+				}
 			}
 			for(;r<rlen; r++)
 				out.writeInt(0);
@@ -2206,14 +2212,14 @@ public class MatrixBlock extends MatrixValue implements CacheBlock, Externalizab
 						for( ; j<aix[apos+j2]; j++ )
 							out.writeDouble( 0 );
 						out.writeDouble( avals[apos+j2] );
-					}					
+					}
 					//remaining 0 values in row
 					for( int j=aix[apos+alen-1]+1; j<clen; j++)
 						out.writeDouble( 0 );
 				}
 				else //empty row
 					for( int j=0; j<clen; j++ )
-						out.writeDouble( 0 );	
+						out.writeDouble( 0 );
 			}
 		}
 	}
@@ -2361,7 +2367,7 @@ public class MatrixBlock extends MatrixValue implements CacheBlock, Externalizab
 		}
 		else {
 			//default serialize (general case)
-			write(os);	
+			write(os);
 		}
 	}
 	
@@ -2621,12 +2627,30 @@ public class MatrixBlock extends MatrixValue implements CacheBlock, Externalizab
 	
 	@Override
 	public boolean isShallowSerialize() {
+		return isShallowSerialize(false);
+	}
+	
+	@Override
+	public boolean isShallowSerialize(boolean inclConvert) {
 		//shallow serialize if dense, dense in serialized form or already in CSR
-		return !sparse || !evalSparseFormatOnDisk()
+		boolean sparseDst = evalSparseFormatOnDisk();
+		return !sparse || !sparseDst
 			|| (sparse && sparseBlock instanceof SparseBlockCSR)
 			|| (sparse && sparseBlock instanceof SparseBlockMCSR
-				&& getInMemorySize()/MAX_SHALLOW_SERIALIZE_OVERHEAD 
-				<= getExactSerializedSize());
+				&& getInMemorySize() / MAX_SHALLOW_SERIALIZE_OVERHEAD 
+				<= getExactSerializedSize())
+			|| (sparse && sparseBlock instanceof SparseBlockMCSR
+				&& nonZeros < Integer.MAX_VALUE //CSR constraint
+				&& inclConvert && CONVERT_MCSR_TO_CSR_ON_DEEP_SERIALIZE
+				&& !isUltraSparseSerialize(sparseDst));
+	}
+	
+	@Override 
+	public void toShallowSerializeBlock() {
+		if( isShallowSerialize() || !isShallowSerialize(true) )
+			return;
+		sparseBlock = SparseBlockFactory.copySparseBlock(
+			SparseBlock.Type.CSR, sparseBlock, false);
 	}
 	
 	@Override