You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by mb...@apache.org on 2017/11/16 08:10:42 UTC

[2/3] systemml git commit: [SYSTEMML-2016] Performance frame cbind and vector frame casts (shallow)

[SYSTEMML-2016] Performance frame cbind and vector frame casts (shallow)

This patch makes major performance improvements to the frame primitives
cbind and matrix (special case of vector) to frame casts, which both can
be done via shallow copies. This is safe due to copy-on-write semantics
of operations. 

On a scenario of 20 iterations of cbind(as.frame(X), cbind(as.frame(Y),
as.frame(Z))) over 10M x 1 dense inputs, this patch improved end-to-end
runtime from 26.1s (15s GC) to 3.9s (0.2s GC). Note that 90% of
remaining execution time is spent in buffer pool evictions, which are
unnecessary and will be addressed in a subsequent patch.


Project: http://git-wip-us.apache.org/repos/asf/systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/1e0a4151
Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/1e0a4151
Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/1e0a4151

Branch: refs/heads/master
Commit: 1e0a4151d21d0821668dd7d0de34ac6b75c33358
Parents: 9e599cd
Author: Matthias Boehm <mb...@gmail.com>
Authored: Wed Nov 15 22:38:24 2017 -0800
Committer: Matthias Boehm <mb...@gmail.com>
Committed: Thu Nov 16 00:11:52 2017 -0800

----------------------------------------------------------------------
 .../RewriteAlgebraicSimplificationStatic.java        |  2 +-
 .../apache/sysml/runtime/matrix/data/FrameBlock.java |  8 +++-----
 .../org/apache/sysml/runtime/util/DataConverter.java | 15 +++++++++++----
 3 files changed, 15 insertions(+), 10 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/systemml/blob/1e0a4151/src/main/java/org/apache/sysml/hops/rewrite/RewriteAlgebraicSimplificationStatic.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/rewrite/RewriteAlgebraicSimplificationStatic.java b/src/main/java/org/apache/sysml/hops/rewrite/RewriteAlgebraicSimplificationStatic.java
index d71c4e0..cc2fe88 100644
--- a/src/main/java/org/apache/sysml/hops/rewrite/RewriteAlgebraicSimplificationStatic.java
+++ b/src/main/java/org/apache/sysml/hops/rewrite/RewriteAlgebraicSimplificationStatic.java
@@ -522,7 +522,7 @@ public class RewriteAlgebraicSimplificationStatic extends HopRewriteRule
 	private static Hop foldMultipleAppendOperations(Hop hi) 
 		throws HopsException
 	{
-		if( hi.getDataType().isMatrix() //no string appends
+		if( hi.getDataType().isMatrix() //no string appends or frames
 			&& (HopRewriteUtils.isBinary(hi, OpOp2.CBIND, OpOp2.RBIND) 
 			|| HopRewriteUtils.isNary(hi, OpOpN.CBIND, OpOpN.RBIND))
 			&& !OptimizerUtils.isHadoopExecutionMode() )

http://git-wip-us.apache.org/repos/asf/systemml/blob/1e0a4151/src/main/java/org/apache/sysml/runtime/matrix/data/FrameBlock.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/FrameBlock.java b/src/main/java/org/apache/sysml/runtime/matrix/data/FrameBlock.java
index a56fd6a..e83b362 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/FrameBlock.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/FrameBlock.java
@@ -471,9 +471,9 @@ public class FrameBlock implements Writable, CacheBlock, Externalizable
 		for( int j=0; j<ncol; j++ )
 			tmpData[j] = new DoubleArray(cols[j]);
 		_colnames = empty ? null : (String[]) ArrayUtils.addAll(getColumnNames(), 
-				createColNames(getNumColumns(), ncol)); //before schema modification
+			createColNames(getNumColumns(), ncol)); //before schema modification
 		_schema = empty ? tmpSchema : (ValueType[]) ArrayUtils.addAll(_schema, tmpSchema); 
-		_coldata = empty ? tmpData : (Array[]) ArrayUtils.addAll(_coldata, tmpData);		
+		_coldata = empty ? tmpData : (Array[]) ArrayUtils.addAll(_coldata, tmpData);
 		_numRows = cols[0].length;
 	}
 
@@ -988,10 +988,8 @@ public class FrameBlock implements Writable, CacheBlock, Externalizable
 			ret._colnames = (String[]) ArrayUtils.addAll(getColumnNames(), that.getColumnNames());
 			ret._colmeta = (ColumnMetadata[]) ArrayUtils.addAll(_colmeta, that._colmeta);
 			
-			//concatenate column data (w/ deep copy to prevent side effects)
+			//concatenate column data (w/ shallow copy which is safe due to copy on write semantics)
 			ret._coldata = (Array[]) ArrayUtils.addAll(_coldata, that._coldata);
-			for( int i=0; i<ret.getNumColumns(); i++ )
-				ret._coldata[i] = ret._coldata[i].clone();
 		}
 		else //ROW APPEND
 		{

http://git-wip-us.apache.org/repos/asf/systemml/blob/1e0a4151/src/main/java/org/apache/sysml/runtime/util/DataConverter.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/util/DataConverter.java b/src/main/java/org/apache/sysml/runtime/util/DataConverter.java
index af69e81..bfc07ba 100644
--- a/src/main/java/org/apache/sysml/runtime/util/DataConverter.java
+++ b/src/main/java/org/apache/sysml/runtime/util/DataConverter.java
@@ -654,7 +654,7 @@ public class DataConverter
 		
 		if( mb.isInSparseFormat() ) //SPARSE
 		{
-			SparseBlock sblock = mb.getSparseBlock();			
+			SparseBlock sblock = mb.getSparseBlock();
 			for( int i=0; i<mb.getNumRows(); i++ ) {
 				Arrays.fill(row, null); //reset
 				if( sblock != null && !sblock.isEmpty(i) ) {
@@ -664,7 +664,7 @@ public class DataConverter
 					double[] aval = sblock.values(i);
 					for( int j=apos; j<apos+alen; j++ ) {
 						row[aix[j]] = UtilFunctions.doubleToObject(
-								schema[aix[j]], aval[j]);					
+								schema[aix[j]], aval[j]);
 					}
 				}
 				frame.appendRow(row);
@@ -673,8 +673,15 @@ public class DataConverter
 		else //DENSE
 		{
 			int dFreq = UtilFunctions.frequency(schema, ValueType.DOUBLE);
-		
-			if( dFreq == schema.length ) {
+			
+			if( schema.length==1 && dFreq==1 && mb.isAllocated() ) {
+				// special case double schema and single columns which
+				// allows for a shallow copy since the physical representation
+				// of row-major matrix and column-major frame match exactly
+				frame.reset();
+				frame.appendColumns(new double[][]{mb.getDenseBlock()});
+			}
+			else if( dFreq == schema.length ) {
 				// special case double schema (without cell-object creation, 
 				// col pre-allocation, and cache-friendly row-column copy)
 				int m = mb.getNumRows();