You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by mb...@apache.org on 2017/07/09 05:33:20 UTC

[4/4] systemml git commit: [SYSTEMML-1754] Performance removeEmpty (shallow copy if unmodified)

[SYSTEMML-1754] Performance removeEmpty (shallow copy if unmodified)

This patch improves the performance and memory efficiency of removeEmpty
rows/columns for both dense and sparse inputs by using a shallow output
copy if the determined output dimensions match the input dimensions. 

Basic experiments (cumulative runtime for 100 iterations):
1) removeEmpty rows 10Kx10K, sp=1.0 (dense): 33.9 -> 0.027s
2) removeEmpty rows 100Kx1K, sp=1.0 (dense): 33.2 -> 0.137s
3) removeEmpty cols 10Kx10K, sp=1.0 (dense): 57.8 -> 14.8s
4) removeEmpty cols 1Kx100K, sp=1.0 (dense): 61.5 -> 14.3s
5) removeEmpty rows 10Kx10K, sp=0.1 (sparse): 0.055 -> 0.036s
6) removeEmpty rows 100Kx1K, sp=0.1 (sparse): 0.470 -> 0.127s
7) removeEmpty cols 10Kx10K, sp=0.1 (sparse): 55.3 -> 1.3s
8) removeEmpty cols 1Kx100K, sp=0.1 (sparse): 39.8 -> 1.1s

Note that this patch improves performance by orders of magnitude, except
for  sparse removeEmpty rows (which already used shallow row copy).


Project: http://git-wip-us.apache.org/repos/asf/systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/b84a4933
Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/b84a4933
Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/b84a4933

Branch: refs/heads/master
Commit: b84a4933c11a25d064216bc0606938bcf0d792e6
Parents: 73f7c6a
Author: Matthias Boehm <mb...@gmail.com>
Authored: Sat Jul 8 21:44:51 2017 -0700
Committer: Matthias Boehm <mb...@gmail.com>
Committed: Sat Jul 8 22:32:05 2017 -0700

----------------------------------------------------------------------
 .../runtime/matrix/data/LibMatrixReorg.java     | 103 +++++++++++--------
 1 file changed, 61 insertions(+), 42 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/systemml/blob/b84a4933/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixReorg.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixReorg.java b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixReorg.java
index 73d50eb..205f787 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixReorg.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixReorg.java
@@ -1710,7 +1710,14 @@ public class LibMatrixReorg
 		if( in.isEmptyBlock(false) )
 			return ret;
 		
-		if( in.sparse ) //* <- SPARSE
+		if( SHALLOW_COPY_REORG && m == rlen2 ) {
+			ret.sparse = in.sparse;
+			if( ret.sparse )
+				ret.sparseBlock = in.sparseBlock;
+			else
+				ret.denseBlock = in.denseBlock;
+		}
+		else if( in.sparse ) //* <- SPARSE
 		{
 			//note: output dense or sparse
 			for( int i=0, cix=0; i<m; i++ )
@@ -1798,14 +1805,7 @@ public class LibMatrixReorg
 			clen2 += flags[j] ? 1 : 0;
 		}
 		
-		//Step 3: create mapping of flags to target indexes
-		int[] cix = new int[n];
-		for( int j=0, pos=0; j<n; j++ ) {
-			if( flags[j] )
-				cix[j] = pos++;	
-		}
-		
-		//Step 3: reset result and copy cols
+		//Step 3: reset result and copy columns
 		//dense stays dense if correct input representation (but robust for any input), 
 		// sparse might be dense/sparse
 		clen2 = Math.max(clen2, 1); //ensure valid output
@@ -1814,42 +1814,61 @@ public class LibMatrixReorg
 		if( in.isEmptyBlock(false) )
 			return ret;
 		
-		if( in.sparse ) //* <- SPARSE 
-		{
-			//note: output dense or sparse
-			SparseBlock a = in.sparseBlock;
-			
-			for( int i=0; i<m; i++ ) 
-				if ( !a.isEmpty(i) ) {
-					int apos = a.pos(i);
-					int alen = a.size(i);
-					int[] aix = a.indexes(i);
-					double[] avals = a.values(i);
-					for( int j=apos; j<apos+alen; j++ )
-						if( flags[aix[j]] )
-							ret.appendValue(i, cix[aix[j]], avals[j]);
-				}
-		}
-		else if( !in.sparse && !ret.sparse )  //DENSE <- DENSE
-		{
-			ret.allocateDenseBlock();
-			double[] a = in.denseBlock;
-			double[] c = ret.denseBlock;
-			
-			for(int i=0, aix=0, lcix=0; i<m; i++, lcix+=clen2)
-				for(int j=0; j<n; j++, aix++)
-					if( flags[j] )
-						 c[ lcix+cix[j] ] = a[aix];	
+		if( SHALLOW_COPY_REORG && n == clen2 ) {
+			//quick path: shallow copy if unmodified
+			ret.sparse = in.sparse;
+			if( ret.sparse )
+				ret.sparseBlock = in.sparseBlock;
+			else
+				ret.denseBlock = in.denseBlock;
 		}
-		else //SPARSE <- DENSE
+		else
 		{
-			ret.allocateSparseRowsBlock();
-			double[] a = in.denseBlock;
+			//create mapping of flags to target indexes
+			int[] cix = new int[n];
+			for( int j=0, pos=0; j<n; j++ ) {
+				if( flags[j] )
+					cix[j] = pos++;
+			}
 			
-			for(int i=0, aix=0; i<m; i++)
-				for(int j=0; j<n; j++, aix++)
-					if( flags[j] && a[aix]!=0 )
-						 ret.appendValue(i, cix[j], a[aix]);	
+			//deep copy of modified outputs
+			if( in.sparse ) //* <- SPARSE
+			{
+				//note: output dense or sparse
+				SparseBlock a = in.sparseBlock;
+				
+				for( int i=0; i<m; i++ )
+					if ( !a.isEmpty(i) ) {
+						int apos = a.pos(i);
+						int alen = a.size(i);
+						int[] aix = a.indexes(i);
+						double[] avals = a.values(i);
+						for( int j=apos; j<apos+alen; j++ )
+							if( flags[aix[j]] )
+								ret.appendValue(i, cix[aix[j]], avals[j]);
+					}
+			}
+			else if( !in.sparse && !ret.sparse )  //DENSE <- DENSE
+			{
+				ret.allocateDenseBlock();
+				double[] a = in.denseBlock;
+				double[] c = ret.denseBlock;
+				
+				for(int i=0, aix=0, lcix=0; i<m; i++, lcix+=clen2)
+					for(int j=0; j<n; j++, aix++)
+						if( flags[j] )
+							 c[ lcix+cix[j] ] = a[aix];	
+			}
+			else //SPARSE <- DENSE
+			{
+				ret.allocateSparseRowsBlock();
+				double[] a = in.denseBlock;
+				
+				for(int i=0, aix=0; i<m; i++)
+					for(int j=0; j<n; j++, aix++)
+						if( flags[j] && a[aix]!=0 )
+							 ret.appendValue(i, cix[j], a[aix]);
+			}
 		}
 		
 		//check sparsity