You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by mb...@apache.org on 2018/04/07 21:45:06 UTC

[2/2] systemml git commit: [SYSTEMML-2237] Improved spark reshape of ultra-sparse matrices

[SYSTEMML-2237] Improved spark reshape of ultra-sparse matrices

For ultra-sparse matrices, the separate output block index creation per
row (begin and end) can dominate performance, if the number of input
non-zeros is very small and empty blocks do not need to be output.
Hence, this patch adds the handling of these cases by creating the
output block indexes only for existing non-zeros.


Project: http://git-wip-us.apache.org/repos/asf/systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/a0b0e80e
Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/a0b0e80e
Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/a0b0e80e

Branch: refs/heads/master
Commit: a0b0e80e9d1beb28e385052141caed6e19220116
Parents: 4152680
Author: Matthias Boehm <mb...@gmail.com>
Authored: Fri Apr 6 22:38:15 2018 -0700
Committer: Matthias Boehm <mb...@gmail.com>
Committed: Fri Apr 6 22:38:15 2018 -0700

----------------------------------------------------------------------
 .../runtime/matrix/data/LibMatrixReorg.java     | 32 +++++++++++++++-----
 1 file changed, 25 insertions(+), 7 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/systemml/blob/a0b0e80e/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixReorg.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixReorg.java b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixReorg.java
index 42c56c4..8de6f13 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixReorg.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixReorg.java
@@ -26,6 +26,7 @@ import java.util.Collection;
 import java.util.Comparator;
 import java.util.HashMap;
 import java.util.HashSet;
+import java.util.Iterator;
 import java.util.List;
 import java.util.Map.Entry;
 import java.util.concurrent.Callable;
@@ -471,7 +472,7 @@ public class LibMatrixReorg
 		MatrixBlock mbIn = (MatrixBlock) in.getValue();
 		
 		//prepare result blocks (no reuse in order to guarantee mem constraints)
-		Collection<MatrixIndexes> rix = computeAllResultBlockIndexes(ixIn, mcIn, mcOut, rowwise);
+		Collection<MatrixIndexes> rix = computeAllResultBlockIndexes(ixIn, mcIn, mcOut, mbIn, rowwise, outputEmptyBlocks);
 		HashMap<MatrixIndexes, MatrixBlock> rblk = createAllResultBlocks(rix, mbIn.nonZeros, mcIn, mcOut, rowwise, out);
 		
 		//basic algorithm
@@ -1450,7 +1451,7 @@ public class LibMatrixReorg
 	///////////////////////////////
 
 	private static Collection<MatrixIndexes> computeAllResultBlockIndexes( MatrixIndexes ixin,
-		MatrixCharacteristics mcIn, MatrixCharacteristics mcOut, boolean rowwise )
+		MatrixCharacteristics mcIn, MatrixCharacteristics mcOut, MatrixBlock in, boolean rowwise, boolean outputEmpty )
 	{
 		HashSet<MatrixIndexes> ret = new HashSet<>();
 		
@@ -1464,12 +1465,16 @@ public class LibMatrixReorg
 				MatrixIndexes first = computeResultBlockIndex(new MatrixIndexes(), row_offset, 0, mcIn, mcOut, rowwise);
 				MatrixIndexes last = computeResultBlockIndex(new MatrixIndexes(), max_row_offset, 0, mcIn, mcOut, rowwise);
 				createRowwiseIndexes(first, last, mcOut.getNumColBlocks(), ret);
-			
 			}
-			for( long i=row_offset; i<max_row_offset+1; i++ ) {
-				MatrixIndexes first = computeResultBlockIndex(new MatrixIndexes(), i, col_offset, mcIn, mcOut, rowwise);
-				MatrixIndexes last = computeResultBlockIndex(new MatrixIndexes(), i, max_col_offset, mcIn, mcOut, rowwise);
-				createRowwiseIndexes(first, last, mcOut.getNumColBlocks(), ret);
+			else if( in.getNonZeros()<in.getNumRows() && !outputEmpty ) {
+				createNonZeroIndexes(mcIn, mcOut, in, row_offset, col_offset, rowwise, ret);
+			}
+			else {
+				for( long i=row_offset; i<max_row_offset+1; i++ ) {
+					MatrixIndexes first = computeResultBlockIndex(new MatrixIndexes(), i, col_offset, mcIn, mcOut, rowwise);
+					MatrixIndexes last = computeResultBlockIndex(new MatrixIndexes(), i, max_col_offset, mcIn, mcOut, rowwise);
+					createRowwiseIndexes(first, last, mcOut.getNumColBlocks(), ret);
+				}
 			}
 		}
 		else{ //colwise
@@ -1478,6 +1483,9 @@ public class LibMatrixReorg
 				MatrixIndexes last = computeResultBlockIndex(new MatrixIndexes(), 0, max_col_offset, mcIn, mcOut, rowwise);
 				createColwiseIndexes(first, last, mcOut.getNumRowBlocks(), ret);
 			}
+			else if( in.getNonZeros()<in.getNumColumns() && !outputEmpty ) {
+				createNonZeroIndexes(mcIn, mcOut, in, row_offset, col_offset, rowwise, ret);
+			}
 			else {
 				for( long j=col_offset; j<max_col_offset+1; j++ ) {
 					MatrixIndexes first = computeResultBlockIndex(new MatrixIndexes(), row_offset, j, mcIn, mcOut, rowwise);
@@ -1534,6 +1542,16 @@ public class LibMatrixReorg
 			ret.add(last);
 		}
 	}
+	
+	private static void createNonZeroIndexes(MatrixCharacteristics mcIn, MatrixCharacteristics mcOut,
+			MatrixBlock in, long row_offset, long col_offset, boolean rowwise, HashSet<MatrixIndexes> ret) {
+		Iterator<IJV> iter = in.getSparseBlockIterator();
+		while( iter.hasNext() ) {
+			IJV cell = iter.next();
+			ret.add(computeResultBlockIndex(new MatrixIndexes(),
+				row_offset+cell.getI(), col_offset+cell.getJ(), mcIn, mcOut, rowwise));
+		}
+	}
 
 	@SuppressWarnings("unused")
 	private static HashMap<MatrixIndexes, MatrixBlock> createAllResultBlocks( Collection<MatrixIndexes> rix,