You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by mb...@apache.org on 2017/10/18 06:09:04 UTC

systemml git commit: [SYSTEMML-1967] Fix spark rand instruction (#partitions for sparse)

Repository: systemml
Updated Branches:
  refs/heads/master 5b8d62659 -> 4f29b3485


[SYSTEMML-1967] Fix spark rand instruction (#partitions for sparse)

This patch fixes the spark rand instruction to create the correct number
of partitions under awareness of sparsity. So far, this method called a
primitive for size estimation with the number of non-zeros instead of
the sparsity, which led to dense estimates. 

Furthermore, this patch also fixes minor configuration issues of enabled
hand-coded fused operators.


Project: http://git-wip-us.apache.org/repos/asf/systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/4f29b348
Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/4f29b348
Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/4f29b348

Branch: refs/heads/master
Commit: 4f29b3485f4eb8a58aebd41eef22c5d0f92d632f
Parents: 5b8d626
Author: Matthias Boehm <mb...@gmail.com>
Authored: Tue Oct 17 23:09:40 2017 -0700
Committer: Matthias Boehm <mb...@gmail.com>
Committed: Tue Oct 17 23:09:40 2017 -0700

----------------------------------------------------------------------
 src/main/java/org/apache/sysml/hops/BinaryOp.java     | 14 ++++++++------
 .../java/org/apache/sysml/hops/OptimizerUtils.java    |  2 +-
 .../runtime/instructions/spark/RandSPInstruction.java |  2 +-
 3 files changed, 10 insertions(+), 8 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/systemml/blob/4f29b348/src/main/java/org/apache/sysml/hops/BinaryOp.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/BinaryOp.java b/src/main/java/org/apache/sysml/hops/BinaryOp.java
index 58bbc8f..76c1a64 100644
--- a/src/main/java/org/apache/sysml/hops/BinaryOp.java
+++ b/src/main/java/org/apache/sysml/hops/BinaryOp.java
@@ -1373,8 +1373,8 @@ public class BinaryOp extends Hop
 
 	private static boolean requiresPartitioning( Hop rightInput )
 	{
-		return (   rightInput.dimsKnown() //known input size 
-                && rightInput.getDim1()*rightInput.getDim2() > DistributedCacheInput.PARTITION_SIZE);
+		return ( rightInput.dimsKnown() //known input size 
+			&& rightInput.getDim1()*rightInput.getDim2() > DistributedCacheInput.PARTITION_SIZE);
 	}
 	
 	public static boolean requiresReplication( Hop left, Hop right )
@@ -1393,9 +1393,10 @@ public class BinaryOp extends Hop
 		long m1_cpb = left.getColsInBlock();
 		
 		//MR_BINARY_UAGG_CHAIN only applied if result is column/row vector of MV binary operation.
-		if( right instanceof AggUnaryOp && right.getInput().get(0) == left  //e.g., P / rowSums(P)
+		if( OptimizerUtils.ALLOW_OPERATOR_FUSION
+			&& right instanceof AggUnaryOp && right.getInput().get(0) == left  //e.g., P / rowSums(P)
 			&& ((((AggUnaryOp) right).getDirection() == Direction.Row && m1_dim2 > 1 && m1_dim2 <= m1_cpb ) //single column block
-		    ||  (((AggUnaryOp) right).getDirection() == Direction.Col && m1_dim1 > 1 && m1_dim1 <= m1_rpb ))) //single row block
+			|| (((AggUnaryOp) right).getDirection() == Direction.Col && m1_dim1 > 1 && m1_dim1 <= m1_rpb ))) //single row block
 		{
 			return MMBinaryMethod.MR_BINARY_UAGG_CHAIN;
 		}
@@ -1430,9 +1431,10 @@ public class BinaryOp extends Hop
 		}
 		
 		//MR_BINARY_UAGG_CHAIN only applied if result is column/row vector of MV binary operation.
-		if( right instanceof AggUnaryOp && right.getInput().get(0) == left  //e.g., P / rowSums(P)
+		if( OptimizerUtils.ALLOW_OPERATOR_FUSION
+			&& right instanceof AggUnaryOp && right.getInput().get(0) == left  //e.g., P / rowSums(P)
 			&& ((((AggUnaryOp) right).getDirection() == Direction.Row && m1_dim2 > 1 && m1_dim2 <= m1_cpb ) //single column block
-		    ||  (((AggUnaryOp) right).getDirection() == Direction.Col && m1_dim1 > 1 && m1_dim1 <= m1_rpb ))) //single row block
+			|| (((AggUnaryOp) right).getDirection() == Direction.Col && m1_dim1 > 1 && m1_dim1 <= m1_rpb ))) //single row block
 		{
 			return MMBinaryMethod.MR_BINARY_UAGG_CHAIN;
 		}

http://git-wip-us.apache.org/repos/asf/systemml/blob/4f29b348/src/main/java/org/apache/sysml/hops/OptimizerUtils.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/OptimizerUtils.java b/src/main/java/org/apache/sysml/hops/OptimizerUtils.java
index 5d831e5..d67e086 100644
--- a/src/main/java/org/apache/sysml/hops/OptimizerUtils.java
+++ b/src/main/java/org/apache/sysml/hops/OptimizerUtils.java
@@ -709,7 +709,7 @@ public class OptimizerUtils
 		//check for guaranteed existence of empty blocks (less nnz than total number of blocks)
 		long tnrblks = (long)Math.ceil((double)rlen/brlen);
 		long tncblks = (long)Math.ceil((double)clen/bclen);
-		long nnz = (long) Math.ceil(sp * rlen * clen);		
+		long nnz = (long) Math.ceil(sp * rlen * clen);
 		if( nnz < tnrblks * tncblks ) {
 			long lrlen = Math.min(rlen, brlen);
 			long lclen = Math.min(clen, bclen);

http://git-wip-us.apache.org/repos/asf/systemml/blob/4f29b348/src/main/java/org/apache/sysml/runtime/instructions/spark/RandSPInstruction.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/instructions/spark/RandSPInstruction.java b/src/main/java/org/apache/sysml/runtime/instructions/spark/RandSPInstruction.java
index b50bf73..2266eeb 100644
--- a/src/main/java/org/apache/sysml/runtime/instructions/spark/RandSPInstruction.java
+++ b/src/main/java/org/apache/sysml/runtime/instructions/spark/RandSPInstruction.java
@@ -350,7 +350,7 @@ public class RandSPInstruction extends UnarySPInstruction {
 		LongStream nnz = LibMatrixDatagen.computeNNZperBlock(rows, cols, rowsInBlock, colsInBlock, sparsity);
 		PrimitiveIterator.OfLong nnzIter = nnz.iterator();
 		double totalSize = OptimizerUtils.estimatePartitionedSizeExactSparsity( rows, cols, rowsInBlock, 
-			colsInBlock, rows*cols*sparsity); //overestimate for on disk, ensures hdfs block per partition
+			colsInBlock, sparsity); //overestimate for on disk, ensures hdfs block per partition
 		double hdfsBlkSize = InfrastructureAnalyzer.getHDFSBlockSize();
 		long numBlocks = new MatrixCharacteristics(rows, cols, rowsInBlock, colsInBlock).getNumBlocks();
 		long numColBlocks = (long)Math.ceil((double)cols/(double)colsInBlock);