You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by mb...@apache.org on 2016/01/24 01:09:21 UTC

[4/5] incubator-systemml git commit: Performance spark wsloss/wcemm ultra-sparse (prefilter empty blocks)

Performance spark wsloss/wcemm ultra-sparse (prefilter empty blocks)

Ultra-sparse matrices are a common case for factorization algorithms.
Accordingly, this change introduces a prefilter for empty blocks on
wsloss and wcemm because the full aggregate ensures result correctness.
In a scenario of wsloss over KDD2010 (15M x 30M, sparsity 9.4e-7), this  
achieved a total runtime reduction from 70s to 39s despite inputs from
HDFS.

Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/a19a14c0
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/a19a14c0
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/a19a14c0

Branch: refs/heads/master
Commit: a19a14c05e8034d5abf7f5c9ffbaea96f05b8017
Parents: 10d1afc
Author: Matthias Boehm <mb...@us.ibm.com>
Authored: Fri Jan 22 22:40:29 2016 -0800
Committer: Matthias Boehm <mb...@us.ibm.com>
Committed: Sat Jan 23 16:08:14 2016 -0800

----------------------------------------------------------------------
 .../runtime/instructions/spark/QuaternarySPInstruction.java   | 7 +++++++
 1 file changed, 7 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/a19a14c0/src/main/java/org/apache/sysml/runtime/instructions/spark/QuaternarySPInstruction.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/instructions/spark/QuaternarySPInstruction.java b/src/main/java/org/apache/sysml/runtime/instructions/spark/QuaternarySPInstruction.java
index af65a9e..500cc01 100644
--- a/src/main/java/org/apache/sysml/runtime/instructions/spark/QuaternarySPInstruction.java
+++ b/src/main/java/org/apache/sysml/runtime/instructions/spark/QuaternarySPInstruction.java
@@ -53,6 +53,7 @@ import org.apache.sysml.runtime.instructions.cp.CPOperand;
 import org.apache.sysml.runtime.instructions.cp.DoubleObject;
 import org.apache.sysml.runtime.instructions.spark.data.LazyIterableIterator;
 import org.apache.sysml.runtime.instructions.spark.data.PartitionedBroadcastMatrix;
+import org.apache.sysml.runtime.instructions.spark.functions.FilterNonEmptyBlocksFunction;
 import org.apache.sysml.runtime.instructions.spark.utils.RDDAggregateUtils;
 import org.apache.sysml.runtime.matrix.MatrixCharacteristics;
 import org.apache.sysml.runtime.matrix.data.MatrixBlock;
@@ -196,6 +197,12 @@ public class QuaternarySPInstruction extends ComputationSPInstruction
 		int brlen = inMc.getRowsPerBlock();
 		int bclen = inMc.getColsPerBlock();
 		
+		//pre-filter empty blocks (ultra-sparse matrices) for full aggregates
+		//(map/redwsloss, map/redwcemm); safe because theses ops produce a scalar
+		if( qop.wtype1 != null || qop.wtype4 != null ) {
+			in = in.filter(new FilterNonEmptyBlocksFunction());
+		}
+		
 		//map-side only operation (one rdd input, two broadcasts)
 		if(    WeightedSquaredLoss.OPCODE.equalsIgnoreCase(getOpcode())  
 			|| WeightedSigmoid.OPCODE.equalsIgnoreCase(getOpcode())