You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by mb...@apache.org on 2016/01/24 01:09:21 UTC
[4/5] incubator-systemml git commit: Performance spark wsloss/wcemm
ultra-sparse (prefilter empty blocks)
Performance spark wsloss/wcemm ultra-sparse (prefilter empty blocks)
Ultra-sparse matrices are a common case for factorization algorithms.
Accordingly, this change introduces a prefilter for empty blocks on
wsloss and wcemm because the full aggregate ensures result correctness.
In a scenario of wsloss over KDD2010 (15M x 30M, sparsity 9.4e-7), this
achieved a total runtime reduction from 70s to 39s despite inputs from
HDFS.
Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/a19a14c0
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/a19a14c0
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/a19a14c0
Branch: refs/heads/master
Commit: a19a14c05e8034d5abf7f5c9ffbaea96f05b8017
Parents: 10d1afc
Author: Matthias Boehm <mb...@us.ibm.com>
Authored: Fri Jan 22 22:40:29 2016 -0800
Committer: Matthias Boehm <mb...@us.ibm.com>
Committed: Sat Jan 23 16:08:14 2016 -0800
----------------------------------------------------------------------
.../runtime/instructions/spark/QuaternarySPInstruction.java | 7 +++++++
1 file changed, 7 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/a19a14c0/src/main/java/org/apache/sysml/runtime/instructions/spark/QuaternarySPInstruction.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/instructions/spark/QuaternarySPInstruction.java b/src/main/java/org/apache/sysml/runtime/instructions/spark/QuaternarySPInstruction.java
index af65a9e..500cc01 100644
--- a/src/main/java/org/apache/sysml/runtime/instructions/spark/QuaternarySPInstruction.java
+++ b/src/main/java/org/apache/sysml/runtime/instructions/spark/QuaternarySPInstruction.java
@@ -53,6 +53,7 @@ import org.apache.sysml.runtime.instructions.cp.CPOperand;
import org.apache.sysml.runtime.instructions.cp.DoubleObject;
import org.apache.sysml.runtime.instructions.spark.data.LazyIterableIterator;
import org.apache.sysml.runtime.instructions.spark.data.PartitionedBroadcastMatrix;
+import org.apache.sysml.runtime.instructions.spark.functions.FilterNonEmptyBlocksFunction;
import org.apache.sysml.runtime.instructions.spark.utils.RDDAggregateUtils;
import org.apache.sysml.runtime.matrix.MatrixCharacteristics;
import org.apache.sysml.runtime.matrix.data.MatrixBlock;
@@ -196,6 +197,12 @@ public class QuaternarySPInstruction extends ComputationSPInstruction
int brlen = inMc.getRowsPerBlock();
int bclen = inMc.getColsPerBlock();
+ //pre-filter empty blocks (ultra-sparse matrices) for full aggregates
+ //(map/redwsloss, map/redwcemm); safe because theses ops produce a scalar
+ if( qop.wtype1 != null || qop.wtype4 != null ) {
+ in = in.filter(new FilterNonEmptyBlocksFunction());
+ }
+
//map-side only operation (one rdd input, two broadcasts)
if( WeightedSquaredLoss.OPCODE.equalsIgnoreCase(getOpcode())
|| WeightedSigmoid.OPCODE.equalsIgnoreCase(getOpcode())