You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by mb...@apache.org on 2018/07/22 03:24:52 UTC
systemml git commit: [SYSTEMML-2462] Fix correctness/performance in-place sparse binary ops

Repository: systemml
Updated Branches:
  refs/heads/master dac22aae7 -> 54dbe9bb2


[SYSTEMML-2462] Fix correctness/performance in-place sparse binary ops

This patch fixes result correctness issues (or alternatively
index-out-of-bounds exceptions) on in-place sparse binary operations
with left-hand-side inputs in CSR (as produced by special operations or
dense to sparse conversion). This implementation assumed the lhs in MCSR
and hence fails on row merging with CSR and COO. We now ensure that the
in-place target is in MCSR.

Furthermore, this also fixes a couple of performance issues due to
unnecessary copies, unnecessary truncation, and potential gaps after
zero introducing operations.


Project: http://git-wip-us.apache.org/repos/asf/systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/54dbe9bb
Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/54dbe9bb
Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/54dbe9bb

Branch: refs/heads/master
Commit: 54dbe9bb26df16164a1c2ca4d0d5877e4fb95deb
Parents: dac22aa
Author: Matthias Boehm <mb...@gmail.com>
Authored: Sat Jul 21 20:10:30 2018 -0700
Committer: Matthias Boehm <mb...@gmail.com>
Committed: Sat Jul 21 20:10:30 2018 -0700

----------------------------------------------------------------------
 .../runtime/matrix/data/LibMatrixBincell.java   | 108 +++++++++----------
 1 file changed, 50 insertions(+), 58 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/systemml/blob/54dbe9bb/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixBincell.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixBincell.java b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixBincell.java
index ba431de..387182f 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixBincell.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixBincell.java
@@ -34,7 +34,6 @@ import org.apache.sysml.runtime.functionobjects.MinusMultiply;
 import org.apache.sysml.runtime.functionobjects.Multiply;
 import org.apache.sysml.runtime.functionobjects.Multiply2;
 import org.apache.sysml.runtime.functionobjects.NotEquals;
-import org.apache.sysml.runtime.functionobjects.Or;
 import org.apache.sysml.runtime.functionobjects.Plus;
 import org.apache.sysml.runtime.functionobjects.PlusMultiply;
 import org.apache.sysml.runtime.functionobjects.Power2;
@@ -47,16 +46,11 @@ import org.apache.sysml.runtime.util.SortUtils;
 import org.apache.sysml.runtime.util.UtilFunctions;
 
 /**
- * MB:
  * Library for binary cellwise operations (incl arithmetic, relational, etc). Currently,
  * we don't have dedicated support for the individual operations but for categories of
  * operations and combinations of dense/sparse and MM/MV. Safe/unsafe refer to sparse-safe
  * and sparse-unsafe operations.
- *  
  * 
- * 
- * TODO: custom operator implementations in order to turn unnecessarily sparse-unsafe
- * operations into sparse safe (e.g., relational operations)
  */
 public class LibMatrixBincell 
 {
@@ -1114,74 +1108,54 @@ public class LibMatrixBincell
 	}
 	
 	private static void safeBinaryInPlaceSparse(MatrixBlock m1ret, MatrixBlock m2, BinaryOperator op) {
-		if(m1ret.sparseBlock!=null)
+		//allocation and preparation (note: for correctness and performance, this 
+		//implementation requires the lhs in MCSR and hence we explicitly convert)
+		if( m1ret.sparseBlock!=null )
 			m1ret.allocateSparseRowsBlock(false);
-		if(m2.sparseBlock!=null)
+		if( !(m1ret.sparseBlock instanceof SparseBlockMCSR) )
+			m1ret.sparseBlock = SparseBlockFactory.copySparseBlock(
+				SparseBlock.Type.MCSR, m1ret.sparseBlock, false);
+		if( m2.sparseBlock!=null )
 			m2.allocateSparseRowsBlock(false);
 		SparseBlock c = m1ret.sparseBlock;
 		SparseBlock b = m2.sparseBlock;
-		
 		final int rlen = m1ret.rlen;
 		final int clen = m1ret.clen;
 		
-		if( c!=null && b!=null )
-		{
-			for(int r=0; r<rlen; r++)
-			{
-				if(c.isEmpty(r) && b.isEmpty(r)) continue;
-				
+		if( c!=null && b!=null ) {
+			for(int r=0; r<rlen; r++) {
+				if(c.isEmpty(r) && b.isEmpty(r))
+					continue;
 				if( b.isEmpty(r) ) {
-					int apos = c.pos(r);
-					int alen = c.size(r);
-					double[] values=c.values(r);
-					for(int i=apos; i<apos+alen; i++)
-						values[i]=op.fn.execute(values[i], 0);
+					zeroRightForSparseBinary(op, r, m1ret);
+				}
+				else if( c.isEmpty(r) ) {
+					appendRightForSparseBinary(op, b.values(r), b.indexes(r), b.pos(r), b.size(r), r, m1ret);
 				}
 				else {
+					//this approach w/ single copy only works with the MCSR format
 					int estimateSize = Math.min(clen, (!c.isEmpty(r) ?
 						c.size(r) : 0) + (!b.isEmpty(r) ? b.size(r) : 0));
-					SparseRow thisRow = c.get(r);
-					c.set(r, new SparseRowVector(estimateSize, clen), false);
-					
-					if(thisRow!=null) {
-						m1ret.nonZeros-=thisRow.size();
-						mergeForSparseBinary(op, thisRow.values(), thisRow.indexes(), 0, 
-								thisRow.size(), b.values(r), b.indexes(r), b.pos(r), b.size(r), r, m1ret);
-					}
-					else {
-						appendRightForSparseBinary(op, b.values(r), b.indexes(r), b.pos(r), b.size(r), 0, r, m1ret);
-					}
+					SparseRow old = c.get(r);
+					c.set(r, new SparseRowVector(estimateSize), false);
+					m1ret.nonZeros -= old.size();
+					mergeForSparseBinary(op, old.values(), old.indexes(), 0, 
+						old.size(), b.values(r), b.indexes(r), b.pos(r), b.size(r), r, m1ret);
 				}
 			}
 		}
-		else if( c == null ) {
+		else if( c == null ) { //lhs empty
 			m1ret.sparseBlock = SparseBlockFactory.createSparseBlock(rlen);
-			for(int r=0; r<rlen; r++) {
-				if( !b.isEmpty(r) ) {
-					SparseRow tmp = new SparseRowVector( b.size(r), clen );
-					appendRightForSparseBinary(op, b.values(r), b.indexes(r), b.pos(r), b.size(r), 0, r, m1ret);
-					m1ret.sparseBlock.set(r, tmp, false);
-				}
+			for( int r=0; r<rlen; r++) {
+				if( b.isEmpty(r) ) continue;
+				appendRightForSparseBinary(op, b.values(r),
+					b.indexes(r), b.pos(r), b.size(r), r, m1ret);
 			}
 		}
-		else //that.sparseRows==null
-		{
-			if( !(op.fn instanceof Plus || op.fn instanceof Minus || op.fn instanceof Or) ) {
-				for(int r=0; r<rlen; r++){
-					if( !c.isEmpty(r) )
-					{
-						SparseRow tmp = c.get(r);
-						int alen = tmp.size();
-						double[] avals = tmp.values();
-						for( int j=0; j<alen; j++ )
-							avals[j] = op.fn.execute(avals[j], 0);
-						tmp.compact(); //handle removed entries (e.g., mult, and)
-						c.set(r, tmp, false);
-						
-						//NOTE: for left in-place, we cannot use append because it would create duplicates
-						//appendLeftForSparseBinary(op, arow.getValueContainer(), arow.getIndexContainer(), arow.size(), 0, r, m1ret);
-					}
-				}
+		else { //rhs empty
+			for(int r=0; r<rlen; r++) {
+				if( c.isEmpty(r) ) continue;
+				zeroRightForSparseBinary(op, r, m1ret);
 			}
 		}
 		
@@ -1354,11 +1328,29 @@ public class LibMatrixBincell
 		}
 	}
 
+	private static void appendRightForSparseBinary(BinaryOperator op, double[] vals, int[] ix, int pos, int size, int r, MatrixBlock ret) {
+		appendRightForSparseBinary(op, vals, ix, pos, size, 0, r, ret);
+	}
+	
 	private static void appendRightForSparseBinary(BinaryOperator op, double[] values2, int[] cols2, int pos2, int size2, 
-			int pos, int resultRow, MatrixBlock result) {
+			int pos, int r, MatrixBlock result) {
 		for( int j=pos2+pos; j<pos2+size2; j++ ) {
 			double v = op.fn.execute(0, values2[j]);
-			result.appendValue(resultRow, cols2[j], v);
+			result.appendValue(r, cols2[j], v);
 		}
 	}
+	
+	private static void zeroRightForSparseBinary(BinaryOperator op, int r, MatrixBlock ret) {
+		if( op.fn instanceof Plus || op.fn instanceof Minus )
+			return;
+		SparseBlock c = ret.sparseBlock;
+		int apos = c.pos(r);
+		int alen = c.size(r);
+		double[] values = c.values(r);
+		boolean zero = false;
+		for(int i=apos; i<apos+alen; i++)
+			zero |= ((values[i] = op.fn.execute(values[i], 0)) == 0);
+		if( zero )
+			c.compact(r);
+	}
 }