You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by mb...@apache.org on 2018/10/06 21:33:07 UTC

systemml git commit: [SYSTEMML-2479] Extended matrix histogram sketch propagation (misc ops)

Repository: systemml
Updated Branches:
  refs/heads/master 30cff5e22 -> ef15e582b


[SYSTEMML-2479] Extended matrix histogram sketch propagation (misc ops)

This patch extends the matrix histogram sparsity estimator by sketch
propagation for intermediates of remaining operations (comparison,
transpose, diag, reshape).

Furthermore, this also includes some minor performance improvements for
sparsity estimation of element-wise addition and multiplication, as well
as accuracy improvements for element-wise addition.


Project: http://git-wip-us.apache.org/repos/asf/systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/ef15e582
Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/ef15e582
Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/ef15e582

Branch: refs/heads/master
Commit: ef15e582b6d6cae1aa8206279b4cc063d717e287
Parents: 30cff5e
Author: Matthias Boehm <mb...@gmail.com>
Authored: Sat Oct 6 21:44:13 2018 +0200
Committer: Matthias Boehm <mb...@gmail.com>
Committed: Sat Oct 6 23:32:41 2018 +0200

----------------------------------------------------------------------
 .../hops/estim/EstimatorMatrixHistogram.java    | 144 +++++++++++++++----
 1 file changed, 118 insertions(+), 26 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/systemml/blob/ef15e582/src/main/java/org/apache/sysml/hops/estim/EstimatorMatrixHistogram.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/estim/EstimatorMatrixHistogram.java b/src/main/java/org/apache/sysml/hops/estim/EstimatorMatrixHistogram.java
index cd4b397..9f9075d 100644
--- a/src/main/java/org/apache/sysml/hops/estim/EstimatorMatrixHistogram.java
+++ b/src/main/java/org/apache/sysml/hops/estim/EstimatorMatrixHistogram.java
@@ -105,22 +105,20 @@ public class EstimatorMatrixHistogram extends SparsityEstimator
 			case MM:
 				return estimInternMM(h1, h2);
 			case MULT: {
-				final double N1 = h1.getNonZeros();
-				final double N2 = h2.getNonZeros();
-				final long scale = IntStream.range(0, h1.getCols())
-					.mapToLong(j -> (long)h1.cNnz[j] * h2.cNnz[j]).sum();
+				final double scale = IntStream.range(0, h1.getCols())
+					.mapToDouble(j -> (double)h1.cNnz[j] * h2.cNnz[j]).sum()
+					/ h1.getNonZeros() / h2.getNonZeros();
 				return IntStream.range(0, h1.getRows())
-					.mapToDouble(i -> (long)h1.rNnz[i] * h2.rNnz[i] * scale / N1 / N2) //collisions
+					.mapToDouble(i -> (double)h1.rNnz[i] * h2.rNnz[i] * scale) //collisions
 					.sum() / msize;
 			}
 			case PLUS: {
-				final double N1 = h1.getNonZeros();
-				final double N2 = h2.getNonZeros();
-				final long scale = IntStream.range(0, h1.getCols())
-					.mapToLong(j -> (long)h1.cNnz[j] * h2.cNnz[j]).sum();
+				final double scale = IntStream.range(0, h1.getCols())
+					.mapToDouble(j -> (double)h1.cNnz[j] * h2.cNnz[j]).sum()
+					/ h1.getNonZeros() / h2.getNonZeros();
 				return IntStream.range(0, h1.getRows())
-					.mapToDouble(i -> (long)h1.rNnz[i] + h2.rNnz[i] //all minus collisions
-						- (long)h1.rNnz[i] * h2.rNnz[i] * scale / N1 / N2)
+					.mapToDouble(i -> (double)h1.rNnz[i] + h2.rNnz[i] //all minus collisions
+						- (double)h1.rNnz[i] * h2.rNnz[i] * scale)
 					.sum() / msize;
 			}
 			case EQZERO:
@@ -315,12 +313,17 @@ public class EstimatorMatrixHistogram extends SparsityEstimator
 		
 		public static MatrixHistogram deriveOutputHistogram(MatrixHistogram h1, MatrixHistogram h2, double spOut, OpCode op) {
 			switch(op) {
-				case MM:    return deriveMMHistogram(h1, h2, spOut);
-				case MULT:  return deriveMultHistogram(h1, h2);
-				case PLUS:  return derivePlusHistogram(h1, h2);
-				case RBIND: return deriveRbindHistogram(h1, h2);
-				case CBIND: return deriveCbindHistogram(h1, h2);
-				//TODO add missing unary operations
+				case MM:      return deriveMMHistogram(h1, h2, spOut);
+				case MULT:    return deriveMultHistogram(h1, h2);
+				case PLUS:    return derivePlusHistogram(h1, h2);
+				case RBIND:   return deriveRbindHistogram(h1, h2);
+				case CBIND:   return deriveCbindHistogram(h1, h2);
+				case NEQZERO: return h1;
+				case EQZERO:  return deriveEq0Histogram(h1);
+				case DIAG:    return deriveDiagHistogram(h1);
+				case TRANS:   return deriveTransHistogram(h1);
+				case RESHAPE: return deriveReshapeHistogram(h1, h1.getRows(), h1.getCols());
+				//FIXME: reshape requires additional meta data from MM node
 				default:
 					throw new NotImplementedException();
 			}
@@ -356,39 +359,44 @@ public class EstimatorMatrixHistogram extends SparsityEstimator
 		}
 		
 		private static MatrixHistogram deriveMultHistogram(MatrixHistogram h1, MatrixHistogram h2) {
-			final double N1 = h1.getNonZeros();
-			final double N2 = h2.getNonZeros();
 			final double scaler = IntStream.range(0, h1.getCols())
-				.mapToDouble(j -> (long)h1.cNnz[j] * h2.cNnz[j]).sum();
+				.mapToDouble(j -> (double)h1.cNnz[j] * h2.cNnz[j])
+				.sum() / h1.getNonZeros() / h2.getNonZeros();
 			final double scalec = IntStream.range(0, h1.getRows())
-				.mapToDouble(j -> (long)h1.rNnz[j] * h2.rNnz[j]).sum();
+				.mapToDouble(j -> (double)h1.rNnz[j] * h2.rNnz[j])
+				.sum() / h1.getNonZeros() / h2.getNonZeros();
 			int rMaxNnz = 0, cMaxNnz = 0;
 			Random rn = new Random();
 			int[] rNnz = new int[h1.getRows()];
 			for(int i=0; i<h1.getRows(); i++) {
-				rNnz[i] = probRound(h1.rNnz[i] * h2.rNnz[i] * scaler / N1 / N2, rn);
+				rNnz[i] = probRound((double)h1.rNnz[i] * h2.rNnz[i] * scaler, rn);
 				rMaxNnz = Math.max(rMaxNnz, rNnz[i]);
 			}
 			int[] cNnz = new int[h1.getCols()];
 			for(int i=0; i<h1.getCols(); i++) {
-				cNnz[i] = probRound(h1.cNnz[i] * h2.cNnz[i] * scalec / N1 / N2, rn);
+				cNnz[i] = probRound((double)h1.cNnz[i] * h2.cNnz[i] * scalec, rn);
 				cMaxNnz = Math.max(cMaxNnz, cNnz[i]);
 			}
 			return new MatrixHistogram(rNnz, null, cNnz, null, rMaxNnz, cMaxNnz);
 		}
 		
 		private static MatrixHistogram derivePlusHistogram(MatrixHistogram h1, MatrixHistogram h2) {
-			double msize = (double)h1.getRows()*h1.getCols();
+			final double scaler = IntStream.range(0, h1.getCols())
+				.mapToDouble(j -> (double)h1.cNnz[j] * h2.cNnz[j])
+				.sum() / h1.getNonZeros() / h2.getNonZeros();
+			final double scalec = IntStream.range(0, h1.getRows())
+				.mapToDouble(j -> (double)h1.rNnz[j] * h2.rNnz[j])
+				.sum() / h1.getNonZeros() / h2.getNonZeros();
 			int rMaxNnz = 0, cMaxNnz = 0;
 			Random rn = new Random();
 			int[] rNnz = new int[h1.getRows()];
 			for(int i=0; i<h1.getRows(); i++) {
-				rNnz[i] = probRound(h1.rNnz[i]/msize + h2.rNnz[i]/msize - h1.rNnz[i]/msize * h2.rNnz[i]/msize, rn);
+				rNnz[i] = probRound(h1.rNnz[i] + h2.rNnz[i] - (double)h1.rNnz[i] * h2.rNnz[i] * scaler, rn);
 				rMaxNnz = Math.max(rMaxNnz, rNnz[i]);
 			}
 			int[] cNnz = new int[h1.getCols()];
 			for(int i=0; i<h1.getCols(); i++) {
-				cNnz[i] = probRound(h1.cNnz[i]/msize + h2.cNnz[i]/msize - h1.cNnz[i]/msize * h2.cNnz[i]/msize, rn);
+				cNnz[i] = probRound(h1.cNnz[i] + h2.cNnz[i] - (double)h1.cNnz[i] * h2.cNnz[i] * scalec, rn);
 				cMaxNnz = Math.max(cMaxNnz, cNnz[i]);
 			}
 			return new MatrixHistogram(rNnz, null, cNnz, null, rMaxNnz, cMaxNnz);
@@ -418,6 +426,90 @@ public class EstimatorMatrixHistogram extends SparsityEstimator
 			return new MatrixHistogram(rNnz, null, cNnz, null, rMaxNnz, cMaxNnz);
 		}
 		
+		private static MatrixHistogram deriveEq0Histogram(MatrixHistogram h1) {
+			final int m = h1.getRows(), n = h1.getCols();
+			int[] rNnz = new int[m], cNnz = new int[n];
+			int rMaxNnz = 0, cMaxNnz = 0;
+			for(int i=0; i<m; i++) {
+				rNnz[i] = n - h1.rNnz[i];
+				rMaxNnz = Math.max(rMaxNnz, rNnz[i]);
+			}
+			for(int j=0; j<n; j++) {
+				cNnz[j] = m - h1.cNnz[j];
+				cMaxNnz = Math.max(cMaxNnz, cNnz[j]);
+			}
+			return new MatrixHistogram(rNnz, null, cNnz, null, rMaxNnz, cMaxNnz);
+		}
+		
+		private static MatrixHistogram deriveDiagHistogram(MatrixHistogram h1) {
+			if( h1.getCols() == 1 ) { //vector-matrix
+				//shallow copy as row count vector is preserved for rows/cols
+				return new MatrixHistogram(h1.rNnz, null,
+					h1.rNnz, null, h1.rMaxNnz, h1.rMaxNnz);
+			}
+			else { //matrix-vector
+				final int m = h1.getRows(), n = h1.getCols();
+				int[] rNnz = new int[m], cNnz = new int[1];
+				int rMaxNnz = 0; Random rand = new Random();
+				for(int i=0; i<m; i++) {
+					rNnz[i] = probRound((double)h1.getNonZeros()/n, rand);
+					rMaxNnz = Math.max(rMaxNnz, rNnz[i]);
+					cNnz[0] += rNnz[i];
+				}
+				return new MatrixHistogram(rNnz, null, cNnz, null, rMaxNnz, cNnz[0]);
+			}
+		}
+		
+		private static MatrixHistogram deriveTransHistogram(MatrixHistogram h1) {
+			return new MatrixHistogram(h1.cNnz, h1.cNnz1e, h1.rNnz, h1.rNnz1e, h1.cMaxNnz, h1.rMaxNnz);
+		}
+		
+		private static MatrixHistogram deriveReshapeHistogram(MatrixHistogram h1, int rows, int cols) {
+			if( h1.getRows() == rows )
+				return h1;
+			else if( h1.getCols() % cols != 0 
+				&& h1.getRows() % rows != 0 )
+				return null;
+			
+			//limitation: only applies to scenarios where each input row
+			//maps to N output rows, or N input rows map to 1 output row.
+			//TODO generalize implementation for partial fractions
+			final int m = h1.getRows(), n = h1.getCols();
+			int[] rNnz = new int[rows], cNnz = new int[cols];
+			int rMaxNnz = 0, cMaxNnz = 0;
+			if( h1.getCols() % cols == 0 ) { //1->N rows
+				//scale and replicate row counts
+				int scale = h1.getCols()/cols;
+				for(int i=0, pos=0; i<m; i++, pos+=scale) {
+					for(int j=0; j<scale; j++)
+						rNnz[pos+j] = h1.rNnz[i]/scale;
+					rMaxNnz = Math.max(rMaxNnz, h1.rNnz[i]/scale);
+				}
+				//aggregate column counts
+				for(int j=0; j<n; j+=scale)
+					for(int j2=0; j2<scale; j2++)
+						cNnz[j2] += h1.cNnz[j];
+				for(int j2=0; j2<scale; j2++)
+					cMaxNnz = Math.max(cMaxNnz, cNnz[j2]);
+			}
+			else if ( h1.getRows() % rows == 0 ) { //N->1 rows
+				int scale = h1.getRows()/rows;
+				//scale and replicate column counts
+				for(int i=0, pos=0; i<n; i++, pos+=scale) {
+					for(int j=0; j<scale; j++)
+						cNnz[pos+j] = h1.cNnz[i]/scale;
+					cMaxNnz = Math.max(cMaxNnz, h1.cNnz[i]/scale);
+				}
+				//aggregate row counts
+				for(int j=0; j<m; j+=scale)
+					for(int j2=0; j2<scale; j2++)
+						rNnz[j2] += h1.rNnz[j];
+				for(int j2=0; j2<scale; j2++)
+					rMaxNnz = Math.max(rMaxNnz, rNnz[j2]);
+			}
+			return new MatrixHistogram(rNnz, null, cNnz, null, rMaxNnz, cMaxNnz);
+		}
+		
 		private static int probRound(double inNnz, Random rand) {
 			double temp = Math.floor(inNnz);
 			double f = inNnz - temp; //non-int fraction [0,1)