You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by mb...@apache.org on 2018/05/03 01:20:37 UTC

[2/2] systemml git commit: [SYSTEMML-2296] Improved matrix histograms (histograms of intermediate)

[SYSTEMML-2296] Improved matrix histograms (histograms of intermediate)

This patch adds the functionality to compute estimated matrix histograms
for intermediates of matrix multiplication chains based on the
histograms of the inputs. This is important for sparsity estimation of
intermediates for plan alternatives in advanced optimizers.


Project: http://git-wip-us.apache.org/repos/asf/systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/e7d948f9
Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/e7d948f9
Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/e7d948f9

Branch: refs/heads/master
Commit: e7d948f9c41a107f04a3f9fe565d8c7528573613
Parents: d17a2e2
Author: Matthias Boehm <mb...@gmail.com>
Authored: Wed May 2 18:17:10 2018 -0700
Committer: Matthias Boehm <mb...@gmail.com>
Committed: Wed May 2 18:17:10 2018 -0700

----------------------------------------------------------------------
 .../hops/estim/EstimatorMatrixHistogram.java    | 70 ++++++++++++++++++--
 1 file changed, 64 insertions(+), 6 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/systemml/blob/e7d948f9/src/main/java/org/apache/sysml/hops/estim/EstimatorMatrixHistogram.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/estim/EstimatorMatrixHistogram.java b/src/main/java/org/apache/sysml/hops/estim/EstimatorMatrixHistogram.java
index 6fbb5f0..18c4a22 100644
--- a/src/main/java/org/apache/sysml/hops/estim/EstimatorMatrixHistogram.java
+++ b/src/main/java/org/apache/sysml/hops/estim/EstimatorMatrixHistogram.java
@@ -20,7 +20,6 @@
 package org.apache.sysml.hops.estim;
 
 import org.apache.sysml.hops.OptimizerUtils;
-import org.apache.sysml.runtime.DMLRuntimeException;
 import org.apache.sysml.runtime.matrix.MatrixCharacteristics;
 import org.apache.sysml.runtime.matrix.data.DenseBlock;
 import org.apache.sysml.runtime.matrix.data.LibMatrixAgg;
@@ -37,8 +36,23 @@ public class EstimatorMatrixHistogram extends SparsityEstimator
 {
 	@Override
 	public double estim(MMNode root) {
-		throw new DMLRuntimeException("Estimation of "
-			+ "intermediate matrix histograms not supported yet.");
+		//recursive histogram computation of non-leaf nodes
+		if( !root.getLeft().isLeaf() )
+			estim(root.getLeft()); //obtain synopsis
+		if( !root.getRight().isLeaf() )
+			estim(root.getLeft()); //obtain synopsis
+		MatrixHistogram h1 = !root.getLeft().isLeaf() ?
+			(MatrixHistogram)root.getLeft().getSynopsis() : new MatrixHistogram(root.getLeft().getData());
+		MatrixHistogram h2 = !root.getRight().isLeaf() ?
+			(MatrixHistogram)root.getRight().getSynopsis() : new MatrixHistogram(root.getRight().getData());
+		
+		//estimate output sparsity based on input histograms
+		double ret = estimIntern(h1, h2);
+		
+		//derive and memoize output histogram
+		root.setSynopsis(MatrixHistogram.deriveOutputHistogram(h1, h2, ret));
+		
+		return ret;
 	}
 
 	@Override
@@ -83,6 +97,7 @@ public class EstimatorMatrixHistogram extends SparsityEstimator
 		private final int[] rNnz;
 		private final int[] cNnz;
 		private int rMaxNnz = 0;
+		@SuppressWarnings("unused")
 		private int cMaxNnz = 0;
 		
 		public MatrixHistogram(MatrixBlock in) {
@@ -117,9 +132,14 @@ public class EstimatorMatrixHistogram extends SparsityEstimator
 					rMaxNnz = Math.max(rMaxNnz, lnnz);
 				}
 			}
-			
-			for(int j=0; j<in.getNumColumns(); j++)
-				cMaxNnz = Math.max(cMaxNnz, cNnz[j]);
+			cMaxNnz = max(cNnz, 0, in.getNumColumns());
+		}
+		
+		public MatrixHistogram(int[] r, int[] c, int rmax, int cmax) {
+			rNnz = r;
+			cNnz = c;
+			rMaxNnz = rmax;
+			cMaxNnz = cmax;
 		}
 		
 		public int getRows() {
@@ -129,5 +149,43 @@ public class EstimatorMatrixHistogram extends SparsityEstimator
 		public int getCols() {
 			return cNnz.length;
 		}
+		
+		public static MatrixHistogram deriveOutputHistogram(MatrixHistogram h1, MatrixHistogram h2, double spOut) {
+			//get input/output nnz for scaling
+			long nnz1 = sum(h1.rNnz, 0, h1.getRows());
+			long nnz2 = sum(h2.cNnz, 0, h2.getCols());
+			double nnzOut = spOut * h1.getRows() * h2.getCols();
+			
+			//propagate h1.r and h2.c to output via simple scaling
+			//(this implies 0s propagate and distribution is preserved)
+			int rMaxNnz = 0, cMaxNnz = 0;
+			int[] rNnz = new int[h1.getRows()];
+			for( int i=0; i<h1.getRows(); i++ ) {
+				rNnz[i] = (int) Math.round(nnzOut/nnz1 * h1.rNnz[i]);
+				rMaxNnz = Math.max(rMaxNnz, rNnz[i]);
+			}
+			int[] cNnz = new int[h2.getCols()];
+			for( int i=0; i<h2.getCols(); i++ ) {
+				cNnz[i] = (int) Math.round(nnzOut/nnz2 * h2.cNnz[i]);
+				cMaxNnz = Math.max(cMaxNnz, cNnz[i]);
+			}
+			
+			//construct new histogram object
+			return new MatrixHistogram(rNnz, cNnz, rMaxNnz, cMaxNnz);
+		}
+		
+		private static int max(int[] a, int ai, int alen) {
+			int ret = Integer.MIN_VALUE;
+			for(int i=ai; i<ai+alen; i++)
+				ret = Math.max(ret, a[i]);
+			return ret;
+		}
+		
+		private static long sum(int[] a, int ai, int alen) {
+			int ret = 0;
+			for(int i=ai; i<ai+alen; i++)
+				ret += a[i];
+			return ret;
+		}
 	}
 }