You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by mb...@apache.org on 2018/04/29 21:47:45 UTC

[2/2] systemml git commit: [SYSTEMML-2288] New sparsity estimator based on density maps

[SYSTEMML-2288] New sparsity estimator based on density maps

This patch adds a sparsity estimator based on density maps with
configurable block size for better sparsity estimates in the presence of
sparsity skew. In addition this fixes and extends the related tests and
makes necessary extensions to the dense block abstraction.


Project: http://git-wip-us.apache.org/repos/asf/systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/b1c8aa5d
Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/b1c8aa5d
Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/b1c8aa5d

Branch: refs/heads/master
Commit: b1c8aa5de0f2923ea3d8c75846e442f2401bf945
Parents: db35d99
Author: Matthias Boehm <mb...@gmail.com>
Authored: Sun Apr 29 14:44:07 2018 -0700
Committer: Matthias Boehm <mb...@gmail.com>
Committed: Sun Apr 29 14:47:24 2018 -0700

----------------------------------------------------------------------
 .../sysml/hops/estim/EstimatorBasicAvg.java     |   1 +
 .../sysml/hops/estim/EstimatorBasicWorst.java   |   1 +
 .../sysml/hops/estim/EstimatorDensityMap.java   | 177 +++++++++++++++++++
 .../org/apache/sysml/hops/estim/MMNode.java     |  21 ++-
 .../sysml/hops/estim/SparsityEstimator.java     |   4 +
 .../sysml/runtime/matrix/data/DenseBlock.java   |  18 ++
 .../runtime/matrix/data/DenseBlockDRB.java      |  10 ++
 .../runtime/matrix/data/DenseBlockLDRB.java     |  10 ++
 .../functions/estim/OuterProductTest.java       |  29 ++-
 9 files changed, 263 insertions(+), 8 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/systemml/blob/b1c8aa5d/src/main/java/org/apache/sysml/hops/estim/EstimatorBasicAvg.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/estim/EstimatorBasicAvg.java b/src/main/java/org/apache/sysml/hops/estim/EstimatorBasicAvg.java
index a621295..efd4195 100644
--- a/src/main/java/org/apache/sysml/hops/estim/EstimatorBasicAvg.java
+++ b/src/main/java/org/apache/sysml/hops/estim/EstimatorBasicAvg.java
@@ -31,6 +31,7 @@ public class EstimatorBasicAvg extends SparsityEstimator
 {
 	@Override
 	public double estim(MMNode root) {
+		//recursive sparsity evaluation of non-leaf nodes
 		double sp1 = !root.getLeft().isLeaf() ? estim(root.getLeft()) :
 			OptimizerUtils.getSparsity(root.getLeft().getMatrixCharacteristics());
 		double sp2 = !root.getRight().isLeaf() ? estim(root.getRight()) :

http://git-wip-us.apache.org/repos/asf/systemml/blob/b1c8aa5d/src/main/java/org/apache/sysml/hops/estim/EstimatorBasicWorst.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/estim/EstimatorBasicWorst.java b/src/main/java/org/apache/sysml/hops/estim/EstimatorBasicWorst.java
index 5803ebe..2ca32a7 100644
--- a/src/main/java/org/apache/sysml/hops/estim/EstimatorBasicWorst.java
+++ b/src/main/java/org/apache/sysml/hops/estim/EstimatorBasicWorst.java
@@ -35,6 +35,7 @@ public class EstimatorBasicWorst extends SparsityEstimator
 {
 	@Override
 	public double estim(MMNode root) {
+		//recursive sparsity evaluation of non-leaf nodes
 		double sp1 = !root.getLeft().isLeaf() ? estim(root.getLeft()) :
 			OptimizerUtils.getSparsity(root.getLeft().getMatrixCharacteristics());
 		double sp2 = !root.getRight().isLeaf() ? estim(root.getRight()) :

http://git-wip-us.apache.org/repos/asf/systemml/blob/b1c8aa5d/src/main/java/org/apache/sysml/hops/estim/EstimatorDensityMap.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/estim/EstimatorDensityMap.java b/src/main/java/org/apache/sysml/hops/estim/EstimatorDensityMap.java
new file mode 100644
index 0000000..5e8d880
--- /dev/null
+++ b/src/main/java/org/apache/sysml/hops/estim/EstimatorDensityMap.java
@@ -0,0 +1,177 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysml.hops.estim;
+
+import org.apache.sysml.hops.OptimizerUtils;
+import org.apache.sysml.runtime.matrix.MatrixCharacteristics;
+import org.apache.sysml.runtime.matrix.data.DenseBlock;
+import org.apache.sysml.runtime.matrix.data.MatrixBlock;
+import org.apache.sysml.runtime.matrix.data.SparseBlock;
+import org.apache.sysml.runtime.util.UtilFunctions;
+
+/**
+ * This estimator implements an approach called density maps, as introduced in
+ * David Kernert, Frank Köhler, Wolfgang Lehner: SpMacho - Optimizing Sparse 
+ * Linear Algebra Expressions with Probabilistic Density Estimation. EDBT 2015: 289-300.
+ * 
+ * The basic idea is to maintain a density matrix per input, i.e., non-zero counts per 
+ * bxb cells of the matrix, and compute the output density map based on a matrix-multiply-like
+ * aggregation of average estimates per cell. The output sparsity is then derived as an
+ * aggregate of the output density map, but this density map can also be used for 
+ * intermediates in chains of matrix multiplications.
+ */
+public class EstimatorDensityMap extends SparsityEstimator
+{
+	private static final int BLOCK_SIZE = 256;
+	
+	private final int _b;
+	
+	public EstimatorDensityMap() {
+		this(BLOCK_SIZE);
+	}
+	
+	public EstimatorDensityMap(int blocksize) {
+		_b = blocksize;
+	}
+	
+	@Override
+	public double estim(MMNode root) {
+		//recursive density map computation of non-leaf nodes
+		if( !root.getLeft().isLeaf() )
+			estim(root.getLeft()); //obtain synopsis
+		if( !root.getRight().isLeaf() )
+			estim(root.getLeft()); //obtain synopsis
+		MatrixBlock m1Map = !root.getLeft().isLeaf() ?
+			(MatrixBlock)root.getLeft().getSynopsis() : computeDensityMap(root.getLeft().getData());
+		MatrixBlock m2Map = !root.getRight().isLeaf() ?
+			(MatrixBlock)root.getLeft().getSynopsis() : computeDensityMap(root.getLeft().getData());
+		
+		//estimate output density map and sparsity
+		MatrixBlock outMap = estimIntern(m1Map, m2Map,
+			true, root.getRows(), root.getCols());
+		root.setSynopsis(outMap); //memoize density map
+		return OptimizerUtils.getSparsity( //aggregate output histogram
+			root.getRows(), root.getCols(), (long)outMap.sum());
+	}
+
+	@Override
+	public double estim(MatrixBlock m1, MatrixBlock m2) {
+		MatrixBlock m1Map = computeDensityMap(m1);
+		MatrixBlock m2Map = computeDensityMap(m2);
+		MatrixBlock outMap = estimIntern(m1Map, m2Map,
+			true, m1.getNumRows(), m2.getNumColumns());
+		return OptimizerUtils.getSparsity( //aggregate output histogram
+			m1.getNumRows(), m2.getNumColumns(), (long)outMap.sum());
+	}
+
+	@Override
+	public double estim(MatrixCharacteristics mc1, MatrixCharacteristics mc2) {
+		LOG.warn("Meta-data-only estimates not supported in EstimatorDensityMap, falling back to EstimatorBasicAvg.");
+		return new EstimatorBasicAvg().estim(mc1, mc2);
+	}
+
+	private MatrixBlock computeDensityMap(MatrixBlock in) {
+		int rlen = (int)Math.ceil((double)in.getNumRows()/_b);
+		int clen = (int)Math.ceil((double)in.getNumColumns()/_b);
+		MatrixBlock out = new MatrixBlock(rlen, clen, false);
+		if( in.isEmptyBlock(false) )
+			return out;
+		
+		//compute nnz histogram
+		DenseBlock c = out.allocateBlock().getDenseBlock();
+		if( in.isInSparseFormat() ) {
+			SparseBlock sblock = in.getSparseBlock();
+			for(int i=0; i<in.getNumRows(); i++) {
+				if( sblock.isEmpty(i) ) continue;
+				int alen = sblock.size(i);
+				int apos = sblock.pos(i);
+				int[] aix = sblock.indexes(i);
+				for(int k=apos; k<apos+alen; k++)
+					c.incr(i/_b, aix[k]/_b);
+			}
+		}
+		else {
+			for(int i=0; i<in.getNumRows(); i++) {
+				for(int j=0; j<in.getNumColumns(); j++) {
+					double aval = in.quickGetValue(i, j);
+					if( aval != 0 )
+						c.incr(i/_b, j/_b);
+				}
+			}
+		}
+		
+		//scale histogram by block size, w/ awareness of boundary blocks
+		for(int i=0; i<rlen; i++){
+			int lrlen = UtilFunctions.computeBlockSize(in.getNumRows(), i+1, _b);
+			for(int j=0; j<clen; j++) {
+				int lclen = UtilFunctions.computeBlockSize(in.getNumColumns(), j+1, _b);
+				c.set(i, j, c.get(i, j)/lrlen/lclen);
+			}
+		}
+		out.recomputeNonZeros();
+		return out;
+	}
+	
+	/**
+	 * Computes the output density map given the density maps of the input operands.
+	 * 
+	 * @param m1Map density map left-hand-side operand
+	 * @param m2Map density map right-hand-side operand
+	 * @param retNnz return number of non-zeros instead of sparsity per cell
+	 * @param rlen number of rows of output matrix, required for returning nnz
+	 * @param clen number of columns of output matrix, required for returning nnz
+	 * @return density map
+	 */
+	private MatrixBlock estimIntern(MatrixBlock m1Map, MatrixBlock m2Map, boolean retNnz, int rlen, int clen) {
+		final int m = m1Map.getNumRows();
+		final int cd = m1Map.getNumColumns();
+		final int n = m2Map.getNumColumns();
+		MatrixBlock out = new MatrixBlock(m, n, false);
+		if( m1Map.isEmptyBlock(false) || m2Map.isEmptyBlock(false) )
+			return out;
+		
+		//compute output density map with IKJ schedule
+		DenseBlock c = out.allocateBlock().getDenseBlock();
+		for(int i=0; i<m; i++) {
+			for(int k=0; k<cd; k++) {
+				int lbk = UtilFunctions.computeBlockSize(cd, k+1, _b);
+				double sp1 = m1Map.quickGetValue(i, k);
+				for(int j=0; j<n; j++) {
+					double sp2 = m2Map.quickGetValue(k, j);
+					//custom multiply for scalar sparsity
+					double tmp1 = 1 - Math.pow(1-sp1*sp2, lbk);
+					//custom add for scalar sparsity
+					double tmp2 = c.get(i, j);
+					c.set(i, j, tmp1+tmp2 - tmp1*tmp2);
+				}
+			}
+			//scale to non-zeros instead of sparsity if needed
+			if( retNnz ) {
+				int lbm = UtilFunctions.computeBlockSize(rlen, i+1, _b);
+				for( int j=0; j<n; j++ ) {
+					int lbn = UtilFunctions.computeBlockSize(clen, j+1, _b);
+					c.set(i, j, c.get(i, j) * lbm * lbn);
+				}
+			}
+		}
+		out.recomputeNonZeros();
+		return out;
+	}
+}

http://git-wip-us.apache.org/repos/asf/systemml/blob/b1c8aa5d/src/main/java/org/apache/sysml/hops/estim/MMNode.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/estim/MMNode.java b/src/main/java/org/apache/sysml/hops/estim/MMNode.java
index d3ce392..2209943 100644
--- a/src/main/java/org/apache/sysml/hops/estim/MMNode.java
+++ b/src/main/java/org/apache/sysml/hops/estim/MMNode.java
@@ -32,6 +32,7 @@ public class MMNode
 	private final MMNode _m2;
 	private final MatrixBlock _data;
 	private final MatrixCharacteristics _mc;
+	private Object _synops = null;
 	
 	public MMNode(MMNode left, MMNode right) {
 		_m1 = left;
@@ -42,12 +43,12 @@ public class MMNode
 			_data.getNumColumns(), -1, -1);
 	}
 	
-	public long getRows() {
-		return _mc.getRows();
+	public int getRows() {
+		return (int)_mc.getRows();
 	}
 	
-	public long getCols() {
-		return _mc.getCols();
+	public int getCols() {
+		return (int)_mc.getCols();
 	}
 	
 	public MatrixCharacteristics getMatrixCharacteristics() {
@@ -65,4 +66,16 @@ public class MMNode
 	public boolean isLeaf() {
 		return _data != null;
 	}
+	
+	public MatrixBlock getData() {
+		return _data;
+	}
+	
+	public void setSynopsis(Object obj) {
+		_synops = obj;
+	}
+	
+	public Object getSynopsis() {
+		return _synops;
+	}
 }

http://git-wip-us.apache.org/repos/asf/systemml/blob/b1c8aa5d/src/main/java/org/apache/sysml/hops/estim/SparsityEstimator.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/estim/SparsityEstimator.java b/src/main/java/org/apache/sysml/hops/estim/SparsityEstimator.java
index fa2769e..ddfed0e 100644
--- a/src/main/java/org/apache/sysml/hops/estim/SparsityEstimator.java
+++ b/src/main/java/org/apache/sysml/hops/estim/SparsityEstimator.java
@@ -19,11 +19,15 @@
 
 package org.apache.sysml.hops.estim;
 
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
 import org.apache.sysml.runtime.matrix.MatrixCharacteristics;
 import org.apache.sysml.runtime.matrix.data.MatrixBlock;
 
 public abstract class SparsityEstimator 
 {
+	protected static final Log LOG = LogFactory.getLog(SparsityEstimator.class.getName());
+	
 	/**
 	 * Estimates the output sparsity of a DAG of matrix multiplications
 	 * for the given operator graph of a single root node.

http://git-wip-us.apache.org/repos/asf/systemml/blob/b1c8aa5d/src/main/java/org/apache/sysml/runtime/matrix/data/DenseBlock.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/DenseBlock.java b/src/main/java/org/apache/sysml/runtime/matrix/data/DenseBlock.java
index 2489f16..4c16594 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/DenseBlock.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/DenseBlock.java
@@ -222,6 +222,24 @@ public abstract class DenseBlock implements Serializable
 	public abstract int pos(int r, int c);
 	
 	/**
+	 * Increments the given value for a given row and column.
+	 * 
+	 * @param r row index
+	 * @param c column index
+	 */
+	public abstract void incr(int r, int c);
+	
+	/**
+	 * Increments the given value for a given row and column
+	 * by delta.
+	 * 
+	 * @param r row index
+	 * @param c column index
+	 * @param delta increment value
+	 */
+	public abstract void incr(int r, int c, double delta);
+	
+	/**
 	 * Set the given value for the entire dense block (fill).
 	 * 
 	 * @param v value

http://git-wip-us.apache.org/repos/asf/systemml/blob/b1c8aa5d/src/main/java/org/apache/sysml/runtime/matrix/data/DenseBlockDRB.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/DenseBlockDRB.java b/src/main/java/org/apache/sysml/runtime/matrix/data/DenseBlockDRB.java
index 979f233..90fbcfe 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/DenseBlockDRB.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/DenseBlockDRB.java
@@ -171,6 +171,16 @@ public class DenseBlockDRB extends DenseBlock
 	}
 
 	@Override
+	public void incr(int r, int c) {
+		data[pos(r, c)] ++;
+	}
+	
+	@Override
+	public void incr(int r, int c, double delta) {
+		data[pos(r, c)] += delta;
+	}
+	
+	@Override
 	public DenseBlock set(double v) {
 		Arrays.fill(data, 0, rlen*clen, v);
 		return this;

http://git-wip-us.apache.org/repos/asf/systemml/blob/b1c8aa5d/src/main/java/org/apache/sysml/runtime/matrix/data/DenseBlockLDRB.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/DenseBlockLDRB.java b/src/main/java/org/apache/sysml/runtime/matrix/data/DenseBlockLDRB.java
index 4662c69..245ecb4 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/DenseBlockLDRB.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/DenseBlockLDRB.java
@@ -198,6 +198,16 @@ public class DenseBlockLDRB extends DenseBlock
 	public int pos(int r, int c) {
 		return (r % blen) * clen + c;
 	}
+	
+	@Override
+	public void incr(int r, int c) {
+		data[index(r)][pos(r, c)] ++;
+	}
+	
+	@Override
+	public void incr(int r, int c, double delta) {
+		data[index(r)][pos(r, c)] += delta;
+	}
 
 	@Override
 	public DenseBlock set(double v) {

http://git-wip-us.apache.org/repos/asf/systemml/blob/b1c8aa5d/src/test/java/org/apache/sysml/test/integration/functions/estim/OuterProductTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/sysml/test/integration/functions/estim/OuterProductTest.java b/src/test/java/org/apache/sysml/test/integration/functions/estim/OuterProductTest.java
index 47a30f3..0392c7b 100644
--- a/src/test/java/org/apache/sysml/test/integration/functions/estim/OuterProductTest.java
+++ b/src/test/java/org/apache/sysml/test/integration/functions/estim/OuterProductTest.java
@@ -22,6 +22,7 @@ package org.apache.sysml.test.integration.functions.estim;
 import org.junit.Test;
 import org.apache.sysml.hops.estim.EstimatorBasicAvg;
 import org.apache.sysml.hops.estim.EstimatorBasicWorst;
+import org.apache.sysml.hops.estim.EstimatorDensityMap;
 import org.apache.sysml.hops.estim.SparsityEstimator;
 import org.apache.sysml.runtime.instructions.InstructionUtils;
 import org.apache.sysml.runtime.matrix.data.MatrixBlock;
@@ -47,22 +48,42 @@ public class OuterProductTest extends AutomatedTestBase
 	
 	@Test
 	public void testBasicAvgCase1() {
-		runSparsityEstimateTest(new EstimatorBasicAvg(), m, n, k, case1);
+		runSparsityEstimateTest(new EstimatorBasicAvg(), m, k, n, case1);
 	}
 	
 	@Test
 	public void testBasicAvgCase2() {
-		runSparsityEstimateTest(new EstimatorBasicAvg(), m, n, k, case2);
+		runSparsityEstimateTest(new EstimatorBasicAvg(), m, k, n, case2);
 	}
 	
 	@Test
 	public void testBasicWorstCase1() {
-		runSparsityEstimateTest(new EstimatorBasicWorst(), m, n, k, case1);
+		runSparsityEstimateTest(new EstimatorBasicWorst(), m, k, n, case1);
 	}
 	
 	@Test
 	public void testBasicWorstCase2() {
-		runSparsityEstimateTest(new EstimatorBasicWorst(), m, n, k, case2);
+		runSparsityEstimateTest(new EstimatorBasicWorst(), m, k, n, case2);
+	}
+	
+	@Test
+	public void testDensityMapCase1() {
+		runSparsityEstimateTest(new EstimatorDensityMap(), m, k, n, case1);
+	}
+	
+	@Test
+	public void testDensityMapCase2() {
+		runSparsityEstimateTest(new EstimatorDensityMap(), m, k, n, case2);
+	}
+	
+	@Test
+	public void testDensityMap7Case1() {
+		runSparsityEstimateTest(new EstimatorDensityMap(7), m, k, n, case1);
+	}
+	
+	@Test
+	public void testDensityMap7Case2() {
+		runSparsityEstimateTest(new EstimatorDensityMap(7), m, k, n, case2);
 	}
 	
 	private void runSparsityEstimateTest(SparsityEstimator estim, int m, int k, int n, double[] sp) {