You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by mb...@apache.org on 2018/05/02 21:34:17 UTC

systemml git commit: [SYSTEMML-2296] New sparsity estimator based on matrix histograms

Repository: systemml
Updated Branches:
  refs/heads/master aa253eb7e -> 78bfb7712


[SYSTEMML-2296] New sparsity estimator based on matrix histograms

This patch introduces a new sparsity estimator based on matrix
histograms, i.e., a synopsis that stores the number of non-zeros per
row/column for a matrix. This extremely simply yet effective synopsis
allows to exploit structural properties of permutation and selection
matrices. For example, if all rows have <=1 non-zeros the exact number
of non-zeros in the output can be computed as the simple dot product of
lhs columns counts times rhs row counts.


Project: http://git-wip-us.apache.org/repos/asf/systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/78bfb771
Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/78bfb771
Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/78bfb771

Branch: refs/heads/master
Commit: 78bfb77120e5952db42e5471b43c7d5d5335f455
Parents: aa253eb
Author: Matthias Boehm <mb...@gmail.com>
Authored: Wed May 2 14:35:21 2018 -0700
Committer: Matthias Boehm <mb...@gmail.com>
Committed: Wed May 2 14:35:21 2018 -0700

----------------------------------------------------------------------
 .../hops/estim/EstimatorMatrixHistogram.java    | 133 +++++++++++++++++++
 .../sysml/runtime/matrix/data/LibMatrixAgg.java |   2 +-
 .../functions/estim/OuterProductTest.java       |  11 ++
 .../functions/estim/SquaredProductTest.java     |  11 ++
 4 files changed, 156 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/systemml/blob/78bfb771/src/main/java/org/apache/sysml/hops/estim/EstimatorMatrixHistogram.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/estim/EstimatorMatrixHistogram.java b/src/main/java/org/apache/sysml/hops/estim/EstimatorMatrixHistogram.java
new file mode 100644
index 0000000..6fbb5f0
--- /dev/null
+++ b/src/main/java/org/apache/sysml/hops/estim/EstimatorMatrixHistogram.java
@@ -0,0 +1,133 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysml.hops.estim;
+
+import org.apache.sysml.hops.OptimizerUtils;
+import org.apache.sysml.runtime.DMLRuntimeException;
+import org.apache.sysml.runtime.matrix.MatrixCharacteristics;
+import org.apache.sysml.runtime.matrix.data.DenseBlock;
+import org.apache.sysml.runtime.matrix.data.LibMatrixAgg;
+import org.apache.sysml.runtime.matrix.data.MatrixBlock;
+import org.apache.sysml.runtime.matrix.data.SparseBlock;
+
+/**
+ * This estimator implements a remarkably simple yet effective
+ * approach for incorporating structural properties into sparsity
+ * estimation. The key idea is to maintain row and column nnz per
+ * matrix, along with additional meta data.
+ */
+public class EstimatorMatrixHistogram extends SparsityEstimator
+{
+	@Override
+	public double estim(MMNode root) {
+		throw new DMLRuntimeException("Estimation of "
+			+ "intermediate matrix histograms not supported yet.");
+	}
+
+	@Override
+	public double estim(MatrixBlock m1, MatrixBlock m2) {
+		MatrixHistogram h1 = new MatrixHistogram(m1);
+		MatrixHistogram h2 = new MatrixHistogram(m2);
+		return estimIntern(h1, h2);
+	}
+
+	@Override
+	public double estim(MatrixCharacteristics mc1, MatrixCharacteristics mc2) {
+		LOG.warn("Meta-data-only estimates not supported in "
+			+ "EstimatorMatrixHistogram, falling back to EstimatorBasicAvg.");
+		return new EstimatorBasicAvg().estim(mc1, mc2);
+	}
+	
+	private double estimIntern(MatrixHistogram h1, MatrixHistogram h2) {
+		long nnz = 0;
+		//special case, with exact sparsity estimate, where the dot product
+		//dot(h1.cNnz,h2rNnz) gives the exact number of non-zeros in the output
+		if( h1.rMaxNnz <= 1 ) {
+			for( int j=0; j<h1.getCols(); j++ )
+				nnz += h1.cNnz[j] * h2.rNnz[j];
+		}
+		//general case with approximate output
+		else {
+			int mnOut = h1.getRows()*h2.getCols();
+			double spOut = 0;
+			for( int j=0; j<h1.getCols(); j++ ) {
+				double lsp = (double) h1.cNnz[j] * h2.rNnz[j] / mnOut;
+				spOut = spOut + lsp - spOut*lsp;
+			}
+			nnz = (long)(spOut * mnOut);
+		}
+		
+		//compute final sparsity
+		return OptimizerUtils.getSparsity(
+			h1.getRows(), h2.getCols(), nnz);
+	}
+	
+	private static class MatrixHistogram {
+		private final int[] rNnz;
+		private final int[] cNnz;
+		private int rMaxNnz = 0;
+		private int cMaxNnz = 0;
+		
+		public MatrixHistogram(MatrixBlock in) {
+			rNnz = new int[in.getNumRows()];
+			cNnz = new int[in.getNumColumns()];
+			if( in.isEmptyBlock(false) )
+				return;
+			
+			if( in.isInSparseFormat() ) {
+				SparseBlock sblock = in.getSparseBlock();
+				for( int i=0; i<in.getNumRows(); i++ ) {
+					if( sblock.isEmpty(i) ) continue;
+					int alen = sblock.size(i);
+					rNnz[i] = alen;
+					rMaxNnz = Math.max(rMaxNnz, alen);
+					LibMatrixAgg.countAgg(sblock.values(i),
+						cNnz, sblock.indexes(i), sblock.pos(i), alen);
+				}
+			}
+			else {
+				DenseBlock dblock = in.getDenseBlock();
+				for( int i=0; i<in.getNumRows(); i++ ) {
+					double[] avals = dblock.values(i);
+					int lnnz = 0, aix = dblock.pos(i);
+					for( int j=0; j<in.getNumColumns(); j++ ) {
+						if( avals[aix+j] != 0 ) {
+							cNnz[j] ++;
+							lnnz ++;
+						}
+					}
+					rNnz[i] = lnnz;
+					rMaxNnz = Math.max(rMaxNnz, lnnz);
+				}
+			}
+			
+			for(int j=0; j<in.getNumColumns(); j++)
+				cMaxNnz = Math.max(cMaxNnz, cNnz[j]);
+		}
+		
+		public int getRows() {
+			return rNnz.length;
+		}
+		
+		public int getCols() {
+			return cNnz.length;
+		}
+	}
+}

http://git-wip-us.apache.org/repos/asf/systemml/blob/78bfb771/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixAgg.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixAgg.java b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixAgg.java
index 5dfddbd..db73ce2 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixAgg.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixAgg.java
@@ -3098,7 +3098,7 @@ public class LibMatrixAgg
 		return minindex;
 	}
 
-	private static void countAgg( double[] a, int[] c, int[] aix, int ai, final int len ) {
+	public static void countAgg( double[] a, int[] c, int[] aix, int ai, final int len ) {
 		final int bn = len%8;
 		//compute rest, not aligned to 8-block
 		for( int i=ai; i<ai+bn; i++ )

http://git-wip-us.apache.org/repos/asf/systemml/blob/78bfb771/src/test/java/org/apache/sysml/test/integration/functions/estim/OuterProductTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/sysml/test/integration/functions/estim/OuterProductTest.java b/src/test/java/org/apache/sysml/test/integration/functions/estim/OuterProductTest.java
index 90f316c..f58a0c4 100644
--- a/src/test/java/org/apache/sysml/test/integration/functions/estim/OuterProductTest.java
+++ b/src/test/java/org/apache/sysml/test/integration/functions/estim/OuterProductTest.java
@@ -24,6 +24,7 @@ import org.apache.sysml.hops.estim.EstimatorBasicAvg;
 import org.apache.sysml.hops.estim.EstimatorBasicWorst;
 import org.apache.sysml.hops.estim.EstimatorBitsetMM;
 import org.apache.sysml.hops.estim.EstimatorDensityMap;
+import org.apache.sysml.hops.estim.EstimatorMatrixHistogram;
 import org.apache.sysml.hops.estim.SparsityEstimator;
 import org.apache.sysml.runtime.instructions.InstructionUtils;
 import org.apache.sysml.runtime.matrix.data.MatrixBlock;
@@ -97,6 +98,16 @@ public class OuterProductTest extends AutomatedTestBase
 		runSparsityEstimateTest(new EstimatorBitsetMM(), m, k, n, case2);
 	}
 	
+	@Test
+	public void testMatrixHistogramCase1() {
+		runSparsityEstimateTest(new EstimatorMatrixHistogram(), m, k, n, case1);
+	}
+	
+	@Test
+	public void testMatrixHistogramCase2() {
+		runSparsityEstimateTest(new EstimatorMatrixHistogram(), m, k, n, case2);
+	}
+	
 	private void runSparsityEstimateTest(SparsityEstimator estim, int m, int k, int n, double[] sp) {
 		MatrixBlock m1 = MatrixBlock.randOperations(m, k, sp[0], 1, 1, "uniform", 3);
 		MatrixBlock m2 = MatrixBlock.randOperations(k, n, sp[1], 1, 1, "uniform", 3);

http://git-wip-us.apache.org/repos/asf/systemml/blob/78bfb771/src/test/java/org/apache/sysml/test/integration/functions/estim/SquaredProductTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/sysml/test/integration/functions/estim/SquaredProductTest.java b/src/test/java/org/apache/sysml/test/integration/functions/estim/SquaredProductTest.java
index 99b1b3b..4cbd06c 100644
--- a/src/test/java/org/apache/sysml/test/integration/functions/estim/SquaredProductTest.java
+++ b/src/test/java/org/apache/sysml/test/integration/functions/estim/SquaredProductTest.java
@@ -24,6 +24,7 @@ import org.apache.sysml.hops.estim.EstimatorBasicAvg;
 import org.apache.sysml.hops.estim.EstimatorBasicWorst;
 import org.apache.sysml.hops.estim.EstimatorBitsetMM;
 import org.apache.sysml.hops.estim.EstimatorDensityMap;
+import org.apache.sysml.hops.estim.EstimatorMatrixHistogram;
 import org.apache.sysml.hops.estim.SparsityEstimator;
 import org.apache.sysml.runtime.instructions.InstructionUtils;
 import org.apache.sysml.runtime.matrix.data.MatrixBlock;
@@ -102,6 +103,16 @@ public class SquaredProductTest extends AutomatedTestBase
 		runSparsityEstimateTest(new EstimatorBitsetMM(), m, k, n, case2);
 	}
 	
+	@Test
+	public void testMatrixHistogramCase1() {
+		runSparsityEstimateTest(new EstimatorMatrixHistogram(), m, k, n, case1);
+	}
+	
+	@Test
+	public void testMatrixHistogramCase2() {
+		runSparsityEstimateTest(new EstimatorMatrixHistogram(), m, k, n, case2);
+	}
+	
 	private void runSparsityEstimateTest(SparsityEstimator estim, int m, int k, int n, double[] sp) {
 		MatrixBlock m1 = MatrixBlock.randOperations(m, k, sp[0], 1, 1, "uniform", 3);
 		MatrixBlock m2 = MatrixBlock.randOperations(k, n, sp[1], 1, 1, "uniform", 3);