You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by mb...@apache.org on 2018/05/02 21:34:17 UTC
systemml git commit: [SYSTEMML-2296] New sparsity estimator based on
matrix histograms
Repository: systemml
Updated Branches:
refs/heads/master aa253eb7e -> 78bfb7712
[SYSTEMML-2296] New sparsity estimator based on matrix histograms
This patch introduces a new sparsity estimator based on matrix
histograms, i.e., a synopsis that stores the number of non-zeros per
row/column for a matrix. This extremely simply yet effective synopsis
allows to exploit structural properties of permutation and selection
matrices. For example, if all rows have <=1 non-zeros the exact number
of non-zeros in the output can be computed as the simple dot product of
lhs columns counts times rhs row counts.
Project: http://git-wip-us.apache.org/repos/asf/systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/78bfb771
Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/78bfb771
Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/78bfb771
Branch: refs/heads/master
Commit: 78bfb77120e5952db42e5471b43c7d5d5335f455
Parents: aa253eb
Author: Matthias Boehm <mb...@gmail.com>
Authored: Wed May 2 14:35:21 2018 -0700
Committer: Matthias Boehm <mb...@gmail.com>
Committed: Wed May 2 14:35:21 2018 -0700
----------------------------------------------------------------------
.../hops/estim/EstimatorMatrixHistogram.java | 133 +++++++++++++++++++
.../sysml/runtime/matrix/data/LibMatrixAgg.java | 2 +-
.../functions/estim/OuterProductTest.java | 11 ++
.../functions/estim/SquaredProductTest.java | 11 ++
4 files changed, 156 insertions(+), 1 deletion(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/systemml/blob/78bfb771/src/main/java/org/apache/sysml/hops/estim/EstimatorMatrixHistogram.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/estim/EstimatorMatrixHistogram.java b/src/main/java/org/apache/sysml/hops/estim/EstimatorMatrixHistogram.java
new file mode 100644
index 0000000..6fbb5f0
--- /dev/null
+++ b/src/main/java/org/apache/sysml/hops/estim/EstimatorMatrixHistogram.java
@@ -0,0 +1,133 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysml.hops.estim;
+
+import org.apache.sysml.hops.OptimizerUtils;
+import org.apache.sysml.runtime.DMLRuntimeException;
+import org.apache.sysml.runtime.matrix.MatrixCharacteristics;
+import org.apache.sysml.runtime.matrix.data.DenseBlock;
+import org.apache.sysml.runtime.matrix.data.LibMatrixAgg;
+import org.apache.sysml.runtime.matrix.data.MatrixBlock;
+import org.apache.sysml.runtime.matrix.data.SparseBlock;
+
+/**
+ * This estimator implements a remarkably simple yet effective
+ * approach for incorporating structural properties into sparsity
+ * estimation. The key idea is to maintain row and column nnz per
+ * matrix, along with additional meta data.
+ */
+public class EstimatorMatrixHistogram extends SparsityEstimator
+{
+ @Override
+ public double estim(MMNode root) {
+ throw new DMLRuntimeException("Estimation of "
+ + "intermediate matrix histograms not supported yet.");
+ }
+
+ @Override
+ public double estim(MatrixBlock m1, MatrixBlock m2) {
+ MatrixHistogram h1 = new MatrixHistogram(m1);
+ MatrixHistogram h2 = new MatrixHistogram(m2);
+ return estimIntern(h1, h2);
+ }
+
+ @Override
+ public double estim(MatrixCharacteristics mc1, MatrixCharacteristics mc2) {
+ LOG.warn("Meta-data-only estimates not supported in "
+ + "EstimatorMatrixHistogram, falling back to EstimatorBasicAvg.");
+ return new EstimatorBasicAvg().estim(mc1, mc2);
+ }
+
+ private double estimIntern(MatrixHistogram h1, MatrixHistogram h2) {
+ long nnz = 0;
+ //special case, with exact sparsity estimate, where the dot product
+ //dot(h1.cNnz,h2rNnz) gives the exact number of non-zeros in the output
+ if( h1.rMaxNnz <= 1 ) {
+ for( int j=0; j<h1.getCols(); j++ )
+ nnz += h1.cNnz[j] * h2.rNnz[j];
+ }
+ //general case with approximate output
+ else {
+ int mnOut = h1.getRows()*h2.getCols();
+ double spOut = 0;
+ for( int j=0; j<h1.getCols(); j++ ) {
+ double lsp = (double) h1.cNnz[j] * h2.rNnz[j] / mnOut;
+ spOut = spOut + lsp - spOut*lsp;
+ }
+ nnz = (long)(spOut * mnOut);
+ }
+
+ //compute final sparsity
+ return OptimizerUtils.getSparsity(
+ h1.getRows(), h2.getCols(), nnz);
+ }
+
+ private static class MatrixHistogram {
+ private final int[] rNnz;
+ private final int[] cNnz;
+ private int rMaxNnz = 0;
+ private int cMaxNnz = 0;
+
+ public MatrixHistogram(MatrixBlock in) {
+ rNnz = new int[in.getNumRows()];
+ cNnz = new int[in.getNumColumns()];
+ if( in.isEmptyBlock(false) )
+ return;
+
+ if( in.isInSparseFormat() ) {
+ SparseBlock sblock = in.getSparseBlock();
+ for( int i=0; i<in.getNumRows(); i++ ) {
+ if( sblock.isEmpty(i) ) continue;
+ int alen = sblock.size(i);
+ rNnz[i] = alen;
+ rMaxNnz = Math.max(rMaxNnz, alen);
+ LibMatrixAgg.countAgg(sblock.values(i),
+ cNnz, sblock.indexes(i), sblock.pos(i), alen);
+ }
+ }
+ else {
+ DenseBlock dblock = in.getDenseBlock();
+ for( int i=0; i<in.getNumRows(); i++ ) {
+ double[] avals = dblock.values(i);
+ int lnnz = 0, aix = dblock.pos(i);
+ for( int j=0; j<in.getNumColumns(); j++ ) {
+ if( avals[aix+j] != 0 ) {
+ cNnz[j] ++;
+ lnnz ++;
+ }
+ }
+ rNnz[i] = lnnz;
+ rMaxNnz = Math.max(rMaxNnz, lnnz);
+ }
+ }
+
+ for(int j=0; j<in.getNumColumns(); j++)
+ cMaxNnz = Math.max(cMaxNnz, cNnz[j]);
+ }
+
+ public int getRows() {
+ return rNnz.length;
+ }
+
+ public int getCols() {
+ return cNnz.length;
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/systemml/blob/78bfb771/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixAgg.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixAgg.java b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixAgg.java
index 5dfddbd..db73ce2 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixAgg.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixAgg.java
@@ -3098,7 +3098,7 @@ public class LibMatrixAgg
return minindex;
}
- private static void countAgg( double[] a, int[] c, int[] aix, int ai, final int len ) {
+ public static void countAgg( double[] a, int[] c, int[] aix, int ai, final int len ) {
final int bn = len%8;
//compute rest, not aligned to 8-block
for( int i=ai; i<ai+bn; i++ )
http://git-wip-us.apache.org/repos/asf/systemml/blob/78bfb771/src/test/java/org/apache/sysml/test/integration/functions/estim/OuterProductTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/sysml/test/integration/functions/estim/OuterProductTest.java b/src/test/java/org/apache/sysml/test/integration/functions/estim/OuterProductTest.java
index 90f316c..f58a0c4 100644
--- a/src/test/java/org/apache/sysml/test/integration/functions/estim/OuterProductTest.java
+++ b/src/test/java/org/apache/sysml/test/integration/functions/estim/OuterProductTest.java
@@ -24,6 +24,7 @@ import org.apache.sysml.hops.estim.EstimatorBasicAvg;
import org.apache.sysml.hops.estim.EstimatorBasicWorst;
import org.apache.sysml.hops.estim.EstimatorBitsetMM;
import org.apache.sysml.hops.estim.EstimatorDensityMap;
+import org.apache.sysml.hops.estim.EstimatorMatrixHistogram;
import org.apache.sysml.hops.estim.SparsityEstimator;
import org.apache.sysml.runtime.instructions.InstructionUtils;
import org.apache.sysml.runtime.matrix.data.MatrixBlock;
@@ -97,6 +98,16 @@ public class OuterProductTest extends AutomatedTestBase
runSparsityEstimateTest(new EstimatorBitsetMM(), m, k, n, case2);
}
+ @Test
+ public void testMatrixHistogramCase1() {
+ runSparsityEstimateTest(new EstimatorMatrixHistogram(), m, k, n, case1);
+ }
+
+ @Test
+ public void testMatrixHistogramCase2() {
+ runSparsityEstimateTest(new EstimatorMatrixHistogram(), m, k, n, case2);
+ }
+
private void runSparsityEstimateTest(SparsityEstimator estim, int m, int k, int n, double[] sp) {
MatrixBlock m1 = MatrixBlock.randOperations(m, k, sp[0], 1, 1, "uniform", 3);
MatrixBlock m2 = MatrixBlock.randOperations(k, n, sp[1], 1, 1, "uniform", 3);
http://git-wip-us.apache.org/repos/asf/systemml/blob/78bfb771/src/test/java/org/apache/sysml/test/integration/functions/estim/SquaredProductTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/sysml/test/integration/functions/estim/SquaredProductTest.java b/src/test/java/org/apache/sysml/test/integration/functions/estim/SquaredProductTest.java
index 99b1b3b..4cbd06c 100644
--- a/src/test/java/org/apache/sysml/test/integration/functions/estim/SquaredProductTest.java
+++ b/src/test/java/org/apache/sysml/test/integration/functions/estim/SquaredProductTest.java
@@ -24,6 +24,7 @@ import org.apache.sysml.hops.estim.EstimatorBasicAvg;
import org.apache.sysml.hops.estim.EstimatorBasicWorst;
import org.apache.sysml.hops.estim.EstimatorBitsetMM;
import org.apache.sysml.hops.estim.EstimatorDensityMap;
+import org.apache.sysml.hops.estim.EstimatorMatrixHistogram;
import org.apache.sysml.hops.estim.SparsityEstimator;
import org.apache.sysml.runtime.instructions.InstructionUtils;
import org.apache.sysml.runtime.matrix.data.MatrixBlock;
@@ -102,6 +103,16 @@ public class SquaredProductTest extends AutomatedTestBase
runSparsityEstimateTest(new EstimatorBitsetMM(), m, k, n, case2);
}
+ @Test
+ public void testMatrixHistogramCase1() {
+ runSparsityEstimateTest(new EstimatorMatrixHistogram(), m, k, n, case1);
+ }
+
+ @Test
+ public void testMatrixHistogramCase2() {
+ runSparsityEstimateTest(new EstimatorMatrixHistogram(), m, k, n, case2);
+ }
+
private void runSparsityEstimateTest(SparsityEstimator estim, int m, int k, int n, double[] sp) {
MatrixBlock m1 = MatrixBlock.randOperations(m, k, sp[0], 1, 1, "uniform", 3);
MatrixBlock m2 = MatrixBlock.randOperations(k, n, sp[1], 1, 1, "uniform", 3);