You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by mb...@apache.org on 2018/05/02 03:54:22 UTC

[2/3] systemml git commit: [SYSTEMML-2295] New sparsity estimator based on bitsets (exact est)

[SYSTEMML-2295] New sparsity estimator based on bitsets (exact est)

This patch adds a naive yet not too uncommon sparsity estimator based on
boolean matrix multiplication, which allows to get the exact non-zero
pattern of the output (e.g., for sparse output preallocation). Compared
to the double precision matrix multiply this bitset multiply uses
implicit ANDs and bitwise ORs of entire rhs input and output rows.


Project: http://git-wip-us.apache.org/repos/asf/systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/e21f840e
Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/e21f840e
Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/e21f840e

Branch: refs/heads/master
Commit: e21f840eda9eef8a61f37717ac0b8240e4e27bb4
Parents: b65f8ba
Author: Matthias Boehm <mb...@gmail.com>
Authored: Tue May 1 20:13:02 2018 -0700
Committer: Matthias Boehm <mb...@gmail.com>
Committed: Tue May 1 20:13:02 2018 -0700

----------------------------------------------------------------------
 .../sysml/hops/estim/EstimatorBitsetMM.java     | 149 +++++++++++++++++++
 1 file changed, 149 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/systemml/blob/e21f840e/src/main/java/org/apache/sysml/hops/estim/EstimatorBitsetMM.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/estim/EstimatorBitsetMM.java b/src/main/java/org/apache/sysml/hops/estim/EstimatorBitsetMM.java
new file mode 100644
index 0000000..c90fbfc
--- /dev/null
+++ b/src/main/java/org/apache/sysml/hops/estim/EstimatorBitsetMM.java
@@ -0,0 +1,149 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysml.hops.estim;
+
+import java.util.BitSet;
+
+import org.apache.sysml.hops.OptimizerUtils;
+import org.apache.sysml.runtime.matrix.MatrixCharacteristics;
+import org.apache.sysml.runtime.matrix.data.DenseBlock;
+import org.apache.sysml.runtime.matrix.data.MatrixBlock;
+import org.apache.sysml.runtime.matrix.data.SparseBlock;
+
+/**
+ * This estimator implements naive but rather common approach of boolean matrix
+ * multiplies which allows to infer the exact non-zero structure and thus is also
+ * useful for sparse result preallocation.
+ * 
+ */
+public class EstimatorBitsetMM extends SparsityEstimator
+{
+	@Override
+	public double estim(MMNode root) {
+		//recursive density map computation of non-leaf nodes
+		if( !root.getLeft().isLeaf() )
+			estim(root.getLeft()); //obtain synopsis
+		if( !root.getRight().isLeaf() )
+			estim(root.getLeft()); //obtain synopsis
+		BitsetMatrix m1Map = !root.getLeft().isLeaf() ?
+			(BitsetMatrix)root.getLeft().getSynopsis() : new BitsetMatrix(root.getLeft().getData());
+		BitsetMatrix m2Map = !root.getRight().isLeaf() ?
+			(BitsetMatrix)root.getLeft().getSynopsis() : new BitsetMatrix(root.getLeft().getData());
+		
+		//estimate output density map and sparsity via boolean matrix mult
+		BitsetMatrix outMap = m1Map.matMult(m2Map);
+		root.setSynopsis(outMap); //memoize boolean matrix
+		return OptimizerUtils.getSparsity(
+			outMap.getNumRows(), outMap.getNumColumns(), outMap.getNonZeros());
+	}
+
+	@Override
+	public double estim(MatrixBlock m1, MatrixBlock m2) {
+		BitsetMatrix m1Map = new BitsetMatrix(m1);
+		BitsetMatrix m2Map = new BitsetMatrix(m2);
+		BitsetMatrix outMap = m1Map.matMult(m2Map);
+		return OptimizerUtils.getSparsity( //aggregate output histogram
+			outMap.getNumRows(), outMap.getNumColumns(), outMap.getNonZeros());
+	}
+
+	@Override
+	public double estim(MatrixCharacteristics mc1, MatrixCharacteristics mc2) {
+		LOG.warn("Meta-data-only estimates not supported in EstimatorBitsetMM, falling back to EstimatorBasicAvg.");
+		return new EstimatorBasicAvg().estim(mc1, mc2);
+	}
+	
+	private static class BitsetMatrix {
+		private final int _rlen;
+		private final int _clen;
+		private long _nonZeros;
+		private BitSet[] _data;
+		
+		public BitsetMatrix(int rlen, int clen) {
+			_rlen = rlen;
+			_clen = clen;
+			_data = new BitSet[_rlen];
+			for(int i=0; i<_rlen; i++)
+				_data[i] = new BitSet(_clen);
+			_nonZeros = 0;
+		}
+		
+		public BitsetMatrix(MatrixBlock in) {
+			this(in.getNumRows(), in.getNumColumns());
+			init(in);
+		}
+		
+		public int getNumRows() {
+			return _rlen;
+		}
+		
+		public int getNumColumns() {
+			return _clen;
+		}
+		
+		public long getNonZeros() {
+			return _nonZeros;
+		}
+		
+		private void init(MatrixBlock in) {
+			if( in.isInSparseFormat() ) {
+				SparseBlock sblock = in.getSparseBlock();
+				for(int i=0; i<in.getNumRows(); i++) {
+					if(sblock.isEmpty(i)) continue;
+					BitSet lbs = _data[i];
+					int alen = sblock.size(i);
+					int apos = sblock.pos(i);
+					int[] aix = sblock.indexes(i);
+					for(int k=apos; k<apos+alen; k++)
+						lbs.set(aix[k]);
+				}
+			}
+			else {
+				DenseBlock dblock = in.getDenseBlock();
+				for(int i=0; i<in.getNumRows(); i++) {
+					BitSet lbs = _data[i];
+					double[] avals = dblock.values(i);
+					int aix = dblock.pos(i);
+					for(int j=0; j<in.getNumColumns(); j++)
+						if( avals[aix+j] != 0 )
+							lbs.set(j);
+				}
+			}
+			_nonZeros = in.getNonZeros();
+		}
+		
+		public BitsetMatrix matMult(BitsetMatrix m2) {
+			final int m = this._rlen;
+			final int cd = this._clen;
+			final int n = m2._clen;
+			//matrix multiply with IKJ schedule and pure OR ops in inner loop
+			BitsetMatrix out = new BitsetMatrix(m, n);
+			for(int i=0; i<m; i++) {
+				BitSet a = this._data[i], c = out._data[i];
+				for(int k=0; k<cd; k++) {
+					if( a.get(k) )
+						c.or(m2._data[k]);
+				}
+				//maintain nnz 
+				out._nonZeros += c.cardinality();
+			}
+			return out;
+		}
+	}
+}