You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by mb...@apache.org on 2016/01/24 01:09:18 UTC

[1/5] incubator-systemml git commit: [SYSTEMML-380] Fix CSR primitives (init from CSR / resize on row insert)

Repository: incubator-systemml
Updated Branches:
  refs/heads/master 444e2b03a -> 9c4228215


[SYSTEMML-380] Fix CSR primitives (init from CSR / resize on row insert)

Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/205cfd3f
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/205cfd3f
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/205cfd3f

Branch: refs/heads/master
Commit: 205cfd3f84da991b052c5b8ffa191c9299ec01bf
Parents: 444e2b0
Author: Matthias Boehm <mb...@us.ibm.com>
Authored: Fri Jan 22 16:42:36 2016 -0800
Committer: Matthias Boehm <mb...@us.ibm.com>
Committed: Sat Jan 23 16:07:52 2016 -0800

----------------------------------------------------------------------
 .../sysml/runtime/matrix/data/SparseBlockCSR.java    | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/205cfd3f/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockCSR.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockCSR.java b/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockCSR.java
index 139cec3..876b0ed 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockCSR.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockCSR.java
@@ -72,7 +72,7 @@ public class SparseBlockCSR extends SparseBlock
 		//special case SparseBlockCSR
 		if( sblock instanceof SparseBlockCSR ) { 
 			SparseBlockCSR ocsr = (SparseBlockCSR)sblock;
-			_ptr = Arrays.copyOf(ocsr._ptr, ocsr.numRows());
+			_ptr = Arrays.copyOf(ocsr._ptr, ocsr.numRows()+1);
 			_indexes = Arrays.copyOf(ocsr._indexes, ocsr._size);
 			_values = Arrays.copyOf(ocsr._values, ocsr._size);
 			_size = ocsr._size;
@@ -260,11 +260,22 @@ public class SparseBlockCSR extends SparseBlock
 	@Override
 	public void set(int r, SparseRow row) {
 		int pos = pos(r);
+		int len = size(r);		
 		int alen = row.size();
 		int[] aix = row.indexes();
 		double[] avals = row.values();
-		deleteIndexRange(r, aix[0], aix[alen-1]+1);
+		
+		//delete existing values if necessary
+		if( len > 0 )
+			deleteIndexRange(r, aix[0], aix[alen-1]+1);
+		
+		//prepare free space (allocate and shift)
+		int lsize = _size+alen;
+		if( _values.length < lsize )
+			resize(lsize);				
 		shiftRightByN(pos, alen);
+		
+		//copy input row into internal representation
 		System.arraycopy(aix, 0, _indexes, pos, alen);
 		System.arraycopy(avals, 0, _values, pos, alen);
 		_size+=alen;


[2/5] incubator-systemml git commit: [SYSTEMML-382] Fix sparse-safe scalar operations over sparse blocks

Posted by mb...@apache.org.
[SYSTEMML-382] Fix sparse-safe scalar operations over sparse blocks

Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/9cb7d553
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/9cb7d553
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/9cb7d553

Branch: refs/heads/master
Commit: 9cb7d553fc0d91798a063e0ccceb2d4a51f8f52b
Parents: 205cfd3
Author: Matthias Boehm <mb...@us.ibm.com>
Authored: Fri Jan 22 16:44:55 2016 -0800
Committer: Matthias Boehm <mb...@us.ibm.com>
Committed: Sat Jan 23 16:08:00 2016 -0800

----------------------------------------------------------------------
 .../apache/sysml/runtime/matrix/data/LibMatrixBincell.java   | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/9cb7d553/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixBincell.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixBincell.java b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixBincell.java
index 5428c96..47a46d4 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixBincell.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixBincell.java
@@ -924,7 +924,7 @@ public class LibMatrixBincell
 			SparseBlock a = m1.sparseBlock;
 			SparseBlock c = ret.sparseBlock;
 			
-			for(int r=0; r<Math.min(m1.rlen, m1.sparseBlock.numRows()); r++) {
+			for(int r=0; r<Math.min(m1.rlen, a.numRows()); r++) {
 				if( !a.isEmpty(r) )
 				{
 					int apos = a.pos(r);
@@ -955,12 +955,10 @@ public class LibMatrixBincell
 						if( op.fn instanceof Multiply || op.fn instanceof Multiply2 
 							|| op.fn instanceof Power2  )
 						{
-							c.allocate(r, alen, -1);
-							//TODO perf sparse block
-							//c[r] = new SparseRow(alen);
+							c.allocate(r, alen, ret.clen);
 						}
 						
-						for(int j=0; j<alen; j++) {
+						for(int j=apos; j<apos+alen; j++) {
 							double val = op.executeScalar(avals[j]);
 							ret.appendValue(r, aix[j], val);
 						}


[5/5] incubator-systemml git commit: [SYSTEMML-383] Integrated sparse block memory estimates / valid nnz

Posted by mb...@apache.org.
[SYSTEMML-383] Integrated sparse block memory estimates / valid nnz

Incl new sparse block test for relative memory estimates between MCSR,
CSR, COO, and dense formats.

Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/9c422821
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/9c422821
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/9c422821

Branch: refs/heads/master
Commit: 9c4228215a2cb9b2df351a4660f36d9b63d252c8
Parents: a19a14c
Author: Matthias Boehm <mb...@us.ibm.com>
Authored: Sat Jan 23 01:02:32 2016 -0800
Committer: Matthias Boehm <mb...@us.ibm.com>
Committed: Sat Jan 23 16:08:22 2016 -0800

----------------------------------------------------------------------
 .../org/apache/sysml/hops/OptimizerUtils.java   |  9 +-
 .../sysml/runtime/matrix/data/MatrixBlock.java  | 15 +--
 .../runtime/matrix/data/SparseBlockCOO.java     | 25 +++++
 .../runtime/matrix/data/SparseBlockCSR.java     | 25 +++++
 .../runtime/matrix/data/SparseBlockFactory.java | 18 ++++
 .../runtime/matrix/data/SparseBlockMCSR.java    | 26 +++++
 .../sparse/SparseBlockMemEstimate.java          | 99 ++++++++++++++++++++
 .../functions/sparse/ZPackageSuite.java         |  1 +
 8 files changed, 203 insertions(+), 15 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/9c422821/src/main/java/org/apache/sysml/hops/OptimizerUtils.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/OptimizerUtils.java b/src/main/java/org/apache/sysml/hops/OptimizerUtils.java
index 19acbd1..57d38d7 100644
--- a/src/main/java/org/apache/sysml/hops/OptimizerUtils.java
+++ b/src/main/java/org/apache/sysml/hops/OptimizerUtils.java
@@ -23,7 +23,6 @@ import java.util.HashMap;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
-
 import org.apache.sysml.api.DMLScript;
 import org.apache.sysml.api.DMLScript.RUNTIME_PLATFORM;
 import org.apache.sysml.conf.ConfigurationManager;
@@ -42,6 +41,7 @@ import org.apache.sysml.runtime.instructions.cp.ScalarObject;
 import org.apache.sysml.runtime.matrix.MatrixCharacteristics;
 import org.apache.sysml.runtime.matrix.data.MatrixBlock;
 import org.apache.sysml.runtime.matrix.data.OutputInfo;
+import org.apache.sysml.runtime.matrix.data.SparseBlock;
 import org.apache.sysml.runtime.matrix.data.SparseRow;
 import org.apache.sysml.runtime.util.UtilFunctions;
 import org.apache.sysml.yarn.ropt.YarnClusterAnalyzer;
@@ -73,7 +73,10 @@ public class OptimizerUtils
 	public static final double BIT_SIZE = (double)1/8;
 	public static final double INVALID_SIZE = -1d; // memory estimate not computed
 
+	//constants for valid CP matrix dimension sizes / nnz (dense/sparse)
 	public static final long MAX_NUMCELLS_CP_DENSE = Integer.MAX_VALUE;
+	public static final long MAX_NNZ_CP_SPARSE = (MatrixBlock.DEFAULT_SPARSEBLOCK == 
+			SparseBlock.Type.MCSR) ? Long.MAX_VALUE : Integer.MAX_VALUE;
 	
 	/**
 	 * Enables/disables dynamic re-compilation of lops/instructions.
@@ -862,8 +865,8 @@ public class OptimizerUtils
 		
 		if( sparse ) //SPARSE
 		{
-			//check max nnz
-			ret = (nnz <= Long.MAX_VALUE);
+			//check max nnz (dependent on sparse block format)
+			ret = (nnz <= MAX_NNZ_CP_SPARSE);
 		}
 		else //DENSE
 		{

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/9c422821/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java b/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java
index 22c6347..9a7bfcb 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java
@@ -2556,18 +2556,9 @@ public class MatrixBlock extends MatrixValue implements Externalizable
 		// basic variables and references sizes
 		double size = 44;
 		
-		//NOTES:
-		// * Each sparse row has a fixed overhead of 8B (reference) + 32B (object) +
-		//   12B (3 int members), 32B (overhead int array), 32B (overhead double array),
-		// * Each non-zero value requires 12B for the column-index/value pair.
-		// * Overheads for arrays, objects, and references refer to 64bit JVMs
-		// * If nnz < than rows we have only also empty rows.
-		
-		// account for sparsity and initial capacity
-		double cnnz = Math.max(SparseRow.initialCapacity, Math.ceil(sparsity*ncols));
-		double rlen = Math.min(nrows, Math.ceil(sparsity*nrows*ncols));
-		size += rlen * ( 116 + 12 * cnnz ); //sparse row
-		size += nrows * 8d; //empty rows
+		// delegate memory estimate to individual sparse blocks
+		size += SparseBlockFactory.estimateSizeSparseInMemory(
+			DEFAULT_SPARSEBLOCK, nrows, ncols, sparsity);
 		
 		// robustness for long overflows
 		return (long) Math.min(size, Long.MAX_VALUE);

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/9c422821/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockCOO.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockCOO.java b/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockCOO.java
index eea0754..5643850 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockCOO.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockCOO.java
@@ -125,6 +125,31 @@ public class SparseBlockCOO extends SparseBlock
 			}
 		}
 	}
+		
+	/**
+	 * Get the estimated in-memory size of the sparse block in COO 
+	 * with the given dimensions w/o accounting for overallocation. 
+	 * 
+	 * @param nrows
+	 * @param ncols
+	 * @param sparsity
+	 * @return
+	 */
+	public static long estimateMemory(long nrows, long ncols, double sparsity) {
+		double lnnz = Math.max(INIT_CAPACITY, Math.ceil(sparsity*nrows*ncols));
+		
+		//32B overhead per array, int/int/double arr in nnz 
+		double size = 16 + 8;   //object + 2 int fields
+		size += 32 + lnnz * 4d; //rindexes array (row indexes)
+		size += 32 + lnnz * 4d; //cindexes array (column indexes)
+		size += 32 + lnnz * 8d; //values array (non-zero values)
+		
+		//robustness for long overflows
+		return (long) Math.min(size, Long.MAX_VALUE);
+	}
+	
+	///////////////////
+	//SparseBlock implementation
 	
 	@Override
 	public void allocate(int r) {

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/9c422821/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockCSR.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockCSR.java b/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockCSR.java
index 876b0ed..c599753 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockCSR.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockCSR.java
@@ -128,6 +128,31 @@ public class SparseBlockCSR extends SparseBlock
 		}
 	}
 	
+	/**
+	 * Get the estimated in-memory size of the sparse block in CSR 
+	 * with the given dimensions w/o accounting for overallocation. 
+	 * 
+	 * @param nrows
+	 * @param ncols
+	 * @param sparsity
+	 * @return
+	 */
+	public static long estimateMemory(long nrows, long ncols, double sparsity) {
+		double lnnz = Math.max(INIT_CAPACITY, Math.ceil(sparsity*nrows*ncols));
+		
+		//32B overhead per array, int arr in nrows, int/double arr in nnz 
+		double size = 16 + 4;        //object + int field
+		size += 32 + (nrows+1) * 4d; //ptr array (row pointers)
+		size += 32 + lnnz * 4d;      //indexes array (column indexes)
+		size += 32 + lnnz * 8d;      //values array (non-zero values)
+		
+		//robustness for long overflows
+		return (long) Math.min(size, Long.MAX_VALUE);
+	}
+	
+	///////////////////
+	//SparseBlock implementation
+
 	@Override
 	public void allocate(int r) {
 		//do nothing everything preallocated

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/9c422821/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockFactory.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockFactory.java b/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockFactory.java
index 1ac8f16..67b3eae 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockFactory.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockFactory.java
@@ -87,4 +87,22 @@ public abstract class SparseBlockFactory
 				throw new RuntimeException("Unexpected sparse block type: "+type.toString());
 		}
 	}
+	
+	/**
+	 * 
+	 * @param type
+	 * @param nrows
+	 * @param ncols
+	 * @param sparsity
+	 * @return
+	 */
+	public static long estimateSizeSparseInMemory(SparseBlock.Type type, long nrows, long ncols, double sparsity) {
+		switch( type ) {
+			case MCSR: return SparseBlockMCSR.estimateMemory(nrows, ncols, sparsity);
+			case CSR: return SparseBlockCSR.estimateMemory(nrows, ncols, sparsity);
+			case COO: return SparseBlockCOO.estimateMemory(nrows, ncols, sparsity);
+			default:
+				throw new RuntimeException("Unexpected sparse block type: "+type.toString());
+		}
+	}
 }

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/9c422821/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockMCSR.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockMCSR.java b/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockMCSR.java
index 378a3f4..6e1cded 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockMCSR.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockMCSR.java
@@ -80,6 +80,32 @@ public class SparseBlockMCSR extends SparseBlock
 	public SparseBlockMCSR(int rlen, int clen) {
 		_rows = new SparseRow[rlen];
 	}
+	
+	/**
+	 * Get the estimated in-memory size of the sparse block in MCSR 
+	 * with the given dimensions w/o accounting for overallocation. 
+	 * 
+	 * @param nrows
+	 * @param ncols
+	 * @param sparsity
+	 * @return
+	 */
+	public static long estimateMemory(long nrows, long ncols, double sparsity) {
+		double cnnz = Math.max(SparseRow.initialCapacity, Math.ceil(sparsity*ncols));
+		double rlen = Math.min(nrows, Math.ceil(sparsity*nrows*ncols));
+		
+		//Each sparse row has a fixed overhead of 8B (reference) + 32B (object) +
+		//12B (3 int members), 32B (overhead int array), 32B (overhead double array),
+		//Each non-zero value requires 12B for the column-index/value pair.
+		//Overheads for arrays, objects, and references refer to 64bit JVMs
+		//If nnz < than rows we have only also empty rows.
+		double size = 16;                 //object
+		size += rlen * (116 + cnnz * 12); //sparse rows
+		size += 32 + nrows * 8d;          //references
+		
+		// robustness for long overflows
+		return (long) Math.min(size, Long.MAX_VALUE);
+	}
 
 	///////////////////
 	//SparseBlock implementation

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/9c422821/src/test/java/org/apache/sysml/test/integration/functions/sparse/SparseBlockMemEstimate.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/sysml/test/integration/functions/sparse/SparseBlockMemEstimate.java b/src/test/java/org/apache/sysml/test/integration/functions/sparse/SparseBlockMemEstimate.java
new file mode 100644
index 0000000..8d2fbd0
--- /dev/null
+++ b/src/test/java/org/apache/sysml/test/integration/functions/sparse/SparseBlockMemEstimate.java
@@ -0,0 +1,99 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysml.test.integration.functions.sparse;
+
+import org.junit.Assert;
+import org.junit.Test;
+import org.apache.sysml.runtime.matrix.data.MatrixBlock;
+import org.apache.sysml.runtime.matrix.data.SparseBlock;
+import org.apache.sysml.runtime.matrix.data.SparseBlockFactory;
+import org.apache.sysml.test.integration.AutomatedTestBase;
+import org.apache.sysml.test.utils.TestUtils;
+
+/**
+ * This is a sparse matrix block component test for sparse block memory
+ * estimation functionality.
+ * 
+ */
+public class SparseBlockMemEstimate extends AutomatedTestBase 
+{
+	private final static int rows = 662;
+	private final static int cols = 444;	
+	private final static double sparsity1 = 0.39;
+	private final static double sparsity2 = 0.0001;
+	
+	@Override
+	public void setUp() {
+		TestUtils.clearAssertionInformation();
+	}
+
+	@Test
+	public void testSparseBlockSparse()  {
+		runSparseBlockMemoryTest(sparsity1);
+	}
+	
+	@Test
+	public void testSparseBlockUltraSparse()  {
+		runSparseBlockMemoryTest(sparsity2);
+	}
+		
+	/**
+	 * 
+	 * @param btype
+	 * @param sparsity
+	 */
+	private void runSparseBlockMemoryTest( double sparsity)
+	{
+		double memMCSR = SparseBlockFactory.estimateSizeSparseInMemory(SparseBlock.Type.MCSR, rows, cols, sparsity);
+		double memCSR = SparseBlockFactory.estimateSizeSparseInMemory(SparseBlock.Type.CSR, rows, cols, sparsity);
+		double memCOO = SparseBlockFactory.estimateSizeSparseInMemory(SparseBlock.Type.COO, rows, cols, sparsity);
+		double memDense = MatrixBlock.estimateSizeDenseInMemory(rows, cols);
+		
+		//check negative estimate
+		if( memMCSR <= 0 )
+			Assert.fail("SparseBlockMCSR memory estimate <= 0.");
+		if( memCSR  <= 0 )
+			Assert.fail("SparseBlockCSR memory estimate <= 0.");
+		if( memCOO  <= 0 )
+			Assert.fail("SparseBlockCOO memory estimate <= 0.");
+		
+		//check dense estimate
+		if( memMCSR > memDense )
+			Assert.fail("SparseBlockMCSR memory estimate larger than dense estimate.");
+		if( memCSR > memDense )
+			Assert.fail("SparseBlockCSR memory estimate larger than dense estimate.");
+		if( memCOO > memDense )
+			Assert.fail("SparseBlockCOO memory estimate larger than dense estimate.");
+		
+		//check sparse estimates relations
+		if( sparsity == sparsity1 ) { //sparse (pref CSR)
+			if( memMCSR < memCSR )
+				Assert.fail("SparseBlockMCSR memory estimate smaller than SparseBlockCSR estimate.");
+			if( memCOO < memCSR )
+				Assert.fail("SparseBlockCOO memory estimate smaller than SparseBlockCSR estimate.");
+		}
+		else { //ultra-sparse (pref COO)
+			if( memMCSR < memCOO )
+				Assert.fail("SparseBlockMCSR memory estimate smaller than SparseBlockCOO estimate.");
+			if( memCSR < memCOO )
+				Assert.fail("SparseBlockCSR memory estimate smaller than SparseBlockCOO estimate.");	
+		}
+	}
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/9c422821/src/test_suites/java/org/apache/sysml/test/integration/functions/sparse/ZPackageSuite.java
----------------------------------------------------------------------
diff --git a/src/test_suites/java/org/apache/sysml/test/integration/functions/sparse/ZPackageSuite.java b/src/test_suites/java/org/apache/sysml/test/integration/functions/sparse/ZPackageSuite.java
index 633e152..01fc461 100644
--- a/src/test_suites/java/org/apache/sysml/test/integration/functions/sparse/ZPackageSuite.java
+++ b/src/test_suites/java/org/apache/sysml/test/integration/functions/sparse/ZPackageSuite.java
@@ -32,6 +32,7 @@ import org.junit.runners.Suite;
 	SparseBlockGetSet.class,
 	SparseBlockIndexRange.class,
 	SparseBlockIterator.class,
+	SparseBlockMemEstimate.class,
 	SparseBlockScan.class,
 	SparseBlockSize.class,
 })


[3/5] incubator-systemml git commit: Fix error handling ultra-sparse data generation (int overflow)

Posted by mb...@apache.org.
Fix error handling ultra-sparse data generation (int overflow)

Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/10d1afc9
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/10d1afc9
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/10d1afc9

Branch: refs/heads/master
Commit: 10d1afc9dac5fe2c4615019e52113de8f9400754
Parents: 9cb7d55
Author: Matthias Boehm <mb...@us.ibm.com>
Authored: Fri Jan 22 20:54:05 2016 -0800
Committer: Matthias Boehm <mb...@us.ibm.com>
Committed: Sat Jan 23 16:08:07 2016 -0800

----------------------------------------------------------------------
 .../runtime/matrix/data/LibMatrixDatagen.java   | 27 +++++++++++---------
 1 file changed, 15 insertions(+), 12 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/10d1afc9/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDatagen.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDatagen.java b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDatagen.java
index dcaaf1a..c190e50 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDatagen.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDatagen.java
@@ -127,19 +127,22 @@ public class LibMatrixDatagen
 	 * @return
 	 * @throws DMLRuntimeException
 	 */
-	public static long[] computeNNZperBlock(long nrow, long ncol, int brlen, int bclen, double sparsity) throws DMLRuntimeException {
-		int numBlocks = (int) (Math.ceil((double)nrow/brlen) * Math.ceil((double)ncol/bclen));
-		
-		// CURRENT: 
-		// 		Total #of NNZ is set to the expected value (nrow*ncol*sparsity).
-		// TODO: 
-		//		Instead of using the expected value, one should actually 
-		// 		treat NNZ as a random variable and accordingly generate a random value.
-		long nnz = (long) Math.ceil (nrow * (ncol*sparsity));
-
-		if ( numBlocks > Integer.MAX_VALUE ) {
-			throw new DMLRuntimeException("A random matrix of size [" + nrow + "," + ncol + "] can not be created. Number of blocks (" +  numBlocks + ") exceeds the maximum integer size. Try to increase the block size.");
+	public static long[] computeNNZperBlock(long nrow, long ncol, int brlen, int bclen, double sparsity) 
+		throws DMLRuntimeException 
+	{
+		long lnumBlocks = (long) (Math.ceil((double)nrow/brlen) * Math.ceil((double)ncol/bclen));
+		
+		//sanity check max number of blocks (before cast to avoid overflow)
+		if ( lnumBlocks > Integer.MAX_VALUE ) {
+			throw new DMLRuntimeException("A random matrix of size [" + nrow + "," + ncol + "] can not be created. "
+					+ "Number of blocks ("+lnumBlocks+") exceeds the maximum integer size. Try to increase the block size.");
 		}
+
+		// NOTE: Total #of NNZ is set to the expected value (nrow*ncol*sparsity).
+		// TODO: Instead of using the expected value, NNZ should be random variable 
+		
+		int numBlocks = (int) lnumBlocks;
+		long nnz = (long) Math.ceil (nrow * (ncol*sparsity));
 		
 		// Compute block-level NNZ
 		long[] ret  = new long[numBlocks];


[4/5] incubator-systemml git commit: Performance spark wsloss/wcemm ultra-sparse (prefilter empty blocks)

Posted by mb...@apache.org.
Performance spark wsloss/wcemm ultra-sparse (prefilter empty blocks)

Ultra-sparse matrices are a common case for factorization algorithms.
Accordingly, this change introduces a prefilter for empty blocks on
wsloss and wcemm because the full aggregate ensures result correctness.
In a scenario of wsloss over KDD2010 (15M x 30M, sparsity 9.4e-7), this  
achieved a total runtime reduction from 70s to 39s despite inputs from
HDFS.

Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/a19a14c0
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/a19a14c0
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/a19a14c0

Branch: refs/heads/master
Commit: a19a14c05e8034d5abf7f5c9ffbaea96f05b8017
Parents: 10d1afc
Author: Matthias Boehm <mb...@us.ibm.com>
Authored: Fri Jan 22 22:40:29 2016 -0800
Committer: Matthias Boehm <mb...@us.ibm.com>
Committed: Sat Jan 23 16:08:14 2016 -0800

----------------------------------------------------------------------
 .../runtime/instructions/spark/QuaternarySPInstruction.java   | 7 +++++++
 1 file changed, 7 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/a19a14c0/src/main/java/org/apache/sysml/runtime/instructions/spark/QuaternarySPInstruction.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/instructions/spark/QuaternarySPInstruction.java b/src/main/java/org/apache/sysml/runtime/instructions/spark/QuaternarySPInstruction.java
index af65a9e..500cc01 100644
--- a/src/main/java/org/apache/sysml/runtime/instructions/spark/QuaternarySPInstruction.java
+++ b/src/main/java/org/apache/sysml/runtime/instructions/spark/QuaternarySPInstruction.java
@@ -53,6 +53,7 @@ import org.apache.sysml.runtime.instructions.cp.CPOperand;
 import org.apache.sysml.runtime.instructions.cp.DoubleObject;
 import org.apache.sysml.runtime.instructions.spark.data.LazyIterableIterator;
 import org.apache.sysml.runtime.instructions.spark.data.PartitionedBroadcastMatrix;
+import org.apache.sysml.runtime.instructions.spark.functions.FilterNonEmptyBlocksFunction;
 import org.apache.sysml.runtime.instructions.spark.utils.RDDAggregateUtils;
 import org.apache.sysml.runtime.matrix.MatrixCharacteristics;
 import org.apache.sysml.runtime.matrix.data.MatrixBlock;
@@ -196,6 +197,12 @@ public class QuaternarySPInstruction extends ComputationSPInstruction
 		int brlen = inMc.getRowsPerBlock();
 		int bclen = inMc.getColsPerBlock();
 		
+		//pre-filter empty blocks (ultra-sparse matrices) for full aggregates
+		//(map/redwsloss, map/redwcemm); safe because theses ops produce a scalar
+		if( qop.wtype1 != null || qop.wtype4 != null ) {
+			in = in.filter(new FilterNonEmptyBlocksFunction());
+		}
+		
 		//map-side only operation (one rdd input, two broadcasts)
 		if(    WeightedSquaredLoss.OPCODE.equalsIgnoreCase(getOpcode())  
 			|| WeightedSigmoid.OPCODE.equalsIgnoreCase(getOpcode())