You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemds.apache.org by mb...@apache.org on 2021/03/23 22:46:32 UTC

[systemds] branch master updated: [SYSTEMDS-2907] Fix memory estimates dense and sparse matrices, part 2

This is an automated email from the ASF dual-hosted git repository.

mboehm7 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/systemds.git


The following commit(s) were added to refs/heads/master by this push:
     new 3d36d98  [SYSTEMDS-2907] Fix memory estimates dense and sparse matrices, part 2
3d36d98 is described below

commit 3d36d9856eb6eac6eef5d09c6293d8d7659c93ae
Author: Matthias Boehm <mb...@gmail.com>
AuthorDate: Tue Mar 23 23:44:27 2021 +0100

    [SYSTEMDS-2907] Fix memory estimates dense and sparse matrices, part 2
    
    This patch applies some additional fixes to create consistency and avoid
    biased estimates that again created problems with ultra-sparse,
    distributed matrices (w/ billions of blocks):
    
    * Fix inconsistent header size for empty (non-allocated) blocks
    * Fix missing nnz attribute in matrix block header size
    * Fix biased estimate of MCSR sparse rows (underestimated nnz per sparse
    row if #sparserows < rows, and underestimated sparse row array sizes due
    to systematic cast to long although balanced across sparse rows)
---
 .../java/org/apache/sysds/runtime/data/SparseBlockMCSR.java | 12 +++++++-----
 .../org/apache/sysds/runtime/matrix/data/MatrixBlock.java   | 13 +++++++------
 2 files changed, 14 insertions(+), 11 deletions(-)

diff --git a/src/main/java/org/apache/sysds/runtime/data/SparseBlockMCSR.java b/src/main/java/org/apache/sysds/runtime/data/SparseBlockMCSR.java
index ddab780..77caaec 100644
--- a/src/main/java/org/apache/sysds/runtime/data/SparseBlockMCSR.java
+++ b/src/main/java/org/apache/sysds/runtime/data/SparseBlockMCSR.java
@@ -100,8 +100,9 @@ public class SparseBlockMCSR extends SparseBlock
 	 * @return memory estimate
 	 */
 	public static long estimateSizeInMemory(long nrows, long ncols, double sparsity) {
-		double cnnz = Math.max(SparseRowVector.initialCapacity, Math.ceil(sparsity*ncols));
-		double rlen = Math.min(nrows, Math.ceil(sparsity*nrows*ncols));
+		double nnz = Math.ceil(sparsity*nrows*ncols);
+		double rlen = Math.min(nrows, nnz); // num sparse row objects
+		double cnnz = Math.max(SparseRowVector.initialCapacity, nnz/rlen);
 		
 		//Each sparse row has a fixed overhead of 16B (object) + 12B (3 ints),
 		//24B (int array), 24B (double array), i.e., in total 76B
@@ -111,11 +112,12 @@ public class SparseBlockMCSR extends SparseBlock
 		double size = 16; //object
 		size += MemoryEstimates.objectArrayCost((long)rlen); //references
 		long sparseRowSize = 16; // object
-		sparseRowSize += MemoryEstimates.intArrayCost((long)cnnz);
-		sparseRowSize += MemoryEstimates.doubleArrayCost((long)cnnz);
 		sparseRowSize += 4*4; // 3 integers + padding
+		sparseRowSize += MemoryEstimates.intArrayCost(0);
+		sparseRowSize += MemoryEstimates.doubleArrayCost(0);
+		sparseRowSize += 12*Math.max(1, cnnz); //avoid bias by down cast for ultra-sparse
 		size += rlen * sparseRowSize; //sparse rows
-		
+
 		// robustness for long overflows
 		return (long) Math.min(size, Long.MAX_VALUE);
 	}
diff --git a/src/main/java/org/apache/sysds/runtime/matrix/data/MatrixBlock.java b/src/main/java/org/apache/sysds/runtime/matrix/data/MatrixBlock.java
index ab83d12..615b28d 100644
--- a/src/main/java/org/apache/sysds/runtime/matrix/data/MatrixBlock.java
+++ b/src/main/java/org/apache/sysds/runtime/matrix/data/MatrixBlock.java
@@ -2427,10 +2427,11 @@ public class MatrixBlock extends MatrixValue implements CacheBlock, Externalizab
 	public static long getHeaderSize() {
 		// basic variables and references sizes
 		long size = 16; // header
-		size += 12; // ints
-		size += 1; // boolean
+		size += 12; // 3 x ints (rlen, clen, ennz/row)
+		size += 1; // boolean (sparse)
 		size += 3; // padding
-		size += 8 * 2; // object references
+		size += 8; // nonZeros
+		size += 2 * 8; // object references
 		return size;
 	}
 	
@@ -2462,8 +2463,8 @@ public class MatrixBlock extends MatrixValue implements CacheBlock, Externalizab
 	}
 	
 	public static long estimateSizeSparseInMemory(long nrows, long ncols, double sparsity, SparseBlock.Type stype) {
-		double size = getHeaderSize()
-			+ SparseBlockFactory.estimateSizeSparseInMemory(stype, nrows, ncols, sparsity);
+		double size = getHeaderSize() + ((sparsity == 0) ? 0 : //allocated on demand
+			SparseBlockFactory.estimateSizeSparseInMemory(stype, nrows, ncols, sparsity));
 		// robustness for long overflows
 		return (long) Math.min(size, Long.MAX_VALUE);
 	}
@@ -2618,7 +2619,7 @@ public class MatrixBlock extends MatrixValue implements CacheBlock, Externalizab
 	public long getInMemorySize() {
 		//in-memory size given by header if not allocated
 		if( !isAllocated() ) 
-			return 44;
+			return getHeaderSize();
 		//in-memory size of dense/sparse representation
 		return !sparse ? estimateSizeDenseInMemory(rlen, clen) :
 			estimateSizeSparseInMemory(rlen, clen, getSparsity(),