You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemds.apache.org by mb...@apache.org on 2021/03/23 22:46:32 UTC
[systemds] branch master updated: [SYSTEMDS-2907] Fix memory
estimates dense and sparse matrices, part 2
This is an automated email from the ASF dual-hosted git repository.
mboehm7 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/systemds.git
The following commit(s) were added to refs/heads/master by this push:
new 3d36d98 [SYSTEMDS-2907] Fix memory estimates dense and sparse matrices, part 2
3d36d98 is described below
commit 3d36d9856eb6eac6eef5d09c6293d8d7659c93ae
Author: Matthias Boehm <mb...@gmail.com>
AuthorDate: Tue Mar 23 23:44:27 2021 +0100
[SYSTEMDS-2907] Fix memory estimates dense and sparse matrices, part 2
This patch applies some additional fixes to create consistency and avoid
biased estimates that again created problems with ultra-sparse,
distributed matrices (w/ billions of blocks):
* Fix inconsistent header size for empty (non-allocated) blocks
* Fix missing nnz attribute in matrix block header size
* Fix biased estimate of MCSR sparse rows (underestimated nnz per sparse
row if #sparserows < rows, and underestimated sparse row array sizes due
to systematic cast to long although balanced across sparse rows)
---
.../java/org/apache/sysds/runtime/data/SparseBlockMCSR.java | 12 +++++++-----
.../org/apache/sysds/runtime/matrix/data/MatrixBlock.java | 13 +++++++------
2 files changed, 14 insertions(+), 11 deletions(-)
diff --git a/src/main/java/org/apache/sysds/runtime/data/SparseBlockMCSR.java b/src/main/java/org/apache/sysds/runtime/data/SparseBlockMCSR.java
index ddab780..77caaec 100644
--- a/src/main/java/org/apache/sysds/runtime/data/SparseBlockMCSR.java
+++ b/src/main/java/org/apache/sysds/runtime/data/SparseBlockMCSR.java
@@ -100,8 +100,9 @@ public class SparseBlockMCSR extends SparseBlock
* @return memory estimate
*/
public static long estimateSizeInMemory(long nrows, long ncols, double sparsity) {
- double cnnz = Math.max(SparseRowVector.initialCapacity, Math.ceil(sparsity*ncols));
- double rlen = Math.min(nrows, Math.ceil(sparsity*nrows*ncols));
+ double nnz = Math.ceil(sparsity*nrows*ncols);
+ double rlen = Math.min(nrows, nnz); // num sparse row objects
+ double cnnz = Math.max(SparseRowVector.initialCapacity, nnz/rlen);
//Each sparse row has a fixed overhead of 16B (object) + 12B (3 ints),
//24B (int array), 24B (double array), i.e., in total 76B
@@ -111,11 +112,12 @@ public class SparseBlockMCSR extends SparseBlock
double size = 16; //object
size += MemoryEstimates.objectArrayCost((long)rlen); //references
long sparseRowSize = 16; // object
- sparseRowSize += MemoryEstimates.intArrayCost((long)cnnz);
- sparseRowSize += MemoryEstimates.doubleArrayCost((long)cnnz);
sparseRowSize += 4*4; // 3 integers + padding
+ sparseRowSize += MemoryEstimates.intArrayCost(0);
+ sparseRowSize += MemoryEstimates.doubleArrayCost(0);
+ sparseRowSize += 12*Math.max(1, cnnz); //avoid bias by down cast for ultra-sparse
size += rlen * sparseRowSize; //sparse rows
-
+
// robustness for long overflows
return (long) Math.min(size, Long.MAX_VALUE);
}
diff --git a/src/main/java/org/apache/sysds/runtime/matrix/data/MatrixBlock.java b/src/main/java/org/apache/sysds/runtime/matrix/data/MatrixBlock.java
index ab83d12..615b28d 100644
--- a/src/main/java/org/apache/sysds/runtime/matrix/data/MatrixBlock.java
+++ b/src/main/java/org/apache/sysds/runtime/matrix/data/MatrixBlock.java
@@ -2427,10 +2427,11 @@ public class MatrixBlock extends MatrixValue implements CacheBlock, Externalizab
public static long getHeaderSize() {
// basic variables and references sizes
long size = 16; // header
- size += 12; // ints
- size += 1; // boolean
+ size += 12; // 3 x ints (rlen, clen, ennz/row)
+ size += 1; // boolean (sparse)
size += 3; // padding
- size += 8 * 2; // object references
+ size += 8; // nonZeros
+ size += 2 * 8; // object references
return size;
}
@@ -2462,8 +2463,8 @@ public class MatrixBlock extends MatrixValue implements CacheBlock, Externalizab
}
public static long estimateSizeSparseInMemory(long nrows, long ncols, double sparsity, SparseBlock.Type stype) {
- double size = getHeaderSize()
- + SparseBlockFactory.estimateSizeSparseInMemory(stype, nrows, ncols, sparsity);
+ double size = getHeaderSize() + ((sparsity == 0) ? 0 : //allocated on demand
+ SparseBlockFactory.estimateSizeSparseInMemory(stype, nrows, ncols, sparsity));
// robustness for long overflows
return (long) Math.min(size, Long.MAX_VALUE);
}
@@ -2618,7 +2619,7 @@ public class MatrixBlock extends MatrixValue implements CacheBlock, Externalizab
public long getInMemorySize() {
//in-memory size given by header if not allocated
if( !isAllocated() )
- return 44;
+ return getHeaderSize();
//in-memory size of dense/sparse representation
return !sparse ? estimateSizeDenseInMemory(rlen, clen) :
estimateSizeSparseInMemory(rlen, clen, getSparsity(),