You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by mb...@apache.org on 2016/01/24 01:09:18 UTC
[1/5] incubator-systemml git commit: [SYSTEMML-380] Fix CSR
primitives (init from CSR / resize on row insert)
Repository: incubator-systemml
Updated Branches:
refs/heads/master 444e2b03a -> 9c4228215
[SYSTEMML-380] Fix CSR primitives (init from CSR / resize on row insert)
Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/205cfd3f
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/205cfd3f
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/205cfd3f
Branch: refs/heads/master
Commit: 205cfd3f84da991b052c5b8ffa191c9299ec01bf
Parents: 444e2b0
Author: Matthias Boehm <mb...@us.ibm.com>
Authored: Fri Jan 22 16:42:36 2016 -0800
Committer: Matthias Boehm <mb...@us.ibm.com>
Committed: Sat Jan 23 16:07:52 2016 -0800
----------------------------------------------------------------------
.../sysml/runtime/matrix/data/SparseBlockCSR.java | 15 +++++++++++++--
1 file changed, 13 insertions(+), 2 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/205cfd3f/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockCSR.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockCSR.java b/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockCSR.java
index 139cec3..876b0ed 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockCSR.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockCSR.java
@@ -72,7 +72,7 @@ public class SparseBlockCSR extends SparseBlock
//special case SparseBlockCSR
if( sblock instanceof SparseBlockCSR ) {
SparseBlockCSR ocsr = (SparseBlockCSR)sblock;
- _ptr = Arrays.copyOf(ocsr._ptr, ocsr.numRows());
+ _ptr = Arrays.copyOf(ocsr._ptr, ocsr.numRows()+1);
_indexes = Arrays.copyOf(ocsr._indexes, ocsr._size);
_values = Arrays.copyOf(ocsr._values, ocsr._size);
_size = ocsr._size;
@@ -260,11 +260,22 @@ public class SparseBlockCSR extends SparseBlock
@Override
public void set(int r, SparseRow row) {
int pos = pos(r);
+ int len = size(r);
int alen = row.size();
int[] aix = row.indexes();
double[] avals = row.values();
- deleteIndexRange(r, aix[0], aix[alen-1]+1);
+
+ //delete existing values if necessary
+ if( len > 0 )
+ deleteIndexRange(r, aix[0], aix[alen-1]+1);
+
+ //prepare free space (allocate and shift)
+ int lsize = _size+alen;
+ if( _values.length < lsize )
+ resize(lsize);
shiftRightByN(pos, alen);
+
+ //copy input row into internal representation
System.arraycopy(aix, 0, _indexes, pos, alen);
System.arraycopy(avals, 0, _values, pos, alen);
_size+=alen;
[2/5] incubator-systemml git commit: [SYSTEMML-382] Fix sparse-safe
scalar operations over sparse blocks
Posted by mb...@apache.org.
[SYSTEMML-382] Fix sparse-safe scalar operations over sparse blocks
Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/9cb7d553
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/9cb7d553
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/9cb7d553
Branch: refs/heads/master
Commit: 9cb7d553fc0d91798a063e0ccceb2d4a51f8f52b
Parents: 205cfd3
Author: Matthias Boehm <mb...@us.ibm.com>
Authored: Fri Jan 22 16:44:55 2016 -0800
Committer: Matthias Boehm <mb...@us.ibm.com>
Committed: Sat Jan 23 16:08:00 2016 -0800
----------------------------------------------------------------------
.../apache/sysml/runtime/matrix/data/LibMatrixBincell.java | 8 +++-----
1 file changed, 3 insertions(+), 5 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/9cb7d553/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixBincell.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixBincell.java b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixBincell.java
index 5428c96..47a46d4 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixBincell.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixBincell.java
@@ -924,7 +924,7 @@ public class LibMatrixBincell
SparseBlock a = m1.sparseBlock;
SparseBlock c = ret.sparseBlock;
- for(int r=0; r<Math.min(m1.rlen, m1.sparseBlock.numRows()); r++) {
+ for(int r=0; r<Math.min(m1.rlen, a.numRows()); r++) {
if( !a.isEmpty(r) )
{
int apos = a.pos(r);
@@ -955,12 +955,10 @@ public class LibMatrixBincell
if( op.fn instanceof Multiply || op.fn instanceof Multiply2
|| op.fn instanceof Power2 )
{
- c.allocate(r, alen, -1);
- //TODO perf sparse block
- //c[r] = new SparseRow(alen);
+ c.allocate(r, alen, ret.clen);
}
- for(int j=0; j<alen; j++) {
+ for(int j=apos; j<apos+alen; j++) {
double val = op.executeScalar(avals[j]);
ret.appendValue(r, aix[j], val);
}
[5/5] incubator-systemml git commit: [SYSTEMML-383] Integrated sparse
block memory estimates / valid nnz
Posted by mb...@apache.org.
[SYSTEMML-383] Integrated sparse block memory estimates / valid nnz
Incl new sparse block test for relative memory estimates between MCSR,
CSR, COO, and dense formats.
Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/9c422821
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/9c422821
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/9c422821
Branch: refs/heads/master
Commit: 9c4228215a2cb9b2df351a4660f36d9b63d252c8
Parents: a19a14c
Author: Matthias Boehm <mb...@us.ibm.com>
Authored: Sat Jan 23 01:02:32 2016 -0800
Committer: Matthias Boehm <mb...@us.ibm.com>
Committed: Sat Jan 23 16:08:22 2016 -0800
----------------------------------------------------------------------
.../org/apache/sysml/hops/OptimizerUtils.java | 9 +-
.../sysml/runtime/matrix/data/MatrixBlock.java | 15 +--
.../runtime/matrix/data/SparseBlockCOO.java | 25 +++++
.../runtime/matrix/data/SparseBlockCSR.java | 25 +++++
.../runtime/matrix/data/SparseBlockFactory.java | 18 ++++
.../runtime/matrix/data/SparseBlockMCSR.java | 26 +++++
.../sparse/SparseBlockMemEstimate.java | 99 ++++++++++++++++++++
.../functions/sparse/ZPackageSuite.java | 1 +
8 files changed, 203 insertions(+), 15 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/9c422821/src/main/java/org/apache/sysml/hops/OptimizerUtils.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/OptimizerUtils.java b/src/main/java/org/apache/sysml/hops/OptimizerUtils.java
index 19acbd1..57d38d7 100644
--- a/src/main/java/org/apache/sysml/hops/OptimizerUtils.java
+++ b/src/main/java/org/apache/sysml/hops/OptimizerUtils.java
@@ -23,7 +23,6 @@ import java.util.HashMap;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
-
import org.apache.sysml.api.DMLScript;
import org.apache.sysml.api.DMLScript.RUNTIME_PLATFORM;
import org.apache.sysml.conf.ConfigurationManager;
@@ -42,6 +41,7 @@ import org.apache.sysml.runtime.instructions.cp.ScalarObject;
import org.apache.sysml.runtime.matrix.MatrixCharacteristics;
import org.apache.sysml.runtime.matrix.data.MatrixBlock;
import org.apache.sysml.runtime.matrix.data.OutputInfo;
+import org.apache.sysml.runtime.matrix.data.SparseBlock;
import org.apache.sysml.runtime.matrix.data.SparseRow;
import org.apache.sysml.runtime.util.UtilFunctions;
import org.apache.sysml.yarn.ropt.YarnClusterAnalyzer;
@@ -73,7 +73,10 @@ public class OptimizerUtils
public static final double BIT_SIZE = (double)1/8;
public static final double INVALID_SIZE = -1d; // memory estimate not computed
+ //constants for valid CP matrix dimension sizes / nnz (dense/sparse)
public static final long MAX_NUMCELLS_CP_DENSE = Integer.MAX_VALUE;
+ public static final long MAX_NNZ_CP_SPARSE = (MatrixBlock.DEFAULT_SPARSEBLOCK ==
+ SparseBlock.Type.MCSR) ? Long.MAX_VALUE : Integer.MAX_VALUE;
/**
* Enables/disables dynamic re-compilation of lops/instructions.
@@ -862,8 +865,8 @@ public class OptimizerUtils
if( sparse ) //SPARSE
{
- //check max nnz
- ret = (nnz <= Long.MAX_VALUE);
+ //check max nnz (dependent on sparse block format)
+ ret = (nnz <= MAX_NNZ_CP_SPARSE);
}
else //DENSE
{
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/9c422821/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java b/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java
index 22c6347..9a7bfcb 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java
@@ -2556,18 +2556,9 @@ public class MatrixBlock extends MatrixValue implements Externalizable
// basic variables and references sizes
double size = 44;
- //NOTES:
- // * Each sparse row has a fixed overhead of 8B (reference) + 32B (object) +
- // 12B (3 int members), 32B (overhead int array), 32B (overhead double array),
- // * Each non-zero value requires 12B for the column-index/value pair.
- // * Overheads for arrays, objects, and references refer to 64bit JVMs
- // * If nnz < than rows we have only also empty rows.
-
- // account for sparsity and initial capacity
- double cnnz = Math.max(SparseRow.initialCapacity, Math.ceil(sparsity*ncols));
- double rlen = Math.min(nrows, Math.ceil(sparsity*nrows*ncols));
- size += rlen * ( 116 + 12 * cnnz ); //sparse row
- size += nrows * 8d; //empty rows
+ // delegate memory estimate to individual sparse blocks
+ size += SparseBlockFactory.estimateSizeSparseInMemory(
+ DEFAULT_SPARSEBLOCK, nrows, ncols, sparsity);
// robustness for long overflows
return (long) Math.min(size, Long.MAX_VALUE);
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/9c422821/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockCOO.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockCOO.java b/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockCOO.java
index eea0754..5643850 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockCOO.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockCOO.java
@@ -125,6 +125,31 @@ public class SparseBlockCOO extends SparseBlock
}
}
}
+
+ /**
+ * Get the estimated in-memory size of the sparse block in COO
+ * with the given dimensions w/o accounting for overallocation.
+ *
+ * @param nrows
+ * @param ncols
+ * @param sparsity
+ * @return
+ */
+ public static long estimateMemory(long nrows, long ncols, double sparsity) {
+ double lnnz = Math.max(INIT_CAPACITY, Math.ceil(sparsity*nrows*ncols));
+
+ //32B overhead per array, int/int/double arr in nnz
+ double size = 16 + 8; //object + 2 int fields
+ size += 32 + lnnz * 4d; //rindexes array (row indexes)
+ size += 32 + lnnz * 4d; //cindexes array (column indexes)
+ size += 32 + lnnz * 8d; //values array (non-zero values)
+
+ //robustness for long overflows
+ return (long) Math.min(size, Long.MAX_VALUE);
+ }
+
+ ///////////////////
+ //SparseBlock implementation
@Override
public void allocate(int r) {
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/9c422821/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockCSR.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockCSR.java b/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockCSR.java
index 876b0ed..c599753 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockCSR.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockCSR.java
@@ -128,6 +128,31 @@ public class SparseBlockCSR extends SparseBlock
}
}
+ /**
+ * Get the estimated in-memory size of the sparse block in CSR
+ * with the given dimensions w/o accounting for overallocation.
+ *
+ * @param nrows
+ * @param ncols
+ * @param sparsity
+ * @return
+ */
+ public static long estimateMemory(long nrows, long ncols, double sparsity) {
+ double lnnz = Math.max(INIT_CAPACITY, Math.ceil(sparsity*nrows*ncols));
+
+ //32B overhead per array, int arr in nrows, int/double arr in nnz
+ double size = 16 + 4; //object + int field
+ size += 32 + (nrows+1) * 4d; //ptr array (row pointers)
+ size += 32 + lnnz * 4d; //indexes array (column indexes)
+ size += 32 + lnnz * 8d; //values array (non-zero values)
+
+ //robustness for long overflows
+ return (long) Math.min(size, Long.MAX_VALUE);
+ }
+
+ ///////////////////
+ //SparseBlock implementation
+
@Override
public void allocate(int r) {
//do nothing everything preallocated
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/9c422821/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockFactory.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockFactory.java b/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockFactory.java
index 1ac8f16..67b3eae 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockFactory.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockFactory.java
@@ -87,4 +87,22 @@ public abstract class SparseBlockFactory
throw new RuntimeException("Unexpected sparse block type: "+type.toString());
}
}
+
+ /**
+ *
+ * @param type
+ * @param nrows
+ * @param ncols
+ * @param sparsity
+ * @return
+ */
+ public static long estimateSizeSparseInMemory(SparseBlock.Type type, long nrows, long ncols, double sparsity) {
+ switch( type ) {
+ case MCSR: return SparseBlockMCSR.estimateMemory(nrows, ncols, sparsity);
+ case CSR: return SparseBlockCSR.estimateMemory(nrows, ncols, sparsity);
+ case COO: return SparseBlockCOO.estimateMemory(nrows, ncols, sparsity);
+ default:
+ throw new RuntimeException("Unexpected sparse block type: "+type.toString());
+ }
+ }
}
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/9c422821/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockMCSR.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockMCSR.java b/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockMCSR.java
index 378a3f4..6e1cded 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockMCSR.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockMCSR.java
@@ -80,6 +80,32 @@ public class SparseBlockMCSR extends SparseBlock
public SparseBlockMCSR(int rlen, int clen) {
_rows = new SparseRow[rlen];
}
+
+ /**
+ * Get the estimated in-memory size of the sparse block in MCSR
+ * with the given dimensions w/o accounting for overallocation.
+ *
+ * @param nrows
+ * @param ncols
+ * @param sparsity
+ * @return
+ */
+ public static long estimateMemory(long nrows, long ncols, double sparsity) {
+ double cnnz = Math.max(SparseRow.initialCapacity, Math.ceil(sparsity*ncols));
+ double rlen = Math.min(nrows, Math.ceil(sparsity*nrows*ncols));
+
+ //Each sparse row has a fixed overhead of 8B (reference) + 32B (object) +
+ //12B (3 int members), 32B (overhead int array), 32B (overhead double array),
+ //Each non-zero value requires 12B for the column-index/value pair.
+ //Overheads for arrays, objects, and references refer to 64bit JVMs
+ //If nnz < than rows we have only also empty rows.
+ double size = 16; //object
+ size += rlen * (116 + cnnz * 12); //sparse rows
+ size += 32 + nrows * 8d; //references
+
+ // robustness for long overflows
+ return (long) Math.min(size, Long.MAX_VALUE);
+ }
///////////////////
//SparseBlock implementation
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/9c422821/src/test/java/org/apache/sysml/test/integration/functions/sparse/SparseBlockMemEstimate.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/sysml/test/integration/functions/sparse/SparseBlockMemEstimate.java b/src/test/java/org/apache/sysml/test/integration/functions/sparse/SparseBlockMemEstimate.java
new file mode 100644
index 0000000..8d2fbd0
--- /dev/null
+++ b/src/test/java/org/apache/sysml/test/integration/functions/sparse/SparseBlockMemEstimate.java
@@ -0,0 +1,99 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysml.test.integration.functions.sparse;
+
+import org.junit.Assert;
+import org.junit.Test;
+import org.apache.sysml.runtime.matrix.data.MatrixBlock;
+import org.apache.sysml.runtime.matrix.data.SparseBlock;
+import org.apache.sysml.runtime.matrix.data.SparseBlockFactory;
+import org.apache.sysml.test.integration.AutomatedTestBase;
+import org.apache.sysml.test.utils.TestUtils;
+
+/**
+ * This is a sparse matrix block component test for sparse block memory
+ * estimation functionality.
+ *
+ */
+public class SparseBlockMemEstimate extends AutomatedTestBase
+{
+ private final static int rows = 662;
+ private final static int cols = 444;
+ private final static double sparsity1 = 0.39;
+ private final static double sparsity2 = 0.0001;
+
+ @Override
+ public void setUp() {
+ TestUtils.clearAssertionInformation();
+ }
+
+ @Test
+ public void testSparseBlockSparse() {
+ runSparseBlockMemoryTest(sparsity1);
+ }
+
+ @Test
+ public void testSparseBlockUltraSparse() {
+ runSparseBlockMemoryTest(sparsity2);
+ }
+
+ /**
+ *
+ * @param btype
+ * @param sparsity
+ */
+ private void runSparseBlockMemoryTest( double sparsity)
+ {
+ double memMCSR = SparseBlockFactory.estimateSizeSparseInMemory(SparseBlock.Type.MCSR, rows, cols, sparsity);
+ double memCSR = SparseBlockFactory.estimateSizeSparseInMemory(SparseBlock.Type.CSR, rows, cols, sparsity);
+ double memCOO = SparseBlockFactory.estimateSizeSparseInMemory(SparseBlock.Type.COO, rows, cols, sparsity);
+ double memDense = MatrixBlock.estimateSizeDenseInMemory(rows, cols);
+
+ //check negative estimate
+ if( memMCSR <= 0 )
+ Assert.fail("SparseBlockMCSR memory estimate <= 0.");
+ if( memCSR <= 0 )
+ Assert.fail("SparseBlockCSR memory estimate <= 0.");
+ if( memCOO <= 0 )
+ Assert.fail("SparseBlockCOO memory estimate <= 0.");
+
+ //check dense estimate
+ if( memMCSR > memDense )
+ Assert.fail("SparseBlockMCSR memory estimate larger than dense estimate.");
+ if( memCSR > memDense )
+ Assert.fail("SparseBlockCSR memory estimate larger than dense estimate.");
+ if( memCOO > memDense )
+ Assert.fail("SparseBlockCOO memory estimate larger than dense estimate.");
+
+ //check sparse estimates relations
+ if( sparsity == sparsity1 ) { //sparse (pref CSR)
+ if( memMCSR < memCSR )
+ Assert.fail("SparseBlockMCSR memory estimate smaller than SparseBlockCSR estimate.");
+ if( memCOO < memCSR )
+ Assert.fail("SparseBlockCOO memory estimate smaller than SparseBlockCSR estimate.");
+ }
+ else { //ultra-sparse (pref COO)
+ if( memMCSR < memCOO )
+ Assert.fail("SparseBlockMCSR memory estimate smaller than SparseBlockCOO estimate.");
+ if( memCSR < memCOO )
+ Assert.fail("SparseBlockCSR memory estimate smaller than SparseBlockCOO estimate.");
+ }
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/9c422821/src/test_suites/java/org/apache/sysml/test/integration/functions/sparse/ZPackageSuite.java
----------------------------------------------------------------------
diff --git a/src/test_suites/java/org/apache/sysml/test/integration/functions/sparse/ZPackageSuite.java b/src/test_suites/java/org/apache/sysml/test/integration/functions/sparse/ZPackageSuite.java
index 633e152..01fc461 100644
--- a/src/test_suites/java/org/apache/sysml/test/integration/functions/sparse/ZPackageSuite.java
+++ b/src/test_suites/java/org/apache/sysml/test/integration/functions/sparse/ZPackageSuite.java
@@ -32,6 +32,7 @@ import org.junit.runners.Suite;
SparseBlockGetSet.class,
SparseBlockIndexRange.class,
SparseBlockIterator.class,
+ SparseBlockMemEstimate.class,
SparseBlockScan.class,
SparseBlockSize.class,
})
[3/5] incubator-systemml git commit: Fix error handling ultra-sparse
data generation (int overflow)
Posted by mb...@apache.org.
Fix error handling ultra-sparse data generation (int overflow)
Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/10d1afc9
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/10d1afc9
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/10d1afc9
Branch: refs/heads/master
Commit: 10d1afc9dac5fe2c4615019e52113de8f9400754
Parents: 9cb7d55
Author: Matthias Boehm <mb...@us.ibm.com>
Authored: Fri Jan 22 20:54:05 2016 -0800
Committer: Matthias Boehm <mb...@us.ibm.com>
Committed: Sat Jan 23 16:08:07 2016 -0800
----------------------------------------------------------------------
.../runtime/matrix/data/LibMatrixDatagen.java | 27 +++++++++++---------
1 file changed, 15 insertions(+), 12 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/10d1afc9/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDatagen.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDatagen.java b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDatagen.java
index dcaaf1a..c190e50 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDatagen.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDatagen.java
@@ -127,19 +127,22 @@ public class LibMatrixDatagen
* @return
* @throws DMLRuntimeException
*/
- public static long[] computeNNZperBlock(long nrow, long ncol, int brlen, int bclen, double sparsity) throws DMLRuntimeException {
- int numBlocks = (int) (Math.ceil((double)nrow/brlen) * Math.ceil((double)ncol/bclen));
-
- // CURRENT:
- // Total #of NNZ is set to the expected value (nrow*ncol*sparsity).
- // TODO:
- // Instead of using the expected value, one should actually
- // treat NNZ as a random variable and accordingly generate a random value.
- long nnz = (long) Math.ceil (nrow * (ncol*sparsity));
-
- if ( numBlocks > Integer.MAX_VALUE ) {
- throw new DMLRuntimeException("A random matrix of size [" + nrow + "," + ncol + "] can not be created. Number of blocks (" + numBlocks + ") exceeds the maximum integer size. Try to increase the block size.");
+ public static long[] computeNNZperBlock(long nrow, long ncol, int brlen, int bclen, double sparsity)
+ throws DMLRuntimeException
+ {
+ long lnumBlocks = (long) (Math.ceil((double)nrow/brlen) * Math.ceil((double)ncol/bclen));
+
+ //sanity check max number of blocks (before cast to avoid overflow)
+ if ( lnumBlocks > Integer.MAX_VALUE ) {
+ throw new DMLRuntimeException("A random matrix of size [" + nrow + "," + ncol + "] can not be created. "
+ + "Number of blocks ("+lnumBlocks+") exceeds the maximum integer size. Try to increase the block size.");
}
+
+ // NOTE: Total #of NNZ is set to the expected value (nrow*ncol*sparsity).
+ // TODO: Instead of using the expected value, NNZ should be random variable
+
+ int numBlocks = (int) lnumBlocks;
+ long nnz = (long) Math.ceil (nrow * (ncol*sparsity));
// Compute block-level NNZ
long[] ret = new long[numBlocks];
[4/5] incubator-systemml git commit: Performance spark wsloss/wcemm
ultra-sparse (prefilter empty blocks)
Posted by mb...@apache.org.
Performance spark wsloss/wcemm ultra-sparse (prefilter empty blocks)
Ultra-sparse matrices are a common case for factorization algorithms.
Accordingly, this change introduces a prefilter for empty blocks on
wsloss and wcemm because the full aggregate ensures result correctness.
In a scenario of wsloss over KDD2010 (15M x 30M, sparsity 9.4e-7), this
achieved a total runtime reduction from 70s to 39s despite inputs from
HDFS.
Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/a19a14c0
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/a19a14c0
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/a19a14c0
Branch: refs/heads/master
Commit: a19a14c05e8034d5abf7f5c9ffbaea96f05b8017
Parents: 10d1afc
Author: Matthias Boehm <mb...@us.ibm.com>
Authored: Fri Jan 22 22:40:29 2016 -0800
Committer: Matthias Boehm <mb...@us.ibm.com>
Committed: Sat Jan 23 16:08:14 2016 -0800
----------------------------------------------------------------------
.../runtime/instructions/spark/QuaternarySPInstruction.java | 7 +++++++
1 file changed, 7 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/a19a14c0/src/main/java/org/apache/sysml/runtime/instructions/spark/QuaternarySPInstruction.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/instructions/spark/QuaternarySPInstruction.java b/src/main/java/org/apache/sysml/runtime/instructions/spark/QuaternarySPInstruction.java
index af65a9e..500cc01 100644
--- a/src/main/java/org/apache/sysml/runtime/instructions/spark/QuaternarySPInstruction.java
+++ b/src/main/java/org/apache/sysml/runtime/instructions/spark/QuaternarySPInstruction.java
@@ -53,6 +53,7 @@ import org.apache.sysml.runtime.instructions.cp.CPOperand;
import org.apache.sysml.runtime.instructions.cp.DoubleObject;
import org.apache.sysml.runtime.instructions.spark.data.LazyIterableIterator;
import org.apache.sysml.runtime.instructions.spark.data.PartitionedBroadcastMatrix;
+import org.apache.sysml.runtime.instructions.spark.functions.FilterNonEmptyBlocksFunction;
import org.apache.sysml.runtime.instructions.spark.utils.RDDAggregateUtils;
import org.apache.sysml.runtime.matrix.MatrixCharacteristics;
import org.apache.sysml.runtime.matrix.data.MatrixBlock;
@@ -196,6 +197,12 @@ public class QuaternarySPInstruction extends ComputationSPInstruction
int brlen = inMc.getRowsPerBlock();
int bclen = inMc.getColsPerBlock();
+ //pre-filter empty blocks (ultra-sparse matrices) for full aggregates
+ //(map/redwsloss, map/redwcemm); safe because theses ops produce a scalar
+ if( qop.wtype1 != null || qop.wtype4 != null ) {
+ in = in.filter(new FilterNonEmptyBlocksFunction());
+ }
+
//map-side only operation (one rdd input, two broadcasts)
if( WeightedSquaredLoss.OPCODE.equalsIgnoreCase(getOpcode())
|| WeightedSigmoid.OPCODE.equalsIgnoreCase(getOpcode())