You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by mb...@apache.org on 2017/05/25 21:49:10 UTC
[1/2] incubator-systemml git commit: [SYSTEMML-1627] Fix guarded
parallelize of matrices collected from rdds
Repository: incubator-systemml
Updated Branches:
refs/heads/master 841a4d030 -> c697c30eb
[SYSTEMML-1627] Fix guarded parallelize of matrices collected from rdds
This patch fixes special cases of rdd construction, where rdds are
created from in-memory matrices that have been previously collected from
rdds. Specifically, this targets guarded parallelize, which exports
matrices and creates the rdd from the hadoop file. So far we only
exported dirty in-memory matrices but not collected matrices (which are
not marked as dirty). Accordingly, following rdd operations fail with
file not found exceptions. This happens, for example, in special cases,
where unary operations (1 inputs, 1 output) run in CP, but binary
operations (2 outputs, 1 input) run in SPARK and we have to fall back to
guarded parallelize.
Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/a362bce0
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/a362bce0
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/a362bce0
Branch: refs/heads/master
Commit: a362bce0e11ffcd163f03c0cfdf4598eed6378f3
Parents: 841a4d0
Author: Matthias Boehm <mb...@gmail.com>
Authored: Wed May 24 18:57:07 2017 -0700
Committer: Matthias Boehm <mb...@gmail.com>
Committed: Thu May 25 14:48:57 2017 -0700
----------------------------------------------------------------------
.../sysml/runtime/controlprogram/caching/CacheableData.java | 6 ++----
.../sysml/runtime/controlprogram/caching/MatrixObject.java | 6 ++++--
.../runtime/controlprogram/context/SparkExecutionContext.java | 3 +--
.../functions/misc/ValueTypeMatrixScalarBuiltinTest.java | 2 +-
4 files changed, 8 insertions(+), 9 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/a362bce0/src/main/java/org/apache/sysml/runtime/controlprogram/caching/CacheableData.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/controlprogram/caching/CacheableData.java b/src/main/java/org/apache/sysml/runtime/controlprogram/caching/CacheableData.java
index c1a024a..fd6fa16 100644
--- a/src/main/java/org/apache/sysml/runtime/controlprogram/caching/CacheableData.java
+++ b/src/main/java/org/apache/sysml/runtime/controlprogram/caching/CacheableData.java
@@ -432,10 +432,8 @@ public abstract class CacheableData<T extends CacheBlock> extends Data
_data = readBlobFromRDD( getRDDHandle(), writeStatus );
//mark for initial local write (prevent repeated execution of rdd operations)
- if( writeStatus.booleanValue() )
- _requiresLocalWrite = CACHING_WRITE_CACHE_ON_READ;
- else
- _requiresLocalWrite = true;
+ _requiresLocalWrite = writeStatus.booleanValue() ?
+ CACHING_WRITE_CACHE_ON_READ : true;
}
setDirty(false);
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/a362bce0/src/main/java/org/apache/sysml/runtime/controlprogram/caching/MatrixObject.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/controlprogram/caching/MatrixObject.java b/src/main/java/org/apache/sysml/runtime/controlprogram/caching/MatrixObject.java
index 94bdb2d..4105351 100644
--- a/src/main/java/org/apache/sysml/runtime/controlprogram/caching/MatrixObject.java
+++ b/src/main/java/org/apache/sysml/runtime/controlprogram/caching/MatrixObject.java
@@ -436,10 +436,12 @@ public class MatrixObject extends CacheableData<MatrixBlock>
+ ", dimensions: [" + mc.getRows() + ", " + mc.getCols() + ", " + mc.getNonZeros() + "]");
begin = System.currentTimeMillis();
}
-
- double sparsity = ( mc.getNonZeros() >= 0 ? ((double)mc.getNonZeros())/(mc.getRows()*mc.getCols()) : 1.0d) ;
+
+ //read matrix and maintain meta data
+ double sparsity = (mc.getNonZeros() >= 0 ? ((double)mc.getNonZeros())/(mc.getRows()*mc.getCols()) : 1.0d);
MatrixBlock newData = DataConverter.readMatrixFromHDFS(fname, iimd.getInputInfo(), rlen, clen,
mc.getRowsPerBlock(), mc.getColsPerBlock(), sparsity, getFileFormatProperties());
+ setHDFSFileExists(true);
//sanity check correct output
if( newData == null )
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/a362bce0/src/main/java/org/apache/sysml/runtime/controlprogram/context/SparkExecutionContext.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/controlprogram/context/SparkExecutionContext.java b/src/main/java/org/apache/sysml/runtime/controlprogram/context/SparkExecutionContext.java
index 92946ff..1dd3600 100644
--- a/src/main/java/org/apache/sysml/runtime/controlprogram/context/SparkExecutionContext.java
+++ b/src/main/java/org/apache/sysml/runtime/controlprogram/context/SparkExecutionContext.java
@@ -356,9 +356,8 @@ public class SparkExecutionContext extends ExecutionContext
boolean fromFile = false;
if( !OptimizerUtils.checkSparkCollectMemoryBudget(mc, 0) || !_parRDDs.reserve(
OptimizerUtils.estimatePartitionedSizeExactSparsity(mc))) {
- if( mo.isDirty() ) { //write only if necessary
+ if( mo.isDirty() || !mo.isHDFSFileExists() ) //write if necessary
mo.exportData();
- }
rdd = sc.hadoopFile( mo.getFileName(), inputInfo.inputFormatClass, inputInfo.inputKeyClass, inputInfo.inputValueClass);
rdd = SparkUtils.copyBinaryBlockMatrix((JavaPairRDD<MatrixIndexes, MatrixBlock>)rdd); //cp is workaround for read bug
fromFile = true;
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/a362bce0/src/test/java/org/apache/sysml/test/integration/functions/misc/ValueTypeMatrixScalarBuiltinTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/sysml/test/integration/functions/misc/ValueTypeMatrixScalarBuiltinTest.java b/src/test/java/org/apache/sysml/test/integration/functions/misc/ValueTypeMatrixScalarBuiltinTest.java
index 61ffa7d..e2124b5 100644
--- a/src/test/java/org/apache/sysml/test/integration/functions/misc/ValueTypeMatrixScalarBuiltinTest.java
+++ b/src/test/java/org/apache/sysml/test/integration/functions/misc/ValueTypeMatrixScalarBuiltinTest.java
@@ -112,7 +112,7 @@ public class ValueTypeMatrixScalarBuiltinTest extends AutomatedTestBase
loadTestConfiguration(getTestConfiguration(testName));
//setup arguments and run test
- String RI_HOME = SCRIPT_DIR + TEST_DIR;
+ String RI_HOME = SCRIPT_DIR + TEST_DIR;
fullDMLScriptName = RI_HOME + testName + ".dml";
programArgs = new String[]{"-args",
vtIn==ValueType.DOUBLE ? "7.7" : "7", output("R")};
[2/2] incubator-systemml git commit: [MINOR] Fix codegen row template
construction (input ordering w/ vma)
Posted by mb...@apache.org.
[MINOR] Fix codegen row template construction (input ordering w/ vma)
Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/c697c30e
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/c697c30e
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/c697c30e
Branch: refs/heads/master
Commit: c697c30ebbfb24872f379523793a2c121553bb1c
Parents: a362bce
Author: Matthias Boehm <mb...@gmail.com>
Authored: Thu May 25 14:36:38 2017 -0700
Committer: Matthias Boehm <mb...@gmail.com>
Committed: Thu May 25 14:48:58 2017 -0700
----------------------------------------------------------------------
.../java/org/apache/sysml/hops/codegen/template/TemplateRow.java | 1 +
1 file changed, 1 insertion(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/c697c30e/src/main/java/org/apache/sysml/hops/codegen/template/TemplateRow.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/codegen/template/TemplateRow.java b/src/main/java/org/apache/sysml/hops/codegen/template/TemplateRow.java
index 3f947c8..3979aae 100644
--- a/src/main/java/org/apache/sysml/hops/codegen/template/TemplateRow.java
+++ b/src/main/java/org/apache/sysml/hops/codegen/template/TemplateRow.java
@@ -200,6 +200,7 @@ public class TemplateRow extends TemplateBase
inHops.add(hop.getInput().get(0).getInput().get(0));
out = new CNodeBinary(cdata1, cdata2, BinType.VECT_MULT_ADD);
+ inHops2.put("X", hop.getInput().get(0).getInput().get(0));
}
else
{