You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemds.apache.org by mb...@apache.org on 2021/06/04 23:21:46 UTC
[systemds] branch master updated: [SYSTEMDS-2981] Fix remaining
issues consistent blocksize handling
This is an automated email from the ASF dual-hosted git repository.
mboehm7 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/systemds.git
The following commit(s) were added to refs/heads/master by this push:
new 64a38eb [SYSTEMDS-2981] Fix remaining issues consistent blocksize handling
64a38eb is described below
commit 64a38eb6710dbbbca999db2333095de2ecc69e3a
Author: Matthias Boehm <mb...@gmail.com>
AuthorDate: Sat Jun 5 01:21:20 2021 +0200
[SYSTEMDS-2981] Fix remaining issues consistent blocksize handling
---
.../sysds/runtime/controlprogram/caching/CacheableData.java | 12 +++++-------
.../sysds/runtime/controlprogram/caching/FrameObject.java | 2 +-
.../sysds/runtime/controlprogram/caching/MatrixObject.java | 3 ++-
.../functions/transform/TransformCSVFrameEncodeReadTest.java | 3 ++-
4 files changed, 10 insertions(+), 10 deletions(-)
diff --git a/src/main/java/org/apache/sysds/runtime/controlprogram/caching/CacheableData.java b/src/main/java/org/apache/sysds/runtime/controlprogram/caching/CacheableData.java
index eeb48c9..f549d15 100644
--- a/src/main/java/org/apache/sysds/runtime/controlprogram/caching/CacheableData.java
+++ b/src/main/java/org/apache/sysds/runtime/controlprogram/caching/CacheableData.java
@@ -826,7 +826,7 @@ public abstract class CacheableData<T extends CacheBlock> extends Data
boolean eqScheme = IOUtilFunctions.isSameFileScheme(
new Path(_hdfsFileName), new Path(fName));
boolean eqFormat = isEqualOutputFormat(outputFormat);
- boolean eqBlksize = outputFormat.equals("binary")
+ boolean eqBlksize = (outputFormat == null || outputFormat.equals("binary"))
&& ConfigurationManager.getBlocksize() != getBlocksize();
//actual export (note: no direct transfer of local copy in order to ensure blocking (and hence, parallelism))
@@ -841,25 +841,23 @@ public abstract class CacheableData<T extends CacheBlock> extends Data
{
//read data from HDFS if required (never read before), this applies only to pWrite w/ different output formats
//note: for large rdd outputs, we compile dedicated writespinstructions (no need to handle this here)
- try
- {
+ try {
if( getRDDHandle()==null || getRDDHandle().allowsShortCircuitRead() )
_data = readBlobFromHDFS( _hdfsFileName );
else if( getRDDHandle() != null )
_data = readBlobFromRDD( getRDDHandle(), new MutableBoolean() );
else if(!federatedWrite)
_data = readBlobFromFederated( getFedMapping() );
-
setDirty(false);
+ refreshMetaData(); //e.g., after unknown csv read
}
catch (IOException e) {
throw new DMLRuntimeException("Reading of " + _hdfsFileName + " ("+hashCode()+") failed.", e);
}
}
//get object from cache
- if(!federatedWrite){
-
- if( _data == null )
+ if(!federatedWrite) {
+ if( _data == null )
getCache();
acquire( false, _data==null ); //incl. read matrix if evicted
}
diff --git a/src/main/java/org/apache/sysds/runtime/controlprogram/caching/FrameObject.java b/src/main/java/org/apache/sysds/runtime/controlprogram/caching/FrameObject.java
index 388238c..5eae986 100644
--- a/src/main/java/org/apache/sysds/runtime/controlprogram/caching/FrameObject.java
+++ b/src/main/java/org/apache/sysds/runtime/controlprogram/caching/FrameObject.java
@@ -276,7 +276,7 @@ public class FrameObject extends CacheableData<FrameBlock>
FileFormat fmt = (ofmt != null ? FileFormat.safeValueOf(ofmt) : iimd.getFileFormat());
FrameWriter writer = FrameWriterFactory.createFrameWriter(fmt, fprop);
- writer.writeFrameToHDFS(_data, fname, getNumRows(), getNumColumns());
+ writer.writeFrameToHDFS(_data, fname, getNumRows(), getNumColumns());
}
@Override
diff --git a/src/main/java/org/apache/sysds/runtime/controlprogram/caching/MatrixObject.java b/src/main/java/org/apache/sysds/runtime/controlprogram/caching/MatrixObject.java
index 65f533b..04b482a 100644
--- a/src/main/java/org/apache/sysds/runtime/controlprogram/caching/MatrixObject.java
+++ b/src/main/java/org/apache/sysds/runtime/controlprogram/caching/MatrixObject.java
@@ -509,7 +509,8 @@ public class MatrixObject extends CacheableData<MatrixBlock>
//obtain matrix block from RDD
int rlen = (int)mc.getRows();
int clen = (int)mc.getCols();
- int blen = mc.getBlocksize();
+ int blen = mc.getBlocksize() > 0 ? mc.getBlocksize() :
+ ConfigurationManager.getBlocksize();
long nnz = mc.getNonZerosBound();
//guarded rdd collect
diff --git a/src/test/java/org/apache/sysds/test/functions/transform/TransformCSVFrameEncodeReadTest.java b/src/test/java/org/apache/sysds/test/functions/transform/TransformCSVFrameEncodeReadTest.java
index c2f123b..cb4e5e7 100644
--- a/src/test/java/org/apache/sysds/test/functions/transform/TransformCSVFrameEncodeReadTest.java
+++ b/src/test/java/org/apache/sysds/test/functions/transform/TransformCSVFrameEncodeReadTest.java
@@ -126,6 +126,7 @@ public class TransformCSVFrameEncodeReadTest extends AutomatedTestBase
try
{
getAndLoadTestConfiguration(TEST_NAME1);
+ setOutputBuffering(true);
String HOME = SCRIPT_DIR + TEST_DIR;
int nrows = subset ? 4 : 13;
@@ -133,7 +134,7 @@ public class TransformCSVFrameEncodeReadTest extends AutomatedTestBase
programArgs = new String[]{"-args",
DATASET_DIR + DATASET, String.valueOf(nrows), output("R") };
- String stdOut = runTest(null).toString();
+ String stdOut = runTest(null).toString();
//read input/output and compare
FrameReader reader2 = parRead ?