You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemds.apache.org by mb...@apache.org on 2021/06/04 23:21:46 UTC

[systemds] branch master updated: [SYSTEMDS-2981] Fix remaining issues consistent blocksize handling

This is an automated email from the ASF dual-hosted git repository.

mboehm7 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/systemds.git


The following commit(s) were added to refs/heads/master by this push:
     new 64a38eb  [SYSTEMDS-2981] Fix remaining issues consistent blocksize handling
64a38eb is described below

commit 64a38eb6710dbbbca999db2333095de2ecc69e3a
Author: Matthias Boehm <mb...@gmail.com>
AuthorDate: Sat Jun 5 01:21:20 2021 +0200

    [SYSTEMDS-2981] Fix remaining issues consistent blocksize handling
---
 .../sysds/runtime/controlprogram/caching/CacheableData.java  | 12 +++++-------
 .../sysds/runtime/controlprogram/caching/FrameObject.java    |  2 +-
 .../sysds/runtime/controlprogram/caching/MatrixObject.java   |  3 ++-
 .../functions/transform/TransformCSVFrameEncodeReadTest.java |  3 ++-
 4 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/src/main/java/org/apache/sysds/runtime/controlprogram/caching/CacheableData.java b/src/main/java/org/apache/sysds/runtime/controlprogram/caching/CacheableData.java
index eeb48c9..f549d15 100644
--- a/src/main/java/org/apache/sysds/runtime/controlprogram/caching/CacheableData.java
+++ b/src/main/java/org/apache/sysds/runtime/controlprogram/caching/CacheableData.java
@@ -826,7 +826,7 @@ public abstract class CacheableData<T extends CacheBlock> extends Data
 		boolean eqScheme = IOUtilFunctions.isSameFileScheme(
 			new Path(_hdfsFileName), new Path(fName));
 		boolean eqFormat = isEqualOutputFormat(outputFormat);
-		boolean eqBlksize = outputFormat.equals("binary")
+		boolean eqBlksize = (outputFormat == null || outputFormat.equals("binary"))
 			&& ConfigurationManager.getBlocksize() != getBlocksize();
 		
 		//actual export (note: no direct transfer of local copy in order to ensure blocking (and hence, parallelism))
@@ -841,25 +841,23 @@ public abstract class CacheableData<T extends CacheBlock> extends Data
 			{
 				//read data from HDFS if required (never read before), this applies only to pWrite w/ different output formats
 				//note: for large rdd outputs, we compile dedicated writespinstructions (no need to handle this here) 
-				try
-				{
+				try {
 					if( getRDDHandle()==null || getRDDHandle().allowsShortCircuitRead() )
 						_data = readBlobFromHDFS( _hdfsFileName );
 					else if( getRDDHandle() != null )
 						_data = readBlobFromRDD( getRDDHandle(), new MutableBoolean() );
 					else if(!federatedWrite)
 						_data = readBlobFromFederated( getFedMapping() );
-					
 					setDirty(false);
+					refreshMetaData(); //e.g., after unknown csv read
 				}
 				catch (IOException e) {
 					throw new DMLRuntimeException("Reading of " + _hdfsFileName + " ("+hashCode()+") failed.", e);
 				}
 			}
 			//get object from cache
-			if(!federatedWrite){
-
-				if(  _data == null )
+			if(!federatedWrite) {
+				if( _data == null )
 					getCache();
 				acquire( false, _data==null ); //incl. read matrix if evicted
 			}
diff --git a/src/main/java/org/apache/sysds/runtime/controlprogram/caching/FrameObject.java b/src/main/java/org/apache/sysds/runtime/controlprogram/caching/FrameObject.java
index 388238c..5eae986 100644
--- a/src/main/java/org/apache/sysds/runtime/controlprogram/caching/FrameObject.java
+++ b/src/main/java/org/apache/sysds/runtime/controlprogram/caching/FrameObject.java
@@ -276,7 +276,7 @@ public class FrameObject extends CacheableData<FrameBlock>
 		FileFormat fmt = (ofmt != null ? FileFormat.safeValueOf(ofmt) : iimd.getFileFormat());
 		
 		FrameWriter writer = FrameWriterFactory.createFrameWriter(fmt, fprop);
-		writer.writeFrameToHDFS(_data, fname,  getNumRows(), getNumColumns());
+		writer.writeFrameToHDFS(_data, fname, getNumRows(), getNumColumns());
 	}
 
 	@Override
diff --git a/src/main/java/org/apache/sysds/runtime/controlprogram/caching/MatrixObject.java b/src/main/java/org/apache/sysds/runtime/controlprogram/caching/MatrixObject.java
index 65f533b..04b482a 100644
--- a/src/main/java/org/apache/sysds/runtime/controlprogram/caching/MatrixObject.java
+++ b/src/main/java/org/apache/sysds/runtime/controlprogram/caching/MatrixObject.java
@@ -509,7 +509,8 @@ public class MatrixObject extends CacheableData<MatrixBlock>
 			//obtain matrix block from RDD
 			int rlen = (int)mc.getRows();
 			int clen = (int)mc.getCols();
-			int blen = mc.getBlocksize();
+			int blen = mc.getBlocksize() > 0 ? mc.getBlocksize() : 
+				ConfigurationManager.getBlocksize();
 			long nnz = mc.getNonZerosBound();
 			
 			//guarded rdd collect 
diff --git a/src/test/java/org/apache/sysds/test/functions/transform/TransformCSVFrameEncodeReadTest.java b/src/test/java/org/apache/sysds/test/functions/transform/TransformCSVFrameEncodeReadTest.java
index c2f123b..cb4e5e7 100644
--- a/src/test/java/org/apache/sysds/test/functions/transform/TransformCSVFrameEncodeReadTest.java
+++ b/src/test/java/org/apache/sysds/test/functions/transform/TransformCSVFrameEncodeReadTest.java
@@ -126,6 +126,7 @@ public class TransformCSVFrameEncodeReadTest extends AutomatedTestBase
 		try
 		{
 			getAndLoadTestConfiguration(TEST_NAME1);
+			setOutputBuffering(true);
 			
 			String HOME = SCRIPT_DIR + TEST_DIR;
 			int nrows = subset ? 4 : 13;
@@ -133,7 +134,7 @@ public class TransformCSVFrameEncodeReadTest extends AutomatedTestBase
 			programArgs = new String[]{"-args", 
 				DATASET_DIR + DATASET, String.valueOf(nrows), output("R") };
 			
-			String stdOut = runTest(null).toString(); 
+			String stdOut = runTest(null).toString();
 			
 			//read input/output and compare
 			FrameReader reader2 = parRead ?