You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by mb...@apache.org on 2016/09/11 16:36:46 UTC

[1/2] incubator-systemml git commit: [SYSTEMML-593] Fix mlcontext/jmlc filename construction, incl cleanup

Repository: incubator-systemml
Updated Branches:
  refs/heads/master 4d5fc9d16 -> f0c91ed0d


[SYSTEMML-593] Fix mlcontext/jmlc filename construction, incl cleanup

Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/245488c3
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/245488c3
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/245488c3

Branch: refs/heads/master
Commit: 245488c3b720332178b6c279a72daa75ebf6947f
Parents: 4d5fc9d
Author: Matthias Boehm <mb...@us.ibm.com>
Authored: Sun Sep 11 00:30:00 2016 +0200
Committer: Matthias Boehm <mb...@us.ibm.com>
Committed: Sun Sep 11 00:30:35 2016 +0200

----------------------------------------------------------------------
 .../java/org/apache/sysml/api/MLContext.java    |  10 +-
 .../apache/sysml/api/jmlc/PreparedScript.java   |   8 +-
 .../sysml/api/mlcontext/BinaryBlockFrame.java   |   5 +-
 .../sysml/api/mlcontext/BinaryBlockMatrix.java  |   5 +-
 .../sysml/api/mlcontext/FrameMetadata.java      |   5 +-
 .../api/mlcontext/MLContextConversionUtil.java  | 122 +++++++------------
 .../sysml/api/mlcontext/MLContextUtil.java      |  18 ---
 .../sysml/api/mlcontext/MatrixMetadata.java     |   5 +-
 .../org/apache/sysml/hops/OptimizerUtils.java   |  20 +++
 .../java/org/apache/sysml/lops/compile/Dag.java |  55 +++++----
 10 files changed, 117 insertions(+), 136 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/245488c3/src/main/java/org/apache/sysml/api/MLContext.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/api/MLContext.java b/src/main/java/org/apache/sysml/api/MLContext.java
index 649ffee..bce0378 100644
--- a/src/main/java/org/apache/sysml/api/MLContext.java
+++ b/src/main/java/org/apache/sysml/api/MLContext.java
@@ -590,7 +590,7 @@ public class MLContext {
 		FrameObject fo = null;
 		if( format.equals("csv") ) {
 			CSVFileFormatProperties csvprops = (props!=null) ? (CSVFileFormatProperties)props: new CSVFileFormatProperties();
-			fo = new FrameObject(null, new MatrixFormatMetaData(mc, OutputInfo.CSVOutputInfo, InputInfo.CSVInputInfo));
+			fo = new FrameObject(OptimizerUtils.getUniqueTempFileName(), new MatrixFormatMetaData(mc, OutputInfo.CSVOutputInfo, InputInfo.CSVInputInfo));
 			fo.setFileFormatProperties(csvprops);
 		}
 		else if( format.equals("text") ) {
@@ -624,7 +624,7 @@ public class MLContext {
 			_inVarnames = new ArrayList<String>();
 		
 		MatrixCharacteristics mc = new MatrixCharacteristics(rlen, clen, OptimizerUtils.DEFAULT_BLOCKSIZE, OptimizerUtils.DEFAULT_BLOCKSIZE, -1);
-		FrameObject fo = new FrameObject(null, new MatrixFormatMetaData(mc, OutputInfo.BinaryBlockOutputInfo, InputInfo.BinaryBlockInputInfo));
+		FrameObject fo = new FrameObject(OptimizerUtils.getUniqueTempFileName(), new MatrixFormatMetaData(mc, OutputInfo.BinaryBlockOutputInfo, InputInfo.BinaryBlockInputInfo));
 		
 		if(props != null)
 			fo.setFileFormatProperties(props);
@@ -708,7 +708,8 @@ public class MLContext {
 		// Bug in Spark is messing up blocks and indexes due to too eager reuse of data structures
 		JavaPairRDD<MatrixIndexes, MatrixBlock> copyRDD = rdd.mapToPair( new CopyBlockPairFunction() );
 		
-		MatrixObject mo = new MatrixObject(ValueType.DOUBLE, "temp", new MatrixFormatMetaData(mc, OutputInfo.BinaryBlockOutputInfo, InputInfo.BinaryBlockInputInfo));
+		MatrixObject mo = new MatrixObject(ValueType.DOUBLE, OptimizerUtils.getUniqueTempFileName(), 
+				new MatrixFormatMetaData(mc, OutputInfo.BinaryBlockOutputInfo, InputInfo.BinaryBlockInputInfo));
 		mo.setRDDHandle(new RDDObject(copyRDD, varName));
 		_variables.put(varName, mo);
 		_inVarnames.add(varName);
@@ -725,7 +726,8 @@ public class MLContext {
 			_variables = new LocalVariableMap();
 		if(_inVarnames == null)
 			_inVarnames = new ArrayList<String>();
-		MatrixObject mo = new MatrixObject(ValueType.DOUBLE, "temp", new MatrixFormatMetaData(mc, OutputInfo.BinaryBlockOutputInfo, InputInfo.BinaryBlockInputInfo));
+		MatrixObject mo = new MatrixObject(ValueType.DOUBLE, OptimizerUtils.getUniqueTempFileName(), 
+				new MatrixFormatMetaData(mc, OutputInfo.BinaryBlockOutputInfo, InputInfo.BinaryBlockInputInfo));
 		mo.acquireModify(mb); 
 		mo.release();
 		_variables.put(varName, mo);

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/245488c3/src/main/java/org/apache/sysml/api/jmlc/PreparedScript.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/api/jmlc/PreparedScript.java b/src/main/java/org/apache/sysml/api/jmlc/PreparedScript.java
index 4ef5add..e06810b 100644
--- a/src/main/java/org/apache/sysml/api/jmlc/PreparedScript.java
+++ b/src/main/java/org/apache/sysml/api/jmlc/PreparedScript.java
@@ -29,6 +29,7 @@ import java.util.Map.Entry;
 
 import org.apache.sysml.api.DMLException;
 import org.apache.sysml.conf.ConfigurationManager;
+import org.apache.sysml.hops.OptimizerUtils;
 import org.apache.sysml.parser.Expression.ValueType;
 import org.apache.sysml.runtime.controlprogram.LocalVariableMap;
 import org.apache.sysml.runtime.controlprogram.Program;
@@ -234,13 +235,12 @@ public class PreparedScript
 		if( !_inVarnames.contains(varname) )
 			throw new DMLException("Unspecified input variable: "+varname);
 				
-		String scratch_space = ConfigurationManager.getScratchSpace();
 		int blocksize = ConfigurationManager.getBlocksize();
 		
 		//create new matrix object
 		MatrixCharacteristics mc = new MatrixCharacteristics(matrix.getNumRows(), matrix.getNumColumns(), blocksize, blocksize);
 		MatrixFormatMetaData meta = new MatrixFormatMetaData(mc, OutputInfo.BinaryBlockOutputInfo, InputInfo.BinaryBlockInputInfo);
-		MatrixObject mo = new MatrixObject(ValueType.DOUBLE, scratch_space+"/"+varname, meta);
+		MatrixObject mo = new MatrixObject(ValueType.DOUBLE, OptimizerUtils.getUniqueTempFileName(), meta);
 		mo.acquireModify(matrix); 
 		mo.release();
 		
@@ -342,13 +342,11 @@ public class PreparedScript
 	{
 		if( !_inVarnames.contains(varname) )
 			throw new DMLException("Unspecified input variable: "+varname);
-				
-		String scratch_space = ConfigurationManager.getScratchSpace();
 		
 		//create new frame object
 		MatrixCharacteristics mc = new MatrixCharacteristics(frame.getNumRows(), frame.getNumColumns(), -1, -1);
 		MatrixFormatMetaData meta = new MatrixFormatMetaData(mc, OutputInfo.BinaryCellOutputInfo, InputInfo.BinaryCellInputInfo);
-		FrameObject fo = new FrameObject(scratch_space+"/"+varname, meta);
+		FrameObject fo = new FrameObject(OptimizerUtils.getUniqueTempFileName(), meta);
 		fo.acquireModify(frame);
 		fo.release();
 		

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/245488c3/src/main/java/org/apache/sysml/api/mlcontext/BinaryBlockFrame.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/api/mlcontext/BinaryBlockFrame.java b/src/main/java/org/apache/sysml/api/mlcontext/BinaryBlockFrame.java
index 88b1b38..29871ad 100644
--- a/src/main/java/org/apache/sysml/api/mlcontext/BinaryBlockFrame.java
+++ b/src/main/java/org/apache/sysml/api/mlcontext/BinaryBlockFrame.java
@@ -21,6 +21,7 @@ package org.apache.sysml.api.mlcontext;
 
 import org.apache.spark.api.java.JavaPairRDD;
 import org.apache.spark.sql.DataFrame;
+import org.apache.sysml.conf.ConfigurationManager;
 import org.apache.sysml.runtime.DMLRuntimeException;
 import org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext;
 import org.apache.sysml.runtime.matrix.MatrixCharacteristics;
@@ -60,8 +61,8 @@ public class BinaryBlockFrame {
 	 *            the number of columns
 	 */
 	public BinaryBlockFrame(DataFrame dataFrame, long numRows, long numCols) {
-		this(dataFrame, new FrameMetadata(numRows, numCols, MLContextUtil.defaultBlockSize(),
-				MLContextUtil.defaultBlockSize()));
+		this(dataFrame, new FrameMetadata(numRows, numCols, 
+				ConfigurationManager.getBlocksize(), ConfigurationManager.getBlocksize()));
 	}
 
 	/**

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/245488c3/src/main/java/org/apache/sysml/api/mlcontext/BinaryBlockMatrix.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/api/mlcontext/BinaryBlockMatrix.java b/src/main/java/org/apache/sysml/api/mlcontext/BinaryBlockMatrix.java
index ffa8a11..70cb259 100644
--- a/src/main/java/org/apache/sysml/api/mlcontext/BinaryBlockMatrix.java
+++ b/src/main/java/org/apache/sysml/api/mlcontext/BinaryBlockMatrix.java
@@ -21,6 +21,7 @@ package org.apache.sysml.api.mlcontext;
 
 import org.apache.spark.api.java.JavaPairRDD;
 import org.apache.spark.sql.DataFrame;
+import org.apache.sysml.conf.ConfigurationManager;
 import org.apache.sysml.runtime.DMLRuntimeException;
 import org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext;
 import org.apache.sysml.runtime.matrix.MatrixCharacteristics;
@@ -61,8 +62,8 @@ public class BinaryBlockMatrix {
 	 *            the number of columns
 	 */
 	public BinaryBlockMatrix(DataFrame dataFrame, long numRows, long numCols) {
-		this(dataFrame, new MatrixMetadata(numRows, numCols, MLContextUtil.defaultBlockSize(),
-				MLContextUtil.defaultBlockSize()));
+		this(dataFrame, new MatrixMetadata(numRows, numCols, 
+				ConfigurationManager.getBlocksize(), ConfigurationManager.getBlocksize()));
 	}
 
 	/**

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/245488c3/src/main/java/org/apache/sysml/api/mlcontext/FrameMetadata.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/api/mlcontext/FrameMetadata.java b/src/main/java/org/apache/sysml/api/mlcontext/FrameMetadata.java
index 5aabd80..7dd20f1 100644
--- a/src/main/java/org/apache/sysml/api/mlcontext/FrameMetadata.java
+++ b/src/main/java/org/apache/sysml/api/mlcontext/FrameMetadata.java
@@ -19,6 +19,7 @@
 
 package org.apache.sysml.api.mlcontext;
 
+import org.apache.sysml.conf.ConfigurationManager;
 import org.apache.sysml.runtime.matrix.MatrixCharacteristics;
 
 /**
@@ -632,8 +633,8 @@ public class FrameMetadata extends Metadata {
 
 		long nr = (numRows == null) ? -1 : numRows;
 		long nc = (numColumns == null) ? -1 : numColumns;
-		int nrpb = (numRowsPerBlock == null) ? MLContextUtil.defaultBlockSize() : numRowsPerBlock;
-		int ncpb = (numColumnsPerBlock == null) ? MLContextUtil.defaultBlockSize() : numColumnsPerBlock;
+		int nrpb = (numRowsPerBlock == null) ? ConfigurationManager.getBlocksize() : numRowsPerBlock;
+		int ncpb = (numColumnsPerBlock == null) ? ConfigurationManager.getBlocksize() : numColumnsPerBlock;
 		long nnz = (numNonZeros == null) ? -1 : numNonZeros;
 		MatrixCharacteristics mc = new MatrixCharacteristics(nr, nc, nrpb, ncpb, nnz);
 		return mc;

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/245488c3/src/main/java/org/apache/sysml/api/mlcontext/MLContextConversionUtil.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/api/mlcontext/MLContextConversionUtil.java b/src/main/java/org/apache/sysml/api/mlcontext/MLContextConversionUtil.java
index 62a4f60..15aa15e 100644
--- a/src/main/java/org/apache/sysml/api/mlcontext/MLContextConversionUtil.java
+++ b/src/main/java/org/apache/sysml/api/mlcontext/MLContextConversionUtil.java
@@ -41,6 +41,8 @@ import org.apache.spark.sql.Row;
 import org.apache.spark.sql.SQLContext;
 import org.apache.spark.sql.types.StructType;
 import org.apache.sysml.api.MLContextProxy;
+import org.apache.sysml.conf.ConfigurationManager;
+import org.apache.sysml.hops.OptimizerUtils;
 import org.apache.sysml.parser.Expression.ValueType;
 import org.apache.sysml.runtime.DMLRuntimeException;
 import org.apache.sysml.runtime.controlprogram.caching.CacheException;
@@ -106,18 +108,13 @@ public class MLContextConversionUtil {
 			MatrixMetadata matrixMetadata) {
 		try {
 			MatrixBlock matrixBlock = DataConverter.convertToMatrixBlock(doubleMatrix);
-			MatrixCharacteristics matrixCharacteristics;
-			if (matrixMetadata != null) {
-				matrixCharacteristics = matrixMetadata.asMatrixCharacteristics();
-			} else {
-				matrixCharacteristics = new MatrixCharacteristics(matrixBlock.getNumRows(), matrixBlock.getNumColumns(),
-						MLContextUtil.defaultBlockSize(), MLContextUtil.defaultBlockSize());
-			}
+			MatrixCharacteristics mc = (matrixMetadata != null) ? 
+					matrixMetadata.asMatrixCharacteristics() : new MatrixCharacteristics(matrixBlock.getNumRows(), 
+					matrixBlock.getNumColumns(), ConfigurationManager.getBlocksize(), ConfigurationManager.getBlocksize());
 
-			MatrixFormatMetaData meta = new MatrixFormatMetaData(matrixCharacteristics,
-					OutputInfo.BinaryBlockOutputInfo, InputInfo.BinaryBlockInputInfo);
-			MatrixObject matrixObject = new MatrixObject(ValueType.DOUBLE,
-					MLContextUtil.scratchSpace() + "/" + variableName, meta);
+			MatrixObject matrixObject = new MatrixObject(ValueType.DOUBLE, OptimizerUtils.getUniqueTempFileName(), 
+					new MatrixFormatMetaData(mc, OutputInfo.BinaryBlockOutputInfo, InputInfo.BinaryBlockInputInfo));
+			
 			matrixObject.acquireModify(matrixBlock);
 			matrixObject.release();
 			return matrixObject;
@@ -173,16 +170,10 @@ public class MLContextConversionUtil {
 	public static MatrixObject matrixBlockToMatrixObject(String variableName, MatrixBlock matrixBlock,
 			MatrixMetadata matrixMetadata) {
 		try {
-			MatrixCharacteristics matrixCharacteristics;
-			if (matrixMetadata != null) {
-				matrixCharacteristics = matrixMetadata.asMatrixCharacteristics();
-			} else {
-				matrixCharacteristics = new MatrixCharacteristics();
-			}
-			MatrixFormatMetaData mtd = new MatrixFormatMetaData(matrixCharacteristics, OutputInfo.BinaryBlockOutputInfo,
-					InputInfo.BinaryBlockInputInfo);
-			MatrixObject matrixObject = new MatrixObject(ValueType.DOUBLE,
-					MLContextUtil.scratchSpace() + "/" + variableName, mtd);
+			MatrixCharacteristics mc = (matrixMetadata != null) ? 
+					matrixMetadata.asMatrixCharacteristics() : new MatrixCharacteristics();
+			MatrixObject matrixObject = new MatrixObject(ValueType.DOUBLE, OptimizerUtils.getUniqueTempFileName(), 
+					new MatrixFormatMetaData(mc, OutputInfo.BinaryBlockOutputInfo, InputInfo.BinaryBlockInputInfo));
 			matrixObject.acquireModify(matrixBlock);
 			matrixObject.release();
 			return matrixObject;
@@ -213,7 +204,7 @@ public class MLContextConversionUtil {
 			}
 			MatrixFormatMetaData mtd = new MatrixFormatMetaData(matrixCharacteristics, OutputInfo.BinaryBlockOutputInfo,
 					InputInfo.BinaryBlockInputInfo);
-			FrameObject frameObject = new FrameObject(MLContextUtil.scratchSpace() + "/" + variableName, mtd);
+			FrameObject frameObject = new FrameObject(OptimizerUtils.getUniqueTempFileName(), mtd);
 			frameObject.acquireModify(frameBlock);
 			frameObject.release();
 			return frameObject;
@@ -256,18 +247,12 @@ public class MLContextConversionUtil {
 	public static MatrixObject binaryBlocksToMatrixObject(String variableName,
 			JavaPairRDD<MatrixIndexes, MatrixBlock> binaryBlocks, MatrixMetadata matrixMetadata) {
 
-		MatrixCharacteristics matrixCharacteristics;
-		if (matrixMetadata != null) {
-			matrixCharacteristics = matrixMetadata.asMatrixCharacteristics();
-		} else {
-			matrixCharacteristics = new MatrixCharacteristics();
-		}
-
+		MatrixCharacteristics mc = (matrixMetadata != null) ?
+			matrixMetadata.asMatrixCharacteristics() : new MatrixCharacteristics();
 		JavaPairRDD<MatrixIndexes, MatrixBlock> javaPairRdd = binaryBlocks.mapToPair(new CopyBlockPairFunction());
-
-		MatrixObject matrixObject = new MatrixObject(ValueType.DOUBLE,
-				MLContextUtil.scratchSpace() + "/" + "temp_" + System.nanoTime(), new MatrixFormatMetaData(
-						matrixCharacteristics, OutputInfo.BinaryBlockOutputInfo, InputInfo.BinaryBlockInputInfo));
+		
+		MatrixObject matrixObject = new MatrixObject(ValueType.DOUBLE, OptimizerUtils.getUniqueTempFileName(),
+				new MatrixFormatMetaData(mc, OutputInfo.BinaryBlockOutputInfo, InputInfo.BinaryBlockInputInfo));
 		matrixObject.setRDDHandle(new RDDObject(javaPairRdd, variableName));
 		return matrixObject;
 	}
@@ -304,17 +289,11 @@ public class MLContextConversionUtil {
 	public static FrameObject binaryBlocksToFrameObject(String variableName, JavaPairRDD<Long, FrameBlock> binaryBlocks,
 			FrameMetadata frameMetadata) {
 
-		MatrixCharacteristics matrixCharacteristics;
-		if (frameMetadata != null) {
-			matrixCharacteristics = frameMetadata.asMatrixCharacteristics();
-		} else {
-			matrixCharacteristics = new MatrixCharacteristics();
-		}
+		MatrixCharacteristics mc = (frameMetadata != null) ? 
+				frameMetadata.asMatrixCharacteristics() : new MatrixCharacteristics();
 
-		MatrixFormatMetaData mtd = new MatrixFormatMetaData(matrixCharacteristics, OutputInfo.BinaryBlockOutputInfo,
-				InputInfo.BinaryBlockInputInfo);
-		FrameObject frameObject = new FrameObject(
-				MLContextUtil.scratchSpace() + "/" + "temp_" + System.nanoTime() + variableName, mtd);
+		FrameObject frameObject = new FrameObject(OptimizerUtils.getUniqueTempFileName(), 
+				new MatrixFormatMetaData(mc, OutputInfo.BinaryBlockOutputInfo, InputInfo.BinaryBlockInputInfo));
 		frameObject.setRDDHandle(new RDDObject(binaryBlocks, variableName));
 		return frameObject;
 	}
@@ -655,14 +634,11 @@ public class MLContextConversionUtil {
 	public static MatrixObject javaRDDStringCSVToMatrixObject(String variableName, JavaRDD<String> javaRDD,
 			MatrixMetadata matrixMetadata) {
 		JavaPairRDD<LongWritable, Text> javaPairRDD = javaRDD.mapToPair(new ConvertStringToLongTextPair());
-		MatrixCharacteristics matrixCharacteristics;
-		if (matrixMetadata != null) {
-			matrixCharacteristics = matrixMetadata.asMatrixCharacteristics();
-		} else {
-			matrixCharacteristics = new MatrixCharacteristics();
-		}
-		MatrixObject matrixObject = new MatrixObject(ValueType.DOUBLE, null,
-				new MatrixFormatMetaData(matrixCharacteristics, OutputInfo.CSVOutputInfo, InputInfo.CSVInputInfo));
+		MatrixCharacteristics mc = (matrixMetadata != null) ? 
+				matrixMetadata.asMatrixCharacteristics() : new MatrixCharacteristics();
+
+		MatrixObject matrixObject = new MatrixObject(ValueType.DOUBLE, OptimizerUtils.getUniqueTempFileName(),
+				new MatrixFormatMetaData(mc, OutputInfo.CSVOutputInfo, InputInfo.CSVInputInfo));
 		JavaPairRDD<LongWritable, Text> javaPairRDD2 = javaPairRDD.mapToPair(new CopyTextInputFunction());
 		matrixObject.setRDDHandle(new RDDObject(javaPairRDD2, variableName));
 		return matrixObject;
@@ -695,22 +671,17 @@ public class MLContextConversionUtil {
 	public static FrameObject javaRDDStringCSVToFrameObject(String variableName, JavaRDD<String> javaRDD,
 			FrameMetadata frameMetadata) {
 		JavaPairRDD<LongWritable, Text> javaPairRDD = javaRDD.mapToPair(new ConvertStringToLongTextPair());
-		MatrixCharacteristics matrixCharacteristics;
-		if (frameMetadata != null) {
-			matrixCharacteristics = frameMetadata.asMatrixCharacteristics();
-		} else {
-			matrixCharacteristics = new MatrixCharacteristics();
-		}
+		MatrixCharacteristics mc = (frameMetadata != null) ? 
+				frameMetadata.asMatrixCharacteristics() : new MatrixCharacteristics();
 		JavaPairRDD<LongWritable, Text> javaPairRDDText = javaPairRDD.mapToPair(new CopyTextInputFunction());
 
 		JavaSparkContext jsc = MLContextUtil.getJavaSparkContext((MLContext) MLContextProxy.getActiveMLContextForAPI());
 
-		FrameObject frameObject = new FrameObject(null, new MatrixFormatMetaData(matrixCharacteristics,
-				OutputInfo.BinaryBlockOutputInfo, InputInfo.BinaryBlockInputInfo));
+		FrameObject frameObject = new FrameObject(OptimizerUtils.getUniqueTempFileName(), 
+				new MatrixFormatMetaData(mc, OutputInfo.BinaryBlockOutputInfo, InputInfo.BinaryBlockInputInfo));
 		JavaPairRDD<Long, FrameBlock> rdd;
 		try {
-			rdd = FrameRDDConverterUtils.csvToBinaryBlock(jsc, javaPairRDDText, matrixCharacteristics, false, ",",
-					false, -1);
+			rdd = FrameRDDConverterUtils.csvToBinaryBlock(jsc, javaPairRDDText, mc, false, ",", false, -1);
 		} catch (DMLRuntimeException e) {
 			e.printStackTrace();
 			return null;
@@ -734,14 +705,11 @@ public class MLContextConversionUtil {
 	public static MatrixObject javaRDDStringIJVToMatrixObject(String variableName, JavaRDD<String> javaRDD,
 			MatrixMetadata matrixMetadata) {
 		JavaPairRDD<LongWritable, Text> javaPairRDD = javaRDD.mapToPair(new ConvertStringToLongTextPair());
-		MatrixCharacteristics matrixCharacteristics;
-		if (matrixMetadata != null) {
-			matrixCharacteristics = matrixMetadata.asMatrixCharacteristics();
-		} else {
-			matrixCharacteristics = new MatrixCharacteristics();
-		}
-		MatrixObject matrixObject = new MatrixObject(ValueType.DOUBLE, null, new MatrixFormatMetaData(
-				matrixCharacteristics, OutputInfo.TextCellOutputInfo, InputInfo.TextCellInputInfo));
+		MatrixCharacteristics mc = (matrixMetadata != null) ? 
+				matrixMetadata.asMatrixCharacteristics() : new MatrixCharacteristics();
+
+		MatrixObject matrixObject = new MatrixObject(ValueType.DOUBLE, OptimizerUtils.getUniqueTempFileName(), 
+				new MatrixFormatMetaData(mc, OutputInfo.TextCellOutputInfo, InputInfo.TextCellInputInfo));
 		JavaPairRDD<LongWritable, Text> javaPairRDD2 = javaPairRDD.mapToPair(new CopyTextInputFunction());
 		matrixObject.setRDDHandle(new RDDObject(javaPairRDD2, variableName));
 		return matrixObject;
@@ -762,25 +730,21 @@ public class MLContextConversionUtil {
 	public static FrameObject javaRDDStringIJVToFrameObject(String variableName, JavaRDD<String> javaRDD,
 			FrameMetadata frameMetadata) {
 		JavaPairRDD<LongWritable, Text> javaPairRDD = javaRDD.mapToPair(new ConvertStringToLongTextPair());
-		MatrixCharacteristics matrixCharacteristics;
-		if (frameMetadata != null) {
-			matrixCharacteristics = frameMetadata.asMatrixCharacteristics();
-		} else {
-			matrixCharacteristics = new MatrixCharacteristics();
-		}
+		MatrixCharacteristics mc = (frameMetadata != null) ? 
+				frameMetadata.asMatrixCharacteristics() : new MatrixCharacteristics();
 
 		JavaPairRDD<LongWritable, Text> javaPairRDDText = javaPairRDD.mapToPair(new CopyTextInputFunction());
 
 		JavaSparkContext jsc = MLContextUtil.getJavaSparkContext((MLContext) MLContextProxy.getActiveMLContextForAPI());
 
-		FrameObject frameObject = new FrameObject(null, new MatrixFormatMetaData(matrixCharacteristics,
-				OutputInfo.BinaryBlockOutputInfo, InputInfo.BinaryBlockInputInfo));
+		FrameObject frameObject = new FrameObject(OptimizerUtils.getUniqueTempFileName(), 
+				new MatrixFormatMetaData(mc, OutputInfo.BinaryBlockOutputInfo, InputInfo.BinaryBlockInputInfo));
 		JavaPairRDD<Long, FrameBlock> rdd;
 		try {
 			List<ValueType> lschema = null;
 			if (lschema == null)
-				lschema = Collections.nCopies((int) matrixCharacteristics.getCols(), ValueType.STRING);
-			rdd = FrameRDDConverterUtils.textCellToBinaryBlock(jsc, javaPairRDDText, matrixCharacteristics, lschema);
+				lschema = Collections.nCopies((int) mc.getCols(), ValueType.STRING);
+			rdd = FrameRDDConverterUtils.textCellToBinaryBlock(jsc, javaPairRDDText, mc, lschema);
 		} catch (DMLRuntimeException e) {
 			e.printStackTrace();
 			return null;

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/245488c3/src/main/java/org/apache/sysml/api/mlcontext/MLContextUtil.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/api/mlcontext/MLContextUtil.java b/src/main/java/org/apache/sysml/api/mlcontext/MLContextUtil.java
index 1331338..5b4e736 100644
--- a/src/main/java/org/apache/sysml/api/mlcontext/MLContextUtil.java
+++ b/src/main/java/org/apache/sysml/api/mlcontext/MLContextUtil.java
@@ -610,24 +610,6 @@ public final class MLContextUtil {
 	}
 
 	/**
-	 * Return the default matrix block size.
-	 * 
-	 * @return the default matrix block size
-	 */
-	public static int defaultBlockSize() {
-		return ConfigurationManager.getBlocksize();
-	}
-
-	/**
-	 * Return the location of the scratch space directory.
-	 * 
-	 * @return the lcoation of the scratch space directory
-	 */
-	public static String scratchSpace() {
-		return ConfigurationManager.getScratchSpace();
-	}
-
-	/**
 	 * Return a double-quoted string with inner single and double quotes
 	 * escaped.
 	 * 

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/245488c3/src/main/java/org/apache/sysml/api/mlcontext/MatrixMetadata.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/api/mlcontext/MatrixMetadata.java b/src/main/java/org/apache/sysml/api/mlcontext/MatrixMetadata.java
index 513b74d..a9f96df 100644
--- a/src/main/java/org/apache/sysml/api/mlcontext/MatrixMetadata.java
+++ b/src/main/java/org/apache/sysml/api/mlcontext/MatrixMetadata.java
@@ -19,6 +19,7 @@
 
 package org.apache.sysml.api.mlcontext;
 
+import org.apache.sysml.conf.ConfigurationManager;
 import org.apache.sysml.runtime.matrix.MatrixCharacteristics;
 
 /**
@@ -478,8 +479,8 @@ public class MatrixMetadata extends Metadata {
 
 		long nr = (numRows == null) ? -1 : numRows;
 		long nc = (numColumns == null) ? -1 : numColumns;
-		int nrpb = (numRowsPerBlock == null) ? MLContextUtil.defaultBlockSize() : numRowsPerBlock;
-		int ncpb = (numColumnsPerBlock == null) ? MLContextUtil.defaultBlockSize() : numColumnsPerBlock;
+		int nrpb = (numRowsPerBlock == null) ? ConfigurationManager.getBlocksize() : numRowsPerBlock;
+		int ncpb = (numColumnsPerBlock == null) ? ConfigurationManager.getBlocksize() : numColumnsPerBlock;
 		long nnz = (numNonZeros == null) ? -1 : numNonZeros;
 		MatrixCharacteristics mc = new MatrixCharacteristics(nr, nc, nrpb, ncpb, nnz);
 		return mc;

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/245488c3/src/main/java/org/apache/sysml/hops/OptimizerUtils.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/OptimizerUtils.java b/src/main/java/org/apache/sysml/hops/OptimizerUtils.java
index 6111830..c19ad57 100644
--- a/src/main/java/org/apache/sysml/hops/OptimizerUtils.java
+++ b/src/main/java/org/apache/sysml/hops/OptimizerUtils.java
@@ -35,7 +35,10 @@ import org.apache.sysml.hops.Hop.FileFormatTypes;
 import org.apache.sysml.hops.Hop.OpOp2;
 import org.apache.sysml.hops.rewrite.HopRewriteUtils;
 import org.apache.sysml.lops.Checkpoint;
+import org.apache.sysml.lops.Lop;
 import org.apache.sysml.lops.LopProperties.ExecType;
+import org.apache.sysml.lops.compile.Dag;
+import org.apache.sysml.parser.Expression.DataType;
 import org.apache.sysml.parser.Expression.ValueType;
 import org.apache.sysml.runtime.DMLRuntimeException;
 import org.apache.sysml.runtime.controlprogram.LocalVariableMap;
@@ -939,6 +942,23 @@ public class OptimizerUtils
 		return ret;
 	}
 	
+	/**
+	 * Wrapper over internal filename construction for external usage. 
+	 * 
+	 * @return
+	 */
+	public static String getUniqueTempFileName() {
+		return new Dag<Lop>().getNextUniqueFilename();
+	}
+	
+	/**
+	 * Wrapper over internal varname construction for external usage.
+	 * 
+	 * @return
+	 */
+	public static String getUniqueVariableName(DataType dt) {
+		return Dag.getNextUniqueVarname(dt);
+	}
 
 	/**
 	 * 

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/245488c3/src/main/java/org/apache/sysml/lops/compile/Dag.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/lops/compile/Dag.java b/src/main/java/org/apache/sysml/lops/compile/Dag.java
index 7831751..ecf7346 100644
--- a/src/main/java/org/apache/sysml/lops/compile/Dag.java
+++ b/src/main/java/org/apache/sysml/lops/compile/Dag.java
@@ -179,19 +179,9 @@ public class Dag<N extends Lop>
 		}
 		public void addLastInstruction(Instruction inst) {
 			lastInstructions.add(inst);
-		}
-		
+		}		
 	}
 	
-	private String getFilePath() {
-		if ( scratchFilePath == null ) {
-			scratchFilePath = scratch + Lop.FILE_SEPARATOR
-								+ Lop.PROCESS_PREFIX + DMLScript.getUUID()
-								+ Lop.FILE_SEPARATOR + Lop.FILE_SEPARATOR
-								+ ProgramConverter.CP_ROOT_THREAD_ID + Lop.FILE_SEPARATOR;
-		}
-		return scratchFilePath;
-	}
 	
 	/**
 	 * Constructor
@@ -205,7 +195,32 @@ public class Dag<N extends Lop>
 		// get number of reducers from dml config
 		total_reducers = ConfigurationManager.getNumReducers();
 	}
+	
+	///////
+	// filename handling
+	
+	private String getFilePath() {
+		if ( scratchFilePath == null ) {
+			scratchFilePath = scratch + Lop.FILE_SEPARATOR
+								+ Lop.PROCESS_PREFIX + DMLScript.getUUID()
+								+ Lop.FILE_SEPARATOR + Lop.FILE_SEPARATOR
+								+ ProgramConverter.CP_ROOT_THREAD_ID + Lop.FILE_SEPARATOR;
+		}
+		return scratchFilePath;
+	}
 
+	public String getNextUniqueFilename() {
+		return getFilePath() + "temp" + job_id.getNextID();
+	}
+	
+	public static String getNextUniqueVarname(DataType dt) {
+		return (dt==DataType.MATRIX ? Lop.MATRIX_VAR_NAME_PREFIX :
+			Lop.FRAME_VAR_NAME_PREFIX) + var_index.getNextID();
+	}
+	
+	///////
+	// Dag modifications
+	
 	/**
 	 * Method to add a node to the DAG.
 	 * 
@@ -2388,8 +2403,8 @@ public class Dag<N extends Lop>
 					// TODO: change it to output binaryblock
 					
 					Data dataInput = (Data) input;
-					oparams.setFile_name(getFilePath() + "temp" + job_id.getNextID());
-					oparams.setLabel(Lop.MATRIX_VAR_NAME_PREFIX + var_index.getNextID());
+					oparams.setFile_name(getNextUniqueFilename());
+					oparams.setLabel(getNextUniqueVarname(DataType.MATRIX));
 
 					// generate an instruction that creates a symbol table entry for the new variable in CSV format
 					Data delimLop = (Data) dataInput.getNamedInputLop(
@@ -2425,9 +2440,8 @@ public class Dag<N extends Lop>
 			{
 				// generate temporary filename and a variable name to hold the
 				// output produced by "rootNode"
-				oparams.setFile_name(getFilePath() + "temp" + job_id.getNextID());
-				oparams.setLabel( (node.getDataType()==DataType.MATRIX ? Lop.MATRIX_VAR_NAME_PREFIX :
-						Lop.FRAME_VAR_NAME_PREFIX) + var_index.getNextID());
+				oparams.setFile_name(getNextUniqueFilename());
+				oparams.setLabel(getNextUniqueVarname(node.getDataType()));
 
 				// generate an instruction that creates a symbol table entry for the new variable
 				//String createInst = prepareVariableInstruction("createvar", node);
@@ -2575,7 +2589,7 @@ public class Dag<N extends Lop>
 						
 						// generate temporary filename & var name
 						String tempVarName = oparams.getLabel() + "temp";
-						String tempFileName = getFilePath() + "temp" + job_id.getNextID();
+						String tempFileName = getNextUniqueFilename();
 						
 						//String createInst = prepareVariableInstruction("createvar", tempVarName, node.getDataType(), node.getValueType(), tempFileName, oparams, out.getOutInfo());
 						//out.addPreInstruction(CPInstructionParser.parseSingleInstruction(createInst));
@@ -2665,8 +2679,7 @@ public class Dag<N extends Lop>
 						// part MM format file on hdfs.
 						if (oparams.getFormat() == Format.CSV)  {
 							
-							String tempFileName = getFilePath() + "temp" + job_id.getNextID();
-							
+							String tempFileName = getNextUniqueFilename();
 							String createInst = node.getInstructions(tempFileName);
 							createvarInst= CPInstructionParser.parseSingleInstruction(createInst);
 						
@@ -2690,11 +2703,9 @@ public class Dag<N extends Lop>
 						} 
 						else if (oparams.getFormat() == Format.MM )  {
 							
-							String tempFileName = getFilePath() + "temp" + job_id.getNextID();
-							
 							createvarInst= VariableCPInstruction.prepareCreateVariableInstruction(
 													oparams.getLabel(), 
-													tempFileName, 
+													getNextUniqueFilename(), 
 													false, node.getDataType(),
 													OutputInfo.outputInfoToString(getOutputInfo(node, false)), 
 													new MatrixCharacteristics(oparams.getNumRows(), oparams.getNumCols(), rpb, cpb, oparams.getNnz()),


[2/2] incubator-systemml git commit: [SYSTEMML-905] Fix unary aggregates over compressed matrices w/ UC group

Posted by mb...@apache.org.
[SYSTEMML-905] Fix unary aggregates over compressed matrices w/ UC group

This patch fixes result correctness issues of unary aggregates over
compressed matrices w/ UC group, where the UC group resets partial
results from preceding groups. Furthermore, this also includes extended
tests to always include a UC group.

Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/f0c91ed0
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/f0c91ed0
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/f0c91ed0

Branch: refs/heads/master
Commit: f0c91ed0d115dc727a79ec2c9e0c4a8aa420c727
Parents: 245488c
Author: Matthias Boehm <mb...@us.ibm.com>
Authored: Sun Sep 11 02:03:58 2016 +0200
Committer: Matthias Boehm <mb...@us.ibm.com>
Committed: Sun Sep 11 02:03:58 2016 +0200

----------------------------------------------------------------------
 .../runtime/compress/ColGroupUncompressed.java   | 15 +++++++++++++++
 .../runtime/compress/CompressedMatrixBlock.java  | 19 +++++++++++++------
 .../compress/BasicUnaryAggregateTest.java        |  1 +
 .../compress/ParUnaryAggregateTest.java          |  1 +
 4 files changed, 30 insertions(+), 6 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/f0c91ed0/src/main/java/org/apache/sysml/runtime/compress/ColGroupUncompressed.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/compress/ColGroupUncompressed.java b/src/main/java/org/apache/sysml/runtime/compress/ColGroupUncompressed.java
index 0ac82aa..085244c 100644
--- a/src/main/java/org/apache/sysml/runtime/compress/ColGroupUncompressed.java
+++ b/src/main/java/org/apache/sysml/runtime/compress/ColGroupUncompressed.java
@@ -28,6 +28,7 @@ import java.util.Arrays;
 import java.util.List;
 
 import org.apache.sysml.runtime.DMLRuntimeException;
+import org.apache.sysml.runtime.functionobjects.ReduceRow;
 import org.apache.sysml.runtime.matrix.data.LibMatrixAgg;
 import org.apache.sysml.runtime.matrix.data.LibMatrixMult;
 import org.apache.sysml.runtime.matrix.data.MatrixBlock;
@@ -325,6 +326,20 @@ public class ColGroupUncompressed extends ColGroup
 	{
 		//execute unary aggregate operations
 		LibMatrixAgg.aggregateUnaryMatrix(_data, ret, op);
+		
+		//shift result into correct column indexes
+		if( op.indexFn instanceof ReduceRow ) {
+			//clear corrections
+			for( int i=0; i<_colIndexes.length; i++ )
+				if( op.aggOp.correctionExists )
+					ret.quickSetValue(0, i+_colIndexes.length, 0);
+			//shift partial results
+			for( int i=_colIndexes.length-1; i>=0; i-- ) {
+				double val = ret.quickGetValue(0, i);
+				ret.quickSetValue(0, i, 0);
+				ret.quickSetValue(0, _colIndexes[i], val);
+			}
+		}
 	}
 
 	@Override

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/f0c91ed0/src/main/java/org/apache/sysml/runtime/compress/CompressedMatrixBlock.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/compress/CompressedMatrixBlock.java b/src/main/java/org/apache/sysml/runtime/compress/CompressedMatrixBlock.java
index fac2461..7ff3a8a 100644
--- a/src/main/java/org/apache/sysml/runtime/compress/CompressedMatrixBlock.java
+++ b/src/main/java/org/apache/sysml/runtime/compress/CompressedMatrixBlock.java
@@ -1049,6 +1049,9 @@ public class CompressedMatrixBlock extends MatrixBlock implements Externalizable
 			ArrayList<ColGroup>[] grpParts = createStaticTaskPartitioning(op.getNumThreads(), false);
 			ColGroupUncompressed uc = getUncompressedColGroup();
 			try {
+				//compute uncompressed column group in parallel (otherwise bottleneck)
+				if( uc != null )
+					 ret = (MatrixBlock)uc.getData().aggregateUnaryOperations(op, ret, blockingFactorRow, blockingFactorCol, indexesIn, false);					
 				//compute all compressed column groups
 				ExecutorService pool = Executors.newFixedThreadPool( op.getNumThreads() );
 				ArrayList<UnaryAggregateTask> tasks = new ArrayList<UnaryAggregateTask>();
@@ -1056,9 +1059,6 @@ public class CompressedMatrixBlock extends MatrixBlock implements Externalizable
 					tasks.add(new UnaryAggregateTask(grp, ret, op));
 				pool.invokeAll(tasks);	
 				pool.shutdown();
-				//compute uncompressed column group in parallel (otherwise bottleneck)
-				if( uc != null )
-					 ret = (MatrixBlock)uc.getData().aggregateUnaryOperations(op, ret, blockingFactorRow, blockingFactorCol, indexesIn, false);					
 				//aggregate partial results
 				if( !(op.indexFn instanceof ReduceRow) ){
 					KahanObject kbuff = new KahanObject(0,0);
@@ -1077,9 +1077,16 @@ public class CompressedMatrixBlock extends MatrixBlock implements Externalizable
 			}
 		}
 		else {
-			for (ColGroup grp : _colGroups) {
-				grp.unaryAggregateOperations(op, ret);
-			}
+			//process UC column group
+			for (ColGroup grp : _colGroups)
+				if( grp instanceof ColGroupUncompressed )
+					grp.unaryAggregateOperations(op, ret);
+			
+			//process OLE/RLE column groups
+			for (ColGroup grp : _colGroups)
+				if( !(grp instanceof ColGroupUncompressed) )
+					grp.unaryAggregateOperations(op, ret);
+			
 		}
 		
 		//drop correction if necessary

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/f0c91ed0/src/test/java/org/apache/sysml/test/integration/functions/compress/BasicUnaryAggregateTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/sysml/test/integration/functions/compress/BasicUnaryAggregateTest.java b/src/test/java/org/apache/sysml/test/integration/functions/compress/BasicUnaryAggregateTest.java
index 960008d..767ca03 100644
--- a/src/test/java/org/apache/sysml/test/integration/functions/compress/BasicUnaryAggregateTest.java
+++ b/src/test/java/org/apache/sysml/test/integration/functions/compress/BasicUnaryAggregateTest.java
@@ -507,6 +507,7 @@ public class BasicUnaryAggregateTest extends AutomatedTestBase
 			if( vtype==ValueType.RAND_ROUND )
 				input = TestUtils.round(input);
 			MatrixBlock mb = DataConverter.convertToMatrixBlock(input);
+			mb = mb.appendOperations(MatrixBlock.seqOperations(0.1, rows-0.1, 1), new MatrixBlock()); //uc group
 			
 			//prepare unary aggregate operator
 			AggregateUnaryOperator auop = null;

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/f0c91ed0/src/test/java/org/apache/sysml/test/integration/functions/compress/ParUnaryAggregateTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/sysml/test/integration/functions/compress/ParUnaryAggregateTest.java b/src/test/java/org/apache/sysml/test/integration/functions/compress/ParUnaryAggregateTest.java
index 7d65418..5224298 100644
--- a/src/test/java/org/apache/sysml/test/integration/functions/compress/ParUnaryAggregateTest.java
+++ b/src/test/java/org/apache/sysml/test/integration/functions/compress/ParUnaryAggregateTest.java
@@ -509,6 +509,7 @@ public class ParUnaryAggregateTest extends AutomatedTestBase
 			if( vtype==ValueType.RAND_ROUND )
 				input = TestUtils.round(input);
 			MatrixBlock mb = DataConverter.convertToMatrixBlock(input);
+			mb = mb.appendOperations(MatrixBlock.seqOperations(0.1, rows-0.1, 1), new MatrixBlock()); //uc group
 			
 			//prepare unary aggregate operator
 			AggregateUnaryOperator auop = null;