You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by mb...@apache.org on 2016/09/11 16:36:46 UTC
[1/2] incubator-systemml git commit: [SYSTEMML-593] Fix
mlcontext/jmlc filename construction, incl cleanup
Repository: incubator-systemml
Updated Branches:
refs/heads/master 4d5fc9d16 -> f0c91ed0d
[SYSTEMML-593] Fix mlcontext/jmlc filename construction, incl cleanup
Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/245488c3
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/245488c3
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/245488c3
Branch: refs/heads/master
Commit: 245488c3b720332178b6c279a72daa75ebf6947f
Parents: 4d5fc9d
Author: Matthias Boehm <mb...@us.ibm.com>
Authored: Sun Sep 11 00:30:00 2016 +0200
Committer: Matthias Boehm <mb...@us.ibm.com>
Committed: Sun Sep 11 00:30:35 2016 +0200
----------------------------------------------------------------------
.../java/org/apache/sysml/api/MLContext.java | 10 +-
.../apache/sysml/api/jmlc/PreparedScript.java | 8 +-
.../sysml/api/mlcontext/BinaryBlockFrame.java | 5 +-
.../sysml/api/mlcontext/BinaryBlockMatrix.java | 5 +-
.../sysml/api/mlcontext/FrameMetadata.java | 5 +-
.../api/mlcontext/MLContextConversionUtil.java | 122 +++++++------------
.../sysml/api/mlcontext/MLContextUtil.java | 18 ---
.../sysml/api/mlcontext/MatrixMetadata.java | 5 +-
.../org/apache/sysml/hops/OptimizerUtils.java | 20 +++
.../java/org/apache/sysml/lops/compile/Dag.java | 55 +++++----
10 files changed, 117 insertions(+), 136 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/245488c3/src/main/java/org/apache/sysml/api/MLContext.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/api/MLContext.java b/src/main/java/org/apache/sysml/api/MLContext.java
index 649ffee..bce0378 100644
--- a/src/main/java/org/apache/sysml/api/MLContext.java
+++ b/src/main/java/org/apache/sysml/api/MLContext.java
@@ -590,7 +590,7 @@ public class MLContext {
FrameObject fo = null;
if( format.equals("csv") ) {
CSVFileFormatProperties csvprops = (props!=null) ? (CSVFileFormatProperties)props: new CSVFileFormatProperties();
- fo = new FrameObject(null, new MatrixFormatMetaData(mc, OutputInfo.CSVOutputInfo, InputInfo.CSVInputInfo));
+ fo = new FrameObject(OptimizerUtils.getUniqueTempFileName(), new MatrixFormatMetaData(mc, OutputInfo.CSVOutputInfo, InputInfo.CSVInputInfo));
fo.setFileFormatProperties(csvprops);
}
else if( format.equals("text") ) {
@@ -624,7 +624,7 @@ public class MLContext {
_inVarnames = new ArrayList<String>();
MatrixCharacteristics mc = new MatrixCharacteristics(rlen, clen, OptimizerUtils.DEFAULT_BLOCKSIZE, OptimizerUtils.DEFAULT_BLOCKSIZE, -1);
- FrameObject fo = new FrameObject(null, new MatrixFormatMetaData(mc, OutputInfo.BinaryBlockOutputInfo, InputInfo.BinaryBlockInputInfo));
+ FrameObject fo = new FrameObject(OptimizerUtils.getUniqueTempFileName(), new MatrixFormatMetaData(mc, OutputInfo.BinaryBlockOutputInfo, InputInfo.BinaryBlockInputInfo));
if(props != null)
fo.setFileFormatProperties(props);
@@ -708,7 +708,8 @@ public class MLContext {
// Bug in Spark is messing up blocks and indexes due to too eager reuse of data structures
JavaPairRDD<MatrixIndexes, MatrixBlock> copyRDD = rdd.mapToPair( new CopyBlockPairFunction() );
- MatrixObject mo = new MatrixObject(ValueType.DOUBLE, "temp", new MatrixFormatMetaData(mc, OutputInfo.BinaryBlockOutputInfo, InputInfo.BinaryBlockInputInfo));
+ MatrixObject mo = new MatrixObject(ValueType.DOUBLE, OptimizerUtils.getUniqueTempFileName(),
+ new MatrixFormatMetaData(mc, OutputInfo.BinaryBlockOutputInfo, InputInfo.BinaryBlockInputInfo));
mo.setRDDHandle(new RDDObject(copyRDD, varName));
_variables.put(varName, mo);
_inVarnames.add(varName);
@@ -725,7 +726,8 @@ public class MLContext {
_variables = new LocalVariableMap();
if(_inVarnames == null)
_inVarnames = new ArrayList<String>();
- MatrixObject mo = new MatrixObject(ValueType.DOUBLE, "temp", new MatrixFormatMetaData(mc, OutputInfo.BinaryBlockOutputInfo, InputInfo.BinaryBlockInputInfo));
+ MatrixObject mo = new MatrixObject(ValueType.DOUBLE, OptimizerUtils.getUniqueTempFileName(),
+ new MatrixFormatMetaData(mc, OutputInfo.BinaryBlockOutputInfo, InputInfo.BinaryBlockInputInfo));
mo.acquireModify(mb);
mo.release();
_variables.put(varName, mo);
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/245488c3/src/main/java/org/apache/sysml/api/jmlc/PreparedScript.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/api/jmlc/PreparedScript.java b/src/main/java/org/apache/sysml/api/jmlc/PreparedScript.java
index 4ef5add..e06810b 100644
--- a/src/main/java/org/apache/sysml/api/jmlc/PreparedScript.java
+++ b/src/main/java/org/apache/sysml/api/jmlc/PreparedScript.java
@@ -29,6 +29,7 @@ import java.util.Map.Entry;
import org.apache.sysml.api.DMLException;
import org.apache.sysml.conf.ConfigurationManager;
+import org.apache.sysml.hops.OptimizerUtils;
import org.apache.sysml.parser.Expression.ValueType;
import org.apache.sysml.runtime.controlprogram.LocalVariableMap;
import org.apache.sysml.runtime.controlprogram.Program;
@@ -234,13 +235,12 @@ public class PreparedScript
if( !_inVarnames.contains(varname) )
throw new DMLException("Unspecified input variable: "+varname);
- String scratch_space = ConfigurationManager.getScratchSpace();
int blocksize = ConfigurationManager.getBlocksize();
//create new matrix object
MatrixCharacteristics mc = new MatrixCharacteristics(matrix.getNumRows(), matrix.getNumColumns(), blocksize, blocksize);
MatrixFormatMetaData meta = new MatrixFormatMetaData(mc, OutputInfo.BinaryBlockOutputInfo, InputInfo.BinaryBlockInputInfo);
- MatrixObject mo = new MatrixObject(ValueType.DOUBLE, scratch_space+"/"+varname, meta);
+ MatrixObject mo = new MatrixObject(ValueType.DOUBLE, OptimizerUtils.getUniqueTempFileName(), meta);
mo.acquireModify(matrix);
mo.release();
@@ -342,13 +342,11 @@ public class PreparedScript
{
if( !_inVarnames.contains(varname) )
throw new DMLException("Unspecified input variable: "+varname);
-
- String scratch_space = ConfigurationManager.getScratchSpace();
//create new frame object
MatrixCharacteristics mc = new MatrixCharacteristics(frame.getNumRows(), frame.getNumColumns(), -1, -1);
MatrixFormatMetaData meta = new MatrixFormatMetaData(mc, OutputInfo.BinaryCellOutputInfo, InputInfo.BinaryCellInputInfo);
- FrameObject fo = new FrameObject(scratch_space+"/"+varname, meta);
+ FrameObject fo = new FrameObject(OptimizerUtils.getUniqueTempFileName(), meta);
fo.acquireModify(frame);
fo.release();
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/245488c3/src/main/java/org/apache/sysml/api/mlcontext/BinaryBlockFrame.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/api/mlcontext/BinaryBlockFrame.java b/src/main/java/org/apache/sysml/api/mlcontext/BinaryBlockFrame.java
index 88b1b38..29871ad 100644
--- a/src/main/java/org/apache/sysml/api/mlcontext/BinaryBlockFrame.java
+++ b/src/main/java/org/apache/sysml/api/mlcontext/BinaryBlockFrame.java
@@ -21,6 +21,7 @@ package org.apache.sysml.api.mlcontext;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.sql.DataFrame;
+import org.apache.sysml.conf.ConfigurationManager;
import org.apache.sysml.runtime.DMLRuntimeException;
import org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext;
import org.apache.sysml.runtime.matrix.MatrixCharacteristics;
@@ -60,8 +61,8 @@ public class BinaryBlockFrame {
* the number of columns
*/
public BinaryBlockFrame(DataFrame dataFrame, long numRows, long numCols) {
- this(dataFrame, new FrameMetadata(numRows, numCols, MLContextUtil.defaultBlockSize(),
- MLContextUtil.defaultBlockSize()));
+ this(dataFrame, new FrameMetadata(numRows, numCols,
+ ConfigurationManager.getBlocksize(), ConfigurationManager.getBlocksize()));
}
/**
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/245488c3/src/main/java/org/apache/sysml/api/mlcontext/BinaryBlockMatrix.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/api/mlcontext/BinaryBlockMatrix.java b/src/main/java/org/apache/sysml/api/mlcontext/BinaryBlockMatrix.java
index ffa8a11..70cb259 100644
--- a/src/main/java/org/apache/sysml/api/mlcontext/BinaryBlockMatrix.java
+++ b/src/main/java/org/apache/sysml/api/mlcontext/BinaryBlockMatrix.java
@@ -21,6 +21,7 @@ package org.apache.sysml.api.mlcontext;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.sql.DataFrame;
+import org.apache.sysml.conf.ConfigurationManager;
import org.apache.sysml.runtime.DMLRuntimeException;
import org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext;
import org.apache.sysml.runtime.matrix.MatrixCharacteristics;
@@ -61,8 +62,8 @@ public class BinaryBlockMatrix {
* the number of columns
*/
public BinaryBlockMatrix(DataFrame dataFrame, long numRows, long numCols) {
- this(dataFrame, new MatrixMetadata(numRows, numCols, MLContextUtil.defaultBlockSize(),
- MLContextUtil.defaultBlockSize()));
+ this(dataFrame, new MatrixMetadata(numRows, numCols,
+ ConfigurationManager.getBlocksize(), ConfigurationManager.getBlocksize()));
}
/**
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/245488c3/src/main/java/org/apache/sysml/api/mlcontext/FrameMetadata.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/api/mlcontext/FrameMetadata.java b/src/main/java/org/apache/sysml/api/mlcontext/FrameMetadata.java
index 5aabd80..7dd20f1 100644
--- a/src/main/java/org/apache/sysml/api/mlcontext/FrameMetadata.java
+++ b/src/main/java/org/apache/sysml/api/mlcontext/FrameMetadata.java
@@ -19,6 +19,7 @@
package org.apache.sysml.api.mlcontext;
+import org.apache.sysml.conf.ConfigurationManager;
import org.apache.sysml.runtime.matrix.MatrixCharacteristics;
/**
@@ -632,8 +633,8 @@ public class FrameMetadata extends Metadata {
long nr = (numRows == null) ? -1 : numRows;
long nc = (numColumns == null) ? -1 : numColumns;
- int nrpb = (numRowsPerBlock == null) ? MLContextUtil.defaultBlockSize() : numRowsPerBlock;
- int ncpb = (numColumnsPerBlock == null) ? MLContextUtil.defaultBlockSize() : numColumnsPerBlock;
+ int nrpb = (numRowsPerBlock == null) ? ConfigurationManager.getBlocksize() : numRowsPerBlock;
+ int ncpb = (numColumnsPerBlock == null) ? ConfigurationManager.getBlocksize() : numColumnsPerBlock;
long nnz = (numNonZeros == null) ? -1 : numNonZeros;
MatrixCharacteristics mc = new MatrixCharacteristics(nr, nc, nrpb, ncpb, nnz);
return mc;
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/245488c3/src/main/java/org/apache/sysml/api/mlcontext/MLContextConversionUtil.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/api/mlcontext/MLContextConversionUtil.java b/src/main/java/org/apache/sysml/api/mlcontext/MLContextConversionUtil.java
index 62a4f60..15aa15e 100644
--- a/src/main/java/org/apache/sysml/api/mlcontext/MLContextConversionUtil.java
+++ b/src/main/java/org/apache/sysml/api/mlcontext/MLContextConversionUtil.java
@@ -41,6 +41,8 @@ import org.apache.spark.sql.Row;
import org.apache.spark.sql.SQLContext;
import org.apache.spark.sql.types.StructType;
import org.apache.sysml.api.MLContextProxy;
+import org.apache.sysml.conf.ConfigurationManager;
+import org.apache.sysml.hops.OptimizerUtils;
import org.apache.sysml.parser.Expression.ValueType;
import org.apache.sysml.runtime.DMLRuntimeException;
import org.apache.sysml.runtime.controlprogram.caching.CacheException;
@@ -106,18 +108,13 @@ public class MLContextConversionUtil {
MatrixMetadata matrixMetadata) {
try {
MatrixBlock matrixBlock = DataConverter.convertToMatrixBlock(doubleMatrix);
- MatrixCharacteristics matrixCharacteristics;
- if (matrixMetadata != null) {
- matrixCharacteristics = matrixMetadata.asMatrixCharacteristics();
- } else {
- matrixCharacteristics = new MatrixCharacteristics(matrixBlock.getNumRows(), matrixBlock.getNumColumns(),
- MLContextUtil.defaultBlockSize(), MLContextUtil.defaultBlockSize());
- }
+ MatrixCharacteristics mc = (matrixMetadata != null) ?
+ matrixMetadata.asMatrixCharacteristics() : new MatrixCharacteristics(matrixBlock.getNumRows(),
+ matrixBlock.getNumColumns(), ConfigurationManager.getBlocksize(), ConfigurationManager.getBlocksize());
- MatrixFormatMetaData meta = new MatrixFormatMetaData(matrixCharacteristics,
- OutputInfo.BinaryBlockOutputInfo, InputInfo.BinaryBlockInputInfo);
- MatrixObject matrixObject = new MatrixObject(ValueType.DOUBLE,
- MLContextUtil.scratchSpace() + "/" + variableName, meta);
+ MatrixObject matrixObject = new MatrixObject(ValueType.DOUBLE, OptimizerUtils.getUniqueTempFileName(),
+ new MatrixFormatMetaData(mc, OutputInfo.BinaryBlockOutputInfo, InputInfo.BinaryBlockInputInfo));
+
matrixObject.acquireModify(matrixBlock);
matrixObject.release();
return matrixObject;
@@ -173,16 +170,10 @@ public class MLContextConversionUtil {
public static MatrixObject matrixBlockToMatrixObject(String variableName, MatrixBlock matrixBlock,
MatrixMetadata matrixMetadata) {
try {
- MatrixCharacteristics matrixCharacteristics;
- if (matrixMetadata != null) {
- matrixCharacteristics = matrixMetadata.asMatrixCharacteristics();
- } else {
- matrixCharacteristics = new MatrixCharacteristics();
- }
- MatrixFormatMetaData mtd = new MatrixFormatMetaData(matrixCharacteristics, OutputInfo.BinaryBlockOutputInfo,
- InputInfo.BinaryBlockInputInfo);
- MatrixObject matrixObject = new MatrixObject(ValueType.DOUBLE,
- MLContextUtil.scratchSpace() + "/" + variableName, mtd);
+ MatrixCharacteristics mc = (matrixMetadata != null) ?
+ matrixMetadata.asMatrixCharacteristics() : new MatrixCharacteristics();
+ MatrixObject matrixObject = new MatrixObject(ValueType.DOUBLE, OptimizerUtils.getUniqueTempFileName(),
+ new MatrixFormatMetaData(mc, OutputInfo.BinaryBlockOutputInfo, InputInfo.BinaryBlockInputInfo));
matrixObject.acquireModify(matrixBlock);
matrixObject.release();
return matrixObject;
@@ -213,7 +204,7 @@ public class MLContextConversionUtil {
}
MatrixFormatMetaData mtd = new MatrixFormatMetaData(matrixCharacteristics, OutputInfo.BinaryBlockOutputInfo,
InputInfo.BinaryBlockInputInfo);
- FrameObject frameObject = new FrameObject(MLContextUtil.scratchSpace() + "/" + variableName, mtd);
+ FrameObject frameObject = new FrameObject(OptimizerUtils.getUniqueTempFileName(), mtd);
frameObject.acquireModify(frameBlock);
frameObject.release();
return frameObject;
@@ -256,18 +247,12 @@ public class MLContextConversionUtil {
public static MatrixObject binaryBlocksToMatrixObject(String variableName,
JavaPairRDD<MatrixIndexes, MatrixBlock> binaryBlocks, MatrixMetadata matrixMetadata) {
- MatrixCharacteristics matrixCharacteristics;
- if (matrixMetadata != null) {
- matrixCharacteristics = matrixMetadata.asMatrixCharacteristics();
- } else {
- matrixCharacteristics = new MatrixCharacteristics();
- }
-
+ MatrixCharacteristics mc = (matrixMetadata != null) ?
+ matrixMetadata.asMatrixCharacteristics() : new MatrixCharacteristics();
JavaPairRDD<MatrixIndexes, MatrixBlock> javaPairRdd = binaryBlocks.mapToPair(new CopyBlockPairFunction());
-
- MatrixObject matrixObject = new MatrixObject(ValueType.DOUBLE,
- MLContextUtil.scratchSpace() + "/" + "temp_" + System.nanoTime(), new MatrixFormatMetaData(
- matrixCharacteristics, OutputInfo.BinaryBlockOutputInfo, InputInfo.BinaryBlockInputInfo));
+
+ MatrixObject matrixObject = new MatrixObject(ValueType.DOUBLE, OptimizerUtils.getUniqueTempFileName(),
+ new MatrixFormatMetaData(mc, OutputInfo.BinaryBlockOutputInfo, InputInfo.BinaryBlockInputInfo));
matrixObject.setRDDHandle(new RDDObject(javaPairRdd, variableName));
return matrixObject;
}
@@ -304,17 +289,11 @@ public class MLContextConversionUtil {
public static FrameObject binaryBlocksToFrameObject(String variableName, JavaPairRDD<Long, FrameBlock> binaryBlocks,
FrameMetadata frameMetadata) {
- MatrixCharacteristics matrixCharacteristics;
- if (frameMetadata != null) {
- matrixCharacteristics = frameMetadata.asMatrixCharacteristics();
- } else {
- matrixCharacteristics = new MatrixCharacteristics();
- }
+ MatrixCharacteristics mc = (frameMetadata != null) ?
+ frameMetadata.asMatrixCharacteristics() : new MatrixCharacteristics();
- MatrixFormatMetaData mtd = new MatrixFormatMetaData(matrixCharacteristics, OutputInfo.BinaryBlockOutputInfo,
- InputInfo.BinaryBlockInputInfo);
- FrameObject frameObject = new FrameObject(
- MLContextUtil.scratchSpace() + "/" + "temp_" + System.nanoTime() + variableName, mtd);
+ FrameObject frameObject = new FrameObject(OptimizerUtils.getUniqueTempFileName(),
+ new MatrixFormatMetaData(mc, OutputInfo.BinaryBlockOutputInfo, InputInfo.BinaryBlockInputInfo));
frameObject.setRDDHandle(new RDDObject(binaryBlocks, variableName));
return frameObject;
}
@@ -655,14 +634,11 @@ public class MLContextConversionUtil {
public static MatrixObject javaRDDStringCSVToMatrixObject(String variableName, JavaRDD<String> javaRDD,
MatrixMetadata matrixMetadata) {
JavaPairRDD<LongWritable, Text> javaPairRDD = javaRDD.mapToPair(new ConvertStringToLongTextPair());
- MatrixCharacteristics matrixCharacteristics;
- if (matrixMetadata != null) {
- matrixCharacteristics = matrixMetadata.asMatrixCharacteristics();
- } else {
- matrixCharacteristics = new MatrixCharacteristics();
- }
- MatrixObject matrixObject = new MatrixObject(ValueType.DOUBLE, null,
- new MatrixFormatMetaData(matrixCharacteristics, OutputInfo.CSVOutputInfo, InputInfo.CSVInputInfo));
+ MatrixCharacteristics mc = (matrixMetadata != null) ?
+ matrixMetadata.asMatrixCharacteristics() : new MatrixCharacteristics();
+
+ MatrixObject matrixObject = new MatrixObject(ValueType.DOUBLE, OptimizerUtils.getUniqueTempFileName(),
+ new MatrixFormatMetaData(mc, OutputInfo.CSVOutputInfo, InputInfo.CSVInputInfo));
JavaPairRDD<LongWritable, Text> javaPairRDD2 = javaPairRDD.mapToPair(new CopyTextInputFunction());
matrixObject.setRDDHandle(new RDDObject(javaPairRDD2, variableName));
return matrixObject;
@@ -695,22 +671,17 @@ public class MLContextConversionUtil {
public static FrameObject javaRDDStringCSVToFrameObject(String variableName, JavaRDD<String> javaRDD,
FrameMetadata frameMetadata) {
JavaPairRDD<LongWritable, Text> javaPairRDD = javaRDD.mapToPair(new ConvertStringToLongTextPair());
- MatrixCharacteristics matrixCharacteristics;
- if (frameMetadata != null) {
- matrixCharacteristics = frameMetadata.asMatrixCharacteristics();
- } else {
- matrixCharacteristics = new MatrixCharacteristics();
- }
+ MatrixCharacteristics mc = (frameMetadata != null) ?
+ frameMetadata.asMatrixCharacteristics() : new MatrixCharacteristics();
JavaPairRDD<LongWritable, Text> javaPairRDDText = javaPairRDD.mapToPair(new CopyTextInputFunction());
JavaSparkContext jsc = MLContextUtil.getJavaSparkContext((MLContext) MLContextProxy.getActiveMLContextForAPI());
- FrameObject frameObject = new FrameObject(null, new MatrixFormatMetaData(matrixCharacteristics,
- OutputInfo.BinaryBlockOutputInfo, InputInfo.BinaryBlockInputInfo));
+ FrameObject frameObject = new FrameObject(OptimizerUtils.getUniqueTempFileName(),
+ new MatrixFormatMetaData(mc, OutputInfo.BinaryBlockOutputInfo, InputInfo.BinaryBlockInputInfo));
JavaPairRDD<Long, FrameBlock> rdd;
try {
- rdd = FrameRDDConverterUtils.csvToBinaryBlock(jsc, javaPairRDDText, matrixCharacteristics, false, ",",
- false, -1);
+ rdd = FrameRDDConverterUtils.csvToBinaryBlock(jsc, javaPairRDDText, mc, false, ",", false, -1);
} catch (DMLRuntimeException e) {
e.printStackTrace();
return null;
@@ -734,14 +705,11 @@ public class MLContextConversionUtil {
public static MatrixObject javaRDDStringIJVToMatrixObject(String variableName, JavaRDD<String> javaRDD,
MatrixMetadata matrixMetadata) {
JavaPairRDD<LongWritable, Text> javaPairRDD = javaRDD.mapToPair(new ConvertStringToLongTextPair());
- MatrixCharacteristics matrixCharacteristics;
- if (matrixMetadata != null) {
- matrixCharacteristics = matrixMetadata.asMatrixCharacteristics();
- } else {
- matrixCharacteristics = new MatrixCharacteristics();
- }
- MatrixObject matrixObject = new MatrixObject(ValueType.DOUBLE, null, new MatrixFormatMetaData(
- matrixCharacteristics, OutputInfo.TextCellOutputInfo, InputInfo.TextCellInputInfo));
+ MatrixCharacteristics mc = (matrixMetadata != null) ?
+ matrixMetadata.asMatrixCharacteristics() : new MatrixCharacteristics();
+
+ MatrixObject matrixObject = new MatrixObject(ValueType.DOUBLE, OptimizerUtils.getUniqueTempFileName(),
+ new MatrixFormatMetaData(mc, OutputInfo.TextCellOutputInfo, InputInfo.TextCellInputInfo));
JavaPairRDD<LongWritable, Text> javaPairRDD2 = javaPairRDD.mapToPair(new CopyTextInputFunction());
matrixObject.setRDDHandle(new RDDObject(javaPairRDD2, variableName));
return matrixObject;
@@ -762,25 +730,21 @@ public class MLContextConversionUtil {
public static FrameObject javaRDDStringIJVToFrameObject(String variableName, JavaRDD<String> javaRDD,
FrameMetadata frameMetadata) {
JavaPairRDD<LongWritable, Text> javaPairRDD = javaRDD.mapToPair(new ConvertStringToLongTextPair());
- MatrixCharacteristics matrixCharacteristics;
- if (frameMetadata != null) {
- matrixCharacteristics = frameMetadata.asMatrixCharacteristics();
- } else {
- matrixCharacteristics = new MatrixCharacteristics();
- }
+ MatrixCharacteristics mc = (frameMetadata != null) ?
+ frameMetadata.asMatrixCharacteristics() : new MatrixCharacteristics();
JavaPairRDD<LongWritable, Text> javaPairRDDText = javaPairRDD.mapToPair(new CopyTextInputFunction());
JavaSparkContext jsc = MLContextUtil.getJavaSparkContext((MLContext) MLContextProxy.getActiveMLContextForAPI());
- FrameObject frameObject = new FrameObject(null, new MatrixFormatMetaData(matrixCharacteristics,
- OutputInfo.BinaryBlockOutputInfo, InputInfo.BinaryBlockInputInfo));
+ FrameObject frameObject = new FrameObject(OptimizerUtils.getUniqueTempFileName(),
+ new MatrixFormatMetaData(mc, OutputInfo.BinaryBlockOutputInfo, InputInfo.BinaryBlockInputInfo));
JavaPairRDD<Long, FrameBlock> rdd;
try {
List<ValueType> lschema = null;
if (lschema == null)
- lschema = Collections.nCopies((int) matrixCharacteristics.getCols(), ValueType.STRING);
- rdd = FrameRDDConverterUtils.textCellToBinaryBlock(jsc, javaPairRDDText, matrixCharacteristics, lschema);
+ lschema = Collections.nCopies((int) mc.getCols(), ValueType.STRING);
+ rdd = FrameRDDConverterUtils.textCellToBinaryBlock(jsc, javaPairRDDText, mc, lschema);
} catch (DMLRuntimeException e) {
e.printStackTrace();
return null;
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/245488c3/src/main/java/org/apache/sysml/api/mlcontext/MLContextUtil.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/api/mlcontext/MLContextUtil.java b/src/main/java/org/apache/sysml/api/mlcontext/MLContextUtil.java
index 1331338..5b4e736 100644
--- a/src/main/java/org/apache/sysml/api/mlcontext/MLContextUtil.java
+++ b/src/main/java/org/apache/sysml/api/mlcontext/MLContextUtil.java
@@ -610,24 +610,6 @@ public final class MLContextUtil {
}
/**
- * Return the default matrix block size.
- *
- * @return the default matrix block size
- */
- public static int defaultBlockSize() {
- return ConfigurationManager.getBlocksize();
- }
-
- /**
- * Return the location of the scratch space directory.
- *
- * @return the lcoation of the scratch space directory
- */
- public static String scratchSpace() {
- return ConfigurationManager.getScratchSpace();
- }
-
- /**
* Return a double-quoted string with inner single and double quotes
* escaped.
*
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/245488c3/src/main/java/org/apache/sysml/api/mlcontext/MatrixMetadata.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/api/mlcontext/MatrixMetadata.java b/src/main/java/org/apache/sysml/api/mlcontext/MatrixMetadata.java
index 513b74d..a9f96df 100644
--- a/src/main/java/org/apache/sysml/api/mlcontext/MatrixMetadata.java
+++ b/src/main/java/org/apache/sysml/api/mlcontext/MatrixMetadata.java
@@ -19,6 +19,7 @@
package org.apache.sysml.api.mlcontext;
+import org.apache.sysml.conf.ConfigurationManager;
import org.apache.sysml.runtime.matrix.MatrixCharacteristics;
/**
@@ -478,8 +479,8 @@ public class MatrixMetadata extends Metadata {
long nr = (numRows == null) ? -1 : numRows;
long nc = (numColumns == null) ? -1 : numColumns;
- int nrpb = (numRowsPerBlock == null) ? MLContextUtil.defaultBlockSize() : numRowsPerBlock;
- int ncpb = (numColumnsPerBlock == null) ? MLContextUtil.defaultBlockSize() : numColumnsPerBlock;
+ int nrpb = (numRowsPerBlock == null) ? ConfigurationManager.getBlocksize() : numRowsPerBlock;
+ int ncpb = (numColumnsPerBlock == null) ? ConfigurationManager.getBlocksize() : numColumnsPerBlock;
long nnz = (numNonZeros == null) ? -1 : numNonZeros;
MatrixCharacteristics mc = new MatrixCharacteristics(nr, nc, nrpb, ncpb, nnz);
return mc;
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/245488c3/src/main/java/org/apache/sysml/hops/OptimizerUtils.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/OptimizerUtils.java b/src/main/java/org/apache/sysml/hops/OptimizerUtils.java
index 6111830..c19ad57 100644
--- a/src/main/java/org/apache/sysml/hops/OptimizerUtils.java
+++ b/src/main/java/org/apache/sysml/hops/OptimizerUtils.java
@@ -35,7 +35,10 @@ import org.apache.sysml.hops.Hop.FileFormatTypes;
import org.apache.sysml.hops.Hop.OpOp2;
import org.apache.sysml.hops.rewrite.HopRewriteUtils;
import org.apache.sysml.lops.Checkpoint;
+import org.apache.sysml.lops.Lop;
import org.apache.sysml.lops.LopProperties.ExecType;
+import org.apache.sysml.lops.compile.Dag;
+import org.apache.sysml.parser.Expression.DataType;
import org.apache.sysml.parser.Expression.ValueType;
import org.apache.sysml.runtime.DMLRuntimeException;
import org.apache.sysml.runtime.controlprogram.LocalVariableMap;
@@ -939,6 +942,23 @@ public class OptimizerUtils
return ret;
}
+ /**
+ * Wrapper over internal filename construction for external usage.
+ *
+ * @return
+ */
+ public static String getUniqueTempFileName() {
+ return new Dag<Lop>().getNextUniqueFilename();
+ }
+
+ /**
+ * Wrapper over internal varname construction for external usage.
+ *
+ * @return
+ */
+ public static String getUniqueVariableName(DataType dt) {
+ return Dag.getNextUniqueVarname(dt);
+ }
/**
*
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/245488c3/src/main/java/org/apache/sysml/lops/compile/Dag.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/lops/compile/Dag.java b/src/main/java/org/apache/sysml/lops/compile/Dag.java
index 7831751..ecf7346 100644
--- a/src/main/java/org/apache/sysml/lops/compile/Dag.java
+++ b/src/main/java/org/apache/sysml/lops/compile/Dag.java
@@ -179,19 +179,9 @@ public class Dag<N extends Lop>
}
public void addLastInstruction(Instruction inst) {
lastInstructions.add(inst);
- }
-
+ }
}
- private String getFilePath() {
- if ( scratchFilePath == null ) {
- scratchFilePath = scratch + Lop.FILE_SEPARATOR
- + Lop.PROCESS_PREFIX + DMLScript.getUUID()
- + Lop.FILE_SEPARATOR + Lop.FILE_SEPARATOR
- + ProgramConverter.CP_ROOT_THREAD_ID + Lop.FILE_SEPARATOR;
- }
- return scratchFilePath;
- }
/**
* Constructor
@@ -205,7 +195,32 @@ public class Dag<N extends Lop>
// get number of reducers from dml config
total_reducers = ConfigurationManager.getNumReducers();
}
+
+ ///////
+ // filename handling
+
+ private String getFilePath() {
+ if ( scratchFilePath == null ) {
+ scratchFilePath = scratch + Lop.FILE_SEPARATOR
+ + Lop.PROCESS_PREFIX + DMLScript.getUUID()
+ + Lop.FILE_SEPARATOR + Lop.FILE_SEPARATOR
+ + ProgramConverter.CP_ROOT_THREAD_ID + Lop.FILE_SEPARATOR;
+ }
+ return scratchFilePath;
+ }
+ public String getNextUniqueFilename() {
+ return getFilePath() + "temp" + job_id.getNextID();
+ }
+
+ public static String getNextUniqueVarname(DataType dt) {
+ return (dt==DataType.MATRIX ? Lop.MATRIX_VAR_NAME_PREFIX :
+ Lop.FRAME_VAR_NAME_PREFIX) + var_index.getNextID();
+ }
+
+ ///////
+ // Dag modifications
+
/**
* Method to add a node to the DAG.
*
@@ -2388,8 +2403,8 @@ public class Dag<N extends Lop>
// TODO: change it to output binaryblock
Data dataInput = (Data) input;
- oparams.setFile_name(getFilePath() + "temp" + job_id.getNextID());
- oparams.setLabel(Lop.MATRIX_VAR_NAME_PREFIX + var_index.getNextID());
+ oparams.setFile_name(getNextUniqueFilename());
+ oparams.setLabel(getNextUniqueVarname(DataType.MATRIX));
// generate an instruction that creates a symbol table entry for the new variable in CSV format
Data delimLop = (Data) dataInput.getNamedInputLop(
@@ -2425,9 +2440,8 @@ public class Dag<N extends Lop>
{
// generate temporary filename and a variable name to hold the
// output produced by "rootNode"
- oparams.setFile_name(getFilePath() + "temp" + job_id.getNextID());
- oparams.setLabel( (node.getDataType()==DataType.MATRIX ? Lop.MATRIX_VAR_NAME_PREFIX :
- Lop.FRAME_VAR_NAME_PREFIX) + var_index.getNextID());
+ oparams.setFile_name(getNextUniqueFilename());
+ oparams.setLabel(getNextUniqueVarname(node.getDataType()));
// generate an instruction that creates a symbol table entry for the new variable
//String createInst = prepareVariableInstruction("createvar", node);
@@ -2575,7 +2589,7 @@ public class Dag<N extends Lop>
// generate temporary filename & var name
String tempVarName = oparams.getLabel() + "temp";
- String tempFileName = getFilePath() + "temp" + job_id.getNextID();
+ String tempFileName = getNextUniqueFilename();
//String createInst = prepareVariableInstruction("createvar", tempVarName, node.getDataType(), node.getValueType(), tempFileName, oparams, out.getOutInfo());
//out.addPreInstruction(CPInstructionParser.parseSingleInstruction(createInst));
@@ -2665,8 +2679,7 @@ public class Dag<N extends Lop>
// part MM format file on hdfs.
if (oparams.getFormat() == Format.CSV) {
- String tempFileName = getFilePath() + "temp" + job_id.getNextID();
-
+ String tempFileName = getNextUniqueFilename();
String createInst = node.getInstructions(tempFileName);
createvarInst= CPInstructionParser.parseSingleInstruction(createInst);
@@ -2690,11 +2703,9 @@ public class Dag<N extends Lop>
}
else if (oparams.getFormat() == Format.MM ) {
- String tempFileName = getFilePath() + "temp" + job_id.getNextID();
-
createvarInst= VariableCPInstruction.prepareCreateVariableInstruction(
oparams.getLabel(),
- tempFileName,
+ getNextUniqueFilename(),
false, node.getDataType(),
OutputInfo.outputInfoToString(getOutputInfo(node, false)),
new MatrixCharacteristics(oparams.getNumRows(), oparams.getNumCols(), rpb, cpb, oparams.getNnz()),
[2/2] incubator-systemml git commit: [SYSTEMML-905] Fix unary
aggregates over compressed matrices w/ UC group
Posted by mb...@apache.org.
[SYSTEMML-905] Fix unary aggregates over compressed matrices w/ UC group
This patch fixes result correctness issues of unary aggregates over
compressed matrices w/ UC group, where the UC group resets partial
results from preceding groups. Furthermore, this also includes extended
tests to always include a UC group.
Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/f0c91ed0
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/f0c91ed0
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/f0c91ed0
Branch: refs/heads/master
Commit: f0c91ed0d115dc727a79ec2c9e0c4a8aa420c727
Parents: 245488c
Author: Matthias Boehm <mb...@us.ibm.com>
Authored: Sun Sep 11 02:03:58 2016 +0200
Committer: Matthias Boehm <mb...@us.ibm.com>
Committed: Sun Sep 11 02:03:58 2016 +0200
----------------------------------------------------------------------
.../runtime/compress/ColGroupUncompressed.java | 15 +++++++++++++++
.../runtime/compress/CompressedMatrixBlock.java | 19 +++++++++++++------
.../compress/BasicUnaryAggregateTest.java | 1 +
.../compress/ParUnaryAggregateTest.java | 1 +
4 files changed, 30 insertions(+), 6 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/f0c91ed0/src/main/java/org/apache/sysml/runtime/compress/ColGroupUncompressed.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/compress/ColGroupUncompressed.java b/src/main/java/org/apache/sysml/runtime/compress/ColGroupUncompressed.java
index 0ac82aa..085244c 100644
--- a/src/main/java/org/apache/sysml/runtime/compress/ColGroupUncompressed.java
+++ b/src/main/java/org/apache/sysml/runtime/compress/ColGroupUncompressed.java
@@ -28,6 +28,7 @@ import java.util.Arrays;
import java.util.List;
import org.apache.sysml.runtime.DMLRuntimeException;
+import org.apache.sysml.runtime.functionobjects.ReduceRow;
import org.apache.sysml.runtime.matrix.data.LibMatrixAgg;
import org.apache.sysml.runtime.matrix.data.LibMatrixMult;
import org.apache.sysml.runtime.matrix.data.MatrixBlock;
@@ -325,6 +326,20 @@ public class ColGroupUncompressed extends ColGroup
{
//execute unary aggregate operations
LibMatrixAgg.aggregateUnaryMatrix(_data, ret, op);
+
+ //shift result into correct column indexes
+ if( op.indexFn instanceof ReduceRow ) {
+ //clear corrections
+ for( int i=0; i<_colIndexes.length; i++ )
+ if( op.aggOp.correctionExists )
+ ret.quickSetValue(0, i+_colIndexes.length, 0);
+ //shift partial results
+ for( int i=_colIndexes.length-1; i>=0; i-- ) {
+ double val = ret.quickGetValue(0, i);
+ ret.quickSetValue(0, i, 0);
+ ret.quickSetValue(0, _colIndexes[i], val);
+ }
+ }
}
@Override
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/f0c91ed0/src/main/java/org/apache/sysml/runtime/compress/CompressedMatrixBlock.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/compress/CompressedMatrixBlock.java b/src/main/java/org/apache/sysml/runtime/compress/CompressedMatrixBlock.java
index fac2461..7ff3a8a 100644
--- a/src/main/java/org/apache/sysml/runtime/compress/CompressedMatrixBlock.java
+++ b/src/main/java/org/apache/sysml/runtime/compress/CompressedMatrixBlock.java
@@ -1049,6 +1049,9 @@ public class CompressedMatrixBlock extends MatrixBlock implements Externalizable
ArrayList<ColGroup>[] grpParts = createStaticTaskPartitioning(op.getNumThreads(), false);
ColGroupUncompressed uc = getUncompressedColGroup();
try {
+ //compute uncompressed column group in parallel (otherwise bottleneck)
+ if( uc != null )
+ ret = (MatrixBlock)uc.getData().aggregateUnaryOperations(op, ret, blockingFactorRow, blockingFactorCol, indexesIn, false);
//compute all compressed column groups
ExecutorService pool = Executors.newFixedThreadPool( op.getNumThreads() );
ArrayList<UnaryAggregateTask> tasks = new ArrayList<UnaryAggregateTask>();
@@ -1056,9 +1059,6 @@ public class CompressedMatrixBlock extends MatrixBlock implements Externalizable
tasks.add(new UnaryAggregateTask(grp, ret, op));
pool.invokeAll(tasks);
pool.shutdown();
- //compute uncompressed column group in parallel (otherwise bottleneck)
- if( uc != null )
- ret = (MatrixBlock)uc.getData().aggregateUnaryOperations(op, ret, blockingFactorRow, blockingFactorCol, indexesIn, false);
//aggregate partial results
if( !(op.indexFn instanceof ReduceRow) ){
KahanObject kbuff = new KahanObject(0,0);
@@ -1077,9 +1077,16 @@ public class CompressedMatrixBlock extends MatrixBlock implements Externalizable
}
}
else {
- for (ColGroup grp : _colGroups) {
- grp.unaryAggregateOperations(op, ret);
- }
+ //process UC column group
+ for (ColGroup grp : _colGroups)
+ if( grp instanceof ColGroupUncompressed )
+ grp.unaryAggregateOperations(op, ret);
+
+ //process OLE/RLE column groups
+ for (ColGroup grp : _colGroups)
+ if( !(grp instanceof ColGroupUncompressed) )
+ grp.unaryAggregateOperations(op, ret);
+
}
//drop correction if necessary
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/f0c91ed0/src/test/java/org/apache/sysml/test/integration/functions/compress/BasicUnaryAggregateTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/sysml/test/integration/functions/compress/BasicUnaryAggregateTest.java b/src/test/java/org/apache/sysml/test/integration/functions/compress/BasicUnaryAggregateTest.java
index 960008d..767ca03 100644
--- a/src/test/java/org/apache/sysml/test/integration/functions/compress/BasicUnaryAggregateTest.java
+++ b/src/test/java/org/apache/sysml/test/integration/functions/compress/BasicUnaryAggregateTest.java
@@ -507,6 +507,7 @@ public class BasicUnaryAggregateTest extends AutomatedTestBase
if( vtype==ValueType.RAND_ROUND )
input = TestUtils.round(input);
MatrixBlock mb = DataConverter.convertToMatrixBlock(input);
+ mb = mb.appendOperations(MatrixBlock.seqOperations(0.1, rows-0.1, 1), new MatrixBlock()); //uc group
//prepare unary aggregate operator
AggregateUnaryOperator auop = null;
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/f0c91ed0/src/test/java/org/apache/sysml/test/integration/functions/compress/ParUnaryAggregateTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/sysml/test/integration/functions/compress/ParUnaryAggregateTest.java b/src/test/java/org/apache/sysml/test/integration/functions/compress/ParUnaryAggregateTest.java
index 7d65418..5224298 100644
--- a/src/test/java/org/apache/sysml/test/integration/functions/compress/ParUnaryAggregateTest.java
+++ b/src/test/java/org/apache/sysml/test/integration/functions/compress/ParUnaryAggregateTest.java
@@ -509,6 +509,7 @@ public class ParUnaryAggregateTest extends AutomatedTestBase
if( vtype==ValueType.RAND_ROUND )
input = TestUtils.round(input);
MatrixBlock mb = DataConverter.convertToMatrixBlock(input);
+ mb = mb.appendOperations(MatrixBlock.seqOperations(0.1, rows-0.1, 1), new MatrixBlock()); //uc group
//prepare unary aggregate operator
AggregateUnaryOperator auop = null;