You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by mb...@apache.org on 2016/09/15 20:04:11 UTC
incubator-systemml git commit: [SYSTEMML-909] Robustness dataframe
index column handling, cleanup apis
Repository: incubator-systemml
Updated Branches:
refs/heads/master 19a68541c -> ad65dfa10
[SYSTEMML-909] Robustness dataframe index column handling, cleanup apis
Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/ad65dfa1
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/ad65dfa1
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/ad65dfa1
Branch: refs/heads/master
Commit: ad65dfa10ac5ea359f21319384011090b2c35fb6
Parents: 19a6854
Author: Matthias Boehm <mb...@us.ibm.com>
Authored: Thu Sep 15 22:02:28 2016 +0200
Committer: Matthias Boehm <mb...@us.ibm.com>
Committed: Thu Sep 15 22:03:01 2016 +0200
----------------------------------------------------------------------
.../java/org/apache/sysml/api/MLOutput.java | 2 +-
.../sysml/api/mlcontext/BinaryBlockFrame.java | 3 +-
.../sysml/api/mlcontext/BinaryBlockMatrix.java | 3 +-
.../org/apache/sysml/api/mlcontext/Frame.java | 27 ++---
.../apache/sysml/api/mlcontext/MLContext.java | 24 +----
.../api/mlcontext/MLContextConversionUtil.java | 59 ++++-------
.../sysml/api/mlcontext/MLContextUtil.java | 103 ++++++-------------
.../apache/sysml/api/mlcontext/MLResults.java | 83 +++++----------
.../org/apache/sysml/api/mlcontext/Matrix.java | 40 +++----
.../sysml/api/mlcontext/MatrixMetadata.java | 3 +-
.../sysml/api/mlcontext/ScriptFactory.java | 15 +--
.../spark/utils/FrameRDDConverterUtils.java | 2 +-
.../spark/utils/RDDConverterUtils.java | 4 +
.../sysml/api/ml/BaseSystemMLClassifier.scala | 8 +-
.../sysml/api/ml/BaseSystemMLRegressor.scala | 4 +-
.../apache/sysml/api/ml/PredictionUtils.scala | 4 +-
.../integration/mlcontext/MLContextTest.java | 21 ++--
17 files changed, 133 insertions(+), 272 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/ad65dfa1/src/main/java/org/apache/sysml/api/MLOutput.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/api/MLOutput.java b/src/main/java/org/apache/sysml/api/MLOutput.java
index ac1b3f3..ec2f24d 100644
--- a/src/main/java/org/apache/sysml/api/MLOutput.java
+++ b/src/main/java/org/apache/sysml/api/MLOutput.java
@@ -86,7 +86,7 @@ public class MLOutput {
/**
* Note, the output DataFrame has an additional column ID.
- * An easy way to get DataFrame without ID is by df.sort("__INDEX").drop("__INDEX")
+ * An easy way to get DataFrame without ID is by df.drop("__INDEX")
* @param sqlContext
* @param varName
* @return
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/ad65dfa1/src/main/java/org/apache/sysml/api/mlcontext/BinaryBlockFrame.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/api/mlcontext/BinaryBlockFrame.java b/src/main/java/org/apache/sysml/api/mlcontext/BinaryBlockFrame.java
index 29871ad..9d7ab9d 100644
--- a/src/main/java/org/apache/sysml/api/mlcontext/BinaryBlockFrame.java
+++ b/src/main/java/org/apache/sysml/api/mlcontext/BinaryBlockFrame.java
@@ -122,9 +122,8 @@ public class BinaryBlockFrame {
try {
MatrixCharacteristics mc = getMatrixCharacteristics();
FrameSchema frameSchema = frameMetadata.getFrameSchema();
- FrameBlock mb = SparkExecutionContext.toFrameBlock(binaryBlocks, frameSchema.getSchema(),
+ return SparkExecutionContext.toFrameBlock(binaryBlocks, frameSchema.getSchema(),
(int) mc.getRows(), (int) mc.getCols());
- return mb;
} catch (DMLRuntimeException e) {
throw new MLContextException("Exception while getting FrameBlock from binary-block frame", e);
}
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/ad65dfa1/src/main/java/org/apache/sysml/api/mlcontext/BinaryBlockMatrix.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/api/mlcontext/BinaryBlockMatrix.java b/src/main/java/org/apache/sysml/api/mlcontext/BinaryBlockMatrix.java
index 70cb259..abbdcc0 100644
--- a/src/main/java/org/apache/sysml/api/mlcontext/BinaryBlockMatrix.java
+++ b/src/main/java/org/apache/sysml/api/mlcontext/BinaryBlockMatrix.java
@@ -109,9 +109,8 @@ public class BinaryBlockMatrix {
public MatrixBlock getMatrixBlock() {
try {
MatrixCharacteristics mc = getMatrixCharacteristics();
- MatrixBlock mb = SparkExecutionContext.toMatrixBlock(binaryBlocks, (int) mc.getRows(), (int) mc.getCols(),
+ return SparkExecutionContext.toMatrixBlock(binaryBlocks, (int) mc.getRows(), (int) mc.getCols(),
mc.getRowsPerBlock(), mc.getColsPerBlock(), mc.getNonZeros());
- return mb;
} catch (DMLRuntimeException e) {
throw new MLContextException("Exception while getting MatrixBlock from binary-block matrix", e);
}
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/ad65dfa1/src/main/java/org/apache/sysml/api/mlcontext/Frame.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/api/mlcontext/Frame.java b/src/main/java/org/apache/sysml/api/mlcontext/Frame.java
index c8c2c97..85dca64 100644
--- a/src/main/java/org/apache/sysml/api/mlcontext/Frame.java
+++ b/src/main/java/org/apache/sysml/api/mlcontext/Frame.java
@@ -24,7 +24,6 @@ import org.apache.spark.rdd.RDD;
import org.apache.spark.sql.DataFrame;
import org.apache.sysml.runtime.controlprogram.caching.FrameObject;
import org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext;
-import org.apache.sysml.runtime.matrix.MatrixCharacteristics;
/**
* Frame encapsulates a SystemML frame.
@@ -55,8 +54,7 @@ public class Frame {
* @return the frame as a two-dimensional String array
*/
public String[][] to2DStringArray() {
- String[][] strArray = MLContextConversionUtil.frameObjectTo2DStringArray(frameObject);
- return strArray;
+ return MLContextConversionUtil.frameObjectTo2DStringArray(frameObject);
}
/**
@@ -65,8 +63,7 @@ public class Frame {
* @return the frame as a {@code JavaRDD<String>} in IJV format
*/
public JavaRDD<String> toJavaRDDStringIJV() {
- JavaRDD<String> javaRDDStringIJV = MLContextConversionUtil.frameObjectToJavaRDDStringIJV(frameObject);
- return javaRDDStringIJV;
+ return MLContextConversionUtil.frameObjectToJavaRDDStringIJV(frameObject);
}
/**
@@ -75,8 +72,7 @@ public class Frame {
* @return the frame as a {@code JavaRDD<String>} in CSV format
*/
public JavaRDD<String> toJavaRDDStringCSV() {
- JavaRDD<String> javaRDDStringCSV = MLContextConversionUtil.frameObjectToJavaRDDStringCSV(frameObject, ",");
- return javaRDDStringCSV;
+ return MLContextConversionUtil.frameObjectToJavaRDDStringCSV(frameObject, ",");
}
/**
@@ -85,8 +81,7 @@ public class Frame {
* @return the frame as a {@code RDD<String>} in CSV format
*/
public RDD<String> toRDDStringCSV() {
- RDD<String> rddStringCSV = MLContextConversionUtil.frameObjectToRDDStringCSV(frameObject, ",");
- return rddStringCSV;
+ return MLContextConversionUtil.frameObjectToRDDStringCSV(frameObject, ",");
}
/**
@@ -95,8 +90,7 @@ public class Frame {
* @return the frame as a {@code RDD<String>} in IJV format
*/
public RDD<String> toRDDStringIJV() {
- RDD<String> rddStringIJV = MLContextConversionUtil.frameObjectToRDDStringIJV(frameObject);
- return rddStringIJV;
+ return MLContextConversionUtil.frameObjectToRDDStringIJV(frameObject);
}
/**
@@ -105,8 +99,7 @@ public class Frame {
* @return the frame as a {@code DataFrame}
*/
public DataFrame toDF() {
- DataFrame df = MLContextConversionUtil.frameObjectToDataFrame(frameObject, sparkExecutionContext);
- return df;
+ return MLContextConversionUtil.frameObjectToDataFrame(frameObject, sparkExecutionContext);
}
/**
@@ -115,9 +108,7 @@ public class Frame {
* @return the matrix as a {@code BinaryBlockFrame}
*/
public BinaryBlockFrame toBinaryBlockFrame() {
- BinaryBlockFrame binaryBlockFrame = MLContextConversionUtil.frameObjectToBinaryBlockFrame(frameObject,
- sparkExecutionContext);
- return binaryBlockFrame;
+ return MLContextConversionUtil.frameObjectToBinaryBlockFrame(frameObject, sparkExecutionContext);
}
/**
@@ -126,9 +117,7 @@ public class Frame {
* @return the frame metadata
*/
public FrameMetadata getFrameMetadata() {
- MatrixCharacteristics matrixCharacteristics = frameObject.getMatrixCharacteristics();
- FrameMetadata frameMetadata = new FrameMetadata(matrixCharacteristics);
- return frameMetadata;
+ return new FrameMetadata(frameObject.getMatrixCharacteristics());
}
@Override
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/ad65dfa1/src/main/java/org/apache/sysml/api/mlcontext/MLContext.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/api/mlcontext/MLContext.java b/src/main/java/org/apache/sysml/api/mlcontext/MLContext.java
index 88c937b..281371d 100644
--- a/src/main/java/org/apache/sysml/api/mlcontext/MLContext.java
+++ b/src/main/java/org/apache/sysml/api/mlcontext/MLContext.java
@@ -31,6 +31,7 @@ import org.apache.spark.api.java.JavaSparkContext;
import org.apache.sysml.api.DMLScript;
import org.apache.sysml.api.DMLScript.RUNTIME_PLATFORM;
import org.apache.sysml.api.MLContextProxy;
+import org.apache.sysml.api.jmlc.JMLCUtils;
import org.apache.sysml.api.monitoring.SparkMonitoringUtil;
import org.apache.sysml.conf.ConfigurationManager;
import org.apache.sysml.conf.DMLConfig;
@@ -46,7 +47,6 @@ import org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext;
import org.apache.sysml.runtime.instructions.Instruction;
import org.apache.sysml.runtime.instructions.cp.Data;
import org.apache.sysml.runtime.instructions.cp.ScalarObject;
-import org.apache.sysml.runtime.instructions.cp.VariableCPInstruction;
import org.apache.sysml.runtime.instructions.spark.functions.SparkListener;
import org.apache.sysml.runtime.matrix.MatrixFormatMetaData;
import org.apache.sysml.runtime.matrix.data.OutputInfo;
@@ -478,27 +478,11 @@ public class MLContext {
}
public ArrayList<Instruction> performCleanupAfterRecompilation(ArrayList<Instruction> instructions) {
- if (executingScript == null) {
+ if (executingScript == null || executingScript.getOutputVariables() == null)
return instructions;
- }
+
Set<String> outputVariableNames = executingScript.getOutputVariables();
- if (outputVariableNames == null) {
- return instructions;
- }
-
- for (int i = 0; i < instructions.size(); i++) {
- Instruction inst = instructions.get(i);
- if (inst instanceof VariableCPInstruction && ((VariableCPInstruction) inst).isRemoveVariable()) {
- VariableCPInstruction varInst = (VariableCPInstruction) inst;
- for (String outputVariableName : outputVariableNames)
- if (varInst.isRemoveVariable(outputVariableName)) {
- instructions.remove(i);
- i--;
- break;
- }
- }
- }
- return instructions;
+ return JMLCUtils.cleanupRuntimeInstructions(instructions, outputVariableNames.toArray(new String[0]));
}
}
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/ad65dfa1/src/main/java/org/apache/sysml/api/mlcontext/MLContextConversionUtil.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/api/mlcontext/MLContextConversionUtil.java b/src/main/java/org/apache/sysml/api/mlcontext/MLContextConversionUtil.java
index 0661a93..799225b 100644
--- a/src/main/java/org/apache/sysml/api/mlcontext/MLContextConversionUtil.java
+++ b/src/main/java/org/apache/sysml/api/mlcontext/MLContextConversionUtil.java
@@ -139,11 +139,9 @@ public class MLContextConversionUtil {
JavaSparkContext javaSparkContext = new JavaSparkContext(sparkContext);
JavaRDD<String> javaRDD = javaSparkContext.parallelize(lines);
if ((matrixMetadata == null) || (matrixMetadata.getMatrixFormat() == MatrixFormat.CSV)) {
- MatrixObject matrixObject = javaRDDStringCSVToMatrixObject(variableName, javaRDD, matrixMetadata);
- return matrixObject;
+ return javaRDDStringCSVToMatrixObject(variableName, javaRDD, matrixMetadata);
} else if (matrixMetadata.getMatrixFormat() == MatrixFormat.IJV) {
- MatrixObject matrixObject = javaRDDStringIJVToMatrixObject(variableName, javaRDD, matrixMetadata);
- return matrixObject;
+ return javaRDDStringIJVToMatrixObject(variableName, javaRDD, matrixMetadata);
}
return null;
} catch (Exception e) {
@@ -370,9 +368,7 @@ public class MLContextConversionUtil {
JavaPairRDD<Long, FrameBlock> binaryBlock = FrameRDDConverterUtils.dataFrameToBinaryBlock(javaSparkContext,
dataFrame, matrixCharacteristics, containsID);
- FrameObject frameObject = MLContextConversionUtil.binaryBlocksToFrameObject(variableName, binaryBlock,
- frameMetadata);
- return frameObject;
+ return MLContextConversionUtil.binaryBlocksToFrameObject(variableName, binaryBlock, frameMetadata);
} catch (DMLRuntimeException e) {
throw new MLContextException("Exception converting DataFrame to FrameObject", e);
}
@@ -470,18 +466,14 @@ public class MLContextConversionUtil {
MatrixFormat mf = null;
if (hasID) {
Object object = firstRow.get(1);
- if (object instanceof Vector) {
- mf = MatrixFormat.DF_VECTOR_WITH_INDEX;
- } else {
- mf = MatrixFormat.DF_DOUBLES_WITH_INDEX;
- }
+ mf = (object instanceof Vector) ?
+ MatrixFormat.DF_VECTOR_WITH_INDEX :
+ MatrixFormat.DF_DOUBLES_WITH_INDEX;
} else {
Object object = firstRow.get(0);
- if (object instanceof Vector) {
- mf = MatrixFormat.DF_VECTOR;
- } else {
- mf = MatrixFormat.DF_DOUBLES;
- }
+ mf = (object instanceof Vector) ?
+ MatrixFormat.DF_VECTOR :
+ MatrixFormat.DF_DOUBLES;
}
if (mf == null) {
throw new MLContextException("DataFrame format not recognized as an accepted SystemML MatrixFormat");
@@ -802,8 +794,7 @@ public class MLContextConversionUtil {
*/
public static RDD<String> binaryBlockMatrixToRDDStringIJV(BinaryBlockMatrix binaryBlockMatrix) {
JavaRDD<String> javaRDD = binaryBlockMatrixToJavaRDDStringIJV(binaryBlockMatrix);
- RDD<String> rdd = JavaRDD.toRDD(javaRDD);
- return rdd;
+ return JavaRDD.toRDD(javaRDD);
}
/**
@@ -821,8 +812,7 @@ public class MLContextConversionUtil {
SparkContext sc = activeMLContext.getSparkContext();
@SuppressWarnings("resource")
JavaSparkContext jsc = new JavaSparkContext(sc);
- JavaRDD<String> javaRDDStringCSV = jsc.parallelize(list);
- return javaRDDStringCSV;
+ return jsc.parallelize(list);
}
/**
@@ -836,8 +826,7 @@ public class MLContextConversionUtil {
List<String> list = frameObjectToListStringCSV(frameObject, delimiter);
JavaSparkContext jsc = MLContextUtil.getJavaSparkContext((MLContext) MLContextProxy.getActiveMLContextForAPI());
- JavaRDD<String> javaRDDStringCSV = jsc.parallelize(list);
- return javaRDDStringCSV;
+ return jsc.parallelize(list);
}
/**
@@ -855,8 +844,7 @@ public class MLContextConversionUtil {
SparkContext sc = activeMLContext.getSparkContext();
@SuppressWarnings("resource")
JavaSparkContext jsc = new JavaSparkContext(sc);
- JavaRDD<String> javaRDDStringCSV = jsc.parallelize(list);
- return javaRDDStringCSV;
+ return jsc.parallelize(list);
}
/**
@@ -870,8 +858,7 @@ public class MLContextConversionUtil {
List<String> list = frameObjectToListStringIJV(frameObject);
JavaSparkContext jsc = MLContextUtil.getJavaSparkContext((MLContext) MLContextProxy.getActiveMLContextForAPI());
- JavaRDD<String> javaRDDStringIJV = jsc.parallelize(list);
- return javaRDDStringIJV;
+ return jsc.parallelize(list);
}
/**
@@ -898,8 +885,7 @@ public class MLContextConversionUtil {
MLContext activeMLContext = (MLContext) MLContextProxy.getActiveMLContextForAPI();
SparkContext sc = activeMLContext.getSparkContext();
ClassTag<String> tag = scala.reflect.ClassTag$.MODULE$.apply(String.class);
- RDD<String> rddString = sc.parallelize(JavaConversions.asScalaBuffer(list), sc.defaultParallelism(), tag);
- return rddString;
+ return sc.parallelize(JavaConversions.asScalaBuffer(list), sc.defaultParallelism(), tag);
}
/**
@@ -925,8 +911,7 @@ public class MLContextConversionUtil {
SparkContext sc = MLContextUtil.getSparkContext((MLContext) MLContextProxy.getActiveMLContextForAPI());
ClassTag<String> tag = scala.reflect.ClassTag$.MODULE$.apply(String.class);
- RDD<String> rddString = sc.parallelize(JavaConversions.asScalaBuffer(list), sc.defaultParallelism(), tag);
- return rddString;
+ return sc.parallelize(JavaConversions.asScalaBuffer(list), sc.defaultParallelism(), tag);
}
/**
@@ -953,8 +938,7 @@ public class MLContextConversionUtil {
MLContext activeMLContext = (MLContext) MLContextProxy.getActiveMLContextForAPI();
SparkContext sc = activeMLContext.getSparkContext();
ClassTag<String> tag = scala.reflect.ClassTag$.MODULE$.apply(String.class);
- RDD<String> rddString = sc.parallelize(JavaConversions.asScalaBuffer(list), sc.defaultParallelism(), tag);
- return rddString;
+ return sc.parallelize(JavaConversions.asScalaBuffer(list), sc.defaultParallelism(), tag);
}
/**
@@ -980,8 +964,7 @@ public class MLContextConversionUtil {
SparkContext sc = MLContextUtil.getSparkContext((MLContext) MLContextProxy.getActiveMLContextForAPI());
ClassTag<String> tag = scala.reflect.ClassTag$.MODULE$.apply(String.class);
- RDD<String> rddString = sc.parallelize(JavaConversions.asScalaBuffer(list), sc.defaultParallelism(), tag);
- return rddString;
+ return sc.parallelize(JavaConversions.asScalaBuffer(list), sc.defaultParallelism(), tag);
}
/**
@@ -1257,8 +1240,7 @@ public class MLContextConversionUtil {
JavaPairRDD<MatrixIndexes, MatrixBlock> binaryBlock = (JavaPairRDD<MatrixIndexes, MatrixBlock>) sparkExecutionContext
.getRDDHandleForMatrixObject(matrixObject, InputInfo.BinaryBlockInputInfo);
MatrixCharacteristics matrixCharacteristics = matrixObject.getMatrixCharacteristics();
- BinaryBlockMatrix binaryBlockMatrix = new BinaryBlockMatrix(binaryBlock, matrixCharacteristics);
- return binaryBlockMatrix;
+ return new BinaryBlockMatrix(binaryBlock, matrixCharacteristics);
} catch (DMLRuntimeException e) {
throw new MLContextException("DMLRuntimeException while converting matrix object to BinaryBlockMatrix", e);
}
@@ -1282,8 +1264,7 @@ public class MLContextConversionUtil {
MatrixCharacteristics matrixCharacteristics = frameObject.getMatrixCharacteristics();
FrameSchema fs = new FrameSchema(frameObject.getSchema());
FrameMetadata fm = new FrameMetadata(fs, matrixCharacteristics);
- BinaryBlockFrame binaryBlockFrame = new BinaryBlockFrame(binaryBlock, fm);
- return binaryBlockFrame;
+ return new BinaryBlockFrame(binaryBlock, fm);
} catch (DMLRuntimeException e) {
throw new MLContextException("DMLRuntimeException while converting frame object to BinaryBlockFrame", e);
}
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/ad65dfa1/src/main/java/org/apache/sysml/api/mlcontext/MLContextUtil.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/api/mlcontext/MLContextUtil.java b/src/main/java/org/apache/sysml/api/mlcontext/MLContextUtil.java
index 6c75048..8c2ba78 100644
--- a/src/main/java/org/apache/sysml/api/mlcontext/MLContextUtil.java
+++ b/src/main/java/org/apache/sysml/api/mlcontext/MLContextUtil.java
@@ -140,11 +140,7 @@ public final class MLContextUtil {
* @return {@code true} if Spark version supported; otherwise {@code false}.
*/
public static boolean isSparkVersionSupported(String sparkVersion) {
- if (compareVersion(sparkVersion, MLContext.SYSTEMML_MINIMUM_SPARK_VERSION) < 0) {
- return false;
- } else {
- return true;
- }
+ return compareVersion(sparkVersion, MLContext.SYSTEMML_MINIMUM_SPARK_VERSION) >= 0;
}
/**
@@ -409,33 +405,25 @@ public final class MLContextUtil {
if (hasMatrixMetadata) {
MatrixMetadata matrixMetadata = (MatrixMetadata) metadata;
- MatrixObject matrixObject;
if (matrixMetadata.getMatrixFormat() == MatrixFormat.IJV) {
- matrixObject = MLContextConversionUtil.javaRDDStringIJVToMatrixObject(name, javaRDD,
- matrixMetadata);
+ return MLContextConversionUtil.javaRDDStringIJVToMatrixObject(name, javaRDD, matrixMetadata);
} else {
- matrixObject = MLContextConversionUtil.javaRDDStringCSVToMatrixObject(name, javaRDD,
- matrixMetadata);
+ return MLContextConversionUtil.javaRDDStringCSVToMatrixObject(name, javaRDD, matrixMetadata);
}
- return matrixObject;
} else if (hasFrameMetadata) {
FrameMetadata frameMetadata = (FrameMetadata) metadata;
- FrameObject frameObject;
if (frameMetadata.getFrameFormat() == FrameFormat.IJV) {
- frameObject = MLContextConversionUtil.javaRDDStringIJVToFrameObject(name, javaRDD, frameMetadata);
+ return MLContextConversionUtil.javaRDDStringIJVToFrameObject(name, javaRDD, frameMetadata);
} else {
- frameObject = MLContextConversionUtil.javaRDDStringCSVToFrameObject(name, javaRDD, frameMetadata);
+ return MLContextConversionUtil.javaRDDStringCSVToFrameObject(name, javaRDD, frameMetadata);
}
- return frameObject;
} else if (!hasMetadata) {
String firstLine = javaRDD.first();
boolean isAllNumbers = isCSVLineAllNumbers(firstLine);
if (isAllNumbers) {
- MatrixObject matrixObject = MLContextConversionUtil.javaRDDStringCSVToMatrixObject(name, javaRDD);
- return matrixObject;
+ return MLContextConversionUtil.javaRDDStringCSVToMatrixObject(name, javaRDD);
} else {
- FrameObject frameObject = MLContextConversionUtil.javaRDDStringCSVToFrameObject(name, javaRDD);
- return frameObject;
+ return MLContextConversionUtil.javaRDDStringCSVToFrameObject(name, javaRDD);
}
}
@@ -445,63 +433,47 @@ public final class MLContextUtil {
if (hasMatrixMetadata) {
MatrixMetadata matrixMetadata = (MatrixMetadata) metadata;
- MatrixObject matrixObject;
if (matrixMetadata.getMatrixFormat() == MatrixFormat.IJV) {
- matrixObject = MLContextConversionUtil.rddStringIJVToMatrixObject(name, rdd, matrixMetadata);
+ return MLContextConversionUtil.rddStringIJVToMatrixObject(name, rdd, matrixMetadata);
} else {
- matrixObject = MLContextConversionUtil.rddStringCSVToMatrixObject(name, rdd, matrixMetadata);
+ return MLContextConversionUtil.rddStringCSVToMatrixObject(name, rdd, matrixMetadata);
}
- return matrixObject;
} else if (hasFrameMetadata) {
FrameMetadata frameMetadata = (FrameMetadata) metadata;
- FrameObject frameObject;
if (frameMetadata.getFrameFormat() == FrameFormat.IJV) {
- frameObject = MLContextConversionUtil.rddStringIJVToFrameObject(name, rdd, frameMetadata);
+ return MLContextConversionUtil.rddStringIJVToFrameObject(name, rdd, frameMetadata);
} else {
- frameObject = MLContextConversionUtil.rddStringCSVToFrameObject(name, rdd, frameMetadata);
+ return MLContextConversionUtil.rddStringCSVToFrameObject(name, rdd, frameMetadata);
}
- return frameObject;
} else if (!hasMetadata) {
String firstLine = rdd.first();
boolean isAllNumbers = isCSVLineAllNumbers(firstLine);
if (isAllNumbers) {
- MatrixObject matrixObject = MLContextConversionUtil.rddStringCSVToMatrixObject(name, rdd);
- return matrixObject;
+ return MLContextConversionUtil.rddStringCSVToMatrixObject(name, rdd);
} else {
- FrameObject frameObject = MLContextConversionUtil.rddStringCSVToFrameObject(name, rdd);
- return frameObject;
+ return MLContextConversionUtil.rddStringCSVToFrameObject(name, rdd);
}
}
} else if (value instanceof MatrixBlock) {
MatrixBlock matrixBlock = (MatrixBlock) value;
- MatrixObject matrixObject = MLContextConversionUtil.matrixBlockToMatrixObject(name, matrixBlock,
- (MatrixMetadata) metadata);
- return matrixObject;
+ return MLContextConversionUtil.matrixBlockToMatrixObject(name, matrixBlock, (MatrixMetadata) metadata);
} else if (value instanceof FrameBlock) {
FrameBlock frameBlock = (FrameBlock) value;
- FrameObject frameObject = MLContextConversionUtil.frameBlockToFrameObject(name, frameBlock,
- (FrameMetadata) metadata);
- return frameObject;
+ return MLContextConversionUtil.frameBlockToFrameObject(name, frameBlock, (FrameMetadata) metadata);
} else if (value instanceof DataFrame) {
DataFrame dataFrame = (DataFrame) value;
if (hasMatrixMetadata) {
- MatrixObject matrixObject = MLContextConversionUtil.dataFrameToMatrixObject(name, dataFrame,
- (MatrixMetadata) metadata);
- return matrixObject;
+ return MLContextConversionUtil.dataFrameToMatrixObject(name, dataFrame, (MatrixMetadata) metadata);
} else if (hasFrameMetadata) {
- FrameObject frameObject = MLContextConversionUtil.dataFrameToFrameObject(name, dataFrame,
- (FrameMetadata) metadata);
- return frameObject;
+ return MLContextConversionUtil.dataFrameToFrameObject(name, dataFrame, (FrameMetadata) metadata);
} else if (!hasMetadata) {
Row firstRow = dataFrame.first();
boolean looksLikeMatrix = doesRowLookLikeMatrixRow(firstRow);
if (looksLikeMatrix) {
- MatrixObject matrixObject = MLContextConversionUtil.dataFrameToMatrixObject(name, dataFrame);
- return matrixObject;
+ return MLContextConversionUtil.dataFrameToMatrixObject(name, dataFrame);
} else {
- FrameObject frameObject = MLContextConversionUtil.dataFrameToFrameObject(name, dataFrame);
- return frameObject;
+ return MLContextConversionUtil.dataFrameToFrameObject(name, dataFrame);
}
}
} else if (value instanceof BinaryBlockMatrix) {
@@ -510,51 +482,34 @@ public final class MLContextUtil {
metadata = binaryBlockMatrix.getMatrixMetadata();
}
JavaPairRDD<MatrixIndexes, MatrixBlock> binaryBlocks = binaryBlockMatrix.getBinaryBlocks();
- MatrixObject matrixObject = MLContextConversionUtil.binaryBlocksToMatrixObject(name, binaryBlocks,
- (MatrixMetadata) metadata);
- return matrixObject;
+ return MLContextConversionUtil.binaryBlocksToMatrixObject(name, binaryBlocks, (MatrixMetadata) metadata);
} else if (value instanceof BinaryBlockFrame) {
BinaryBlockFrame binaryBlockFrame = (BinaryBlockFrame) value;
if (metadata == null) {
metadata = binaryBlockFrame.getFrameMetadata();
}
JavaPairRDD<Long, FrameBlock> binaryBlocks = binaryBlockFrame.getBinaryBlocks();
- FrameObject frameObject = MLContextConversionUtil.binaryBlocksToFrameObject(name, binaryBlocks,
- (FrameMetadata) metadata);
- return frameObject;
+ return MLContextConversionUtil.binaryBlocksToFrameObject(name, binaryBlocks, (FrameMetadata) metadata);
} else if (value instanceof Matrix) {
Matrix matrix = (Matrix) value;
- MatrixObject matrixObject = matrix.toMatrixObject();
- return matrixObject;
+ return matrix.toMatrixObject();
} else if (value instanceof Frame) {
Frame frame = (Frame) value;
- FrameObject frameObject = frame.toFrameObject();
- return frameObject;
+ return frame.toFrameObject();
} else if (value instanceof double[][]) {
double[][] doubleMatrix = (double[][]) value;
- MatrixObject matrixObject = MLContextConversionUtil.doubleMatrixToMatrixObject(name, doubleMatrix,
- (MatrixMetadata) metadata);
- return matrixObject;
+ return MLContextConversionUtil.doubleMatrixToMatrixObject(name, doubleMatrix, (MatrixMetadata) metadata);
} else if (value instanceof URL) {
URL url = (URL) value;
- MatrixObject matrixObject = MLContextConversionUtil.urlToMatrixObject(name, url, (MatrixMetadata) metadata);
- return matrixObject;
+ return MLContextConversionUtil.urlToMatrixObject(name, url, (MatrixMetadata) metadata);
} else if (value instanceof Integer) {
- Integer i = (Integer) value;
- IntObject iObj = new IntObject(i);
- return iObj;
+ return new IntObject((Integer) value);
} else if (value instanceof Double) {
- Double d = (Double) value;
- DoubleObject dObj = new DoubleObject(d);
- return dObj;
+ return new DoubleObject((Double) value);
} else if (value instanceof String) {
- String s = (String) value;
- StringObject sObj = new StringObject(s);
- return sObj;
+ return new StringObject((String) value);
} else if (value instanceof Boolean) {
- Boolean b = (Boolean) value;
- BooleanObject bObj = new BooleanObject(b);
- return bObj;
+ return new BooleanObject((Boolean) value);
}
return null;
}
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/ad65dfa1/src/main/java/org/apache/sysml/api/mlcontext/MLResults.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/api/mlcontext/MLResults.java b/src/main/java/org/apache/sysml/api/mlcontext/MLResults.java
index 1b05f98..3931c39 100644
--- a/src/main/java/org/apache/sysml/api/mlcontext/MLResults.java
+++ b/src/main/java/org/apache/sysml/api/mlcontext/MLResults.java
@@ -35,6 +35,7 @@ import org.apache.sysml.runtime.instructions.cp.DoubleObject;
import org.apache.sysml.runtime.instructions.cp.IntObject;
import org.apache.sysml.runtime.instructions.cp.ScalarObject;
import org.apache.sysml.runtime.instructions.cp.StringObject;
+import org.apache.sysml.runtime.instructions.spark.utils.RDDConverterUtils;
import scala.Tuple1;
import scala.Tuple10;
@@ -98,8 +99,7 @@ public class MLResults {
if (!keys.contains(outputName)) {
throw new MLContextException("Variable '" + outputName + "' not found");
}
- Data data = symbolTable.get(outputName);
- return data;
+ return symbolTable.get(outputName);
}
/**
@@ -114,8 +114,7 @@ public class MLResults {
if (!(data instanceof MatrixObject)) {
throw new MLContextException("Variable '" + outputName + "' not a matrix");
}
- MatrixObject mo = (MatrixObject) data;
- return mo;
+ return (MatrixObject) data;
}
/**
@@ -142,8 +141,7 @@ public class MLResults {
*/
public double[][] getMatrixAs2DDoubleArray(String outputName) {
MatrixObject mo = getMatrixObject(outputName);
- double[][] doubleMatrix = MLContextConversionUtil.matrixObjectTo2DDoubleArray(mo);
- return doubleMatrix;
+ return MLContextConversionUtil.matrixObjectTo2DDoubleArray(mo);
}
/**
@@ -169,12 +167,10 @@ public class MLResults {
public JavaRDD<String> getJavaRDDStringIJV(String outputName) {
if (isMatrixObject(outputName)) {
MatrixObject mo = getMatrixObject(outputName);
- JavaRDD<String> javaRDDStringIJV = MLContextConversionUtil.matrixObjectToJavaRDDStringIJV(mo);
- return javaRDDStringIJV;
+ return MLContextConversionUtil.matrixObjectToJavaRDDStringIJV(mo);
} else if (isFrameObject(outputName)) {
FrameObject fo = getFrameObject(outputName);
- JavaRDD<String> javaRDDStringIJV = MLContextConversionUtil.frameObjectToJavaRDDStringIJV(fo);
- return javaRDDStringIJV;
+ return MLContextConversionUtil.frameObjectToJavaRDDStringIJV(fo);
}
return null;
}
@@ -200,12 +196,10 @@ public class MLResults {
public JavaRDD<String> getJavaRDDStringCSV(String outputName) {
if (isMatrixObject(outputName)) {
MatrixObject mo = getMatrixObject(outputName);
- JavaRDD<String> javaRDDStringCSV = MLContextConversionUtil.matrixObjectToJavaRDDStringCSV(mo);
- return javaRDDStringCSV;
+ return MLContextConversionUtil.matrixObjectToJavaRDDStringCSV(mo);
} else if (isFrameObject(outputName)) {
FrameObject fo = getFrameObject(outputName);
- JavaRDD<String> javaRDDStringCSV = MLContextConversionUtil.frameObjectToJavaRDDStringCSV(fo, ",");
- return javaRDDStringCSV;
+ return MLContextConversionUtil.frameObjectToJavaRDDStringCSV(fo, ",");
}
return null;
}
@@ -231,12 +225,10 @@ public class MLResults {
public RDD<String> getRDDStringCSV(String outputName) {
if (isMatrixObject(outputName)) {
MatrixObject mo = getMatrixObject(outputName);
- RDD<String> rddStringCSV = MLContextConversionUtil.matrixObjectToRDDStringCSV(mo);
- return rddStringCSV;
+ return MLContextConversionUtil.matrixObjectToRDDStringCSV(mo);
} else if (isFrameObject(outputName)) {
FrameObject fo = getFrameObject(outputName);
- RDD<String> rddStringCSV = MLContextConversionUtil.frameObjectToRDDStringCSV(fo, ",");
- return rddStringCSV;
+ return MLContextConversionUtil.frameObjectToRDDStringCSV(fo, ",");
}
return null;
}
@@ -264,12 +256,10 @@ public class MLResults {
public RDD<String> getRDDStringIJV(String outputName) {
if (isMatrixObject(outputName)) {
MatrixObject mo = getMatrixObject(outputName);
- RDD<String> rddStringIJV = MLContextConversionUtil.matrixObjectToRDDStringIJV(mo);
- return rddStringIJV;
+ return MLContextConversionUtil.matrixObjectToRDDStringIJV(mo);
} else if (isFrameObject(outputName)) {
FrameObject fo = getFrameObject(outputName);
- RDD<String> rddStringIJV = MLContextConversionUtil.frameObjectToRDDStringIJV(fo);
- return rddStringIJV;
+ return MLContextConversionUtil.frameObjectToRDDStringIJV(fo);
}
return null;
}
@@ -296,12 +286,10 @@ public class MLResults {
public DataFrame getDataFrame(String outputName) {
if (isMatrixObject(outputName)) {
MatrixObject mo = getMatrixObject(outputName);
- DataFrame df = MLContextConversionUtil.matrixObjectToDataFrame(mo, sparkExecutionContext, false);
- return df;
+ return MLContextConversionUtil.matrixObjectToDataFrame(mo, sparkExecutionContext, false);
} else if (isFrameObject(outputName)) {
FrameObject mo = getFrameObject(outputName);
- DataFrame df = MLContextConversionUtil.frameObjectToDataFrame(mo, sparkExecutionContext);
- return df;
+ return MLContextConversionUtil.frameObjectToDataFrame(mo, sparkExecutionContext);
}
return null;
}
@@ -316,11 +304,7 @@ public class MLResults {
*/
private boolean isMatrixObject(String outputName) {
Data data = getData(outputName);
- if (data instanceof MatrixObject) {
- return true;
- } else {
- return false;
- }
+ return (data instanceof MatrixObject);
}
/**
@@ -333,11 +317,7 @@ public class MLResults {
*/
private boolean isFrameObject(String outputName) {
Data data = getData(outputName);
- if (data instanceof FrameObject) {
- return true;
- } else {
- return false;
- }
+ return (data instanceof FrameObject);
}
/**
@@ -374,8 +354,7 @@ public class MLResults {
throw new MLContextException("This method currently supports only matrices");
}
MatrixObject mo = getMatrixObject(outputName);
- DataFrame df = MLContextConversionUtil.matrixObjectToDataFrame(mo, sparkExecutionContext, isVectorDF);
- return df;
+ return MLContextConversionUtil.matrixObjectToDataFrame(mo, sparkExecutionContext, isVectorDF);
}
/**
@@ -401,8 +380,7 @@ public class MLResults {
throw new MLContextException("This method currently supports only matrices");
}
MatrixObject mo = getMatrixObject(outputName);
- DataFrame df = MLContextConversionUtil.matrixObjectToDataFrame(mo, sparkExecutionContext, false);
- return df;
+ return MLContextConversionUtil.matrixObjectToDataFrame(mo, sparkExecutionContext, false);
}
/**
@@ -428,8 +406,7 @@ public class MLResults {
throw new MLContextException("This method currently supports only matrices");
}
MatrixObject mo = getMatrixObject(outputName);
- DataFrame df = MLContextConversionUtil.matrixObjectToDataFrame(mo, sparkExecutionContext, true);
- return df;
+ return MLContextConversionUtil.matrixObjectToDataFrame(mo, sparkExecutionContext, true);
}
/**
@@ -456,8 +433,7 @@ public class MLResults {
}
MatrixObject mo = getMatrixObject(outputName);
DataFrame df = MLContextConversionUtil.matrixObjectToDataFrame(mo, sparkExecutionContext, false);
- df = df.sort("__INDEX").drop("__INDEX");
- return df;
+ return df.drop(RDDConverterUtils.DF_ID_COLUMN);
}
/**
@@ -484,8 +460,7 @@ public class MLResults {
}
MatrixObject mo = getMatrixObject(outputName);
DataFrame df = MLContextConversionUtil.matrixObjectToDataFrame(mo, sparkExecutionContext, true);
- df = df.sort("__INDEX").drop("__INDEX");
- return df;
+ return df.drop(RDDConverterUtils.DF_ID_COLUMN);
}
/**
@@ -497,8 +472,7 @@ public class MLResults {
*/
public Matrix getMatrix(String outputName) {
MatrixObject mo = getMatrixObject(outputName);
- Matrix matrix = new Matrix(mo, sparkExecutionContext);
- return matrix;
+ return new Matrix(mo, sparkExecutionContext);
}
/**
@@ -510,8 +484,7 @@ public class MLResults {
*/
public Frame getFrame(String outputName) {
FrameObject fo = getFrameObject(outputName);
- Frame frame = new Frame(fo, sparkExecutionContext);
- return frame;
+ return new Frame(fo, sparkExecutionContext);
}
/**
@@ -523,9 +496,7 @@ public class MLResults {
*/
public BinaryBlockMatrix getBinaryBlockMatrix(String outputName) {
MatrixObject mo = getMatrixObject(outputName);
- BinaryBlockMatrix binaryBlockMatrix = MLContextConversionUtil.matrixObjectToBinaryBlockMatrix(mo,
- sparkExecutionContext);
- return binaryBlockMatrix;
+ return MLContextConversionUtil.matrixObjectToBinaryBlockMatrix(mo, sparkExecutionContext);
}
/**
@@ -537,8 +508,7 @@ public class MLResults {
*/
public String[][] getFrameAs2DStringArray(String outputName) {
FrameObject frameObject = getFrameObject(outputName);
- String[][] frame = MLContextConversionUtil.frameObjectTo2DStringArray(frameObject);
- return frame;
+ return MLContextConversionUtil.frameObjectTo2DStringArray(frameObject);
}
/**
@@ -587,8 +557,7 @@ public class MLResults {
if (!(data instanceof ScalarObject)) {
throw new MLContextException("Variable '" + outputName + "' not a scalar");
}
- ScalarObject so = (ScalarObject) data;
- return so;
+ return (ScalarObject) data;
}
/**
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/ad65dfa1/src/main/java/org/apache/sysml/api/mlcontext/Matrix.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/api/mlcontext/Matrix.java b/src/main/java/org/apache/sysml/api/mlcontext/Matrix.java
index 1ff18f0..aa8033d 100644
--- a/src/main/java/org/apache/sysml/api/mlcontext/Matrix.java
+++ b/src/main/java/org/apache/sysml/api/mlcontext/Matrix.java
@@ -24,7 +24,7 @@ import org.apache.spark.rdd.RDD;
import org.apache.spark.sql.DataFrame;
import org.apache.sysml.runtime.controlprogram.caching.MatrixObject;
import org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext;
-import org.apache.sysml.runtime.matrix.MatrixCharacteristics;
+import org.apache.sysml.runtime.instructions.spark.utils.RDDConverterUtils;
/**
* Matrix encapsulates a SystemML matrix. It allows for easy conversion to
@@ -58,8 +58,7 @@ public class Matrix {
* @return the matrix as a two-dimensional double array
*/
public double[][] to2DDoubleArray() {
- double[][] doubleMatrix = MLContextConversionUtil.matrixObjectTo2DDoubleArray(matrixObject);
- return doubleMatrix;
+ return MLContextConversionUtil.matrixObjectTo2DDoubleArray(matrixObject);
}
/**
@@ -68,8 +67,7 @@ public class Matrix {
* @return the matrix as a {@code JavaRDD<String>} in IJV format
*/
public JavaRDD<String> toJavaRDDStringIJV() {
- JavaRDD<String> javaRDDStringIJV = MLContextConversionUtil.matrixObjectToJavaRDDStringIJV(matrixObject);
- return javaRDDStringIJV;
+ return MLContextConversionUtil.matrixObjectToJavaRDDStringIJV(matrixObject);
}
/**
@@ -78,8 +76,7 @@ public class Matrix {
* @return the matrix as a {@code JavaRDD<String>} in CSV format
*/
public JavaRDD<String> toJavaRDDStringCSV() {
- JavaRDD<String> javaRDDStringCSV = MLContextConversionUtil.matrixObjectToJavaRDDStringCSV(matrixObject);
- return javaRDDStringCSV;
+ return MLContextConversionUtil.matrixObjectToJavaRDDStringCSV(matrixObject);
}
/**
@@ -88,8 +85,7 @@ public class Matrix {
* @return the matrix as a {@code RDD<String>} in CSV format
*/
public RDD<String> toRDDStringCSV() {
- RDD<String> rddStringCSV = MLContextConversionUtil.matrixObjectToRDDStringCSV(matrixObject);
- return rddStringCSV;
+ return MLContextConversionUtil.matrixObjectToRDDStringCSV(matrixObject);
}
/**
@@ -98,8 +94,7 @@ public class Matrix {
* @return the matrix as a {@code RDD<String>} in IJV format
*/
public RDD<String> toRDDStringIJV() {
- RDD<String> rddStringIJV = MLContextConversionUtil.matrixObjectToRDDStringIJV(matrixObject);
- return rddStringIJV;
+ return MLContextConversionUtil.matrixObjectToRDDStringIJV(matrixObject);
}
/**
@@ -108,8 +103,7 @@ public class Matrix {
* @return the matrix as a {@code DataFrame} of doubles with an ID column
*/
public DataFrame toDF() {
- DataFrame df = MLContextConversionUtil.matrixObjectToDataFrame(matrixObject, sparkExecutionContext, false);
- return df;
+ return MLContextConversionUtil.matrixObjectToDataFrame(matrixObject, sparkExecutionContext, false);
}
/**
@@ -118,8 +112,7 @@ public class Matrix {
* @return the matrix as a {@code DataFrame} of doubles with an ID column
*/
public DataFrame toDFDoubleWithIDColumn() {
- DataFrame df = MLContextConversionUtil.matrixObjectToDataFrame(matrixObject, sparkExecutionContext, false);
- return df;
+ return MLContextConversionUtil.matrixObjectToDataFrame(matrixObject, sparkExecutionContext, false);
}
/**
@@ -129,8 +122,7 @@ public class Matrix {
*/
public DataFrame toDFDoubleNoIDColumn() {
DataFrame df = MLContextConversionUtil.matrixObjectToDataFrame(matrixObject, sparkExecutionContext, false);
- df = df.sort("__INDEX").drop("__INDEX");
- return df;
+ return df.drop(RDDConverterUtils.DF_ID_COLUMN);
}
/**
@@ -139,8 +131,7 @@ public class Matrix {
* @return the matrix as a {@code DataFrame} of vectors with an ID column
*/
public DataFrame toDFVectorWithIDColumn() {
- DataFrame df = MLContextConversionUtil.matrixObjectToDataFrame(matrixObject, sparkExecutionContext, true);
- return df;
+ return MLContextConversionUtil.matrixObjectToDataFrame(matrixObject, sparkExecutionContext, true);
}
/**
@@ -150,8 +141,7 @@ public class Matrix {
*/
public DataFrame toDFVectorNoIDColumn() {
DataFrame df = MLContextConversionUtil.matrixObjectToDataFrame(matrixObject, sparkExecutionContext, true);
- df = df.sort("__INDEX").drop("__INDEX");
- return df;
+ return df.drop(RDDConverterUtils.DF_ID_COLUMN);
}
/**
@@ -160,9 +150,7 @@ public class Matrix {
* @return the matrix as a {@code BinaryBlockMatrix}
*/
public BinaryBlockMatrix toBinaryBlockMatrix() {
- BinaryBlockMatrix binaryBlockMatrix = MLContextConversionUtil.matrixObjectToBinaryBlockMatrix(matrixObject,
- sparkExecutionContext);
- return binaryBlockMatrix;
+ return MLContextConversionUtil.matrixObjectToBinaryBlockMatrix(matrixObject, sparkExecutionContext);
}
/**
@@ -171,9 +159,7 @@ public class Matrix {
* @return the matrix metadata
*/
public MatrixMetadata getMatrixMetadata() {
- MatrixCharacteristics matrixCharacteristics = matrixObject.getMatrixCharacteristics();
- MatrixMetadata matrixMetadata = new MatrixMetadata(matrixCharacteristics);
- return matrixMetadata;
+ return new MatrixMetadata(matrixObject.getMatrixCharacteristics());
}
@Override
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/ad65dfa1/src/main/java/org/apache/sysml/api/mlcontext/MatrixMetadata.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/api/mlcontext/MatrixMetadata.java b/src/main/java/org/apache/sysml/api/mlcontext/MatrixMetadata.java
index a9f96df..a583e90 100644
--- a/src/main/java/org/apache/sysml/api/mlcontext/MatrixMetadata.java
+++ b/src/main/java/org/apache/sysml/api/mlcontext/MatrixMetadata.java
@@ -482,8 +482,7 @@ public class MatrixMetadata extends Metadata {
int nrpb = (numRowsPerBlock == null) ? ConfigurationManager.getBlocksize() : numRowsPerBlock;
int ncpb = (numColumnsPerBlock == null) ? ConfigurationManager.getBlocksize() : numColumnsPerBlock;
long nnz = (numNonZeros == null) ? -1 : numNonZeros;
- MatrixCharacteristics mc = new MatrixCharacteristics(nr, nc, nrpb, ncpb, nnz);
- return mc;
+ return new MatrixCharacteristics(nr, nc, nrpb, ncpb, nnz);
}
@Override
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/ad65dfa1/src/main/java/org/apache/sysml/api/mlcontext/ScriptFactory.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/api/mlcontext/ScriptFactory.java b/src/main/java/org/apache/sysml/api/mlcontext/ScriptFactory.java
index 3fc797a..db89bd1 100644
--- a/src/main/java/org/apache/sysml/api/mlcontext/ScriptFactory.java
+++ b/src/main/java/org/apache/sysml/api/mlcontext/ScriptFactory.java
@@ -252,8 +252,7 @@ public class ScriptFactory {
* @return DML or PYDML Script object
*/
private static Script scriptFromString(String scriptString, ScriptType scriptType) {
- Script script = new Script(scriptString, scriptType);
- return script;
+ return new Script(scriptString, scriptType);
}
/**
@@ -360,15 +359,13 @@ public class ScriptFactory {
FileSystem fs = FileSystem.get(ConfigurationManager.getCachedJobConf());
Path path = new Path(scriptFilePath);
FSDataInputStream fsdis = fs.open(path);
- String scriptString = IOUtils.toString(fsdis);
- return scriptString;
+ return IOUtils.toString(fsdis);
} else {// from local file system
if (!LocalFileUtils.validateExternalFilename(scriptFilePath, false)) {
throw new MLContextException("Invalid (non-trustworthy) local filename: " + scriptFilePath);
}
File scriptFile = new File(scriptFilePath);
- String scriptString = FileUtils.readFileToString(scriptFile);
- return scriptString;
+ return FileUtils.readFileToString(scriptFile);
}
} catch (IllegalArgumentException e) {
throw new MLContextException("Error trying to read script string from file: " + scriptFilePath, e);
@@ -391,8 +388,7 @@ public class ScriptFactory {
throw new MLContextException("InputStream is null");
}
try {
- String scriptString = IOUtils.toString(inputStream);
- return scriptString;
+ return IOUtils.toString(inputStream);
} catch (IOException e) {
throw new MLContextException("Error trying to read script string from InputStream", e);
}
@@ -438,8 +434,7 @@ public class ScriptFactory {
}
try {
InputStream is = url.openStream();
- String scriptString = IOUtils.toString(is);
- return scriptString;
+ return IOUtils.toString(is);
} catch (IOException e) {
throw new MLContextException("Error trying to read script string from URL: " + url, e);
}
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/ad65dfa1/src/main/java/org/apache/sysml/runtime/instructions/spark/utils/FrameRDDConverterUtils.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/instructions/spark/utils/FrameRDDConverterUtils.java b/src/main/java/org/apache/sysml/runtime/instructions/spark/utils/FrameRDDConverterUtils.java
index 85e5711..211f814 100644
--- a/src/main/java/org/apache/sysml/runtime/instructions/spark/utils/FrameRDDConverterUtils.java
+++ b/src/main/java/org/apache/sysml/runtime/instructions/spark/utils/FrameRDDConverterUtils.java
@@ -331,7 +331,7 @@ public class FrameRDDConverterUtils
{
if(containsID)
- df = df.drop("__INDEX");
+ df = df.drop(RDDConverterUtils.DF_ID_COLUMN);
//determine unknown dimensions if required
if( !mcOut.dimsKnown(true) ) {
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/ad65dfa1/src/main/java/org/apache/sysml/runtime/instructions/spark/utils/RDDConverterUtils.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/instructions/spark/utils/RDDConverterUtils.java b/src/main/java/org/apache/sysml/runtime/instructions/spark/utils/RDDConverterUtils.java
index 17bdea8..6fe4a50 100644
--- a/src/main/java/org/apache/sysml/runtime/instructions/spark/utils/RDDConverterUtils.java
+++ b/src/main/java/org/apache/sysml/runtime/instructions/spark/utils/RDDConverterUtils.java
@@ -1014,6 +1014,10 @@ public class RDDConverterUtils
public Tuple2<Row, Long> call(Row arg0) throws Exception {
//extract 1-based IDs and convert to 0-based positions
long id = UtilFunctions.toLong(UtilFunctions.getDouble(arg0.get(0)));
+ if( id <= 0 ) {
+ throw new DMLRuntimeException("ID Column '" + DF_ID_COLUMN
+ + "' expected to be 1-based, but found value: "+id);
+ }
return new Tuple2<Row,Long>(arg0, id-1);
}
}
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/ad65dfa1/src/main/scala/org/apache/sysml/api/ml/BaseSystemMLClassifier.scala
----------------------------------------------------------------------
diff --git a/src/main/scala/org/apache/sysml/api/ml/BaseSystemMLClassifier.scala b/src/main/scala/org/apache/sysml/api/ml/BaseSystemMLClassifier.scala
index 7415109..67926a5 100644
--- a/src/main/scala/org/apache/sysml/api/ml/BaseSystemMLClassifier.scala
+++ b/src/main/scala/org/apache/sysml/api/ml/BaseSystemMLClassifier.scala
@@ -147,14 +147,14 @@ trait BaseSystemMLClassifierModel extends BaseSystemMLEstimatorModel {
val Xin_bin = new BinaryBlockMatrix(Xin, mcXin)
val modelPredict = ml.execute(script._1.in(script._2, Xin_bin))
val predLabelOut = PredictionUtils.computePredictedClassLabelsFromProbability(modelPredict, isSingleNode, sc, probVar)
- val predictedDF = PredictionUtils.updateLabels(isSingleNode, predLabelOut.getDataFrame("Prediction"), null, "C1", labelMapping).select("__INDEX", "prediction")
+ val predictedDF = PredictionUtils.updateLabels(isSingleNode, predLabelOut.getDataFrame("Prediction"), null, "C1", labelMapping).select(RDDConverterUtils.DF_ID_COLUMN, "prediction")
if(outputProb) {
- val prob = modelPredict.getDataFrame(probVar, true).withColumnRenamed("C1", "probability").select("__INDEX", "probability")
- val dataset = RDDConverterUtilsExt.addIDToDataFrame(df.asInstanceOf[DataFrame], df.sqlContext, "__INDEX")
+ val prob = modelPredict.getDataFrame(probVar, true).withColumnRenamed("C1", "probability").select(RDDConverterUtils.DF_ID_COLUMN, "probability")
+ val dataset = RDDConverterUtilsExt.addIDToDataFrame(df.asInstanceOf[DataFrame], df.sqlContext, RDDConverterUtils.DF_ID_COLUMN)
return PredictionUtils.joinUsingID(dataset, PredictionUtils.joinUsingID(prob, predictedDF))
}
else {
- val dataset = RDDConverterUtilsExt.addIDToDataFrame(df.asInstanceOf[DataFrame], df.sqlContext, "__INDEX")
+ val dataset = RDDConverterUtilsExt.addIDToDataFrame(df.asInstanceOf[DataFrame], df.sqlContext, RDDConverterUtils.DF_ID_COLUMN)
return PredictionUtils.joinUsingID(dataset, predictedDF)
}
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/ad65dfa1/src/main/scala/org/apache/sysml/api/ml/BaseSystemMLRegressor.scala
----------------------------------------------------------------------
diff --git a/src/main/scala/org/apache/sysml/api/ml/BaseSystemMLRegressor.scala b/src/main/scala/org/apache/sysml/api/ml/BaseSystemMLRegressor.scala
index ed0fabb..d0445d2 100644
--- a/src/main/scala/org/apache/sysml/api/ml/BaseSystemMLRegressor.scala
+++ b/src/main/scala/org/apache/sysml/api/ml/BaseSystemMLRegressor.scala
@@ -79,8 +79,8 @@ trait BaseSystemMLRegressorModel extends BaseSystemMLEstimatorModel {
val script = getPredictionScript(mloutput, isSingleNode)
val Xin_bin = new BinaryBlockMatrix(Xin, mcXin)
val modelPredict = ml.execute(script._1.in(script._2, Xin_bin))
- val predictedDF = modelPredict.getDataFrame(predictionVar).select("__INDEX", "C1").withColumnRenamed("C1", "prediction")
- val dataset = RDDConverterUtilsExt.addIDToDataFrame(df.asInstanceOf[DataFrame], df.sqlContext, "__INDEX")
+ val predictedDF = modelPredict.getDataFrame(predictionVar).select(RDDConverterUtils.DF_ID_COLUMN, "C1").withColumnRenamed("C1", "prediction")
+ val dataset = RDDConverterUtilsExt.addIDToDataFrame(df.asInstanceOf[DataFrame], df.sqlContext, RDDConverterUtils.DF_ID_COLUMN)
return PredictionUtils.joinUsingID(dataset, predictedDF)
}
}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/ad65dfa1/src/main/scala/org/apache/sysml/api/ml/PredictionUtils.scala
----------------------------------------------------------------------
diff --git a/src/main/scala/org/apache/sysml/api/ml/PredictionUtils.scala b/src/main/scala/org/apache/sysml/api/ml/PredictionUtils.scala
index e2a1c0d..0811b1b 100644
--- a/src/main/scala/org/apache/sysml/api/ml/PredictionUtils.scala
+++ b/src/main/scala/org/apache/sysml/api/ml/PredictionUtils.scala
@@ -26,7 +26,7 @@ import org.apache.spark.SparkContext
import org.apache.sysml.runtime.matrix.data.MatrixBlock
import org.apache.sysml.runtime.DMLRuntimeException
import org.apache.sysml.runtime.matrix.MatrixCharacteristics
-import org.apache.sysml.runtime.instructions.spark.utils.{ RDDConverterUtilsExt => RDDConverterUtils }
+import org.apache.sysml.runtime.instructions.spark.utils.RDDConverterUtils
import org.apache.sysml.api.mlcontext.MLResults
import org.apache.sysml.api.mlcontext.ScriptFactory._
import org.apache.sysml.api.mlcontext.Script
@@ -131,7 +131,7 @@ object PredictionUtils {
}
def joinUsingID(df1:DataFrame, df2:DataFrame):DataFrame = {
- df1.join(df2, "__INDEX")
+ df1.join(df2, RDDConverterUtils.DF_ID_COLUMN)
}
def computePredictedClassLabelsFromProbability(mlscoreoutput:MLResults, isSingleNode:Boolean, sc:SparkContext, inProbVar:String): MLResults = {
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/ad65dfa1/src/test/java/org/apache/sysml/test/integration/mlcontext/MLContextTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/sysml/test/integration/mlcontext/MLContextTest.java b/src/test/java/org/apache/sysml/test/integration/mlcontext/MLContextTest.java
index 61f44e5..484a777 100644
--- a/src/test/java/org/apache/sysml/test/integration/mlcontext/MLContextTest.java
+++ b/src/test/java/org/apache/sysml/test/integration/mlcontext/MLContextTest.java
@@ -67,6 +67,7 @@ import org.apache.sysml.api.mlcontext.MatrixMetadata;
import org.apache.sysml.api.mlcontext.Script;
import org.apache.sysml.api.mlcontext.ScriptExecutor;
import org.apache.sysml.runtime.controlprogram.caching.MatrixObject;
+import org.apache.sysml.runtime.instructions.spark.utils.RDDConverterUtils;
import org.apache.sysml.test.integration.AutomatedTestBase;
import org.junit.After;
import org.junit.AfterClass;
@@ -564,7 +565,7 @@ public class MLContextTest extends AutomatedTestBase {
JavaRDD<Row> javaRddRow = javaRddString.map(new CommaSeparatedValueStringToRow());
SQLContext sqlContext = new SQLContext(sc);
List<StructField> fields = new ArrayList<StructField>();
- fields.add(DataTypes.createStructField("__INDEX", DataTypes.StringType, true));
+ fields.add(DataTypes.createStructField(RDDConverterUtils.DF_ID_COLUMN, DataTypes.StringType, true));
fields.add(DataTypes.createStructField("C1", DataTypes.StringType, true));
fields.add(DataTypes.createStructField("C2", DataTypes.StringType, true));
fields.add(DataTypes.createStructField("C3", DataTypes.StringType, true));
@@ -591,7 +592,7 @@ public class MLContextTest extends AutomatedTestBase {
JavaRDD<Row> javaRddRow = javaRddString.map(new CommaSeparatedValueStringToRow());
SQLContext sqlContext = new SQLContext(sc);
List<StructField> fields = new ArrayList<StructField>();
- fields.add(DataTypes.createStructField("__INDEX", DataTypes.StringType, true));
+ fields.add(DataTypes.createStructField(RDDConverterUtils.DF_ID_COLUMN, DataTypes.StringType, true));
fields.add(DataTypes.createStructField("C1", DataTypes.StringType, true));
fields.add(DataTypes.createStructField("C2", DataTypes.StringType, true));
fields.add(DataTypes.createStructField("C3", DataTypes.StringType, true));
@@ -618,7 +619,7 @@ public class MLContextTest extends AutomatedTestBase {
JavaRDD<Row> javaRddRow = javaRddString.map(new CommaSeparatedValueStringToRow());
SQLContext sqlContext = new SQLContext(sc);
List<StructField> fields = new ArrayList<StructField>();
- fields.add(DataTypes.createStructField("__INDEX", DataTypes.StringType, true));
+ fields.add(DataTypes.createStructField(RDDConverterUtils.DF_ID_COLUMN, DataTypes.StringType, true));
fields.add(DataTypes.createStructField("C1", DataTypes.StringType, true));
fields.add(DataTypes.createStructField("C2", DataTypes.StringType, true));
fields.add(DataTypes.createStructField("C3", DataTypes.StringType, true));
@@ -645,7 +646,7 @@ public class MLContextTest extends AutomatedTestBase {
JavaRDD<Row> javaRddRow = javaRddString.map(new CommaSeparatedValueStringToRow());
SQLContext sqlContext = new SQLContext(sc);
List<StructField> fields = new ArrayList<StructField>();
- fields.add(DataTypes.createStructField("__INDEX", DataTypes.StringType, true));
+ fields.add(DataTypes.createStructField(RDDConverterUtils.DF_ID_COLUMN, DataTypes.StringType, true));
fields.add(DataTypes.createStructField("C1", DataTypes.StringType, true));
fields.add(DataTypes.createStructField("C2", DataTypes.StringType, true));
fields.add(DataTypes.createStructField("C3", DataTypes.StringType, true));
@@ -672,7 +673,7 @@ public class MLContextTest extends AutomatedTestBase {
JavaRDD<Row> javaRddRow = javaRddTuple.map(new DoubleVectorRow());
SQLContext sqlContext = new SQLContext(sc);
List<StructField> fields = new ArrayList<StructField>();
- fields.add(DataTypes.createStructField("__INDEX", DataTypes.StringType, true));
+ fields.add(DataTypes.createStructField(RDDConverterUtils.DF_ID_COLUMN, DataTypes.StringType, true));
fields.add(DataTypes.createStructField("C1", new VectorUDT(), true));
StructType schema = DataTypes.createStructType(fields);
DataFrame dataFrame = sqlContext.createDataFrame(javaRddRow, schema);
@@ -697,7 +698,7 @@ public class MLContextTest extends AutomatedTestBase {
JavaRDD<Row> javaRddRow = javaRddTuple.map(new DoubleVectorRow());
SQLContext sqlContext = new SQLContext(sc);
List<StructField> fields = new ArrayList<StructField>();
- fields.add(DataTypes.createStructField("__INDEX", DataTypes.StringType, true));
+ fields.add(DataTypes.createStructField(RDDConverterUtils.DF_ID_COLUMN, DataTypes.StringType, true));
fields.add(DataTypes.createStructField("C1", new VectorUDT(), true));
StructType schema = DataTypes.createStructType(fields);
DataFrame dataFrame = sqlContext.createDataFrame(javaRddRow, schema);
@@ -2102,7 +2103,7 @@ public class MLContextTest extends AutomatedTestBase {
JavaRDD<Row> javaRddRow = javaRddString.map(new CommaSeparatedValueStringToRow());
SQLContext sqlContext = new SQLContext(sc);
List<StructField> fields = new ArrayList<StructField>();
- fields.add(DataTypes.createStructField("__INDEX", DataTypes.StringType, true));
+ fields.add(DataTypes.createStructField(RDDConverterUtils.DF_ID_COLUMN, DataTypes.StringType, true));
fields.add(DataTypes.createStructField("C1", DataTypes.StringType, true));
fields.add(DataTypes.createStructField("C2", DataTypes.StringType, true));
fields.add(DataTypes.createStructField("C3", DataTypes.StringType, true));
@@ -2127,7 +2128,7 @@ public class MLContextTest extends AutomatedTestBase {
JavaRDD<Row> javaRddRow = javaRddString.map(new CommaSeparatedValueStringToRow());
SQLContext sqlContext = new SQLContext(sc);
List<StructField> fields = new ArrayList<StructField>();
- fields.add(DataTypes.createStructField("__INDEX", DataTypes.StringType, true));
+ fields.add(DataTypes.createStructField(RDDConverterUtils.DF_ID_COLUMN, DataTypes.StringType, true));
fields.add(DataTypes.createStructField("C1", DataTypes.StringType, true));
fields.add(DataTypes.createStructField("C2", DataTypes.StringType, true));
fields.add(DataTypes.createStructField("C3", DataTypes.StringType, true));
@@ -2152,7 +2153,7 @@ public class MLContextTest extends AutomatedTestBase {
JavaRDD<Row> javaRddRow = javaRddTuple.map(new DoubleVectorRow());
SQLContext sqlContext = new SQLContext(sc);
List<StructField> fields = new ArrayList<StructField>();
- fields.add(DataTypes.createStructField("__INDEX", DataTypes.StringType, true));
+ fields.add(DataTypes.createStructField(RDDConverterUtils.DF_ID_COLUMN, DataTypes.StringType, true));
fields.add(DataTypes.createStructField("C1", new VectorUDT(), true));
StructType schema = DataTypes.createStructType(fields);
DataFrame dataFrame = sqlContext.createDataFrame(javaRddRow, schema);
@@ -2175,7 +2176,7 @@ public class MLContextTest extends AutomatedTestBase {
JavaRDD<Row> javaRddRow = javaRddTuple.map(new DoubleVectorRow());
SQLContext sqlContext = new SQLContext(sc);
List<StructField> fields = new ArrayList<StructField>();
- fields.add(DataTypes.createStructField("__INDEX", DataTypes.StringType, true));
+ fields.add(DataTypes.createStructField(RDDConverterUtils.DF_ID_COLUMN, DataTypes.StringType, true));
fields.add(DataTypes.createStructField("C1", new VectorUDT(), true));
StructType schema = DataTypes.createStructType(fields);
DataFrame dataFrame = sqlContext.createDataFrame(javaRddRow, schema);