You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by de...@apache.org on 2016/09/09 20:40:17 UTC
incubator-systemml git commit: [SYSTEMML-900] Rename DataFrame ID
column
Repository: incubator-systemml
Updated Branches:
refs/heads/master 6bfccb712 -> 7fa318674
[SYSTEMML-900] Rename DataFrame ID column
Closes #236.
Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/7fa31867
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/7fa31867
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/7fa31867
Branch: refs/heads/master
Commit: 7fa31867479ff4be66fc97394ae8edfb4ffbbeb8
Parents: 6bfccb7
Author: Deron Eriksson <de...@us.ibm.com>
Authored: Fri Sep 9 13:37:17 2016 -0700
Committer: Deron Eriksson <de...@us.ibm.com>
Committed: Fri Sep 9 13:37:17 2016 -0700
----------------------------------------------------------------------
.../java/org/apache/sysml/api/MLOutput.java | 4 +-
.../api/mlcontext/MLContextConversionUtil.java | 12 +++---
.../apache/sysml/api/mlcontext/MLResults.java | 4 +-
.../org/apache/sysml/api/mlcontext/Matrix.java | 4 +-
.../sysml/api/mlcontext/MatrixFormat.java | 28 +++++++-------
.../spark/utils/FrameRDDConverterUtils.java | 2 +-
.../spark/utils/RDDConverterUtilsExt.java | 10 ++---
src/main/python/systemml/mlcontext.py | 2 +-
src/main/python/tests/test_mlcontext.py | 2 +-
src/main/python/tests/test_mllearn.py | 4 +-
.../sysml/api/ml/BaseSystemMLClassifier.scala | 8 ++--
.../sysml/api/ml/BaseSystemMLRegressor.scala | 4 +-
.../apache/sysml/api/ml/PredictionUtils.scala | 4 +-
.../integration/mlcontext/MLContextTest.java | 40 ++++++++++----------
14 files changed, 64 insertions(+), 64 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/7fa31867/src/main/java/org/apache/sysml/api/MLOutput.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/api/MLOutput.java b/src/main/java/org/apache/sysml/api/MLOutput.java
index a18ab60..f41c479 100644
--- a/src/main/java/org/apache/sysml/api/MLOutput.java
+++ b/src/main/java/org/apache/sysml/api/MLOutput.java
@@ -101,7 +101,7 @@ public class MLOutput {
/**
* Note, the output DataFrame has an additional column ID.
- * An easy way to get DataFrame without ID is by df.sort("ID").drop("ID")
+ * An easy way to get DataFrame without ID is by df.sort("__INDEX").drop("__INDEX")
* @param sqlContext
* @param varName
* @return
@@ -181,7 +181,7 @@ public class MLOutput {
List<StructField> fields = new ArrayList<StructField>();
// LongTypes throw an error: java.lang.Double incompatible with java.lang.Long
- fields.add(DataTypes.createStructField("ID", DataTypes.DoubleType, false));
+ fields.add(DataTypes.createStructField("__INDEX", DataTypes.DoubleType, false));
for(int k = 0; k < alRange.size(); k++) {
String colName = alRange.get(k)._1;
long low = alRange.get(k)._2._1;
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/7fa31867/src/main/java/org/apache/sysml/api/mlcontext/MLContextConversionUtil.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/api/mlcontext/MLContextConversionUtil.java b/src/main/java/org/apache/sysml/api/mlcontext/MLContextConversionUtil.java
index 9b15852..9d9ee5a 100644
--- a/src/main/java/org/apache/sysml/api/mlcontext/MLContextConversionUtil.java
+++ b/src/main/java/org/apache/sysml/api/mlcontext/MLContextConversionUtil.java
@@ -454,7 +454,7 @@ public class MLContextConversionUtil {
}
if (isDataFrameWithIDColumn(matrixMetadata)) {
- dataFrame = dataFrame.sort("ID").drop("ID");
+ dataFrame = dataFrame.sort("__INDEX").drop("__INDEX");
}
boolean isVectorBasedDataFrame = isVectorBasedDataFrame(matrixMetadata);
@@ -507,7 +507,7 @@ public class MLContextConversionUtil {
StructType schema = dataFrame.schema();
boolean hasID = false;
try {
- schema.fieldIndex("ID");
+ schema.fieldIndex("__INDEX");
hasID = true;
} catch (IllegalArgumentException iae) {
}
@@ -516,16 +516,16 @@ public class MLContextConversionUtil {
if (hasID) {
Object object = firstRow.get(1);
if (object instanceof Vector) {
- mf = MatrixFormat.DF_VECTOR_WITH_ID_COLUMN;
+ mf = MatrixFormat.DF_VECTOR_WITH_INDEX;
} else {
- mf = MatrixFormat.DF_DOUBLES_WITH_ID_COLUMN;
+ mf = MatrixFormat.DF_DOUBLES_WITH_INDEX;
}
} else {
Object object = firstRow.get(0);
if (object instanceof Vector) {
- mf = MatrixFormat.DF_VECTOR_WITH_NO_ID_COLUMN;
+ mf = MatrixFormat.DF_VECTOR;
} else {
- mf = MatrixFormat.DF_DOUBLES_WITH_NO_ID_COLUMN;
+ mf = MatrixFormat.DF_DOUBLES;
}
}
if (mf == null) {
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/7fa31867/src/main/java/org/apache/sysml/api/mlcontext/MLResults.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/api/mlcontext/MLResults.java b/src/main/java/org/apache/sysml/api/mlcontext/MLResults.java
index 5ae882f..1b05f98 100644
--- a/src/main/java/org/apache/sysml/api/mlcontext/MLResults.java
+++ b/src/main/java/org/apache/sysml/api/mlcontext/MLResults.java
@@ -456,7 +456,7 @@ public class MLResults {
}
MatrixObject mo = getMatrixObject(outputName);
DataFrame df = MLContextConversionUtil.matrixObjectToDataFrame(mo, sparkExecutionContext, false);
- df = df.sort("ID").drop("ID");
+ df = df.sort("__INDEX").drop("__INDEX");
return df;
}
@@ -484,7 +484,7 @@ public class MLResults {
}
MatrixObject mo = getMatrixObject(outputName);
DataFrame df = MLContextConversionUtil.matrixObjectToDataFrame(mo, sparkExecutionContext, true);
- df = df.sort("ID").drop("ID");
+ df = df.sort("__INDEX").drop("__INDEX");
return df;
}
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/7fa31867/src/main/java/org/apache/sysml/api/mlcontext/Matrix.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/api/mlcontext/Matrix.java b/src/main/java/org/apache/sysml/api/mlcontext/Matrix.java
index 1e8e2eb..1ff18f0 100644
--- a/src/main/java/org/apache/sysml/api/mlcontext/Matrix.java
+++ b/src/main/java/org/apache/sysml/api/mlcontext/Matrix.java
@@ -129,7 +129,7 @@ public class Matrix {
*/
public DataFrame toDFDoubleNoIDColumn() {
DataFrame df = MLContextConversionUtil.matrixObjectToDataFrame(matrixObject, sparkExecutionContext, false);
- df = df.sort("ID").drop("ID");
+ df = df.sort("__INDEX").drop("__INDEX");
return df;
}
@@ -150,7 +150,7 @@ public class Matrix {
*/
public DataFrame toDFVectorNoIDColumn() {
DataFrame df = MLContextConversionUtil.matrixObjectToDataFrame(matrixObject, sparkExecutionContext, true);
- df = df.sort("ID").drop("ID");
+ df = df.sort("__INDEX").drop("__INDEX");
return df;
}
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/7fa31867/src/main/java/org/apache/sysml/api/mlcontext/MatrixFormat.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/api/mlcontext/MatrixFormat.java b/src/main/java/org/apache/sysml/api/mlcontext/MatrixFormat.java
index a7ac395..4f4c7f9 100644
--- a/src/main/java/org/apache/sysml/api/mlcontext/MatrixFormat.java
+++ b/src/main/java/org/apache/sysml/api/mlcontext/MatrixFormat.java
@@ -37,24 +37,24 @@ public enum MatrixFormat {
IJV,
/**
- * DataFrame of doubles with an ID column.
+ * DataFrame of doubles with a row index.
*/
- DF_DOUBLES_WITH_ID_COLUMN,
+ DF_DOUBLES_WITH_INDEX,
/**
- * DataFrame of doubles with no ID column.
+ * DataFrame of doubles with no row index.
*/
- DF_DOUBLES_WITH_NO_ID_COLUMN,
+ DF_DOUBLES,
/**
- * Vector DataFrame with an ID column.
+ * Vector DataFrame with a row index.
*/
- DF_VECTOR_WITH_ID_COLUMN,
+ DF_VECTOR_WITH_INDEX,
/**
- * Vector DataFrame with no ID column.
+ * Vector DataFrame with no row index.
*/
- DF_VECTOR_WITH_NO_ID_COLUMN;
+ DF_VECTOR;
/**
* Is the matrix format vector-based?
@@ -63,9 +63,9 @@ public enum MatrixFormat {
* otherwise.
*/
public boolean isVectorBased() {
- if (this == DF_VECTOR_WITH_ID_COLUMN) {
+ if (this == DF_VECTOR_WITH_INDEX) {
return true;
- } else if (this == DF_VECTOR_WITH_NO_ID_COLUMN) {
+ } else if (this == DF_VECTOR) {
return true;
} else {
return false;
@@ -73,15 +73,15 @@ public enum MatrixFormat {
}
/**
- * Does the DataFrame have an ID column?
+ * Does the DataFrame have a row index?
*
- * @return {@code true} if the DataFrame has an ID column, {@code false}
+ * @return {@code true} if the DataFrame has a row index, {@code false}
* otherwise.
*/
public boolean hasIDColumn() {
- if (this == DF_DOUBLES_WITH_ID_COLUMN) {
+ if (this == DF_DOUBLES_WITH_INDEX) {
return true;
- } else if (this == DF_VECTOR_WITH_ID_COLUMN) {
+ } else if (this == DF_VECTOR_WITH_INDEX) {
return true;
} else {
return false;
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/7fa31867/src/main/java/org/apache/sysml/runtime/instructions/spark/utils/FrameRDDConverterUtils.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/instructions/spark/utils/FrameRDDConverterUtils.java b/src/main/java/org/apache/sysml/runtime/instructions/spark/utils/FrameRDDConverterUtils.java
index fa8c48f..85e5711 100644
--- a/src/main/java/org/apache/sysml/runtime/instructions/spark/utils/FrameRDDConverterUtils.java
+++ b/src/main/java/org/apache/sysml/runtime/instructions/spark/utils/FrameRDDConverterUtils.java
@@ -331,7 +331,7 @@ public class FrameRDDConverterUtils
{
if(containsID)
- df = df.drop("ID");
+ df = df.drop("__INDEX");
//determine unknown dimensions if required
if( !mcOut.dimsKnown(true) ) {
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/7fa31867/src/main/java/org/apache/sysml/runtime/instructions/spark/utils/RDDConverterUtilsExt.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/instructions/spark/utils/RDDConverterUtilsExt.java b/src/main/java/org/apache/sysml/runtime/instructions/spark/utils/RDDConverterUtilsExt.java
index 88dd44c..34e5a91 100644
--- a/src/main/java/org/apache/sysml/runtime/instructions/spark/utils/RDDConverterUtilsExt.java
+++ b/src/main/java/org/apache/sysml/runtime/instructions/spark/utils/RDDConverterUtilsExt.java
@@ -217,7 +217,7 @@ public class RDDConverterUtilsExt
throws DMLRuntimeException {
if(containsID) {
- inputDF = dropColumn(inputDF.sort("ID"), "ID");
+ inputDF = dropColumn(inputDF.sort("__INDEX"), "__INDEX");
}
DataFrame df = inputDF.select(vectorColumnName);
@@ -276,7 +276,7 @@ public class RDDConverterUtilsExt
throw new DMLRuntimeException("No column other than \"" + column + "\" present in the dataframe.");
}
- // Round about way to do in Java (not exposed in Spark 1.3.0): df = df.drop("ID");
+ // Round about way to do in Java (not exposed in Spark 1.3.0): df = df.drop("__INDEX");
return df.select(firstCol, scala.collection.JavaConversions.asScalaBuffer(columnToSelect).toList());
}
@@ -405,7 +405,7 @@ public class RDDConverterUtilsExt
}
if(containsID) {
- df = dropColumn(df.sort("ID"), "ID");
+ df = dropColumn(df.sort("__INDEX"), "__INDEX");
}
//determine unknown dimensions and sparsity if required
@@ -447,7 +447,7 @@ public class RDDConverterUtilsExt
List<StructField> fields = new ArrayList<StructField>();
// LongTypes throw an error: java.lang.Double incompatible with java.lang.Long
- fields.add(DataTypes.createStructField("ID", DataTypes.DoubleType, false));
+ fields.add(DataTypes.createStructField("__INDEX", DataTypes.DoubleType, false));
fields.add(DataTypes.createStructField("C1", new VectorUDT(), false));
// fields.add(DataTypes.createStructField("C1", DataTypes.createArrayType(DataTypes.DoubleType), false));
@@ -509,7 +509,7 @@ public class RDDConverterUtilsExt
List<StructField> fields = new ArrayList<StructField>();
// LongTypes throw an error: java.lang.Double incompatible with java.lang.Long
- fields.add(DataTypes.createStructField("ID", DataTypes.DoubleType, false));
+ fields.add(DataTypes.createStructField("__INDEX", DataTypes.DoubleType, false));
for(int i = 1; i <= numColumns; i++) {
fields.add(DataTypes.createStructField("C" + i, DataTypes.DoubleType, false));
}
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/7fa31867/src/main/python/systemml/mlcontext.py
----------------------------------------------------------------------
diff --git a/src/main/python/systemml/mlcontext.py b/src/main/python/systemml/mlcontext.py
index 389f9e1..ae56a46 100644
--- a/src/main/python/systemml/mlcontext.py
+++ b/src/main/python/systemml/mlcontext.py
@@ -120,7 +120,7 @@ class Matrix(object):
-------
df: PySpark SQL DataFrame
A PySpark SQL DataFrame representing the matrix, with
- one "ID" column containing the row index (since Spark
+ one "__INDEX" column containing the row index (since Spark
DataFrames are unordered), followed by columns of doubles
for each column in the matrix.
"""
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/7fa31867/src/main/python/tests/test_mlcontext.py
----------------------------------------------------------------------
diff --git a/src/main/python/tests/test_mlcontext.py b/src/main/python/tests/test_mlcontext.py
index 6a6f64e..b9ecb00 100644
--- a/src/main/python/tests/test_mlcontext.py
+++ b/src/main/python/tests/test_mlcontext.py
@@ -62,7 +62,7 @@ class TestAPI(unittest.TestCase):
rdd1 = sc.parallelize(["1.0,2.0", "3.0,4.0"])
script = dml(sums).input(m1=rdd1).output("m2")
m2 = ml.execute(script).get("m2")
- self.assertEqual(repr(m2.toDF()), "DataFrame[ID: double, C1: double, C2: double]")
+ self.assertEqual(repr(m2.toDF()), "DataFrame[__INDEX: double, C1: double, C2: double]")
def test_input_single(self):
script = """
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/7fa31867/src/main/python/tests/test_mllearn.py
----------------------------------------------------------------------
diff --git a/src/main/python/tests/test_mllearn.py b/src/main/python/tests/test_mllearn.py
index 9cc6443..1a1a6cb 100644
--- a/src/main/python/tests/test_mllearn.py
+++ b/src/main/python/tests/test_mllearn.py
@@ -79,7 +79,7 @@ class TestMLLearn(unittest.TestCase):
(9, "a e c l", 2.0),
(10, "spark compile", 1.0),
(11, "hadoop software", 2.0)
- ], ["id", "text", "label"])
+ ], ["__INDEX", "text", "label"])
tokenizer = Tokenizer(inputCol="text", outputCol="words")
hashingTF = HashingTF(inputCol="words", outputCol="features", numFeatures=20)
lr = LogisticRegression(sqlCtx)
@@ -89,7 +89,7 @@ class TestMLLearn(unittest.TestCase):
(12, "spark i j k", 1.0),
(13, "l m n", 2.0),
(14, "mapreduce spark", 1.0),
- (15, "apache hadoop", 2.0)], ["id", "text", "label"])
+ (15, "apache hadoop", 2.0)], ["__INDEX", "text", "label"])
result = model.transform(test)
predictionAndLabels = result.select("prediction", "label")
evaluator = MulticlassClassificationEvaluator()
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/7fa31867/src/main/scala/org/apache/sysml/api/ml/BaseSystemMLClassifier.scala
----------------------------------------------------------------------
diff --git a/src/main/scala/org/apache/sysml/api/ml/BaseSystemMLClassifier.scala b/src/main/scala/org/apache/sysml/api/ml/BaseSystemMLClassifier.scala
index 98def7c..c9c05e0 100644
--- a/src/main/scala/org/apache/sysml/api/ml/BaseSystemMLClassifier.scala
+++ b/src/main/scala/org/apache/sysml/api/ml/BaseSystemMLClassifier.scala
@@ -147,14 +147,14 @@ trait BaseSystemMLClassifierModel extends BaseSystemMLEstimatorModel {
val Xin_bin = new BinaryBlockMatrix(Xin, mcXin)
val modelPredict = ml.execute(script._1.in(script._2, Xin_bin))
val predLabelOut = PredictionUtils.computePredictedClassLabelsFromProbability(modelPredict, isSingleNode, sc, probVar)
- val predictedDF = PredictionUtils.updateLabels(isSingleNode, predLabelOut.getDataFrame("Prediction"), null, "C1", labelMapping).select("ID", "prediction")
+ val predictedDF = PredictionUtils.updateLabels(isSingleNode, predLabelOut.getDataFrame("Prediction"), null, "C1", labelMapping).select("__INDEX", "prediction")
if(outputProb) {
- val prob = modelPredict.getDataFrame(probVar, true).withColumnRenamed("C1", "probability").select("ID", "probability")
- val dataset = RDDConverterUtils.addIDToDataFrame(df.asInstanceOf[DataFrame], df.sqlContext, "ID")
+ val prob = modelPredict.getDataFrame(probVar, true).withColumnRenamed("C1", "probability").select("__INDEX", "probability")
+ val dataset = RDDConverterUtils.addIDToDataFrame(df.asInstanceOf[DataFrame], df.sqlContext, "__INDEX")
return PredictionUtils.joinUsingID(dataset, PredictionUtils.joinUsingID(prob, predictedDF))
}
else {
- val dataset = RDDConverterUtils.addIDToDataFrame(df.asInstanceOf[DataFrame], df.sqlContext, "ID")
+ val dataset = RDDConverterUtils.addIDToDataFrame(df.asInstanceOf[DataFrame], df.sqlContext, "__INDEX")
return PredictionUtils.joinUsingID(dataset, predictedDF)
}
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/7fa31867/src/main/scala/org/apache/sysml/api/ml/BaseSystemMLRegressor.scala
----------------------------------------------------------------------
diff --git a/src/main/scala/org/apache/sysml/api/ml/BaseSystemMLRegressor.scala b/src/main/scala/org/apache/sysml/api/ml/BaseSystemMLRegressor.scala
index 5bcde30..73bf9be 100644
--- a/src/main/scala/org/apache/sysml/api/ml/BaseSystemMLRegressor.scala
+++ b/src/main/scala/org/apache/sysml/api/ml/BaseSystemMLRegressor.scala
@@ -79,8 +79,8 @@ trait BaseSystemMLRegressorModel extends BaseSystemMLEstimatorModel {
val script = getPredictionScript(mloutput, isSingleNode)
val Xin_bin = new BinaryBlockMatrix(Xin, mcXin)
val modelPredict = ml.execute(script._1.in(script._2, Xin_bin))
- val predictedDF = modelPredict.getDataFrame(predictionVar).select("ID", "C1").withColumnRenamed("C1", "prediction")
- val dataset = RDDConverterUtils.addIDToDataFrame(df.asInstanceOf[DataFrame], df.sqlContext, "ID")
+ val predictedDF = modelPredict.getDataFrame(predictionVar).select("__INDEX", "C1").withColumnRenamed("C1", "prediction")
+ val dataset = RDDConverterUtils.addIDToDataFrame(df.asInstanceOf[DataFrame], df.sqlContext, "__INDEX")
return PredictionUtils.joinUsingID(dataset, predictedDF)
}
}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/7fa31867/src/main/scala/org/apache/sysml/api/ml/PredictionUtils.scala
----------------------------------------------------------------------
diff --git a/src/main/scala/org/apache/sysml/api/ml/PredictionUtils.scala b/src/main/scala/org/apache/sysml/api/ml/PredictionUtils.scala
index 8e3893d..895fcd6 100644
--- a/src/main/scala/org/apache/sysml/api/ml/PredictionUtils.scala
+++ b/src/main/scala/org/apache/sysml/api/ml/PredictionUtils.scala
@@ -131,8 +131,8 @@ object PredictionUtils {
}
def joinUsingID(df1:DataFrame, df2:DataFrame):DataFrame = {
- val tempDF1 = df1.withColumnRenamed("ID", "ID1")
- tempDF1.join(df2, tempDF1.col("ID1").equalTo(df2.col("ID"))).drop("ID1")
+ val tempDF1 = df1.withColumnRenamed("__INDEX", "ID1")
+ tempDF1.join(df2, tempDF1.col("ID1").equalTo(df2.col("__INDEX"))).drop("ID1")
}
def computePredictedClassLabelsFromProbability(mlscoreoutput:MLResults, isSingleNode:Boolean, sc:SparkContext, inProbVar:String): MLResults = {
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/7fa31867/src/test/java/org/apache/sysml/test/integration/mlcontext/MLContextTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/sysml/test/integration/mlcontext/MLContextTest.java b/src/test/java/org/apache/sysml/test/integration/mlcontext/MLContextTest.java
index 0252b50..61f44e5 100644
--- a/src/test/java/org/apache/sysml/test/integration/mlcontext/MLContextTest.java
+++ b/src/test/java/org/apache/sysml/test/integration/mlcontext/MLContextTest.java
@@ -518,7 +518,7 @@ public class MLContextTest extends AutomatedTestBase {
StructType schema = DataTypes.createStructType(fields);
DataFrame dataFrame = sqlContext.createDataFrame(javaRddRow, schema);
- MatrixMetadata mm = new MatrixMetadata(MatrixFormat.DF_DOUBLES_WITH_NO_ID_COLUMN);
+ MatrixMetadata mm = new MatrixMetadata(MatrixFormat.DF_DOUBLES);
Script script = dml("print('sum: ' + sum(M));").in("M", dataFrame, mm);
setExpectedStdOut("sum: 450.0");
@@ -544,7 +544,7 @@ public class MLContextTest extends AutomatedTestBase {
StructType schema = DataTypes.createStructType(fields);
DataFrame dataFrame = sqlContext.createDataFrame(javaRddRow, schema);
- MatrixMetadata mm = new MatrixMetadata(MatrixFormat.DF_DOUBLES_WITH_NO_ID_COLUMN);
+ MatrixMetadata mm = new MatrixMetadata(MatrixFormat.DF_DOUBLES);
Script script = pydml("print('sum: ' + sum(M))").in("M", dataFrame, mm);
setExpectedStdOut("sum: 450.0");
@@ -564,14 +564,14 @@ public class MLContextTest extends AutomatedTestBase {
JavaRDD<Row> javaRddRow = javaRddString.map(new CommaSeparatedValueStringToRow());
SQLContext sqlContext = new SQLContext(sc);
List<StructField> fields = new ArrayList<StructField>();
- fields.add(DataTypes.createStructField("ID", DataTypes.StringType, true));
+ fields.add(DataTypes.createStructField("__INDEX", DataTypes.StringType, true));
fields.add(DataTypes.createStructField("C1", DataTypes.StringType, true));
fields.add(DataTypes.createStructField("C2", DataTypes.StringType, true));
fields.add(DataTypes.createStructField("C3", DataTypes.StringType, true));
StructType schema = DataTypes.createStructType(fields);
DataFrame dataFrame = sqlContext.createDataFrame(javaRddRow, schema);
- MatrixMetadata mm = new MatrixMetadata(MatrixFormat.DF_DOUBLES_WITH_ID_COLUMN);
+ MatrixMetadata mm = new MatrixMetadata(MatrixFormat.DF_DOUBLES_WITH_INDEX);
Script script = dml("print('sum: ' + sum(M));").in("M", dataFrame, mm);
setExpectedStdOut("sum: 45.0");
@@ -591,14 +591,14 @@ public class MLContextTest extends AutomatedTestBase {
JavaRDD<Row> javaRddRow = javaRddString.map(new CommaSeparatedValueStringToRow());
SQLContext sqlContext = new SQLContext(sc);
List<StructField> fields = new ArrayList<StructField>();
- fields.add(DataTypes.createStructField("ID", DataTypes.StringType, true));
+ fields.add(DataTypes.createStructField("__INDEX", DataTypes.StringType, true));
fields.add(DataTypes.createStructField("C1", DataTypes.StringType, true));
fields.add(DataTypes.createStructField("C2", DataTypes.StringType, true));
fields.add(DataTypes.createStructField("C3", DataTypes.StringType, true));
StructType schema = DataTypes.createStructType(fields);
DataFrame dataFrame = sqlContext.createDataFrame(javaRddRow, schema);
- MatrixMetadata mm = new MatrixMetadata(MatrixFormat.DF_DOUBLES_WITH_ID_COLUMN);
+ MatrixMetadata mm = new MatrixMetadata(MatrixFormat.DF_DOUBLES_WITH_INDEX);
Script script = pydml("print('sum: ' + sum(M))").in("M", dataFrame, mm);
setExpectedStdOut("sum: 45.0");
@@ -618,14 +618,14 @@ public class MLContextTest extends AutomatedTestBase {
JavaRDD<Row> javaRddRow = javaRddString.map(new CommaSeparatedValueStringToRow());
SQLContext sqlContext = new SQLContext(sc);
List<StructField> fields = new ArrayList<StructField>();
- fields.add(DataTypes.createStructField("ID", DataTypes.StringType, true));
+ fields.add(DataTypes.createStructField("__INDEX", DataTypes.StringType, true));
fields.add(DataTypes.createStructField("C1", DataTypes.StringType, true));
fields.add(DataTypes.createStructField("C2", DataTypes.StringType, true));
fields.add(DataTypes.createStructField("C3", DataTypes.StringType, true));
StructType schema = DataTypes.createStructType(fields);
DataFrame dataFrame = sqlContext.createDataFrame(javaRddRow, schema);
- MatrixMetadata mm = new MatrixMetadata(MatrixFormat.DF_DOUBLES_WITH_ID_COLUMN);
+ MatrixMetadata mm = new MatrixMetadata(MatrixFormat.DF_DOUBLES_WITH_INDEX);
Script script = dml("print('M[1,1]: ' + as.scalar(M[1,1]));").in("M", dataFrame, mm);
setExpectedStdOut("M[1,1]: 1.0");
@@ -645,14 +645,14 @@ public class MLContextTest extends AutomatedTestBase {
JavaRDD<Row> javaRddRow = javaRddString.map(new CommaSeparatedValueStringToRow());
SQLContext sqlContext = new SQLContext(sc);
List<StructField> fields = new ArrayList<StructField>();
- fields.add(DataTypes.createStructField("ID", DataTypes.StringType, true));
+ fields.add(DataTypes.createStructField("__INDEX", DataTypes.StringType, true));
fields.add(DataTypes.createStructField("C1", DataTypes.StringType, true));
fields.add(DataTypes.createStructField("C2", DataTypes.StringType, true));
fields.add(DataTypes.createStructField("C3", DataTypes.StringType, true));
StructType schema = DataTypes.createStructType(fields);
DataFrame dataFrame = sqlContext.createDataFrame(javaRddRow, schema);
- MatrixMetadata mm = new MatrixMetadata(MatrixFormat.DF_DOUBLES_WITH_ID_COLUMN);
+ MatrixMetadata mm = new MatrixMetadata(MatrixFormat.DF_DOUBLES_WITH_INDEX);
Script script = pydml("print('M[0,0]: ' + scalar(M[0,0]))").in("M", dataFrame, mm);
setExpectedStdOut("M[0,0]: 1.0");
@@ -672,12 +672,12 @@ public class MLContextTest extends AutomatedTestBase {
JavaRDD<Row> javaRddRow = javaRddTuple.map(new DoubleVectorRow());
SQLContext sqlContext = new SQLContext(sc);
List<StructField> fields = new ArrayList<StructField>();
- fields.add(DataTypes.createStructField("ID", DataTypes.StringType, true));
+ fields.add(DataTypes.createStructField("__INDEX", DataTypes.StringType, true));
fields.add(DataTypes.createStructField("C1", new VectorUDT(), true));
StructType schema = DataTypes.createStructType(fields);
DataFrame dataFrame = sqlContext.createDataFrame(javaRddRow, schema);
- MatrixMetadata mm = new MatrixMetadata(MatrixFormat.DF_VECTOR_WITH_ID_COLUMN);
+ MatrixMetadata mm = new MatrixMetadata(MatrixFormat.DF_VECTOR_WITH_INDEX);
Script script = dml("print('sum: ' + sum(M));").in("M", dataFrame, mm);
setExpectedStdOut("sum: 45.0");
@@ -697,12 +697,12 @@ public class MLContextTest extends AutomatedTestBase {
JavaRDD<Row> javaRddRow = javaRddTuple.map(new DoubleVectorRow());
SQLContext sqlContext = new SQLContext(sc);
List<StructField> fields = new ArrayList<StructField>();
- fields.add(DataTypes.createStructField("ID", DataTypes.StringType, true));
+ fields.add(DataTypes.createStructField("__INDEX", DataTypes.StringType, true));
fields.add(DataTypes.createStructField("C1", new VectorUDT(), true));
StructType schema = DataTypes.createStructType(fields);
DataFrame dataFrame = sqlContext.createDataFrame(javaRddRow, schema);
- MatrixMetadata mm = new MatrixMetadata(MatrixFormat.DF_VECTOR_WITH_ID_COLUMN);
+ MatrixMetadata mm = new MatrixMetadata(MatrixFormat.DF_VECTOR_WITH_INDEX);
Script script = dml("print('sum: ' + sum(M))").in("M", dataFrame, mm);
setExpectedStdOut("sum: 45.0");
@@ -726,7 +726,7 @@ public class MLContextTest extends AutomatedTestBase {
StructType schema = DataTypes.createStructType(fields);
DataFrame dataFrame = sqlContext.createDataFrame(javaRddRow, schema);
- MatrixMetadata mm = new MatrixMetadata(MatrixFormat.DF_VECTOR_WITH_NO_ID_COLUMN);
+ MatrixMetadata mm = new MatrixMetadata(MatrixFormat.DF_VECTOR);
Script script = dml("print('sum: ' + sum(M));").in("M", dataFrame, mm);
setExpectedStdOut("sum: 45.0");
@@ -750,7 +750,7 @@ public class MLContextTest extends AutomatedTestBase {
StructType schema = DataTypes.createStructType(fields);
DataFrame dataFrame = sqlContext.createDataFrame(javaRddRow, schema);
- MatrixMetadata mm = new MatrixMetadata(MatrixFormat.DF_VECTOR_WITH_NO_ID_COLUMN);
+ MatrixMetadata mm = new MatrixMetadata(MatrixFormat.DF_VECTOR);
Script script = dml("print('sum: ' + sum(M))").in("M", dataFrame, mm);
setExpectedStdOut("sum: 45.0");
@@ -2102,7 +2102,7 @@ public class MLContextTest extends AutomatedTestBase {
JavaRDD<Row> javaRddRow = javaRddString.map(new CommaSeparatedValueStringToRow());
SQLContext sqlContext = new SQLContext(sc);
List<StructField> fields = new ArrayList<StructField>();
- fields.add(DataTypes.createStructField("ID", DataTypes.StringType, true));
+ fields.add(DataTypes.createStructField("__INDEX", DataTypes.StringType, true));
fields.add(DataTypes.createStructField("C1", DataTypes.StringType, true));
fields.add(DataTypes.createStructField("C2", DataTypes.StringType, true));
fields.add(DataTypes.createStructField("C3", DataTypes.StringType, true));
@@ -2127,7 +2127,7 @@ public class MLContextTest extends AutomatedTestBase {
JavaRDD<Row> javaRddRow = javaRddString.map(new CommaSeparatedValueStringToRow());
SQLContext sqlContext = new SQLContext(sc);
List<StructField> fields = new ArrayList<StructField>();
- fields.add(DataTypes.createStructField("ID", DataTypes.StringType, true));
+ fields.add(DataTypes.createStructField("__INDEX", DataTypes.StringType, true));
fields.add(DataTypes.createStructField("C1", DataTypes.StringType, true));
fields.add(DataTypes.createStructField("C2", DataTypes.StringType, true));
fields.add(DataTypes.createStructField("C3", DataTypes.StringType, true));
@@ -2152,7 +2152,7 @@ public class MLContextTest extends AutomatedTestBase {
JavaRDD<Row> javaRddRow = javaRddTuple.map(new DoubleVectorRow());
SQLContext sqlContext = new SQLContext(sc);
List<StructField> fields = new ArrayList<StructField>();
- fields.add(DataTypes.createStructField("ID", DataTypes.StringType, true));
+ fields.add(DataTypes.createStructField("__INDEX", DataTypes.StringType, true));
fields.add(DataTypes.createStructField("C1", new VectorUDT(), true));
StructType schema = DataTypes.createStructType(fields);
DataFrame dataFrame = sqlContext.createDataFrame(javaRddRow, schema);
@@ -2175,7 +2175,7 @@ public class MLContextTest extends AutomatedTestBase {
JavaRDD<Row> javaRddRow = javaRddTuple.map(new DoubleVectorRow());
SQLContext sqlContext = new SQLContext(sc);
List<StructField> fields = new ArrayList<StructField>();
- fields.add(DataTypes.createStructField("ID", DataTypes.StringType, true));
+ fields.add(DataTypes.createStructField("__INDEX", DataTypes.StringType, true));
fields.add(DataTypes.createStructField("C1", new VectorUDT(), true));
StructType schema = DataTypes.createStructType(fields);
DataFrame dataFrame = sqlContext.createDataFrame(javaRddRow, schema);