You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by de...@apache.org on 2016/09/09 20:40:17 UTC

incubator-systemml git commit: [SYSTEMML-900] Rename DataFrame ID column

Repository: incubator-systemml
Updated Branches:
  refs/heads/master 6bfccb712 -> 7fa318674


[SYSTEMML-900] Rename DataFrame ID column

Closes #236.


Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/7fa31867
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/7fa31867
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/7fa31867

Branch: refs/heads/master
Commit: 7fa31867479ff4be66fc97394ae8edfb4ffbbeb8
Parents: 6bfccb7
Author: Deron Eriksson <de...@us.ibm.com>
Authored: Fri Sep 9 13:37:17 2016 -0700
Committer: Deron Eriksson <de...@us.ibm.com>
Committed: Fri Sep 9 13:37:17 2016 -0700

----------------------------------------------------------------------
 .../java/org/apache/sysml/api/MLOutput.java     |  4 +-
 .../api/mlcontext/MLContextConversionUtil.java  | 12 +++---
 .../apache/sysml/api/mlcontext/MLResults.java   |  4 +-
 .../org/apache/sysml/api/mlcontext/Matrix.java  |  4 +-
 .../sysml/api/mlcontext/MatrixFormat.java       | 28 +++++++-------
 .../spark/utils/FrameRDDConverterUtils.java     |  2 +-
 .../spark/utils/RDDConverterUtilsExt.java       | 10 ++---
 src/main/python/systemml/mlcontext.py           |  2 +-
 src/main/python/tests/test_mlcontext.py         |  2 +-
 src/main/python/tests/test_mllearn.py           |  4 +-
 .../sysml/api/ml/BaseSystemMLClassifier.scala   |  8 ++--
 .../sysml/api/ml/BaseSystemMLRegressor.scala    |  4 +-
 .../apache/sysml/api/ml/PredictionUtils.scala   |  4 +-
 .../integration/mlcontext/MLContextTest.java    | 40 ++++++++++----------
 14 files changed, 64 insertions(+), 64 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/7fa31867/src/main/java/org/apache/sysml/api/MLOutput.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/api/MLOutput.java b/src/main/java/org/apache/sysml/api/MLOutput.java
index a18ab60..f41c479 100644
--- a/src/main/java/org/apache/sysml/api/MLOutput.java
+++ b/src/main/java/org/apache/sysml/api/MLOutput.java
@@ -101,7 +101,7 @@ public class MLOutput {
 	
 	/**
 	 * Note, the output DataFrame has an additional column ID.
-	 * An easy way to get DataFrame without ID is by df.sort("ID").drop("ID")
+	 * An easy way to get DataFrame without ID is by df.sort("__INDEX").drop("__INDEX")
 	 * @param sqlContext
 	 * @param varName
 	 * @return
@@ -181,7 +181,7 @@ public class MLOutput {
 		
 		List<StructField> fields = new ArrayList<StructField>();
 		// LongTypes throw an error: java.lang.Double incompatible with java.lang.Long
-		fields.add(DataTypes.createStructField("ID", DataTypes.DoubleType, false));
+		fields.add(DataTypes.createStructField("__INDEX", DataTypes.DoubleType, false));
 		for(int k = 0; k < alRange.size(); k++) {
 			String colName = alRange.get(k)._1;
 			long low = alRange.get(k)._2._1;

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/7fa31867/src/main/java/org/apache/sysml/api/mlcontext/MLContextConversionUtil.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/api/mlcontext/MLContextConversionUtil.java b/src/main/java/org/apache/sysml/api/mlcontext/MLContextConversionUtil.java
index 9b15852..9d9ee5a 100644
--- a/src/main/java/org/apache/sysml/api/mlcontext/MLContextConversionUtil.java
+++ b/src/main/java/org/apache/sysml/api/mlcontext/MLContextConversionUtil.java
@@ -454,7 +454,7 @@ public class MLContextConversionUtil {
 		}
 
 		if (isDataFrameWithIDColumn(matrixMetadata)) {
-			dataFrame = dataFrame.sort("ID").drop("ID");
+			dataFrame = dataFrame.sort("__INDEX").drop("__INDEX");
 		}
 
 		boolean isVectorBasedDataFrame = isVectorBasedDataFrame(matrixMetadata);
@@ -507,7 +507,7 @@ public class MLContextConversionUtil {
 		StructType schema = dataFrame.schema();
 		boolean hasID = false;
 		try {
-			schema.fieldIndex("ID");
+			schema.fieldIndex("__INDEX");
 			hasID = true;
 		} catch (IllegalArgumentException iae) {
 		}
@@ -516,16 +516,16 @@ public class MLContextConversionUtil {
 		if (hasID) {
 			Object object = firstRow.get(1);
 			if (object instanceof Vector) {
-				mf = MatrixFormat.DF_VECTOR_WITH_ID_COLUMN;
+				mf = MatrixFormat.DF_VECTOR_WITH_INDEX;
 			} else {
-				mf = MatrixFormat.DF_DOUBLES_WITH_ID_COLUMN;
+				mf = MatrixFormat.DF_DOUBLES_WITH_INDEX;
 			}
 		} else {
 			Object object = firstRow.get(0);
 			if (object instanceof Vector) {
-				mf = MatrixFormat.DF_VECTOR_WITH_NO_ID_COLUMN;
+				mf = MatrixFormat.DF_VECTOR;
 			} else {
-				mf = MatrixFormat.DF_DOUBLES_WITH_NO_ID_COLUMN;
+				mf = MatrixFormat.DF_DOUBLES;
 			}
 		}
 		if (mf == null) {

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/7fa31867/src/main/java/org/apache/sysml/api/mlcontext/MLResults.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/api/mlcontext/MLResults.java b/src/main/java/org/apache/sysml/api/mlcontext/MLResults.java
index 5ae882f..1b05f98 100644
--- a/src/main/java/org/apache/sysml/api/mlcontext/MLResults.java
+++ b/src/main/java/org/apache/sysml/api/mlcontext/MLResults.java
@@ -456,7 +456,7 @@ public class MLResults {
 		}
 		MatrixObject mo = getMatrixObject(outputName);
 		DataFrame df = MLContextConversionUtil.matrixObjectToDataFrame(mo, sparkExecutionContext, false);
-		df = df.sort("ID").drop("ID");
+		df = df.sort("__INDEX").drop("__INDEX");
 		return df;
 	}
 
@@ -484,7 +484,7 @@ public class MLResults {
 		}
 		MatrixObject mo = getMatrixObject(outputName);
 		DataFrame df = MLContextConversionUtil.matrixObjectToDataFrame(mo, sparkExecutionContext, true);
-		df = df.sort("ID").drop("ID");
+		df = df.sort("__INDEX").drop("__INDEX");
 		return df;
 	}
 

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/7fa31867/src/main/java/org/apache/sysml/api/mlcontext/Matrix.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/api/mlcontext/Matrix.java b/src/main/java/org/apache/sysml/api/mlcontext/Matrix.java
index 1e8e2eb..1ff18f0 100644
--- a/src/main/java/org/apache/sysml/api/mlcontext/Matrix.java
+++ b/src/main/java/org/apache/sysml/api/mlcontext/Matrix.java
@@ -129,7 +129,7 @@ public class Matrix {
 	 */
 	public DataFrame toDFDoubleNoIDColumn() {
 		DataFrame df = MLContextConversionUtil.matrixObjectToDataFrame(matrixObject, sparkExecutionContext, false);
-		df = df.sort("ID").drop("ID");
+		df = df.sort("__INDEX").drop("__INDEX");
 		return df;
 	}
 
@@ -150,7 +150,7 @@ public class Matrix {
 	 */
 	public DataFrame toDFVectorNoIDColumn() {
 		DataFrame df = MLContextConversionUtil.matrixObjectToDataFrame(matrixObject, sparkExecutionContext, true);
-		df = df.sort("ID").drop("ID");
+		df = df.sort("__INDEX").drop("__INDEX");
 		return df;
 	}
 

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/7fa31867/src/main/java/org/apache/sysml/api/mlcontext/MatrixFormat.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/api/mlcontext/MatrixFormat.java b/src/main/java/org/apache/sysml/api/mlcontext/MatrixFormat.java
index a7ac395..4f4c7f9 100644
--- a/src/main/java/org/apache/sysml/api/mlcontext/MatrixFormat.java
+++ b/src/main/java/org/apache/sysml/api/mlcontext/MatrixFormat.java
@@ -37,24 +37,24 @@ public enum MatrixFormat {
 	IJV,
 
 	/**
-	 * DataFrame of doubles with an ID column.
+	 * DataFrame of doubles with a row index.
 	 */
-	DF_DOUBLES_WITH_ID_COLUMN,
+	DF_DOUBLES_WITH_INDEX,
 
 	/**
-	 * DataFrame of doubles with no ID column.
+	 * DataFrame of doubles with no row index.
 	 */
-	DF_DOUBLES_WITH_NO_ID_COLUMN,
+	DF_DOUBLES,
 
 	/**
-	 * Vector DataFrame with an ID column.
+	 * Vector DataFrame with a row index.
 	 */
-	DF_VECTOR_WITH_ID_COLUMN,
+	DF_VECTOR_WITH_INDEX,
 
 	/**
-	 * Vector DataFrame with no ID column.
+	 * Vector DataFrame with no row index.
 	 */
-	DF_VECTOR_WITH_NO_ID_COLUMN;
+	DF_VECTOR;
 
 	/**
 	 * Is the matrix format vector-based?
@@ -63,9 +63,9 @@ public enum MatrixFormat {
 	 *         otherwise.
 	 */
 	public boolean isVectorBased() {
-		if (this == DF_VECTOR_WITH_ID_COLUMN) {
+		if (this == DF_VECTOR_WITH_INDEX) {
 			return true;
-		} else if (this == DF_VECTOR_WITH_NO_ID_COLUMN) {
+		} else if (this == DF_VECTOR) {
 			return true;
 		} else {
 			return false;
@@ -73,15 +73,15 @@ public enum MatrixFormat {
 	}
 
 	/**
-	 * Does the DataFrame have an ID column?
+	 * Does the DataFrame have a row index?
 	 * 
-	 * @return {@code true} if the DataFrame has an ID column, {@code false}
+	 * @return {@code true} if the DataFrame has a row index, {@code false}
 	 *         otherwise.
 	 */
 	public boolean hasIDColumn() {
-		if (this == DF_DOUBLES_WITH_ID_COLUMN) {
+		if (this == DF_DOUBLES_WITH_INDEX) {
 			return true;
-		} else if (this == DF_VECTOR_WITH_ID_COLUMN) {
+		} else if (this == DF_VECTOR_WITH_INDEX) {
 			return true;
 		} else {
 			return false;

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/7fa31867/src/main/java/org/apache/sysml/runtime/instructions/spark/utils/FrameRDDConverterUtils.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/instructions/spark/utils/FrameRDDConverterUtils.java b/src/main/java/org/apache/sysml/runtime/instructions/spark/utils/FrameRDDConverterUtils.java
index fa8c48f..85e5711 100644
--- a/src/main/java/org/apache/sysml/runtime/instructions/spark/utils/FrameRDDConverterUtils.java
+++ b/src/main/java/org/apache/sysml/runtime/instructions/spark/utils/FrameRDDConverterUtils.java
@@ -331,7 +331,7 @@ public class FrameRDDConverterUtils
 	{
 		
 		if(containsID)
-			df = df.drop("ID");
+			df = df.drop("__INDEX");
 		
 		//determine unknown dimensions if required
 		if( !mcOut.dimsKnown(true) ) {

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/7fa31867/src/main/java/org/apache/sysml/runtime/instructions/spark/utils/RDDConverterUtilsExt.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/instructions/spark/utils/RDDConverterUtilsExt.java b/src/main/java/org/apache/sysml/runtime/instructions/spark/utils/RDDConverterUtilsExt.java
index 88dd44c..34e5a91 100644
--- a/src/main/java/org/apache/sysml/runtime/instructions/spark/utils/RDDConverterUtilsExt.java
+++ b/src/main/java/org/apache/sysml/runtime/instructions/spark/utils/RDDConverterUtilsExt.java
@@ -217,7 +217,7 @@ public class RDDConverterUtilsExt
 			throws DMLRuntimeException {
 		
 		if(containsID) {
-			inputDF = dropColumn(inputDF.sort("ID"), "ID");
+			inputDF = dropColumn(inputDF.sort("__INDEX"), "__INDEX");
 		}
 		
 		DataFrame df = inputDF.select(vectorColumnName);
@@ -276,7 +276,7 @@ public class RDDConverterUtilsExt
 			throw new DMLRuntimeException("No column other than \"" + column + "\" present in the dataframe.");
 		}
 		
-		// Round about way to do in Java (not exposed in Spark 1.3.0): df = df.drop("ID");
+		// Round about way to do in Java (not exposed in Spark 1.3.0): df = df.drop("__INDEX");
 		return df.select(firstCol, scala.collection.JavaConversions.asScalaBuffer(columnToSelect).toList());
 	}
 	
@@ -405,7 +405,7 @@ public class RDDConverterUtilsExt
 		}
 		
 		if(containsID) {
-			df = dropColumn(df.sort("ID"), "ID");
+			df = dropColumn(df.sort("__INDEX"), "__INDEX");
 		}
 			
 		//determine unknown dimensions and sparsity if required
@@ -447,7 +447,7 @@ public class RDDConverterUtilsExt
 		
 		List<StructField> fields = new ArrayList<StructField>();
 		// LongTypes throw an error: java.lang.Double incompatible with java.lang.Long
-		fields.add(DataTypes.createStructField("ID", DataTypes.DoubleType, false));
+		fields.add(DataTypes.createStructField("__INDEX", DataTypes.DoubleType, false));
 		fields.add(DataTypes.createStructField("C1", new VectorUDT(), false));
 		// fields.add(DataTypes.createStructField("C1", DataTypes.createArrayType(DataTypes.DoubleType), false));
 		
@@ -509,7 +509,7 @@ public class RDDConverterUtilsExt
 		
 		List<StructField> fields = new ArrayList<StructField>();
 		// LongTypes throw an error: java.lang.Double incompatible with java.lang.Long
-		fields.add(DataTypes.createStructField("ID", DataTypes.DoubleType, false)); 
+		fields.add(DataTypes.createStructField("__INDEX", DataTypes.DoubleType, false));
 		for(int i = 1; i <= numColumns; i++) {
 			fields.add(DataTypes.createStructField("C" + i, DataTypes.DoubleType, false));
 		}

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/7fa31867/src/main/python/systemml/mlcontext.py
----------------------------------------------------------------------
diff --git a/src/main/python/systemml/mlcontext.py b/src/main/python/systemml/mlcontext.py
index 389f9e1..ae56a46 100644
--- a/src/main/python/systemml/mlcontext.py
+++ b/src/main/python/systemml/mlcontext.py
@@ -120,7 +120,7 @@ class Matrix(object):
         -------
         df: PySpark SQL DataFrame
             A PySpark SQL DataFrame representing the matrix, with
-            one "ID" column containing the row index (since Spark
+            one "__INDEX" column containing the row index (since Spark
             DataFrames are unordered), followed by columns of doubles
             for each column in the matrix.
         """

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/7fa31867/src/main/python/tests/test_mlcontext.py
----------------------------------------------------------------------
diff --git a/src/main/python/tests/test_mlcontext.py b/src/main/python/tests/test_mlcontext.py
index 6a6f64e..b9ecb00 100644
--- a/src/main/python/tests/test_mlcontext.py
+++ b/src/main/python/tests/test_mlcontext.py
@@ -62,7 +62,7 @@ class TestAPI(unittest.TestCase):
         rdd1 = sc.parallelize(["1.0,2.0", "3.0,4.0"])
         script = dml(sums).input(m1=rdd1).output("m2")
         m2 = ml.execute(script).get("m2")
-        self.assertEqual(repr(m2.toDF()), "DataFrame[ID: double, C1: double, C2: double]")
+        self.assertEqual(repr(m2.toDF()), "DataFrame[__INDEX: double, C1: double, C2: double]")
 
     def test_input_single(self):
         script = """

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/7fa31867/src/main/python/tests/test_mllearn.py
----------------------------------------------------------------------
diff --git a/src/main/python/tests/test_mllearn.py b/src/main/python/tests/test_mllearn.py
index 9cc6443..1a1a6cb 100644
--- a/src/main/python/tests/test_mllearn.py
+++ b/src/main/python/tests/test_mllearn.py
@@ -79,7 +79,7 @@ class TestMLLearn(unittest.TestCase):
             (9, "a e c l", 2.0),
             (10, "spark compile", 1.0),
             (11, "hadoop software", 2.0)
-            ], ["id", "text", "label"])
+            ], ["__INDEX", "text", "label"])
         tokenizer = Tokenizer(inputCol="text", outputCol="words")
         hashingTF = HashingTF(inputCol="words", outputCol="features", numFeatures=20)
         lr = LogisticRegression(sqlCtx)
@@ -89,7 +89,7 @@ class TestMLLearn(unittest.TestCase):
             (12, "spark i j k", 1.0),
             (13, "l m n", 2.0),
             (14, "mapreduce spark", 1.0),
-            (15, "apache hadoop", 2.0)], ["id", "text", "label"])
+            (15, "apache hadoop", 2.0)], ["__INDEX", "text", "label"])
         result = model.transform(test)
         predictionAndLabels = result.select("prediction", "label")
         evaluator = MulticlassClassificationEvaluator()

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/7fa31867/src/main/scala/org/apache/sysml/api/ml/BaseSystemMLClassifier.scala
----------------------------------------------------------------------
diff --git a/src/main/scala/org/apache/sysml/api/ml/BaseSystemMLClassifier.scala b/src/main/scala/org/apache/sysml/api/ml/BaseSystemMLClassifier.scala
index 98def7c..c9c05e0 100644
--- a/src/main/scala/org/apache/sysml/api/ml/BaseSystemMLClassifier.scala
+++ b/src/main/scala/org/apache/sysml/api/ml/BaseSystemMLClassifier.scala
@@ -147,14 +147,14 @@ trait BaseSystemMLClassifierModel extends BaseSystemMLEstimatorModel {
     val Xin_bin = new BinaryBlockMatrix(Xin, mcXin)
     val modelPredict = ml.execute(script._1.in(script._2, Xin_bin))
     val predLabelOut = PredictionUtils.computePredictedClassLabelsFromProbability(modelPredict, isSingleNode, sc, probVar)
-    val predictedDF = PredictionUtils.updateLabels(isSingleNode, predLabelOut.getDataFrame("Prediction"), null, "C1", labelMapping).select("ID", "prediction")
+    val predictedDF = PredictionUtils.updateLabels(isSingleNode, predLabelOut.getDataFrame("Prediction"), null, "C1", labelMapping).select("__INDEX", "prediction")
     if(outputProb) {
-      val prob = modelPredict.getDataFrame(probVar, true).withColumnRenamed("C1", "probability").select("ID", "probability")
-      val dataset = RDDConverterUtils.addIDToDataFrame(df.asInstanceOf[DataFrame], df.sqlContext, "ID")
+      val prob = modelPredict.getDataFrame(probVar, true).withColumnRenamed("C1", "probability").select("__INDEX", "probability")
+      val dataset = RDDConverterUtils.addIDToDataFrame(df.asInstanceOf[DataFrame], df.sqlContext, "__INDEX")
       return PredictionUtils.joinUsingID(dataset, PredictionUtils.joinUsingID(prob, predictedDF))
     }
     else {
-      val dataset = RDDConverterUtils.addIDToDataFrame(df.asInstanceOf[DataFrame], df.sqlContext, "ID")
+      val dataset = RDDConverterUtils.addIDToDataFrame(df.asInstanceOf[DataFrame], df.sqlContext, "__INDEX")
       return PredictionUtils.joinUsingID(dataset, predictedDF)
     }
     

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/7fa31867/src/main/scala/org/apache/sysml/api/ml/BaseSystemMLRegressor.scala
----------------------------------------------------------------------
diff --git a/src/main/scala/org/apache/sysml/api/ml/BaseSystemMLRegressor.scala b/src/main/scala/org/apache/sysml/api/ml/BaseSystemMLRegressor.scala
index 5bcde30..73bf9be 100644
--- a/src/main/scala/org/apache/sysml/api/ml/BaseSystemMLRegressor.scala
+++ b/src/main/scala/org/apache/sysml/api/ml/BaseSystemMLRegressor.scala
@@ -79,8 +79,8 @@ trait BaseSystemMLRegressorModel extends BaseSystemMLEstimatorModel {
     val script = getPredictionScript(mloutput, isSingleNode)
     val Xin_bin = new BinaryBlockMatrix(Xin, mcXin)
     val modelPredict = ml.execute(script._1.in(script._2, Xin_bin))
-    val predictedDF = modelPredict.getDataFrame(predictionVar).select("ID", "C1").withColumnRenamed("C1", "prediction")
-    val dataset = RDDConverterUtils.addIDToDataFrame(df.asInstanceOf[DataFrame], df.sqlContext, "ID")
+    val predictedDF = modelPredict.getDataFrame(predictionVar).select("__INDEX", "C1").withColumnRenamed("C1", "prediction")
+    val dataset = RDDConverterUtils.addIDToDataFrame(df.asInstanceOf[DataFrame], df.sqlContext, "__INDEX")
     return PredictionUtils.joinUsingID(dataset, predictedDF)
   }
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/7fa31867/src/main/scala/org/apache/sysml/api/ml/PredictionUtils.scala
----------------------------------------------------------------------
diff --git a/src/main/scala/org/apache/sysml/api/ml/PredictionUtils.scala b/src/main/scala/org/apache/sysml/api/ml/PredictionUtils.scala
index 8e3893d..895fcd6 100644
--- a/src/main/scala/org/apache/sysml/api/ml/PredictionUtils.scala
+++ b/src/main/scala/org/apache/sysml/api/ml/PredictionUtils.scala
@@ -131,8 +131,8 @@ object PredictionUtils {
   }
   
   def joinUsingID(df1:DataFrame, df2:DataFrame):DataFrame = {
-    val tempDF1 = df1.withColumnRenamed("ID", "ID1")
-    tempDF1.join(df2, tempDF1.col("ID1").equalTo(df2.col("ID"))).drop("ID1")
+    val tempDF1 = df1.withColumnRenamed("__INDEX", "ID1")
+    tempDF1.join(df2, tempDF1.col("ID1").equalTo(df2.col("__INDEX"))).drop("ID1")
   }
   
   def computePredictedClassLabelsFromProbability(mlscoreoutput:MLResults, isSingleNode:Boolean, sc:SparkContext, inProbVar:String): MLResults = {

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/7fa31867/src/test/java/org/apache/sysml/test/integration/mlcontext/MLContextTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/sysml/test/integration/mlcontext/MLContextTest.java b/src/test/java/org/apache/sysml/test/integration/mlcontext/MLContextTest.java
index 0252b50..61f44e5 100644
--- a/src/test/java/org/apache/sysml/test/integration/mlcontext/MLContextTest.java
+++ b/src/test/java/org/apache/sysml/test/integration/mlcontext/MLContextTest.java
@@ -518,7 +518,7 @@ public class MLContextTest extends AutomatedTestBase {
 		StructType schema = DataTypes.createStructType(fields);
 		DataFrame dataFrame = sqlContext.createDataFrame(javaRddRow, schema);
 
-		MatrixMetadata mm = new MatrixMetadata(MatrixFormat.DF_DOUBLES_WITH_NO_ID_COLUMN);
+		MatrixMetadata mm = new MatrixMetadata(MatrixFormat.DF_DOUBLES);
 
 		Script script = dml("print('sum: ' + sum(M));").in("M", dataFrame, mm);
 		setExpectedStdOut("sum: 450.0");
@@ -544,7 +544,7 @@ public class MLContextTest extends AutomatedTestBase {
 		StructType schema = DataTypes.createStructType(fields);
 		DataFrame dataFrame = sqlContext.createDataFrame(javaRddRow, schema);
 
-		MatrixMetadata mm = new MatrixMetadata(MatrixFormat.DF_DOUBLES_WITH_NO_ID_COLUMN);
+		MatrixMetadata mm = new MatrixMetadata(MatrixFormat.DF_DOUBLES);
 
 		Script script = pydml("print('sum: ' + sum(M))").in("M", dataFrame, mm);
 		setExpectedStdOut("sum: 450.0");
@@ -564,14 +564,14 @@ public class MLContextTest extends AutomatedTestBase {
 		JavaRDD<Row> javaRddRow = javaRddString.map(new CommaSeparatedValueStringToRow());
 		SQLContext sqlContext = new SQLContext(sc);
 		List<StructField> fields = new ArrayList<StructField>();
-		fields.add(DataTypes.createStructField("ID", DataTypes.StringType, true));
+		fields.add(DataTypes.createStructField("__INDEX", DataTypes.StringType, true));
 		fields.add(DataTypes.createStructField("C1", DataTypes.StringType, true));
 		fields.add(DataTypes.createStructField("C2", DataTypes.StringType, true));
 		fields.add(DataTypes.createStructField("C3", DataTypes.StringType, true));
 		StructType schema = DataTypes.createStructType(fields);
 		DataFrame dataFrame = sqlContext.createDataFrame(javaRddRow, schema);
 
-		MatrixMetadata mm = new MatrixMetadata(MatrixFormat.DF_DOUBLES_WITH_ID_COLUMN);
+		MatrixMetadata mm = new MatrixMetadata(MatrixFormat.DF_DOUBLES_WITH_INDEX);
 
 		Script script = dml("print('sum: ' + sum(M));").in("M", dataFrame, mm);
 		setExpectedStdOut("sum: 45.0");
@@ -591,14 +591,14 @@ public class MLContextTest extends AutomatedTestBase {
 		JavaRDD<Row> javaRddRow = javaRddString.map(new CommaSeparatedValueStringToRow());
 		SQLContext sqlContext = new SQLContext(sc);
 		List<StructField> fields = new ArrayList<StructField>();
-		fields.add(DataTypes.createStructField("ID", DataTypes.StringType, true));
+		fields.add(DataTypes.createStructField("__INDEX", DataTypes.StringType, true));
 		fields.add(DataTypes.createStructField("C1", DataTypes.StringType, true));
 		fields.add(DataTypes.createStructField("C2", DataTypes.StringType, true));
 		fields.add(DataTypes.createStructField("C3", DataTypes.StringType, true));
 		StructType schema = DataTypes.createStructType(fields);
 		DataFrame dataFrame = sqlContext.createDataFrame(javaRddRow, schema);
 
-		MatrixMetadata mm = new MatrixMetadata(MatrixFormat.DF_DOUBLES_WITH_ID_COLUMN);
+		MatrixMetadata mm = new MatrixMetadata(MatrixFormat.DF_DOUBLES_WITH_INDEX);
 
 		Script script = pydml("print('sum: ' + sum(M))").in("M", dataFrame, mm);
 		setExpectedStdOut("sum: 45.0");
@@ -618,14 +618,14 @@ public class MLContextTest extends AutomatedTestBase {
 		JavaRDD<Row> javaRddRow = javaRddString.map(new CommaSeparatedValueStringToRow());
 		SQLContext sqlContext = new SQLContext(sc);
 		List<StructField> fields = new ArrayList<StructField>();
-		fields.add(DataTypes.createStructField("ID", DataTypes.StringType, true));
+		fields.add(DataTypes.createStructField("__INDEX", DataTypes.StringType, true));
 		fields.add(DataTypes.createStructField("C1", DataTypes.StringType, true));
 		fields.add(DataTypes.createStructField("C2", DataTypes.StringType, true));
 		fields.add(DataTypes.createStructField("C3", DataTypes.StringType, true));
 		StructType schema = DataTypes.createStructType(fields);
 		DataFrame dataFrame = sqlContext.createDataFrame(javaRddRow, schema);
 
-		MatrixMetadata mm = new MatrixMetadata(MatrixFormat.DF_DOUBLES_WITH_ID_COLUMN);
+		MatrixMetadata mm = new MatrixMetadata(MatrixFormat.DF_DOUBLES_WITH_INDEX);
 
 		Script script = dml("print('M[1,1]: ' + as.scalar(M[1,1]));").in("M", dataFrame, mm);
 		setExpectedStdOut("M[1,1]: 1.0");
@@ -645,14 +645,14 @@ public class MLContextTest extends AutomatedTestBase {
 		JavaRDD<Row> javaRddRow = javaRddString.map(new CommaSeparatedValueStringToRow());
 		SQLContext sqlContext = new SQLContext(sc);
 		List<StructField> fields = new ArrayList<StructField>();
-		fields.add(DataTypes.createStructField("ID", DataTypes.StringType, true));
+		fields.add(DataTypes.createStructField("__INDEX", DataTypes.StringType, true));
 		fields.add(DataTypes.createStructField("C1", DataTypes.StringType, true));
 		fields.add(DataTypes.createStructField("C2", DataTypes.StringType, true));
 		fields.add(DataTypes.createStructField("C3", DataTypes.StringType, true));
 		StructType schema = DataTypes.createStructType(fields);
 		DataFrame dataFrame = sqlContext.createDataFrame(javaRddRow, schema);
 
-		MatrixMetadata mm = new MatrixMetadata(MatrixFormat.DF_DOUBLES_WITH_ID_COLUMN);
+		MatrixMetadata mm = new MatrixMetadata(MatrixFormat.DF_DOUBLES_WITH_INDEX);
 
 		Script script = pydml("print('M[0,0]: ' + scalar(M[0,0]))").in("M", dataFrame, mm);
 		setExpectedStdOut("M[0,0]: 1.0");
@@ -672,12 +672,12 @@ public class MLContextTest extends AutomatedTestBase {
 		JavaRDD<Row> javaRddRow = javaRddTuple.map(new DoubleVectorRow());
 		SQLContext sqlContext = new SQLContext(sc);
 		List<StructField> fields = new ArrayList<StructField>();
-		fields.add(DataTypes.createStructField("ID", DataTypes.StringType, true));
+		fields.add(DataTypes.createStructField("__INDEX", DataTypes.StringType, true));
 		fields.add(DataTypes.createStructField("C1", new VectorUDT(), true));
 		StructType schema = DataTypes.createStructType(fields);
 		DataFrame dataFrame = sqlContext.createDataFrame(javaRddRow, schema);
 
-		MatrixMetadata mm = new MatrixMetadata(MatrixFormat.DF_VECTOR_WITH_ID_COLUMN);
+		MatrixMetadata mm = new MatrixMetadata(MatrixFormat.DF_VECTOR_WITH_INDEX);
 
 		Script script = dml("print('sum: ' + sum(M));").in("M", dataFrame, mm);
 		setExpectedStdOut("sum: 45.0");
@@ -697,12 +697,12 @@ public class MLContextTest extends AutomatedTestBase {
 		JavaRDD<Row> javaRddRow = javaRddTuple.map(new DoubleVectorRow());
 		SQLContext sqlContext = new SQLContext(sc);
 		List<StructField> fields = new ArrayList<StructField>();
-		fields.add(DataTypes.createStructField("ID", DataTypes.StringType, true));
+		fields.add(DataTypes.createStructField("__INDEX", DataTypes.StringType, true));
 		fields.add(DataTypes.createStructField("C1", new VectorUDT(), true));
 		StructType schema = DataTypes.createStructType(fields);
 		DataFrame dataFrame = sqlContext.createDataFrame(javaRddRow, schema);
 
-		MatrixMetadata mm = new MatrixMetadata(MatrixFormat.DF_VECTOR_WITH_ID_COLUMN);
+		MatrixMetadata mm = new MatrixMetadata(MatrixFormat.DF_VECTOR_WITH_INDEX);
 
 		Script script = dml("print('sum: ' + sum(M))").in("M", dataFrame, mm);
 		setExpectedStdOut("sum: 45.0");
@@ -726,7 +726,7 @@ public class MLContextTest extends AutomatedTestBase {
 		StructType schema = DataTypes.createStructType(fields);
 		DataFrame dataFrame = sqlContext.createDataFrame(javaRddRow, schema);
 
-		MatrixMetadata mm = new MatrixMetadata(MatrixFormat.DF_VECTOR_WITH_NO_ID_COLUMN);
+		MatrixMetadata mm = new MatrixMetadata(MatrixFormat.DF_VECTOR);
 
 		Script script = dml("print('sum: ' + sum(M));").in("M", dataFrame, mm);
 		setExpectedStdOut("sum: 45.0");
@@ -750,7 +750,7 @@ public class MLContextTest extends AutomatedTestBase {
 		StructType schema = DataTypes.createStructType(fields);
 		DataFrame dataFrame = sqlContext.createDataFrame(javaRddRow, schema);
 
-		MatrixMetadata mm = new MatrixMetadata(MatrixFormat.DF_VECTOR_WITH_NO_ID_COLUMN);
+		MatrixMetadata mm = new MatrixMetadata(MatrixFormat.DF_VECTOR);
 
 		Script script = dml("print('sum: ' + sum(M))").in("M", dataFrame, mm);
 		setExpectedStdOut("sum: 45.0");
@@ -2102,7 +2102,7 @@ public class MLContextTest extends AutomatedTestBase {
 		JavaRDD<Row> javaRddRow = javaRddString.map(new CommaSeparatedValueStringToRow());
 		SQLContext sqlContext = new SQLContext(sc);
 		List<StructField> fields = new ArrayList<StructField>();
-		fields.add(DataTypes.createStructField("ID", DataTypes.StringType, true));
+		fields.add(DataTypes.createStructField("__INDEX", DataTypes.StringType, true));
 		fields.add(DataTypes.createStructField("C1", DataTypes.StringType, true));
 		fields.add(DataTypes.createStructField("C2", DataTypes.StringType, true));
 		fields.add(DataTypes.createStructField("C3", DataTypes.StringType, true));
@@ -2127,7 +2127,7 @@ public class MLContextTest extends AutomatedTestBase {
 		JavaRDD<Row> javaRddRow = javaRddString.map(new CommaSeparatedValueStringToRow());
 		SQLContext sqlContext = new SQLContext(sc);
 		List<StructField> fields = new ArrayList<StructField>();
-		fields.add(DataTypes.createStructField("ID", DataTypes.StringType, true));
+		fields.add(DataTypes.createStructField("__INDEX", DataTypes.StringType, true));
 		fields.add(DataTypes.createStructField("C1", DataTypes.StringType, true));
 		fields.add(DataTypes.createStructField("C2", DataTypes.StringType, true));
 		fields.add(DataTypes.createStructField("C3", DataTypes.StringType, true));
@@ -2152,7 +2152,7 @@ public class MLContextTest extends AutomatedTestBase {
 		JavaRDD<Row> javaRddRow = javaRddTuple.map(new DoubleVectorRow());
 		SQLContext sqlContext = new SQLContext(sc);
 		List<StructField> fields = new ArrayList<StructField>();
-		fields.add(DataTypes.createStructField("ID", DataTypes.StringType, true));
+		fields.add(DataTypes.createStructField("__INDEX", DataTypes.StringType, true));
 		fields.add(DataTypes.createStructField("C1", new VectorUDT(), true));
 		StructType schema = DataTypes.createStructType(fields);
 		DataFrame dataFrame = sqlContext.createDataFrame(javaRddRow, schema);
@@ -2175,7 +2175,7 @@ public class MLContextTest extends AutomatedTestBase {
 		JavaRDD<Row> javaRddRow = javaRddTuple.map(new DoubleVectorRow());
 		SQLContext sqlContext = new SQLContext(sc);
 		List<StructField> fields = new ArrayList<StructField>();
-		fields.add(DataTypes.createStructField("ID", DataTypes.StringType, true));
+		fields.add(DataTypes.createStructField("__INDEX", DataTypes.StringType, true));
 		fields.add(DataTypes.createStructField("C1", new VectorUDT(), true));
 		StructType schema = DataTypes.createStructType(fields);
 		DataFrame dataFrame = sqlContext.createDataFrame(javaRddRow, schema);