You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by ac...@apache.org on 2016/09/22 05:36:41 UTC

[1/6] incubator-systemml git commit: [SYSTEMML-568] Frame Schema support through MLContext

Repository: incubator-systemml
Updated Branches:
  refs/heads/master 4bc6601d6 -> 0b472b09e


[SYSTEMML-568] Frame Schema support through MLContext

Closes 250


Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/3957c0fa
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/3957c0fa
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/3957c0fa

Branch: refs/heads/master
Commit: 3957c0fa6aadb2213e96b6fa0d4f26271c711868
Parents: 61a6dcb
Author: Arvind Surve <ac...@yahoo.com>
Authored: Wed Sep 21 22:18:49 2016 -0700
Committer: Arvind Surve <ac...@yahoo.com>
Committed: Wed Sep 21 22:18:49 2016 -0700

----------------------------------------------------------------------
 .../api/mlcontext/MLContextConversionUtil.java  |  14 ++-
 .../controlprogram/caching/FrameObject.java     |  14 +++
 .../spark/utils/FrameRDDConverterUtils.java     |  11 +-
 .../mlcontext/MLContextFrameTest.java           | 116 +++++++++++++++----
 4 files changed, 124 insertions(+), 31 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/3957c0fa/src/main/java/org/apache/sysml/api/mlcontext/MLContextConversionUtil.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/api/mlcontext/MLContextConversionUtil.java b/src/main/java/org/apache/sysml/api/mlcontext/MLContextConversionUtil.java
index 1adc089..aa0366d 100644
--- a/src/main/java/org/apache/sysml/api/mlcontext/MLContextConversionUtil.java
+++ b/src/main/java/org/apache/sysml/api/mlcontext/MLContextConversionUtil.java
@@ -193,7 +193,7 @@ public class MLContextConversionUtil {
 					frameMetadata.asMatrixCharacteristics() : new MatrixCharacteristics();
 			MatrixFormatMetaData mtd = new MatrixFormatMetaData(mc, 
 					OutputInfo.BinaryBlockOutputInfo, InputInfo.BinaryBlockInputInfo);
-			FrameObject frameObject = new FrameObject(OptimizerUtils.getUniqueTempFileName(), mtd);
+			FrameObject frameObject = new FrameObject(OptimizerUtils.getUniqueTempFileName(), mtd, frameMetadata.getFrameSchema().getSchema());
 			frameObject.acquireModify(frameBlock);
 			frameObject.release();
 			return frameObject;
@@ -282,7 +282,7 @@ public class MLContextConversionUtil {
 				frameMetadata.asMatrixCharacteristics() : new MatrixCharacteristics();
 
 		FrameObject frameObject = new FrameObject(OptimizerUtils.getUniqueTempFileName(), 
-				new MatrixFormatMetaData(mc, OutputInfo.BinaryBlockOutputInfo, InputInfo.BinaryBlockInputInfo));
+				new MatrixFormatMetaData(mc, OutputInfo.BinaryBlockOutputInfo, InputInfo.BinaryBlockInputInfo), frameMetadata.getFrameSchema().getSchema());
 		frameObject.setRDDHandle(new RDDObject(binaryBlocks, variableName));
 		return frameObject;
 	}
@@ -365,6 +365,12 @@ public class MLContextConversionUtil {
 				matrixCharacteristics.setDimension(rows, cols);
 				frameMetadata.setMatrixCharacteristics(matrixCharacteristics);
 			}
+			
+			List<String> colnames = new ArrayList<String>();
+			List<ValueType> fschema = new ArrayList<ValueType>();
+			FrameRDDConverterUtils.convertDFSchemaToFrameSchema(dataFrame.schema(), colnames, fschema, containsID);	
+			frameMetadata.setFrameSchema(new FrameSchema(fschema));
+
 			JavaPairRDD<Long, FrameBlock> binaryBlock = FrameRDDConverterUtils.dataFrameToBinaryBlock(javaSparkContext,
 					dataFrame, matrixCharacteristics, containsID);
 
@@ -598,7 +604,7 @@ public class MLContextConversionUtil {
 		JavaSparkContext jsc = MLContextUtil.getJavaSparkContext((MLContext) MLContextProxy.getActiveMLContextForAPI());
 
 		FrameObject frameObject = new FrameObject(OptimizerUtils.getUniqueTempFileName(), 
-				new MatrixFormatMetaData(mc, OutputInfo.BinaryBlockOutputInfo, InputInfo.BinaryBlockInputInfo));
+				new MatrixFormatMetaData(mc, OutputInfo.BinaryBlockOutputInfo, InputInfo.BinaryBlockInputInfo), frameMetadata.getFrameSchema().getSchema());
 		JavaPairRDD<Long, FrameBlock> rdd;
 		try {
 			rdd = FrameRDDConverterUtils.csvToBinaryBlock(jsc, javaPairRDDText, mc, 
@@ -659,7 +665,7 @@ public class MLContextConversionUtil {
 		JavaSparkContext jsc = MLContextUtil.getJavaSparkContext((MLContext) MLContextProxy.getActiveMLContextForAPI());
 
 		FrameObject frameObject = new FrameObject(OptimizerUtils.getUniqueTempFileName(), 
-				new MatrixFormatMetaData(mc, OutputInfo.BinaryBlockOutputInfo, InputInfo.BinaryBlockInputInfo));
+				new MatrixFormatMetaData(mc, OutputInfo.BinaryBlockOutputInfo, InputInfo.BinaryBlockInputInfo), frameMetadata.getFrameSchema().getSchema());
 		JavaPairRDD<Long, FrameBlock> rdd;
 		try {
 			List<ValueType> lschema = null;

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/3957c0fa/src/main/java/org/apache/sysml/runtime/controlprogram/caching/FrameObject.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/controlprogram/caching/FrameObject.java b/src/main/java/org/apache/sysml/runtime/controlprogram/caching/FrameObject.java
index 1209064..8c5fe8b 100644
--- a/src/main/java/org/apache/sysml/runtime/controlprogram/caching/FrameObject.java
+++ b/src/main/java/org/apache/sysml/runtime/controlprogram/caching/FrameObject.java
@@ -79,6 +79,20 @@ public class FrameObject extends CacheableData<FrameBlock>
 	}
 	
 	/**
+	 * 
+	 * @param fname
+	 * @param meta
+	 * @param schema
+	 * 
+	 */
+	public FrameObject(String fname, MetaData meta, List<ValueType> schema) {
+		this();
+		setFileName(fname);
+		setMetaData(meta);
+		setSchema(schema);
+	}
+	
+	/**
 	 * Copy constructor that copies meta data but NO data.
 	 * 
 	 * @param fo

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/3957c0fa/src/main/java/org/apache/sysml/runtime/instructions/spark/utils/FrameRDDConverterUtils.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/instructions/spark/utils/FrameRDDConverterUtils.java b/src/main/java/org/apache/sysml/runtime/instructions/spark/utils/FrameRDDConverterUtils.java
index 351d559..ede0211 100644
--- a/src/main/java/org/apache/sysml/runtime/instructions/spark/utils/FrameRDDConverterUtils.java
+++ b/src/main/java/org/apache/sysml/runtime/instructions/spark/utils/FrameRDDConverterUtils.java
@@ -326,12 +326,9 @@ public class FrameRDDConverterUtils
 	/**
 	 * 
 	 * @param sc
-	 * @param input
-	 * @param mcOut
-	 * @param hasHeader
-	 * @param delim
-	 * @param fill
-	 * @param missingValue
+	 * @param df
+	 * @param mc
+	 * @param containsID
 	 * @return
 	 * @throws DMLRuntimeException
 	 */
@@ -889,7 +886,7 @@ public class FrameRDDConverterUtils
 			int cols = blk.getNumColumns();
 			for( int i=0; i<rows; i++ ) {
 				Object[] row = new Object[cols+1];
-				row[0] = rowIndex++;
+				row[0] = (double)rowIndex++;
 				for( int j=0; j<cols; j++ )
 					row[j+1] = blk.get(i, j);
 				ret.add(RowFactory.create(row));

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/3957c0fa/src/test/java/org/apache/sysml/test/integration/mlcontext/MLContextFrameTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/sysml/test/integration/mlcontext/MLContextFrameTest.java b/src/test/java/org/apache/sysml/test/integration/mlcontext/MLContextFrameTest.java
index deac382..972e6ea 100644
--- a/src/test/java/org/apache/sysml/test/integration/mlcontext/MLContextFrameTest.java
+++ b/src/test/java/org/apache/sysml/test/integration/mlcontext/MLContextFrameTest.java
@@ -33,12 +33,14 @@ import org.apache.spark.api.java.JavaSparkContext;
 import org.apache.spark.rdd.RDD;
 import org.apache.spark.sql.DataFrame;
 import org.apache.spark.sql.Row;
+import org.apache.spark.sql.RowFactory;
 import org.apache.spark.sql.SQLContext;
 import org.apache.spark.sql.types.DataTypes;
 import org.apache.spark.sql.types.StructField;
 import org.apache.spark.sql.types.StructType;
 import org.apache.sysml.api.mlcontext.FrameFormat;
 import org.apache.sysml.api.mlcontext.FrameMetadata;
+import org.apache.sysml.api.mlcontext.FrameSchema;
 import org.apache.sysml.api.mlcontext.MLContext;
 import org.apache.sysml.api.mlcontext.MLResults;
 import org.apache.sysml.api.mlcontext.MatrixFormat;
@@ -181,6 +183,10 @@ public class MLContextFrameTest extends AutomatedTestBase {
 		List<String> listB = new ArrayList<String>();
 		FrameMetadata fmA = null, fmB = null;
 		Script script = null;
+		List<ValueType> lschemaA = Arrays.asList(ValueType.INT, ValueType.STRING, ValueType.DOUBLE, ValueType.BOOLEAN);
+		FrameSchema fschemaA = new FrameSchema(lschemaA);
+		List<ValueType> lschemaB = Arrays.asList(ValueType.STRING, ValueType.DOUBLE, ValueType.BOOLEAN);
+		FrameSchema fschemaB = new FrameSchema(lschemaB);
 
 		if (inputType != IO_TYPE.FILE) {
 			if (format == FrameFormat.CSV) {
@@ -191,8 +197,8 @@ public class MLContextFrameTest extends AutomatedTestBase {
 				listB.add("Str12,13.0,true");
 				listB.add("Str25,26.0,false");
 
-				fmA = new FrameMetadata(FrameFormat.CSV, 3, 4);
-				fmB = new FrameMetadata(FrameFormat.CSV, 2, 3);
+				fmA = new FrameMetadata(FrameFormat.CSV, fschemaA, 3, 4);
+				fmB = new FrameMetadata(FrameFormat.CSV, fschemaB, 2, 3);
 			} else if (format == FrameFormat.IJV) {
 				listA.add("1 1 1");
 				listA.add("1 2 Str2");
@@ -214,8 +220,8 @@ public class MLContextFrameTest extends AutomatedTestBase {
 				listB.add("2 2 26.0");
 				listB.add("2 3 false");
 
-				fmA = new FrameMetadata(FrameFormat.IJV, 3, 4);
-				fmB = new FrameMetadata(FrameFormat.IJV, 2, 3);
+				fmA = new FrameMetadata(FrameFormat.IJV, fschemaA, 3, 4);
+				fmB = new FrameMetadata(FrameFormat.IJV, fschemaB, 2, 3);
 			}
 			JavaRDD<String> javaRDDA = sc.parallelize(listA);
 			JavaRDD<String> javaRDDB = sc.parallelize(listB);
@@ -224,11 +230,6 @@ public class MLContextFrameTest extends AutomatedTestBase {
 				JavaRDD<Row> javaRddRowA = javaRDDA.map(new MLContextTest.CommaSeparatedValueStringToRow());
 				JavaRDD<Row> javaRddRowB = javaRDDB.map(new MLContextTest.CommaSeparatedValueStringToRow());
 
-				ValueType[] schemaA = { ValueType.INT, ValueType.STRING, ValueType.DOUBLE, ValueType.BOOLEAN };
-				List<ValueType> lschemaA = Arrays.asList(schemaA);
-				ValueType[] schemaB = { ValueType.STRING, ValueType.DOUBLE, ValueType.BOOLEAN };
-				List<ValueType> lschemaB = Arrays.asList(schemaB);
-
 				// Create DataFrame
 				SQLContext sqlContext = new SQLContext(sc);
 				StructType dfSchemaA = FrameRDDConverterUtils.convertFrameSchemaToDFSchema(lschemaA, false);
@@ -302,6 +303,24 @@ public class MLContextFrameTest extends AutomatedTestBase {
 		}
 
 		MLResults mlResults = ml.execute(script);
+		
+		//Validate output schema
+		List<ValueType> lschemaOutA = mlResults.getFrameObject("A").getSchema();
+		List<ValueType> lschemaOutC = mlResults.getFrameObject("C").getSchema();
+		if(inputType != IO_TYPE.FILE) {
+			Assert.assertEquals(ValueType.INT, lschemaOutA.get(0));
+			Assert.assertEquals(ValueType.STRING, lschemaOutA.get(1));
+			Assert.assertEquals(ValueType.DOUBLE, lschemaOutA.get(2));
+			Assert.assertEquals(ValueType.BOOLEAN, lschemaOutA.get(3));
+			
+			Assert.assertEquals(ValueType.STRING, lschemaOutC.get(0));
+			Assert.assertEquals(ValueType.DOUBLE, lschemaOutC.get(1));
+		} else {
+			for (int i=0; i < lschemaOutA.size(); i++)
+				Assert.assertEquals(ValueType.STRING, lschemaOutA.get(i));
+			for (int i=0; i < lschemaOutC.size(); i++)
+				Assert.assertEquals(ValueType.STRING, lschemaOutC.get(i));
+		}
 
 		if (outputType == IO_TYPE.JAVA_RDD_STR_CSV) {
 
@@ -370,30 +389,46 @@ public class MLContextFrameTest extends AutomatedTestBase {
 		} else if (outputType == IO_TYPE.DATAFRAME) {
 
 			DataFrame dataFrameA = mlResults.getDataFrame("A").drop(RDDConverterUtils.DF_ID_COLUMN);
+			StructType dfschemaA = dataFrameA.schema(); 
+			StructField structTypeA = dfschemaA.apply(0);
+			Assert.assertEquals(DataTypes.LongType, structTypeA.dataType());
+			structTypeA = dfschemaA.apply(1);
+			Assert.assertEquals(DataTypes.StringType, structTypeA.dataType());
+			structTypeA = dfschemaA.apply(2);
+			Assert.assertEquals(DataTypes.DoubleType, structTypeA.dataType());
+			structTypeA = dfschemaA.apply(3);
+			Assert.assertEquals(DataTypes.BooleanType, structTypeA.dataType());
+
 			List<Row> listAOut = dataFrameA.collectAsList();
 
 			Row row1 = listAOut.get(0);
-			Assert.assertEquals("Mistmatch with expected value", "1", row1.get(0).toString());
-			Assert.assertEquals("Mistmatch with expected value", "Str2", row1.get(1).toString());
-			Assert.assertEquals("Mistmatch with expected value", "3.0", row1.get(2).toString());
-			Assert.assertEquals("Mistmatch with expected value", "true", row1.get(3).toString());
+			Assert.assertEquals("Mistmatch with expected value", Long.valueOf(1), row1.get(0));
+			Assert.assertEquals("Mistmatch with expected value", "Str2", row1.get(1));
+			Assert.assertEquals("Mistmatch with expected value", 3.0, row1.get(2));
+			Assert.assertEquals("Mistmatch with expected value", true, row1.get(3));
 			
 			Row row2 = listAOut.get(1);
-			Assert.assertEquals("Mistmatch with expected value", "4", row2.get(0).toString());
-			Assert.assertEquals("Mistmatch with expected value", "Str12", row2.get(1).toString());
-			Assert.assertEquals("Mistmatch with expected value", "13.0", row2.get(2).toString());
-			Assert.assertEquals("Mistmatch with expected value", "true", row2.get(3).toString());
+			Assert.assertEquals("Mistmatch with expected value", Long.valueOf(4), row2.get(0));
+			Assert.assertEquals("Mistmatch with expected value", "Str12", row2.get(1));
+			Assert.assertEquals("Mistmatch with expected value", 13.0, row2.get(2));
+			Assert.assertEquals("Mistmatch with expected value", true, row2.get(3));
 
 			DataFrame dataFrameC = mlResults.getDataFrame("C").drop(RDDConverterUtils.DF_ID_COLUMN);
+			StructType dfschemaC = dataFrameC.schema(); 
+			StructField structTypeC = dfschemaC.apply(0);
+			Assert.assertEquals(DataTypes.StringType, structTypeC.dataType());
+			structTypeC = dfschemaC.apply(1);
+			Assert.assertEquals(DataTypes.DoubleType, structTypeC.dataType());
+			
 			List<Row> listCOut = dataFrameC.collectAsList();
 
 			Row row3 = listCOut.get(0);
-			Assert.assertEquals("Mistmatch with expected value", "Str12", row3.get(0).toString());
-			Assert.assertEquals("Mistmatch with expected value", "13.0", row3.get(1).toString());
+			Assert.assertEquals("Mistmatch with expected value", "Str12", row3.get(0));
+			Assert.assertEquals("Mistmatch with expected value", 13.0, row3.get(1));
 
 			Row row4 = listCOut.get(1);
 			Assert.assertEquals("Mistmatch with expected value", "Str25", row4.get(0));
-			Assert.assertEquals("Mistmatch with expected value", "26.0", row4.get(1));
+			Assert.assertEquals("Mistmatch with expected value", 26.0, row4.get(1));
 		} else {
 			String[][] frameA = mlResults.getFrameAs2DStringArray("A");
 			Assert.assertEquals("Str2", frameA[0][1]);
@@ -485,6 +520,47 @@ public class MLContextFrameTest extends AutomatedTestBase {
 		Assert.assertEquals(18.0, matrix[2][0], 0.0);
 	}
 
+	@Test
+	public void testInputFrameAndMatrixOutputMatrixAndFrame() {
+		System.out.println("MLContextFrameTest - input frame and matrix, output matrix and frame");
+		
+		Row[] rowsA = {RowFactory.create("Doc1", "Feat1", 10), RowFactory.create("Doc1", "Feat2", 20), RowFactory.create("Doc2", "Feat1", 31)};
+
+		JavaRDD<Row> javaRddRowA = sc. parallelize( Arrays.asList(rowsA)); 
+
+		SQLContext sqlContext = new SQLContext(sc);
+
+		List<StructField> fieldsA = new ArrayList<StructField>();
+		fieldsA.add(DataTypes.createStructField("myID", DataTypes.StringType, true));
+		fieldsA.add(DataTypes.createStructField("FeatureName", DataTypes.StringType, true));
+		fieldsA.add(DataTypes.createStructField("FeatureValue", DataTypes.IntegerType, true));
+		StructType schemaA = DataTypes.createStructType(fieldsA);
+		DataFrame dataFrameA = sqlContext.createDataFrame(javaRddRowA, schemaA);
+
+		String dmlString = "[tA, tAM] = transformencode (target = A, spec = \"{ids: false ,recode: [ myID, FeatureName ]}\");";
+
+		Script script = dml(dmlString)
+				.in("A", dataFrameA,
+						new FrameMetadata(FrameFormat.CSV, dataFrameA.count(), (long) dataFrameA.columns().length))
+				.out("tA").out("tAM");
+		MLResults results = ml.execute(script);
+
+		double[][] matrixtA = results.getMatrixAs2DDoubleArray("tA");
+		Assert.assertEquals(10.0, matrixtA[0][2], 0.0);
+		Assert.assertEquals(20.0, matrixtA[1][2], 0.0);
+		Assert.assertEquals(31.0, matrixtA[2][2], 0.0);
+
+		DataFrame dataFrame_tA = results.getMatrix("tA").toDF();
+		System.out.println("Number of matrix tA rows = " + dataFrame_tA.count());
+		dataFrame_tA.printSchema();
+		dataFrame_tA.show();
+		
+		DataFrame dataFrame_tAM = results.getFrame("tAM").toDF();
+		System.out.println("Number of frame tAM rows = " + dataFrame_tAM.count());
+		dataFrame_tAM.printSchema();
+		dataFrame_tAM.show();
+	}
+
 	// NOTE: the ordering of the frame values seem to come out differently here
 	// than in the scala shell,
 	// so this should be investigated or explained.


[3/6] incubator-systemml git commit: Merge branch 'master' of https://git-wip-us.apache.org/repos/asf/incubator-systemml

Posted by ac...@apache.org.
Merge branch 'master' of https://git-wip-us.apache.org/repos/asf/incubator-systemml


Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/c9b0327b
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/c9b0327b
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/c9b0327b

Branch: refs/heads/master
Commit: c9b0327b94f12f70a29eb16868c8a32966ab237f
Parents: 3957c0f 4bc6601
Author: Arvind Surve <ac...@yahoo.com>
Authored: Wed Sep 21 22:25:08 2016 -0700
Committer: Arvind Surve <ac...@yahoo.com>
Committed: Wed Sep 21 22:25:08 2016 -0700

----------------------------------------------------------------------
 .../spark/FrameAppendRSPInstruction.java        |   6 +-
 .../spark/FrameIndexingSPInstruction.java       |   6 +-
 .../spark/utils/FrameRDDAggregateUtils.java     |  91 ++++++++++++++++
 .../spark/utils/FrameRDDConverterUtils.java     | 107 ++++++-------------
 .../spark/utils/RDDAggregateUtils.java          |  40 -------
 .../sysml/runtime/matrix/data/FrameBlock.java   |  43 +++++++-
 6 files changed, 170 insertions(+), 123 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/c9b0327b/src/main/java/org/apache/sysml/runtime/instructions/spark/utils/FrameRDDConverterUtils.java
----------------------------------------------------------------------


[6/6] incubator-systemml git commit: Merge branch 'FrameNewMLContext4' [SYSTEMML-930] Warn for result size when call through MLContext

Posted by ac...@apache.org.
Merge branch 'FrameNewMLContext4'
[SYSTEMML-930] Warn for result size when call through MLContext


Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/0b472b09
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/0b472b09
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/0b472b09

Branch: refs/heads/master
Commit: 0b472b09e0a984e7f99066ca3dfaa4eb23f296e6
Parents: 8170e87 c8076d0
Author: Arvind Surve <ac...@yahoo.com>
Authored: Wed Sep 21 22:27:25 2016 -0700
Committer: Arvind Surve <ac...@yahoo.com>
Committed: Wed Sep 21 22:27:25 2016 -0700

----------------------------------------------------------------------

----------------------------------------------------------------------



[4/6] incubator-systemml git commit: Merge branch 'FrameNewMLContext4'

Posted by ac...@apache.org.
Merge branch 'FrameNewMLContext4'


Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/8170e874
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/8170e874
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/8170e874

Branch: refs/heads/master
Commit: 8170e874aa85396a73e648aab20addb502ad5684
Parents: c9b0327 95a3613
Author: Arvind Surve <ac...@yahoo.com>
Authored: Wed Sep 21 22:26:00 2016 -0700
Committer: Arvind Surve <ac...@yahoo.com>
Committed: Wed Sep 21 22:26:00 2016 -0700

----------------------------------------------------------------------
 .../context/SparkExecutionContext.java            | 10 +++++++++-
 .../apache/sysml/runtime/util/UtilFunctions.java  | 18 ++++++++++++++++++
 2 files changed, 27 insertions(+), 1 deletion(-)
----------------------------------------------------------------------



[2/6] incubator-systemml git commit: [SYSTEMML-930] Warn for result size when call through MLContext

Posted by ac...@apache.org.
[SYSTEMML-930] Warn for result size when call through MLContext


Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/95a36136
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/95a36136
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/95a36136

Branch: refs/heads/master
Commit: 95a36136b0c9cfb5942424098c4f3d2643cd0f91
Parents: 61a6dcb
Author: Arvind Surve <ac...@yahoo.com>
Authored: Wed Sep 21 22:22:38 2016 -0700
Committer: Arvind Surve <ac...@yahoo.com>
Committed: Wed Sep 21 22:23:58 2016 -0700

----------------------------------------------------------------------
 .../context/SparkExecutionContext.java            | 10 +++++++++-
 .../apache/sysml/runtime/util/UtilFunctions.java  | 18 ++++++++++++++++++
 2 files changed, 27 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/95a36136/src/main/java/org/apache/sysml/runtime/controlprogram/context/SparkExecutionContext.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/controlprogram/context/SparkExecutionContext.java b/src/main/java/org/apache/sysml/runtime/controlprogram/context/SparkExecutionContext.java
index f95e0d4..a6f99e8 100644
--- a/src/main/java/org/apache/sysml/runtime/controlprogram/context/SparkExecutionContext.java
+++ b/src/main/java/org/apache/sysml/runtime/controlprogram/context/SparkExecutionContext.java
@@ -233,7 +233,15 @@ public class SparkExecutionContext extends ExecutionContext
 				_spctx = new JavaSparkContext(conf);
 			}
 		}
-			
+		
+		// Set warning if spark.driver.maxResultSize is not set. It needs to be set before starting Spark Context for CP collect 
+		String strDriverMaxResSize = _spctx.getConf().get("spark.driver.maxResultSize", "1g");
+		long driverMaxResSize = UtilFunctions.parseMemorySize(strDriverMaxResSize); 
+		if (driverMaxResSize != 0 && driverMaxResSize<OptimizerUtils.getLocalMemBudget())
+			LOG.warn("Configuration parameter spark.driver.maxResultSize set to " + UtilFunctions.formatMemorySize(driverMaxResSize) + "."
+					+ " You can set it through Spark default configuration setting either to 0 (unlimited) or to available memory budget of size " 
+					+ UtilFunctions.formatMemorySize((long)OptimizerUtils.getLocalMemBudget()) + ".");
+		
 		//globally add binaryblock serialization framework for all hdfs read/write operations
 		//TODO if spark context passed in from outside (mlcontext), we need to clean this up at the end 
 		if( MRJobConfiguration.USE_BINARYBLOCK_SERIALIZATION )

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/95a36136/src/main/java/org/apache/sysml/runtime/util/UtilFunctions.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/util/UtilFunctions.java b/src/main/java/org/apache/sysml/runtime/util/UtilFunctions.java
index 81a1e8f..e5a792a 100644
--- a/src/main/java/org/apache/sysml/runtime/util/UtilFunctions.java
+++ b/src/main/java/org/apache/sysml/runtime/util/UtilFunctions.java
@@ -556,6 +556,24 @@ public class UtilFunctions
 	}
 	
 	/**
+	 * Format a memory size with g/m/k quantifiers into its
+	 * number representation.
+	 * 
+	 * @param arg
+	 * @return
+	 */
+	public static String formatMemorySize(long arg) {
+		if (arg >= 1024 * 1024 * 1024)
+			return String.format("%d GB", arg/(1024*1024*1024));
+		else if (arg >= 1024 * 1024)
+			return String.format("%d MB", arg/(1024*1024));
+		else if (arg >= 1024)
+			return String.format("%d KB", arg/(1024));
+		else
+			return String.format("%d", arg);
+	}
+	
+	/**
 	 * 
 	 * @param low   lower bound (inclusive)
 	 * @param up    upper bound (inclusive)


[5/6] incubator-systemml git commit: [SYSTEMML-930] Warn for result size when call through MLContext

Posted by ac...@apache.org.
[SYSTEMML-930] Warn for result size when call through MLContext


Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/c8076d04
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/c8076d04
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/c8076d04

Branch: refs/heads/master
Commit: c8076d04a2b2512614a91c22b05c8c5599922f5f
Parents: 4bc6601
Author: Arvind Surve <ac...@yahoo.com>
Authored: Wed Sep 21 22:22:38 2016 -0700
Committer: Arvind Surve <ac...@yahoo.com>
Committed: Wed Sep 21 22:26:59 2016 -0700

----------------------------------------------------------------------
 .../context/SparkExecutionContext.java            | 10 +++++++++-
 .../apache/sysml/runtime/util/UtilFunctions.java  | 18 ++++++++++++++++++
 2 files changed, 27 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/c8076d04/src/main/java/org/apache/sysml/runtime/controlprogram/context/SparkExecutionContext.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/controlprogram/context/SparkExecutionContext.java b/src/main/java/org/apache/sysml/runtime/controlprogram/context/SparkExecutionContext.java
index f95e0d4..a6f99e8 100644
--- a/src/main/java/org/apache/sysml/runtime/controlprogram/context/SparkExecutionContext.java
+++ b/src/main/java/org/apache/sysml/runtime/controlprogram/context/SparkExecutionContext.java
@@ -233,7 +233,15 @@ public class SparkExecutionContext extends ExecutionContext
 				_spctx = new JavaSparkContext(conf);
 			}
 		}
-			
+		
+		// Set warning if spark.driver.maxResultSize is not set. It needs to be set before starting Spark Context for CP collect 
+		String strDriverMaxResSize = _spctx.getConf().get("spark.driver.maxResultSize", "1g");
+		long driverMaxResSize = UtilFunctions.parseMemorySize(strDriverMaxResSize); 
+		if (driverMaxResSize != 0 && driverMaxResSize<OptimizerUtils.getLocalMemBudget())
+			LOG.warn("Configuration parameter spark.driver.maxResultSize set to " + UtilFunctions.formatMemorySize(driverMaxResSize) + "."
+					+ " You can set it through Spark default configuration setting either to 0 (unlimited) or to available memory budget of size " 
+					+ UtilFunctions.formatMemorySize((long)OptimizerUtils.getLocalMemBudget()) + ".");
+		
 		//globally add binaryblock serialization framework for all hdfs read/write operations
 		//TODO if spark context passed in from outside (mlcontext), we need to clean this up at the end 
 		if( MRJobConfiguration.USE_BINARYBLOCK_SERIALIZATION )

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/c8076d04/src/main/java/org/apache/sysml/runtime/util/UtilFunctions.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/util/UtilFunctions.java b/src/main/java/org/apache/sysml/runtime/util/UtilFunctions.java
index 81a1e8f..e5a792a 100644
--- a/src/main/java/org/apache/sysml/runtime/util/UtilFunctions.java
+++ b/src/main/java/org/apache/sysml/runtime/util/UtilFunctions.java
@@ -556,6 +556,24 @@ public class UtilFunctions
 	}
 	
 	/**
+	 * Format a memory size with g/m/k quantifiers into its
+	 * number representation.
+	 * 
+	 * @param arg
+	 * @return
+	 */
+	public static String formatMemorySize(long arg) {
+		if (arg >= 1024 * 1024 * 1024)
+			return String.format("%d GB", arg/(1024*1024*1024));
+		else if (arg >= 1024 * 1024)
+			return String.format("%d MB", arg/(1024*1024));
+		else if (arg >= 1024)
+			return String.format("%d KB", arg/(1024));
+		else
+			return String.format("%d", arg);
+	}
+	
+	/**
 	 * 
 	 * @param low   lower bound (inclusive)
 	 * @param up    upper bound (inclusive)