You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by mb...@apache.org on 2016/09/27 18:03:35 UTC

[2/3] incubator-systemml git commit: [SYSTEMML-960] Support for frames as function arguments, tests

[SYSTEMML-960] Support for frames as function arguments, tests

So far, the input/outputs of dml-bodied functions did not allow for
variables of type frame (which resulted in parser issues). This patch
fixes this by generalizing the parser. 

In addition, this also includes two minor (partially related) fixes:

* Explain call dag with functions of internal namespace (multi-return
builtin functions), like transformencode as used in this new testcase.

* Unnecessary warning of max result size in local environment (with
USE_LOCAL_SPARK_CONIFG enabled).   


Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/2f7a67d3
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/2f7a67d3
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/2f7a67d3

Branch: refs/heads/master
Commit: 2f7a67d38c9403285a2c5d17b67b4cd4aa76bf39
Parents: 3d7d348
Author: Matthias Boehm <mb...@us.ibm.com>
Authored: Mon Sep 26 18:38:54 2016 -0700
Committer: Matthias Boehm <mb...@us.ibm.com>
Committed: Tue Sep 27 10:59:41 2016 -0700

----------------------------------------------------------------------
 .../parser/common/CommonSyntacticValidator.java |  15 +++
 .../sysml/parser/dml/DmlSyntacticValidator.java |  25 ++--
 .../parser/pydml/PydmlSyntacticValidator.java   |  44 +++----
 .../context/SparkExecutionContext.java          |   2 +-
 .../java/org/apache/sysml/utils/Explain.java    |   2 +-
 .../functions/frame/FrameFunctionTest.java      | 129 +++++++++++++++++++
 .../scripts/functions/frame/FrameFunction.dml   |  42 ++++++
 7 files changed, 214 insertions(+), 45 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/2f7a67d3/src/main/java/org/apache/sysml/parser/common/CommonSyntacticValidator.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/parser/common/CommonSyntacticValidator.java b/src/main/java/org/apache/sysml/parser/common/CommonSyntacticValidator.java
index 995937f..83f7823 100644
--- a/src/main/java/org/apache/sysml/parser/common/CommonSyntacticValidator.java
+++ b/src/main/java/org/apache/sysml/parser/common/CommonSyntacticValidator.java
@@ -740,4 +740,19 @@ public abstract class CommonSyntacticValidator {
 	// End of Helper Functions for exit*FunctionCall*AssignmentStatement
 	// -----------------------------------------------------------------
 
+	/**
+	 * Indicates if the given data type string is a valid data type. 
+	 * 
+	 * @param datatype
+	 * @param start
+	 */
+	protected void checkValidDataType(String datatype, Token start) {
+		boolean validMatrixType = 
+				datatype.equals("matrix") || datatype.equals("Matrix") || 
+				datatype.equals("frame") || datatype.equals("Frame") ||
+				datatype.equals("scalar") || datatype.equals("Scalar");
+		if(!validMatrixType	) {
+			notifyErrorListeners("incorrect datatype (expected matrix, frame or scalar)", start);
+		}
+	}
 }

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/2f7a67d3/src/main/java/org/apache/sysml/parser/dml/DmlSyntacticValidator.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/parser/dml/DmlSyntacticValidator.java b/src/main/java/org/apache/sysml/parser/dml/DmlSyntacticValidator.java
index 07e0705..e658a7e 100644
--- a/src/main/java/org/apache/sysml/parser/dml/DmlSyntacticValidator.java
+++ b/src/main/java/org/apache/sysml/parser/dml/DmlSyntacticValidator.java
@@ -720,18 +720,15 @@ public class DmlSyntacticValidator extends CommonSyntacticValidator implements D
 				dataType = paramCtx.paramType.dataType().getText();
 			}
 
-			if(dataType.equals("matrix") || dataType.equals("Matrix")) {
-				// matrix
+			
+			//check and assign data type
+			checkValidDataType(dataType, paramCtx.start);
+			if( dataType.equalsIgnoreCase("matrix") )
 				dataId.setDataType(DataType.MATRIX);
-			}
-			else if(dataType.equals("scalar") || dataType.equals("Scalar")) {
-				// scalar
+			else if( dataType.equalsIgnoreCase("frame") )
+				dataId.setDataType(DataType.FRAME);
+			else if( dataType.equalsIgnoreCase("scalar") )
 				dataId.setDataType(DataType.SCALAR);
-			}
-			else {
-				notifyErrorListeners("invalid datatype " + dataType, paramCtx.start);
-				return null;
-			}
 
 			valueType = paramCtx.paramType.valueType().getText();
 			if(valueType.equals("int") || valueType.equals("integer")
@@ -931,13 +928,7 @@ public class DmlSyntacticValidator extends CommonSyntacticValidator implements D
 
 	@Override
 	public void exitMatrixDataTypeCheck(MatrixDataTypeCheckContext ctx) {
-		boolean validMatrixType = ctx.ID().getText().equals("matrix")
-								|| ctx.ID().getText().equals("Matrix")
-								|| ctx.ID().getText().equals("Scalar")
-								|| ctx.ID().getText().equals("scalar");
-		if(!validMatrixType	) {
-			notifyErrorListeners("incorrect datatype (expected matrix or scalar)", ctx.start);
-		}
+		checkValidDataType(ctx.ID().getText(), ctx.start);
 	}
 
 

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/2f7a67d3/src/main/java/org/apache/sysml/parser/pydml/PydmlSyntacticValidator.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/parser/pydml/PydmlSyntacticValidator.java b/src/main/java/org/apache/sysml/parser/pydml/PydmlSyntacticValidator.java
index c605308..7068589 100644
--- a/src/main/java/org/apache/sysml/parser/pydml/PydmlSyntacticValidator.java
+++ b/src/main/java/org/apache/sysml/parser/pydml/PydmlSyntacticValidator.java
@@ -1391,18 +1391,14 @@ public class PydmlSyntacticValidator extends CommonSyntacticValidator implements
 				dataType = paramCtx.paramType.dataType().getText();
 			}
 
-			if(dataType.equals("matrix")) {
-				// matrix
+			//check and assign data type
+			checkValidDataType(dataType, paramCtx.start);
+			if( dataType.equals("matrix") )
 				dataId.setDataType(DataType.MATRIX);
-			}
-			else if(dataType.equals("scalar")) {
-				// scalar
+			else if( dataType.equals("frame") )
+				dataId.setDataType(DataType.FRAME);
+			else if( dataType.equals("scalar") )
 				dataId.setDataType(DataType.SCALAR);
-			}
-			else {
-				notifyErrorListeners("invalid datatype " + dataType, paramCtx.start);
-				return null;
-			}
 
 			valueType = paramCtx.paramType.valueType().getText();
 			if(valueType.equals("int")) {
@@ -1574,24 +1570,20 @@ public class PydmlSyntacticValidator extends CommonSyntacticValidator implements
 
 	@Override
 	public void exitMatrixDataTypeCheck(MatrixDataTypeCheckContext ctx) {
-		if(		ctx.ID().getText().equals("matrix")
-				|| ctx.ID().getText().equals("scalar")
-				) {
-			// Do nothing
-		}
-		else if(ctx.ID().getText().equals("Matrix"))
+		checkValidDataType(ctx.ID().getText(), ctx.start);
+		
+		//additional error handling (pydml-specific)
+		String datatype = ctx.ID().getText();
+		if(datatype.equals("Matrix"))
 			notifyErrorListeners("incorrect datatype (Hint: use matrix instead of Matrix)", ctx.start);
-		else if(ctx.ID().getText().equals("Scalar"))
+		else if(datatype.equals("Frame"))
+			notifyErrorListeners("incorrect datatype (Hint: use frame instead of Frame)", ctx.start);
+		else if(datatype.equals("Scalar"))
 			notifyErrorListeners("incorrect datatype (Hint: use scalar instead of Scalar)", ctx.start);
-		else if(		ctx.ID().getText().equals("int")
-				|| ctx.ID().getText().equals("str")
-				|| ctx.ID().getText().equals("bool")
-				|| ctx.ID().getText().equals("float")
-				) {
-			notifyErrorListeners("expected datatype but found a valuetype (Hint: use matrix or scalar instead of " + ctx.ID().getText() + ")", ctx.start);
-		}
-		else {
-			notifyErrorListeners("incorrect datatype (expected matrix or scalar)", ctx.start);
+		else if( datatype.equals("int") || datatype.equals("str")
+			|| datatype.equals("bool") || datatype.equals("float") ) {
+			notifyErrorListeners("expected datatype but found a valuetype "
+					+ "(Hint: use matrix, frame or scalar instead of " + datatype + ")", ctx.start);
 		}
 	}
 

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/2f7a67d3/src/main/java/org/apache/sysml/runtime/controlprogram/context/SparkExecutionContext.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/controlprogram/context/SparkExecutionContext.java b/src/main/java/org/apache/sysml/runtime/controlprogram/context/SparkExecutionContext.java
index 964d2d6..7103b0d 100644
--- a/src/main/java/org/apache/sysml/runtime/controlprogram/context/SparkExecutionContext.java
+++ b/src/main/java/org/apache/sysml/runtime/controlprogram/context/SparkExecutionContext.java
@@ -236,7 +236,7 @@ public class SparkExecutionContext extends ExecutionContext
 		// Set warning if spark.driver.maxResultSize is not set. It needs to be set before starting Spark Context for CP collect 
 		String strDriverMaxResSize = _spctx.getConf().get("spark.driver.maxResultSize", "1g");
 		long driverMaxResSize = UtilFunctions.parseMemorySize(strDriverMaxResSize); 
-		if (driverMaxResSize != 0 && driverMaxResSize<OptimizerUtils.getLocalMemBudget())
+		if (driverMaxResSize != 0 && driverMaxResSize<OptimizerUtils.getLocalMemBudget() && !DMLScript.USE_LOCAL_SPARK_CONFIG)
 			LOG.warn("Configuration parameter spark.driver.maxResultSize set to " + UtilFunctions.formatMemorySize(driverMaxResSize) + "."
 					+ " You can set it through Spark default configuration setting either to 0 (unlimited) or to available memory budget of size " 
 					+ UtilFunctions.formatMemorySize((long)OptimizerUtils.getLocalMemBudget()) + ".");

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/2f7a67d3/src/main/java/org/apache/sysml/utils/Explain.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/utils/Explain.java b/src/main/java/org/apache/sysml/utils/Explain.java
index 8ee2822..12c086e 100644
--- a/src/main/java/org/apache/sysml/utils/Explain.java
+++ b/src/main/java/org/apache/sysml/utils/Explain.java
@@ -1146,7 +1146,7 @@ public class Explain
 						FunctionOp fop = (FunctionOp) h;
 						String fkey = DMLProgram.constructFunctionKey(fop.getFunctionNamespace(), fop.getFunctionName());
 						//prevent redundant call edges
-						if( !lfset.contains(fkey) )
+						if( !lfset.contains(fkey) && !fop.getFunctionNamespace().equals(DMLProgram.INTERNAL_NAMESPACE) )
 						{
 							//recursively explain function call dag
 							if( !fstack.contains(fkey) ) {

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/2f7a67d3/src/test/java/org/apache/sysml/test/integration/functions/frame/FrameFunctionTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/sysml/test/integration/functions/frame/FrameFunctionTest.java b/src/test/java/org/apache/sysml/test/integration/functions/frame/FrameFunctionTest.java
new file mode 100644
index 0000000..b506444
--- /dev/null
+++ b/src/test/java/org/apache/sysml/test/integration/functions/frame/FrameFunctionTest.java
@@ -0,0 +1,129 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysml.test.integration.functions.frame;
+
+import org.apache.sysml.api.DMLScript;
+import org.apache.sysml.api.DMLScript.RUNTIME_PLATFORM;
+import org.apache.sysml.hops.OptimizerUtils;
+import org.apache.sysml.lops.LopProperties.ExecType;
+import org.apache.sysml.runtime.io.FrameReaderFactory;
+import org.apache.sysml.runtime.io.FrameWriterFactory;
+import org.apache.sysml.runtime.matrix.data.FrameBlock;
+import org.apache.sysml.runtime.matrix.data.InputInfo;
+import org.apache.sysml.runtime.matrix.data.OutputInfo;
+import org.apache.sysml.runtime.util.DataConverter;
+import org.apache.sysml.test.integration.AutomatedTestBase;
+import org.apache.sysml.test.integration.TestConfiguration;
+import org.apache.sysml.test.utils.TestUtils;
+import org.junit.Test;
+
+public class FrameFunctionTest extends AutomatedTestBase
+{
+	private final static String TEST_DIR = "functions/frame/";
+	private final static String TEST_NAME = "FrameFunction";
+	private final static String TEST_CLASS_DIR = TEST_DIR + FrameFunctionTest.class.getSimpleName() + "/";
+	
+	private final static int rows = 1382;
+	private final static int cols = 5;
+	
+	@Override
+	public void setUp() {
+		TestUtils.clearAssertionInformation();
+		addTestConfiguration(TEST_NAME, new TestConfiguration(TEST_CLASS_DIR, TEST_NAME, new String[] {"F2"}));
+	}
+
+	@Test
+	public void testFrameFunctionIPACP()  {
+		runFrameFunctionTest(ExecType.CP, true);
+	}
+	
+	@Test
+	public void testFrameFunctionIPASpark()  {
+		runFrameFunctionTest(ExecType.SPARK, true);
+	}
+	
+	@Test
+	public void testFrameFunctionNoIPACP()  {
+		runFrameFunctionTest(ExecType.CP, false);
+	}
+	
+	@Test
+	public void testFrameFunctionNoIPASpark()  {
+		runFrameFunctionTest(ExecType.SPARK, false);
+	}
+
+	/**
+	 * 
+	 * @param et
+	 */
+	private void runFrameFunctionTest( ExecType et, boolean IPA )
+	{
+		//rtplatform for MR
+		RUNTIME_PLATFORM platformOld = rtplatform;
+		switch( et ){
+			case SPARK: rtplatform = RUNTIME_PLATFORM.SPARK; break;
+			default: rtplatform = RUNTIME_PLATFORM.HYBRID_SPARK; break;
+		}
+	
+		boolean sparkConfigOld = DMLScript.USE_LOCAL_SPARK_CONFIG;
+		if( rtplatform == RUNTIME_PLATFORM.SPARK 
+			|| rtplatform == RUNTIME_PLATFORM.HYBRID_SPARK )
+			DMLScript.USE_LOCAL_SPARK_CONFIG = true;
+	
+		boolean oldIPA = OptimizerUtils.ALLOW_INTER_PROCEDURAL_ANALYSIS;
+		OptimizerUtils.ALLOW_INTER_PROCEDURAL_ANALYSIS = IPA;
+		
+		try
+		{
+			//setup testcase
+			getAndLoadTestConfiguration(TEST_NAME);
+			String HOME = SCRIPT_DIR + TEST_DIR;
+			fullDMLScriptName = HOME + TEST_NAME + ".dml";
+			programArgs = new String[]{"-explain", "-args", 
+					input("F"), output("F2")};
+			
+			//generate input data and write as frame
+			double[][] A = getRandomMatrix(rows, cols, -10, 10, 0.9, 8362);
+			FrameBlock fA = DataConverter.convertToFrameBlock(
+				DataConverter.convertToMatrixBlock(A));
+			FrameWriterFactory.createFrameWriter(OutputInfo.CSVOutputInfo)
+				.writeFrameToHDFS(fA, input("F"), rows, cols);
+			
+			//run test
+			runTest(true, false, null, -1); 
+			
+			//read input/output and compare
+			FrameBlock fB = FrameReaderFactory
+					.createFrameReader(InputInfo.CSVInputInfo)
+					.readFrameFromHDFS(output("F2"), rows, cols);
+			String[][] R1 = DataConverter.convertToStringFrame(fA);
+			String[][] R2 = DataConverter.convertToStringFrame(fB);
+			TestUtils.compareFrames(R1, R2, R1.length, R1[0].length);			
+		}
+		catch(Exception ex) {
+			throw new RuntimeException(ex);
+		}
+		finally {
+			rtplatform = platformOld;
+			DMLScript.USE_LOCAL_SPARK_CONFIG = sparkConfigOld;
+			OptimizerUtils.ALLOW_INTER_PROCEDURAL_ANALYSIS = oldIPA;
+		}
+	}
+}

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/2f7a67d3/src/test/scripts/functions/frame/FrameFunction.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/frame/FrameFunction.dml b/src/test/scripts/functions/frame/FrameFunction.dml
new file mode 100644
index 0000000..591e63b
--- /dev/null
+++ b/src/test/scripts/functions/frame/FrameFunction.dml
@@ -0,0 +1,42 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+foo = function(Frame[String] F, String jspec) 
+  return (Matrix[Double] RX, Frame[String] RM) 
+{
+  #prevent function inlining
+  if( 1==1 ){}
+
+  [RX, RM] = transformencode(target=F, spec=jspec);
+}
+
+F = read($1, data_type="frame", format="csv");
+
+#make size unknown for recompile
+if( sum(rand(rows=10,cols=1))<1 ) {
+   F = rbind(F, F);
+}
+
+jspec = "{\"ids\": true,\"recode\": [1,2,3,4,5]}";
+[X, M] = foo(F, jspec);
+
+F2 = transformdecode(target=X, spec=jspec, meta=M);
+write(F2, $2, format="csv");