You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by du...@apache.org on 2016/01/22 17:34:14 UTC
[38/51] [partial] incubator-systemml git commit: [SYSTEMML-482] [SYSTEMML-480] Adding a Git attributes file to enfore Unix-styled line endings, and normalizing all of the line endings.

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/05d2c0a8/scripts/utils/project.dml
----------------------------------------------------------------------
diff --git a/scripts/utils/project.dml b/scripts/utils/project.dml
index dc69bd0..ee6cd80 100644
--- a/scripts/utils/project.dml
+++ b/scripts/utils/project.dml
@@ -1,80 +1,80 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-# Utility script to project columns from input matrix.
-#
-# Parameters:
-#    X       : (input)  filename of data matrix
-#    P       : (input)  filename of 1-column projection matrix containing columnIDs
-#    o       : (output) filename of output matrix with projected columns
-#    exclude : (default FALSE) TRUE means P contains columnIds to be projected
-#                              FALSE means P contains columnsIDS to be excluded
-#    ofmt    : (default binary) format of output matrix
-#
-# Example:
-#   hadoop jar SystemML.jar -f algorithms/utils/project.dml -nvargs X="/tmp/M.mtx" P="/tmp/P.mtx" o="/tmp/PX.mtx" 
-#
-# Assumptions:
-# The order of colIDs in P is preserved. Order of columns in result is same as order of columns in P.
-#      i.e. projecting columns 4 and 2 of X results in a matrix with columns 4 and 2.
-# If P specifies the exclude list, then projected columns are order preserved.
-
-exclude = ifdef ($exclude, FALSE);
-ofmt = ifdef ($ofmt, "binary");
-
-X = read ($X)
-P = read ($P)
-
-# create projection matrix using projection list and sequence matrix, and pad with 0s. The size of
-# PP is nbrOfColsInX x nbrOfColsToKeep
-
-if (exclude==FALSE)
-{
-   # create projection matrix using projection list and sequence matrix, and pad with 0s. The size
-   # of PP is nbrOfColsInX x nbrOfColsToKeep
-   PP = table(P, seq(1, nrow(P), 1), ncol(X), nrow(P)) 
-
- } else {
-   # create new vector P with list of columns to keep using original vector P containing exclude
-   # columns. These are all small vector operations.
-   C = table(P, seq(1, nrow(P), 1))
-   E = rowSums(C);
-      
-   # Row pad w/ 0s
-   EE = matrix (0, rows=ncol(X), cols=1)
-   EE[1:nrow(E),1] = E
-
-   # Convert exclude column list to include column list, and create column indices
-   EE = ppred(EE, 0, "==")
-   EE = EE * seq(1, ncol(X), 1)
-   P = removeEmpty(target=EE, margin="rows")
-
-   PP = table(P, seq(1, nrow(P), 1), ncol(X), nrow(P))
-
-}
-
-# Perform projection using permutation matrix
-PX = X %*% PP
-
-# Write output
-write (PX, $o, format=ofmt)
-
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# Utility script to project columns from input matrix.
+#
+# Parameters:
+#    X       : (input)  filename of data matrix
+#    P       : (input)  filename of 1-column projection matrix containing columnIDs
+#    o       : (output) filename of output matrix with projected columns
+#    exclude : (default FALSE) TRUE means P contains columnIds to be projected
+#                              FALSE means P contains columnsIDS to be excluded
+#    ofmt    : (default binary) format of output matrix
+#
+# Example:
+#   hadoop jar SystemML.jar -f algorithms/utils/project.dml -nvargs X="/tmp/M.mtx" P="/tmp/P.mtx" o="/tmp/PX.mtx" 
+#
+# Assumptions:
+# The order of colIDs in P is preserved. Order of columns in result is same as order of columns in P.
+#      i.e. projecting columns 4 and 2 of X results in a matrix with columns 4 and 2.
+# If P specifies the exclude list, then projected columns are order preserved.
+
+exclude = ifdef ($exclude, FALSE);
+ofmt = ifdef ($ofmt, "binary");
+
+X = read ($X)
+P = read ($P)
+
+# create projection matrix using projection list and sequence matrix, and pad with 0s. The size of
+# PP is nbrOfColsInX x nbrOfColsToKeep
+
+if (exclude==FALSE)
+{
+   # create projection matrix using projection list and sequence matrix, and pad with 0s. The size
+   # of PP is nbrOfColsInX x nbrOfColsToKeep
+   PP = table(P, seq(1, nrow(P), 1), ncol(X), nrow(P)) 
+
+ } else {
+   # create new vector P with list of columns to keep using original vector P containing exclude
+   # columns. These are all small vector operations.
+   C = table(P, seq(1, nrow(P), 1))
+   E = rowSums(C);
+      
+   # Row pad w/ 0s
+   EE = matrix (0, rows=ncol(X), cols=1)
+   EE[1:nrow(E),1] = E
+
+   # Convert exclude column list to include column list, and create column indices
+   EE = ppred(EE, 0, "==")
+   EE = EE * seq(1, ncol(X), 1)
+   P = removeEmpty(target=EE, margin="rows")
+
+   PP = table(P, seq(1, nrow(P), 1), ncol(X), nrow(P))
+
+}
+
+# Perform projection using permutation matrix
+PX = X %*% PP
+
+# Write output
+write (PX, $o, format=ofmt)
+
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/05d2c0a8/scripts/utils/rowIndexMax.dml
----------------------------------------------------------------------
diff --git a/scripts/utils/rowIndexMax.dml b/scripts/utils/rowIndexMax.dml
index 80af2e1..1e5cbc7 100644
--- a/scripts/utils/rowIndexMax.dml
+++ b/scripts/utils/rowIndexMax.dml
@@ -1,38 +1,38 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-# Utility script to return for each row the column nbr with the largest value. If all the values in
-# a row are the same, then the largest column nbr is returned.
-#
-# Parameters:
-#    I       : (input)  filename of input
-#    O       : (output) filename of output
-#    ofmt    : default "csv"; format of O: "csv", "binary"
-#
-# Example:
-#   hadoop jar SystemML.jar -f algorithms/utils/rowIndexMax.dml -nvargs I="/tmp/X.mtx" O="/tmp/X2.mtx"
-#
-
-ofmt = ifdef($ofmt, "csv")
-
-M = read($I)
-C = rowIndexMax(M)
-write(C, $O, format=ofmt)
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# Utility script to return for each row the column nbr with the largest value. If all the values in
+# a row are the same, then the largest column nbr is returned.
+#
+# Parameters:
+#    I       : (input)  filename of input
+#    O       : (output) filename of output
+#    ofmt    : default "csv"; format of O: "csv", "binary"
+#
+# Example:
+#   hadoop jar SystemML.jar -f algorithms/utils/rowIndexMax.dml -nvargs I="/tmp/X.mtx" O="/tmp/X2.mtx"
+#
+
+ofmt = ifdef($ofmt, "csv")
+
+M = read($I)
+C = rowIndexMax(M)
+write(C, $O, format=ofmt)

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/05d2c0a8/scripts/utils/splitXY.dml
----------------------------------------------------------------------
diff --git a/scripts/utils/splitXY.dml b/scripts/utils/splitXY.dml
index 7d5fc24..82027a4 100644
--- a/scripts/utils/splitXY.dml
+++ b/scripts/utils/splitXY.dml
@@ -1,62 +1,62 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-# Utility script to split X into new X and Y.
-#
-# Parameters:
-#    X       : (input)  filename of data matrix
-#    y       : (default ncol(X))  colIndex
-#    OX      : (output) filename of output matrix with all columns except y
-#    OY      : (output) filename of output matrix with y column
-#    ofmt    : (default binary) format of OX and OY output matrix
-#
-# Example:
-#   hadoop jar SystemML.jar -f algorithms/utils/splitXY.dml -nvargs X="/tmp/X.mtx" y=50 OX="/tmp/OX.mtx  OY="/tmp/OY.mtx  
-#
-
-ofmt = ifdef($ofmt, "binary")
-y = ifdef($y, ncol($X))
-
-X = read ($X)
-
-if (y == 1)
-{
-   OX = X[,y+1:ncol(X)]
-   OY = X[,y]
-} 
-else if (y == ncol(X))
-{
-   OX = X[,1:y-1]
-   OY = X[,y]
-} 
-else 
-{
-   OX1 = X[,1:y-1]
-   OX2 = X[,y+1:ncol(X)]
-   OX = append (OX1, OX2)
-   OY = X[,y]
-}
-
-# Write output
-write (OX, $OX, format=ofmt)
-write (OY, $OY, format=ofmt)
-
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# Utility script to split X into new X and Y.
+#
+# Parameters:
+#    X       : (input)  filename of data matrix
+#    y       : (default ncol(X))  colIndex
+#    OX      : (output) filename of output matrix with all columns except y
+#    OY      : (output) filename of output matrix with y column
+#    ofmt    : (default binary) format of OX and OY output matrix
+#
+# Example:
+#   hadoop jar SystemML.jar -f algorithms/utils/splitXY.dml -nvargs X="/tmp/X.mtx" y=50 OX="/tmp/OX.mtx  OY="/tmp/OY.mtx  
+#
+
+ofmt = ifdef($ofmt, "binary")
+y = ifdef($y, ncol($X))
+
+X = read ($X)
+
+if (y == 1)
+{
+   OX = X[,y+1:ncol(X)]
+   OY = X[,y]
+} 
+else if (y == ncol(X))
+{
+   OX = X[,1:y-1]
+   OY = X[,y]
+} 
+else 
+{
+   OX1 = X[,1:y-1]
+   OX2 = X[,y+1:ncol(X)]
+   OX = append (OX1, OX2)
+   OY = X[,y]
+}
+
+# Write output
+write (OX, $OX, format=ofmt)
+write (OY, $OY, format=ofmt)
+
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/05d2c0a8/scripts/utils/write.dml
----------------------------------------------------------------------
diff --git a/scripts/utils/write.dml b/scripts/utils/write.dml
index 7861a3a..f1c81e4 100644
--- a/scripts/utils/write.dml
+++ b/scripts/utils/write.dml
@@ -1,39 +1,39 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-# Utility script to change format of X.
-#
-# Parameters:
-#    I       : (input)  filename of input
-#    O       : (output) filename of output
-#    ofmt    : format of O: "csv", "binary"
-#    sep     : default ","; CSV separator in output
-#    header  : default "FALSE"; CSV header: TRUE | FALSE
-#
-# Example:
-#   hadoop jar SystemML.jar -f algorithms/utils/write.dml -nvargs I="/tmp/X.mtx" O="/tmp/X2.mtx" ofmt="binary" sep="|" header=TRUE
-#
-
-sep = ifdef($sep, ",")
-header = ifdef($header, FALSE )
-
-M = read($I)
-write(M, $O, format=$ofmt, sep=sep, header=header)
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# Utility script to change format of X.
+#
+# Parameters:
+#    I       : (input)  filename of input
+#    O       : (output) filename of output
+#    ofmt    : format of O: "csv", "binary"
+#    sep     : default ","; CSV separator in output
+#    header  : default "FALSE"; CSV header: TRUE | FALSE
+#
+# Example:
+#   hadoop jar SystemML.jar -f algorithms/utils/write.dml -nvargs I="/tmp/X.mtx" O="/tmp/X2.mtx" ofmt="binary" sep="|" header=TRUE
+#
+
+sep = ifdef($sep, ",")
+header = ifdef($header, FALSE )
+
+M = read($I)
+write(M, $O, format=$ofmt, sep=sep, header=header)

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/05d2c0a8/src/main/java/org/apache/sysml/lops/BinaryScalar.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/lops/BinaryScalar.java b/src/main/java/org/apache/sysml/lops/BinaryScalar.java
index 0f423e3..f61c145 100644
--- a/src/main/java/org/apache/sysml/lops/BinaryScalar.java
+++ b/src/main/java/org/apache/sysml/lops/BinaryScalar.java
@@ -1,197 +1,197 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *   http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.sysml.lops;
-
-
- 
-import org.apache.sysml.lops.LopProperties.ExecLocation;
-import org.apache.sysml.lops.LopProperties.ExecType;
-import org.apache.sysml.lops.compile.JobType;
-import org.apache.sysml.parser.Expression.*;
-
-/**
- * Lop to perform binary scalar operations. Both inputs must be scalars.
- * Example i = j + k, i = i + 1. 
- */
-
-public class BinaryScalar extends Lop 
-{	
-	
-	public enum OperationTypes {
-		ADD, SUBTRACT, SUBTRACTRIGHT, MULTIPLY, DIVIDE, MODULUS, INTDIV,
-		LESS_THAN, LESS_THAN_OR_EQUALS, GREATER_THAN, GREATER_THAN_OR_EQUALS, EQUALS, NOT_EQUALS,
-		AND, OR, 
-		LOG,POW,MAX,MIN,PRINT,
-		IQSIZE,
-		Over,
-	}
-	
-	OperationTypes operation;
-
-	/**
-	 * This overloaded constructor is used for setting exec type in case of spark backend
-	 */
-	public BinaryScalar(Lop input1, Lop input2, OperationTypes op, DataType dt, ValueType vt, ExecType et) 
-	{
-		super(Lop.Type.BinaryCP, dt, vt);		
-		operation = op;		
-		this.addInput(input1);
-		this.addInput(input2);
-		input1.addOutput(this);
-		input2.addOutput(this);
-
-		boolean breaksAlignment = false; // this field does not carry any meaning for this lop
-		boolean aligner = false;
-		boolean definesMRJob = false;
-		lps.addCompatibility(JobType.INVALID);
-		this.lps.setProperties(inputs, et, ExecLocation.ControlProgram, breaksAlignment, aligner, definesMRJob );
-	}
-	
-	/**
-	 * Constructor to perform a scalar operation
-	 * @param input
-	 * @param op
-	 */
-
-	public BinaryScalar(Lop input1, Lop input2, OperationTypes op, DataType dt, ValueType vt) 
-	{
-		super(Lop.Type.BinaryCP, dt, vt);		
-		operation = op;		
-		this.addInput(input1);
-		this.addInput(input2);
-		input1.addOutput(this);
-		input2.addOutput(this);
-
-		boolean breaksAlignment = false; // this field does not carry any meaning for this lop
-		boolean aligner = false;
-		boolean definesMRJob = false;
-		lps.addCompatibility(JobType.INVALID);
-		this.lps.setProperties(inputs, ExecType.CP, ExecLocation.ControlProgram, breaksAlignment, aligner, definesMRJob );
-	}
-
-	@Override
-	public String toString() {
-		return "Operation: " + operation;
-	}
-	
-	public OperationTypes getOperationType(){
-		return operation;
-	}
-
-	@Override
-	public String getInstructions(String input1, String input2, String output) throws LopsException
-	{
-		String opString = getOpcode( operation );
-		
-		
-		
-		StringBuilder sb = new StringBuilder();
-		
-		sb.append(getExecType());
-		sb.append(Lop.OPERAND_DELIMITOR);
-		
-		sb.append( opString );
-		sb.append( OPERAND_DELIMITOR );
-		
-		sb.append( getInputs().get(0).prepScalarInputOperand(getExecType()) );
-		sb.append( OPERAND_DELIMITOR );
-		
-		sb.append( getInputs().get(1).prepScalarInputOperand(getExecType()));
-		sb.append( OPERAND_DELIMITOR );
-		
-		sb.append( prepOutputOperand(output));
-
-		return sb.toString();
-	}
-	
-	@Override
-	public Lop.SimpleInstType getSimpleInstructionType()
-	{
-		switch (operation){
- 
-		default:
-			return SimpleInstType.Scalar;
-		}
-	}
-	
-	/**
-	 * 
-	 * @param op
-	 * @return
-	 */
-	public static String getOpcode( OperationTypes op )
-	{
-		switch ( op ) 
-		{
-			/* Arithmetic */
-			case ADD:
-				return "+";
-			case SUBTRACT:
-				return "-";
-			case MULTIPLY:
-				return "*";
-			case DIVIDE:
-				return "/";
-			case MODULUS:
-				return "%%";	
-			case INTDIV:
-				return "%/%";	
-			case POW:	
-				return "^";
-				
-			/* Relational */
-			case LESS_THAN:
-				return "<";
-			case LESS_THAN_OR_EQUALS:
-				return "<=";
-			case GREATER_THAN:
-				return ">";
-			case GREATER_THAN_OR_EQUALS:
-				return ">=";
-			case EQUALS:
-				return "==";
-			case NOT_EQUALS:
-				return "!=";
-			
-			/* Boolean */
-			case AND:
-				return "&&";
-			case OR:
-				return "||";
-			
-			/* Builtin Functions */
-			case LOG:
-				return "log";
-			case MIN:
-				return "min"; 
-			case MAX:
-				return "max"; 
-			
-			case PRINT:
-				return "print";
-				
-			case IQSIZE:
-				return "iqsize"; 
-				
-			default:
-				throw new UnsupportedOperationException("Instruction is not defined for BinaryScalar operator: " + op);
-		}
-	}
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysml.lops;
+
+
+ 
+import org.apache.sysml.lops.LopProperties.ExecLocation;
+import org.apache.sysml.lops.LopProperties.ExecType;
+import org.apache.sysml.lops.compile.JobType;
+import org.apache.sysml.parser.Expression.*;
+
+/**
+ * Lop to perform binary scalar operations. Both inputs must be scalars.
+ * Example i = j + k, i = i + 1. 
+ */
+
+public class BinaryScalar extends Lop 
+{	
+	
+	public enum OperationTypes {
+		ADD, SUBTRACT, SUBTRACTRIGHT, MULTIPLY, DIVIDE, MODULUS, INTDIV,
+		LESS_THAN, LESS_THAN_OR_EQUALS, GREATER_THAN, GREATER_THAN_OR_EQUALS, EQUALS, NOT_EQUALS,
+		AND, OR, 
+		LOG,POW,MAX,MIN,PRINT,
+		IQSIZE,
+		Over,
+	}
+	
+	OperationTypes operation;
+
+	/**
+	 * This overloaded constructor is used for setting exec type in case of spark backend
+	 */
+	public BinaryScalar(Lop input1, Lop input2, OperationTypes op, DataType dt, ValueType vt, ExecType et) 
+	{
+		super(Lop.Type.BinaryCP, dt, vt);		
+		operation = op;		
+		this.addInput(input1);
+		this.addInput(input2);
+		input1.addOutput(this);
+		input2.addOutput(this);
+
+		boolean breaksAlignment = false; // this field does not carry any meaning for this lop
+		boolean aligner = false;
+		boolean definesMRJob = false;
+		lps.addCompatibility(JobType.INVALID);
+		this.lps.setProperties(inputs, et, ExecLocation.ControlProgram, breaksAlignment, aligner, definesMRJob );
+	}
+	
+	/**
+	 * Constructor to perform a scalar operation
+	 * @param input
+	 * @param op
+	 */
+
+	public BinaryScalar(Lop input1, Lop input2, OperationTypes op, DataType dt, ValueType vt) 
+	{
+		super(Lop.Type.BinaryCP, dt, vt);		
+		operation = op;		
+		this.addInput(input1);
+		this.addInput(input2);
+		input1.addOutput(this);
+		input2.addOutput(this);
+
+		boolean breaksAlignment = false; // this field does not carry any meaning for this lop
+		boolean aligner = false;
+		boolean definesMRJob = false;
+		lps.addCompatibility(JobType.INVALID);
+		this.lps.setProperties(inputs, ExecType.CP, ExecLocation.ControlProgram, breaksAlignment, aligner, definesMRJob );
+	}
+
+	@Override
+	public String toString() {
+		return "Operation: " + operation;
+	}
+	
+	public OperationTypes getOperationType(){
+		return operation;
+	}
+
+	@Override
+	public String getInstructions(String input1, String input2, String output) throws LopsException
+	{
+		String opString = getOpcode( operation );
+		
+		
+		
+		StringBuilder sb = new StringBuilder();
+		
+		sb.append(getExecType());
+		sb.append(Lop.OPERAND_DELIMITOR);
+		
+		sb.append( opString );
+		sb.append( OPERAND_DELIMITOR );
+		
+		sb.append( getInputs().get(0).prepScalarInputOperand(getExecType()) );
+		sb.append( OPERAND_DELIMITOR );
+		
+		sb.append( getInputs().get(1).prepScalarInputOperand(getExecType()));
+		sb.append( OPERAND_DELIMITOR );
+		
+		sb.append( prepOutputOperand(output));
+
+		return sb.toString();
+	}
+	
+	@Override
+	public Lop.SimpleInstType getSimpleInstructionType()
+	{
+		switch (operation){
+ 
+		default:
+			return SimpleInstType.Scalar;
+		}
+	}
+	
+	/**
+	 * 
+	 * @param op
+	 * @return
+	 */
+	public static String getOpcode( OperationTypes op )
+	{
+		switch ( op ) 
+		{
+			/* Arithmetic */
+			case ADD:
+				return "+";
+			case SUBTRACT:
+				return "-";
+			case MULTIPLY:
+				return "*";
+			case DIVIDE:
+				return "/";
+			case MODULUS:
+				return "%%";	
+			case INTDIV:
+				return "%/%";	
+			case POW:	
+				return "^";
+				
+			/* Relational */
+			case LESS_THAN:
+				return "<";
+			case LESS_THAN_OR_EQUALS:
+				return "<=";
+			case GREATER_THAN:
+				return ">";
+			case GREATER_THAN_OR_EQUALS:
+				return ">=";
+			case EQUALS:
+				return "==";
+			case NOT_EQUALS:
+				return "!=";
+			
+			/* Boolean */
+			case AND:
+				return "&&";
+			case OR:
+				return "||";
+			
+			/* Builtin Functions */
+			case LOG:
+				return "log";
+			case MIN:
+				return "min"; 
+			case MAX:
+				return "max"; 
+			
+			case PRINT:
+				return "print";
+				
+			case IQSIZE:
+				return "iqsize"; 
+				
+			default:
+				throw new UnsupportedOperationException("Instruction is not defined for BinaryScalar operator: " + op);
+		}
+	}
+}

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/05d2c0a8/src/main/java/org/apache/sysml/parser/dml/Dml.g4
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/parser/dml/Dml.g4 b/src/main/java/org/apache/sysml/parser/dml/Dml.g4
index 9d07dc9..bada11e 100644
--- a/src/main/java/org/apache/sysml/parser/dml/Dml.g4
+++ b/src/main/java/org/apache/sysml/parser/dml/Dml.g4
@@ -1,201 +1,201 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *   http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-grammar Dml;
-
-@header
-{
-	// Commenting the package name and explicitly passing it in build.xml to maintain compatibility with maven plugin
-    // package org.apache.sysml.parser.dml;
-}
-
-// DML Program is a list of expression
-// For now, we only allow global function definitions (not nested or inside a while block)
-dmlprogram: (blocks+=statement | functionBlocks+=functionStatement)* EOF;
-
-statement returns [ StatementInfo info ]
-@init {
-       // This actions occurs regardless of how many alternatives in this rule
-       $info = new StatementInfo();
-} :
-    // ------------------------------------------
-    // ImportStatement
-    'source' '(' filePath = STRING ')'  'as' namespace=ID ';'*       # ImportStatement
-    | 'setwd'  '(' pathValue = STRING ')' ';'*                          # PathStatement
-    // ------------------------------------------
-    // Treat function call as AssignmentStatement or MultiAssignmentStatement
-    // For backward compatibility and also since the behavior of foo() * A + foo() ... where foo returns A
-    // Convert FunctionCallIdentifier(paramExprs, ..) -> source
-    | // TODO: Throw an informative error if user doesnot provide the optional assignment
-    ( targetList+=dataIdentifier ('='|'<-') )? name=ID '(' (paramExprs+=parameterizedExpression (',' paramExprs+=parameterizedExpression)* )? ')' ';'*  # FunctionCallAssignmentStatement
-    | '[' targetList+=dataIdentifier (',' targetList+=dataIdentifier)* ']' ('='|'<-') name=ID '(' (paramExprs+=parameterizedExpression (',' paramExprs+=parameterizedExpression)* )? ')' ';'*  # FunctionCallMultiAssignmentStatement
-    // {notifyErrorListeners("Too many parentheses");}
-    // ------------------------------------------
-    // AssignmentStatement
-    | targetList+=dataIdentifier op=('<-'|'=') 'ifdef' '(' commandLineParam=dataIdentifier ','  source=expression ')' ';'*   # IfdefAssignmentStatement
-    | targetList+=dataIdentifier op=('<-'|'=') source=expression ';'*   # AssignmentStatement
-    // ------------------------------------------
-    // We don't support block statement
-    // | '{' body+=expression ';'* ( body+=expression ';'* )*  '}' # BlockStatement
-    // ------------------------------------------
-    // IfStatement
-    | 'if' '(' predicate=expression ')' (ifBody+=statement ';'* | '{' (ifBody+=statement ';'*)*  '}')  ('else' (elseBody+=statement ';'* | '{' (elseBody+=statement ';'*)*  '}'))?  # IfStatement
-    // ------------------------------------------
-    // ForStatement & ParForStatement
-    | 'for' '(' iterVar=ID 'in' iterPred=iterablePredicate (',' parForParams+=strictParameterizedExpression)* ')' (body+=statement ';'* | '{' (body+=statement ';'* )*  '}')  # ForStatement
-    // Convert strictParameterizedExpression to HashMap<String, String> for parForParams
-    | 'parfor' '(' iterVar=ID 'in' iterPred=iterablePredicate (',' parForParams+=strictParameterizedExpression)* ')' (body+=statement ';'* | '{' (body+=statement ';'*)*  '}')  # ParForStatement
-    | 'while' '(' predicate=expression ')' (body+=statement ';'* | '{' (body+=statement ';'*)* '}')  # WhileStatement
-    // ------------------------------------------
-;
-
-iterablePredicate returns [ ExpressionInfo info ]
-  @init {
-         // This actions occurs regardless of how many alternatives in this rule
-         $info = new ExpressionInfo();
-  } :
-    from=expression ':' to=expression #IterablePredicateColonExpression
-    | ID '(' from=expression ',' to=expression ',' increment=expression ')' #IterablePredicateSeqExpression
-    ;
-
-functionStatement returns [ StatementInfo info ]
-@init {
-       // This actions occurs regardless of how many alternatives in this rule
-       $info = new StatementInfo();
-} :
-    // ------------------------------------------
-    // FunctionStatement & ExternalFunctionStatement
-    // small change: only allow typed arguments here ... instead of data identifier
-    name=ID ('<-'|'=') 'function' '(' ( inputParams+=typedArgNoAssign (',' inputParams+=typedArgNoAssign)* )? ')'  ( 'return' '(' ( outputParams+=typedArgNoAssign (',' outputParams+=typedArgNoAssign)* )? ')' )? '{' (body+=statement ';'*)* '}' # InternalFunctionDefExpression
-    | name=ID ('<-'|'=') 'externalFunction' '(' ( inputParams+=typedArgNoAssign (',' inputParams+=typedArgNoAssign)* )? ')'  ( 'return' '(' ( outputParams+=typedArgNoAssign (',' outputParams+=typedArgNoAssign)* )? ')' )?   'implemented' 'in' '(' ( otherParams+=strictParameterizedKeyValueString (',' otherParams+=strictParameterizedKeyValueString)* )? ')' ';'*    # ExternalFunctionDefExpression
-    // ------------------------------------------
-;
-
-
-// Other data identifiers are typedArgNoAssign, parameterizedExpression and strictParameterizedExpression
-dataIdentifier returns [ ExpressionInfo dataInfo ]
-@init {
-       // This actions occurs regardless of how many alternatives in this rule
-       $dataInfo = new ExpressionInfo();
-       // $dataInfo.expr = new org.apache.sysml.parser.DataIdentifier();
-} :
-    // ------------------------------------------
-    // IndexedIdentifier
-    name=ID '[' (rowLower=expression (':' rowUpper=expression)?)? ',' (colLower=expression (':' colUpper=expression)?)? ']' # IndexedExpression
-    // ------------------------------------------
-    | ID                                            # SimpleDataIdentifierExpression
-    | COMMANDLINE_NAMED_ID                          # CommandlineParamExpression
-    | COMMANDLINE_POSITION_ID                       # CommandlinePositionExpression
-;
-expression returns [ ExpressionInfo info ]
-@init {
-       // This actions occurs regardless of how many alternatives in this rule
-       $info = new ExpressionInfo();
-       // $info.expr = new org.apache.sysml.parser.BinaryExpression(org.apache.sysml.parser.Expression.BinaryOp.INVALID);
-} :
-    // ------------------------------------------
-    // BinaryExpression
-    // power
-    <assoc=right> left=expression op='^' right=expression  # PowerExpression
-    // unary plus and minus
-    | op=('-'|'+') left=expression                        # UnaryExpression
-    // sequence - since we are only using this into for
-    //| left=expression op=':' right=expression             # SequenceExpression
-    // matrix multiply
-    | left=expression op='%*%' right=expression           # MatrixMulExpression
-    // modulus and integer division
-    | left=expression op=('%/%' | '%%' ) right=expression # ModIntDivExpression
-    // arithmetic multiply and divide
-    | left=expression op=('*'|'/') right=expression       # MultDivExpression
-    // arithmetic addition and subtraction
-    | left=expression op=('+'|'-') right=expression       # AddSubExpression
-    // ------------------------------------------
-    // RelationalExpression
-    | left=expression op=('>'|'>='|'<'|'<='|'=='|'!=') right=expression # RelationalExpression
-    // ------------------------------------------
-    // BooleanExpression
-    // boolean not
-    | op='!' left=expression # BooleanNotExpression
-    // boolean and
-    | left=expression op=('&'|'&&') right=expression # BooleanAndExpression
-    // boolean or
-    | left=expression op=('|'|'||') right=expression # BooleanOrExpression
-
-    // ---------------------------------
-    // only applicable for builtin function expressions
-    | name=ID '(' (paramExprs+=parameterizedExpression (',' paramExprs+=parameterizedExpression)* )? ')' ';'*  # BuiltinFunctionExpression
-
-    // 4. Atomic
-    | '(' left=expression ')'                       # AtomicExpression
-
-    // Should you allow indexed expression here ?
-    // | '[' targetList+=expression (',' targetList+=expression)* ']'  # MultiIdExpression
-
-    // | BOOLEAN                                       # ConstBooleanIdExpression
-    | 'TRUE'                                        # ConstTrueExpression
-    | 'FALSE'                                       # ConstFalseExpression
-    | INT                                           # ConstIntIdExpression
-    | DOUBLE                                        # ConstDoubleIdExpression
-    | STRING                                        # ConstStringIdExpression
-    | dataIdentifier                                # DataIdExpression
-    // Special
-    // | 'NULL' | 'NA' | 'Inf' | 'NaN'
-;
-
-typedArgNoAssign : paramType=ml_type paramName=ID;
-parameterizedExpression : (paramName=ID '=')? paramVal=expression;
-strictParameterizedExpression : paramName=ID '=' paramVal=expression ;
-strictParameterizedKeyValueString : paramName=ID '=' paramVal=STRING ;
-ID : (ALPHABET (ALPHABET|DIGIT|'_')*  '::')? ALPHABET (ALPHABET|DIGIT|'_')*
-    // Special ID cases:
-   // | 'matrix' // --> This is a special case which causes lot of headache
-   | 'as.scalar' | 'as.matrix' | 'as.double' | 'as.integer' | 'as.logical' | 'index.return' | 'lower.tail'
-;
-// Unfortunately, we have datatype name clashing with builtin function name: matrix :(
-// Therefore, ugly work around for checking datatype
-ml_type :  valueType | dataType '[' valueType ']';
-// Note to reduce number of keywords, these are case-sensitive,
-// To allow case-insenstive,  'int' becomes: ('i' | 'I') ('n' | 'N') ('t' | 'T')
-valueType: 'int' | 'integer' | 'string' | 'boolean' | 'double'
-            | 'Int' | 'Integer' | 'String' | 'Boolean' | 'Double';
-dataType:
-        // 'scalar' # ScalarDataTypeDummyCheck
-        // |
-        ID # MatrixDataTypeCheck //{ if($ID.text.compareTo("matrix") != 0) { notifyErrorListeners("incorrect datatype"); } }
-        //|  'matrix' //---> See ID, this causes lot of headache
-        ;
-INT : DIGIT+  [Ll]?;
-// BOOLEAN : 'TRUE' | 'FALSE';
-DOUBLE: DIGIT+ '.' DIGIT* EXP? [Ll]?
-| DIGIT+ EXP? [Ll]?
-| '.' DIGIT+ EXP? [Ll]?
-;
-DIGIT: '0'..'9';
-ALPHABET : [a-zA-Z] ;
-fragment EXP : ('E' | 'e') ('+' | '-')? INT ;
-COMMANDLINE_NAMED_ID: '$' ALPHABET (ALPHABET|DIGIT|'_')*;
-COMMANDLINE_POSITION_ID: '$' DIGIT+;
-
-// supports single and double quoted string with escape characters
-STRING: '"' ( ESC | ~[\\"] )*? '"' | '\'' ( ESC | ~[\\'] )*? '\'';
-fragment ESC : '\\' [abtnfrv"'\\] ;
-// Comments, whitespaces and new line
-LINE_COMMENT : '#' .*? '\r'? '\n' -> skip ;
-MULTILINE_BLOCK_COMMENT : '/*' .*? '*/' -> skip ;
-WHITESPACE : (' ' | '\t' | '\r' | '\n')+ -> skip ;
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+grammar Dml;
+
+@header
+{
+	// Commenting the package name and explicitly passing it in build.xml to maintain compatibility with maven plugin
+    // package org.apache.sysml.parser.dml;
+}
+
+// DML Program is a list of expression
+// For now, we only allow global function definitions (not nested or inside a while block)
+dmlprogram: (blocks+=statement | functionBlocks+=functionStatement)* EOF;
+
+statement returns [ StatementInfo info ]
+@init {
+       // This actions occurs regardless of how many alternatives in this rule
+       $info = new StatementInfo();
+} :
+    // ------------------------------------------
+    // ImportStatement
+    'source' '(' filePath = STRING ')'  'as' namespace=ID ';'*       # ImportStatement
+    | 'setwd'  '(' pathValue = STRING ')' ';'*                          # PathStatement
+    // ------------------------------------------
+    // Treat function call as AssignmentStatement or MultiAssignmentStatement
+    // For backward compatibility and also since the behavior of foo() * A + foo() ... where foo returns A
+    // Convert FunctionCallIdentifier(paramExprs, ..) -> source
+    | // TODO: Throw an informative error if user doesnot provide the optional assignment
+    ( targetList+=dataIdentifier ('='|'<-') )? name=ID '(' (paramExprs+=parameterizedExpression (',' paramExprs+=parameterizedExpression)* )? ')' ';'*  # FunctionCallAssignmentStatement
+    | '[' targetList+=dataIdentifier (',' targetList+=dataIdentifier)* ']' ('='|'<-') name=ID '(' (paramExprs+=parameterizedExpression (',' paramExprs+=parameterizedExpression)* )? ')' ';'*  # FunctionCallMultiAssignmentStatement
+    // {notifyErrorListeners("Too many parentheses");}
+    // ------------------------------------------
+    // AssignmentStatement
+    | targetList+=dataIdentifier op=('<-'|'=') 'ifdef' '(' commandLineParam=dataIdentifier ','  source=expression ')' ';'*   # IfdefAssignmentStatement
+    | targetList+=dataIdentifier op=('<-'|'=') source=expression ';'*   # AssignmentStatement
+    // ------------------------------------------
+    // We don't support block statement
+    // | '{' body+=expression ';'* ( body+=expression ';'* )*  '}' # BlockStatement
+    // ------------------------------------------
+    // IfStatement
+    | 'if' '(' predicate=expression ')' (ifBody+=statement ';'* | '{' (ifBody+=statement ';'*)*  '}')  ('else' (elseBody+=statement ';'* | '{' (elseBody+=statement ';'*)*  '}'))?  # IfStatement
+    // ------------------------------------------
+    // ForStatement & ParForStatement
+    | 'for' '(' iterVar=ID 'in' iterPred=iterablePredicate (',' parForParams+=strictParameterizedExpression)* ')' (body+=statement ';'* | '{' (body+=statement ';'* )*  '}')  # ForStatement
+    // Convert strictParameterizedExpression to HashMap<String, String> for parForParams
+    | 'parfor' '(' iterVar=ID 'in' iterPred=iterablePredicate (',' parForParams+=strictParameterizedExpression)* ')' (body+=statement ';'* | '{' (body+=statement ';'*)*  '}')  # ParForStatement
+    | 'while' '(' predicate=expression ')' (body+=statement ';'* | '{' (body+=statement ';'*)* '}')  # WhileStatement
+    // ------------------------------------------
+;
+
+iterablePredicate returns [ ExpressionInfo info ]
+  @init {
+         // This actions occurs regardless of how many alternatives in this rule
+         $info = new ExpressionInfo();
+  } :
+    from=expression ':' to=expression #IterablePredicateColonExpression
+    | ID '(' from=expression ',' to=expression ',' increment=expression ')' #IterablePredicateSeqExpression
+    ;
+
+functionStatement returns [ StatementInfo info ]
+@init {
+       // This actions occurs regardless of how many alternatives in this rule
+       $info = new StatementInfo();
+} :
+    // ------------------------------------------
+    // FunctionStatement & ExternalFunctionStatement
+    // small change: only allow typed arguments here ... instead of data identifier
+    name=ID ('<-'|'=') 'function' '(' ( inputParams+=typedArgNoAssign (',' inputParams+=typedArgNoAssign)* )? ')'  ( 'return' '(' ( outputParams+=typedArgNoAssign (',' outputParams+=typedArgNoAssign)* )? ')' )? '{' (body+=statement ';'*)* '}' # InternalFunctionDefExpression
+    | name=ID ('<-'|'=') 'externalFunction' '(' ( inputParams+=typedArgNoAssign (',' inputParams+=typedArgNoAssign)* )? ')'  ( 'return' '(' ( outputParams+=typedArgNoAssign (',' outputParams+=typedArgNoAssign)* )? ')' )?   'implemented' 'in' '(' ( otherParams+=strictParameterizedKeyValueString (',' otherParams+=strictParameterizedKeyValueString)* )? ')' ';'*    # ExternalFunctionDefExpression
+    // ------------------------------------------
+;
+
+
+// Other data identifiers are typedArgNoAssign, parameterizedExpression and strictParameterizedExpression
+dataIdentifier returns [ ExpressionInfo dataInfo ]
+@init {
+       // This actions occurs regardless of how many alternatives in this rule
+       $dataInfo = new ExpressionInfo();
+       // $dataInfo.expr = new org.apache.sysml.parser.DataIdentifier();
+} :
+    // ------------------------------------------
+    // IndexedIdentifier
+    name=ID '[' (rowLower=expression (':' rowUpper=expression)?)? ',' (colLower=expression (':' colUpper=expression)?)? ']' # IndexedExpression
+    // ------------------------------------------
+    | ID                                            # SimpleDataIdentifierExpression
+    | COMMANDLINE_NAMED_ID                          # CommandlineParamExpression
+    | COMMANDLINE_POSITION_ID                       # CommandlinePositionExpression
+;
+expression returns [ ExpressionInfo info ]
+@init {
+       // This actions occurs regardless of how many alternatives in this rule
+       $info = new ExpressionInfo();
+       // $info.expr = new org.apache.sysml.parser.BinaryExpression(org.apache.sysml.parser.Expression.BinaryOp.INVALID);
+} :
+    // ------------------------------------------
+    // BinaryExpression
+    // power
+    <assoc=right> left=expression op='^' right=expression  # PowerExpression
+    // unary plus and minus
+    | op=('-'|'+') left=expression                        # UnaryExpression
+    // sequence - since we are only using this into for
+    //| left=expression op=':' right=expression             # SequenceExpression
+    // matrix multiply
+    | left=expression op='%*%' right=expression           # MatrixMulExpression
+    // modulus and integer division
+    | left=expression op=('%/%' | '%%' ) right=expression # ModIntDivExpression
+    // arithmetic multiply and divide
+    | left=expression op=('*'|'/') right=expression       # MultDivExpression
+    // arithmetic addition and subtraction
+    | left=expression op=('+'|'-') right=expression       # AddSubExpression
+    // ------------------------------------------
+    // RelationalExpression
+    | left=expression op=('>'|'>='|'<'|'<='|'=='|'!=') right=expression # RelationalExpression
+    // ------------------------------------------
+    // BooleanExpression
+    // boolean not
+    | op='!' left=expression # BooleanNotExpression
+    // boolean and
+    | left=expression op=('&'|'&&') right=expression # BooleanAndExpression
+    // boolean or
+    | left=expression op=('|'|'||') right=expression # BooleanOrExpression
+
+    // ---------------------------------
+    // only applicable for builtin function expressions
+    | name=ID '(' (paramExprs+=parameterizedExpression (',' paramExprs+=parameterizedExpression)* )? ')' ';'*  # BuiltinFunctionExpression
+
+    // 4. Atomic
+    | '(' left=expression ')'                       # AtomicExpression
+
+    // Should you allow indexed expression here ?
+    // | '[' targetList+=expression (',' targetList+=expression)* ']'  # MultiIdExpression
+
+    // | BOOLEAN                                       # ConstBooleanIdExpression
+    | 'TRUE'                                        # ConstTrueExpression
+    | 'FALSE'                                       # ConstFalseExpression
+    | INT                                           # ConstIntIdExpression
+    | DOUBLE                                        # ConstDoubleIdExpression
+    | STRING                                        # ConstStringIdExpression
+    | dataIdentifier                                # DataIdExpression
+    // Special
+    // | 'NULL' | 'NA' | 'Inf' | 'NaN'
+;
+
+typedArgNoAssign : paramType=ml_type paramName=ID;
+parameterizedExpression : (paramName=ID '=')? paramVal=expression;
+strictParameterizedExpression : paramName=ID '=' paramVal=expression ;
+strictParameterizedKeyValueString : paramName=ID '=' paramVal=STRING ;
+ID : (ALPHABET (ALPHABET|DIGIT|'_')*  '::')? ALPHABET (ALPHABET|DIGIT|'_')*
+    // Special ID cases:
+   // | 'matrix' // --> This is a special case which causes lot of headache
+   | 'as.scalar' | 'as.matrix' | 'as.double' | 'as.integer' | 'as.logical' | 'index.return' | 'lower.tail'
+;
+// Unfortunately, we have datatype name clashing with builtin function name: matrix :(
+// Therefore, ugly work around for checking datatype
+ml_type :  valueType | dataType '[' valueType ']';
+// Note to reduce number of keywords, these are case-sensitive,
+// To allow case-insenstive,  'int' becomes: ('i' | 'I') ('n' | 'N') ('t' | 'T')
+valueType: 'int' | 'integer' | 'string' | 'boolean' | 'double'
+            | 'Int' | 'Integer' | 'String' | 'Boolean' | 'Double';
+dataType:
+        // 'scalar' # ScalarDataTypeDummyCheck
+        // |
+        ID # MatrixDataTypeCheck //{ if($ID.text.compareTo("matrix") != 0) { notifyErrorListeners("incorrect datatype"); } }
+        //|  'matrix' //---> See ID, this causes lot of headache
+        ;
+INT : DIGIT+  [Ll]?;
+// BOOLEAN : 'TRUE' | 'FALSE';
+DOUBLE: DIGIT+ '.' DIGIT* EXP? [Ll]?
+| DIGIT+ EXP? [Ll]?
+| '.' DIGIT+ EXP? [Ll]?
+;
+DIGIT: '0'..'9';
+ALPHABET : [a-zA-Z] ;
+fragment EXP : ('E' | 'e') ('+' | '-')? INT ;
+COMMANDLINE_NAMED_ID: '$' ALPHABET (ALPHABET|DIGIT|'_')*;
+COMMANDLINE_POSITION_ID: '$' DIGIT+;
+
+// supports single and double quoted string with escape characters
+STRING: '"' ( ESC | ~[\\"] )*? '"' | '\'' ( ESC | ~[\\'] )*? '\'';
+fragment ESC : '\\' [abtnfrv"'\\] ;
+// Comments, whitespaces and new line
+LINE_COMMENT : '#' .*? '\r'? '\n' -> skip ;
+MULTILINE_BLOCK_COMMENT : '/*' .*? '*/' -> skip ;
+WHITESPACE : (' ' | '\t' | '\r' | '\n')+ -> skip ;

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/05d2c0a8/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/RemoteDPParForMR.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/RemoteDPParForMR.java b/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/RemoteDPParForMR.java
index b2d37f9..77d5282 100644
--- a/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/RemoteDPParForMR.java
+++ b/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/RemoteDPParForMR.java
@@ -1,292 +1,292 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *   http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.sysml.runtime.controlprogram.parfor;
-
-import java.io.IOException;
-import java.util.HashMap;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.LongWritable;
-import org.apache.hadoop.io.SequenceFile;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapred.Counters.Group;
-import org.apache.hadoop.mapred.FileInputFormat;
-import org.apache.hadoop.mapred.FileOutputFormat;
-import org.apache.hadoop.mapred.JobClient;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.RunningJob;
-import org.apache.hadoop.mapred.SequenceFileOutputFormat;
-
-import org.apache.sysml.api.DMLScript;
-import org.apache.sysml.conf.ConfigurationManager;
-import org.apache.sysml.conf.DMLConfig;
-import org.apache.sysml.runtime.DMLRuntimeException;
-import org.apache.sysml.runtime.controlprogram.LocalVariableMap;
-import org.apache.sysml.runtime.controlprogram.ParForProgramBlock;
-import org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PDataPartitionFormat;
-import org.apache.sysml.runtime.controlprogram.caching.CacheStatistics;
-import org.apache.sysml.runtime.controlprogram.caching.CacheableData;
-import org.apache.sysml.runtime.controlprogram.caching.MatrixObject;
-import org.apache.sysml.runtime.controlprogram.parfor.stat.InfrastructureAnalyzer;
-import org.apache.sysml.runtime.controlprogram.parfor.stat.Stat;
-import org.apache.sysml.runtime.controlprogram.parfor.util.PairWritableBlock;
-import org.apache.sysml.runtime.controlprogram.parfor.util.PairWritableCell;
-import org.apache.sysml.runtime.instructions.cp.Data;
-import org.apache.sysml.runtime.io.MatrixReader;
-import org.apache.sysml.runtime.matrix.data.InputInfo;
-import org.apache.sysml.runtime.matrix.data.OutputInfo;
-import org.apache.sysml.runtime.matrix.mapred.MRJobConfiguration;
-import org.apache.sysml.runtime.util.MapReduceTool;
-import org.apache.sysml.utils.Statistics;
-import org.apache.sysml.yarn.DMLAppMasterUtils;
-
-/**
- * MR job class for submitting parfor remote MR jobs, controlling its execution and obtaining results.
- * 
- *
- */
-public class RemoteDPParForMR
-{
-	
-	protected static final Log LOG = LogFactory.getLog(RemoteDPParForMR.class.getName());
-	
-	/**
-	 * 
-	 * @param pfid
-	 * @param program
-	 * @param taskFile
-	 * @param resultFile
-	 * @param enableCPCaching 
-	 * @param mode
-	 * @param numMappers
-	 * @param replication
-	 * @return
-	 * @throws DMLRuntimeException
-	 */
-	public static RemoteParForJobReturn runJob(long pfid, String itervar, String matrixvar, String program, String resultFile, MatrixObject input, 
-			                                   PDataPartitionFormat dpf, OutputInfo oi, boolean tSparseCol, //config params
-			                                   boolean enableCPCaching, int numReducers, int replication, int max_retry)  //opt params
-		throws DMLRuntimeException
-	{
-		RemoteParForJobReturn ret = null;
-		String jobname = "ParFor-DPEMR";
-		long t0 = DMLScript.STATISTICS ? System.nanoTime() : 0;
-		
-		JobConf job;
-		job = new JobConf( RemoteDPParForMR.class );
-		job.setJobName(jobname+pfid);
-		
-		//maintain dml script counters
-		Statistics.incrementNoOfCompiledMRJobs();
-	
-		try
-		{
-			/////
-			//configure the MR job
-		
-			//set arbitrary CP program blocks that will perform in the reducers
-			MRJobConfiguration.setProgramBlocks(job, program); 
-
-			//enable/disable caching
-			MRJobConfiguration.setParforCachingConfig(job, enableCPCaching);
-		
-			//setup input matrix
-			Path path = new Path( input.getFileName() );
-			long rlen = input.getNumRows();
-			long clen = input.getNumColumns();
-			int brlen = (int) input.getNumRowsPerBlock();
-			int bclen = (int) input.getNumColumnsPerBlock();
-			MRJobConfiguration.setPartitioningInfo(job, rlen, clen, brlen, bclen, InputInfo.BinaryBlockInputInfo, oi, dpf, 1, input.getFileName(), itervar, matrixvar, tSparseCol);
-			job.setInputFormat(InputInfo.BinaryBlockInputInfo.inputFormatClass);
-			FileInputFormat.setInputPaths(job, path);
-			
-			//set mapper and reducers classes
-			job.setMapperClass(DataPartitionerRemoteMapper.class); 
-			job.setReducerClass(RemoteDPParWorkerReducer.class); 
-			
-		    //set output format
-		    job.setOutputFormat(SequenceFileOutputFormat.class);
-		    
-		    //set output path
-		    MapReduceTool.deleteFileIfExistOnHDFS(resultFile);
-		    FileOutputFormat.setOutputPath(job, new Path(resultFile));
-		    
-			//set the output key, value schema
-		    
-		    //parfor partitioning outputs (intermediates)
-		    job.setMapOutputKeyClass(LongWritable.class);
-		    if( oi == OutputInfo.BinaryBlockOutputInfo )
-		    	job.setMapOutputValueClass(PairWritableBlock.class); 
-		    else if( oi == OutputInfo.BinaryCellOutputInfo )
-		    	job.setMapOutputValueClass(PairWritableCell.class);
-		    else 
-		    	throw new DMLRuntimeException("Unsupported intermrediate output info: "+oi);
-		    //parfor exec output
-		    job.setOutputKeyClass(LongWritable.class);
-			job.setOutputValueClass(Text.class);
-			
-			//////
-			//set optimization parameters
-
-			//set the number of mappers and reducers 
-			job.setNumReduceTasks( numReducers );			
-			
-			//disable automatic tasks timeouts and speculative task exec
-			job.setInt("mapred.task.timeout", 0);			
-			job.setMapSpeculativeExecution(false);
-			
-			//set up preferred custom serialization framework for binary block format
-			if( MRJobConfiguration.USE_BINARYBLOCK_SERIALIZATION )
-				MRJobConfiguration.addBinaryBlockSerializationFramework( job );
-	
-			//set up map/reduce memory configurations (if in AM context)
-			DMLConfig config = ConfigurationManager.getConfig();
-			DMLAppMasterUtils.setupMRJobRemoteMaxMemory(job, config);
-			
-			//disable JVM reuse
-			job.setNumTasksToExecutePerJvm( 1 ); //-1 for unlimited 
-			
-			//set the replication factor for the results
-			job.setInt("dfs.replication", replication);
-			
-			//set the max number of retries per map task
-			//note: currently disabled to use cluster config
-			//job.setInt("mapreduce.map.maxattempts", max_retry);
-			
-			//set unique working dir
-			MRJobConfiguration.setUniqueWorkingDir(job);
-			
-			/////
-			// execute the MR job			
-			RunningJob runjob = JobClient.runJob(job);
-			
-			// Process different counters 
-			Statistics.incrementNoOfExecutedMRJobs();
-			Group pgroup = runjob.getCounters().getGroup(ParForProgramBlock.PARFOR_COUNTER_GROUP_NAME);
-			int numTasks = (int)pgroup.getCounter( Stat.PARFOR_NUMTASKS.toString() );
-			int numIters = (int)pgroup.getCounter( Stat.PARFOR_NUMITERS.toString() );
-			if( DMLScript.STATISTICS && !InfrastructureAnalyzer.isLocalMode() ) {
-				Statistics.incrementJITCompileTime( pgroup.getCounter( Stat.PARFOR_JITCOMPILE.toString() ) );
-				Statistics.incrementJVMgcCount( pgroup.getCounter( Stat.PARFOR_JVMGC_COUNT.toString() ) );
-				Statistics.incrementJVMgcTime( pgroup.getCounter( Stat.PARFOR_JVMGC_TIME.toString() ) );
-				Group cgroup = runjob.getCounters().getGroup(CacheableData.CACHING_COUNTER_GROUP_NAME.toString());
-				CacheStatistics.incrementMemHits((int)cgroup.getCounter( CacheStatistics.Stat.CACHE_HITS_MEM.toString() ));
-				CacheStatistics.incrementFSBuffHits((int)cgroup.getCounter( CacheStatistics.Stat.CACHE_HITS_FSBUFF.toString() ));
-				CacheStatistics.incrementFSHits((int)cgroup.getCounter( CacheStatistics.Stat.CACHE_HITS_FS.toString() ));
-				CacheStatistics.incrementHDFSHits((int)cgroup.getCounter( CacheStatistics.Stat.CACHE_HITS_HDFS.toString() ));
-				CacheStatistics.incrementFSBuffWrites((int)cgroup.getCounter( CacheStatistics.Stat.CACHE_WRITES_FSBUFF.toString() ));
-				CacheStatistics.incrementFSWrites((int)cgroup.getCounter( CacheStatistics.Stat.CACHE_WRITES_FS.toString() ));
-				CacheStatistics.incrementHDFSWrites((int)cgroup.getCounter( CacheStatistics.Stat.CACHE_WRITES_HDFS.toString() ));
-				CacheStatistics.incrementAcquireRTime(cgroup.getCounter( CacheStatistics.Stat.CACHE_TIME_ACQR.toString() ));
-				CacheStatistics.incrementAcquireMTime(cgroup.getCounter( CacheStatistics.Stat.CACHE_TIME_ACQM.toString() ));
-				CacheStatistics.incrementReleaseTime(cgroup.getCounter( CacheStatistics.Stat.CACHE_TIME_RLS.toString() ));
-				CacheStatistics.incrementExportTime(cgroup.getCounter( CacheStatistics.Stat.CACHE_TIME_EXP.toString() ));
-			}
-				
-			// read all files of result variables and prepare for return
-			LocalVariableMap[] results = readResultFile(job, resultFile); 
-
-			ret = new RemoteParForJobReturn(runjob.isSuccessful(), 
-					                        numTasks, numIters, 
-					                        results);  	
-		}
-		catch(Exception ex)
-		{
-			throw new DMLRuntimeException(ex);
-		}
-		finally
-		{
-			// remove created files 
-			try
-			{
-				MapReduceTool.deleteFileIfExistOnHDFS(new Path(resultFile), job);
-			}
-			catch(IOException ex)
-			{
-				throw new DMLRuntimeException(ex);
-			}
-		}
-		
-		if( DMLScript.STATISTICS ){
-			long t1 = System.nanoTime();
-			Statistics.maintainCPHeavyHitters("MR-Job_"+jobname, t1-t0);
-		}
-		
-		return ret;
-	}
-	
-
-	/**
-	 * Result file contains hierarchy of workerID-resultvar(incl filename). We deduplicate
-	 * on the workerID. Without JVM reuse each task refers to a unique workerID, so we
-	 * will not find any duplicates. With JVM reuse, however, each slot refers to a workerID, 
-	 * and there are duplicate filenames due to partial aggregation and overwrite of fname 
-	 * (the RemoteParWorkerMapper ensures uniqueness of those files independent of the 
-	 * runtime implementation). 
-	 * 
-	 * @param job 
-	 * @param fname
-	 * @return
-	 * @throws DMLRuntimeException
-	 */
-	@SuppressWarnings("deprecation")
-	public static LocalVariableMap [] readResultFile( JobConf job, String fname )
-		throws DMLRuntimeException, IOException
-	{
-		HashMap<Long,LocalVariableMap> tmp = new HashMap<Long,LocalVariableMap>();
-
-		FileSystem fs = FileSystem.get(job);
-		Path path = new Path(fname);
-		LongWritable key = new LongWritable(); //workerID
-		Text value = new Text();               //serialized var header (incl filename)
-		
-		int countAll = 0;
-		for( Path lpath : MatrixReader.getSequenceFilePaths(fs, path) )
-		{
-			SequenceFile.Reader reader = new SequenceFile.Reader(FileSystem.get(job),lpath,job);
-			try
-			{
-				while( reader.next(key, value) )
-				{
-					//System.out.println("key="+key.get()+", value="+value.toString());
-					if( !tmp.containsKey( key.get() ) )
-		        		tmp.put(key.get(), new LocalVariableMap ());	   
-					Object[] dat = ProgramConverter.parseDataObject( value.toString() );
-		        	tmp.get( key.get() ).put((String)dat[0], (Data)dat[1]);
-		        	countAll++;
-				}
-			}	
-			finally
-			{
-				if( reader != null )
-					reader.close();
-			}
-		}		
-
-		LOG.debug("Num remote worker results (before deduplication): "+countAll);
-		LOG.debug("Num remote worker results: "+tmp.size());
-
-		//create return array
-		return tmp.values().toArray(new LocalVariableMap[0]);	
-	}
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysml.runtime.controlprogram.parfor;
+
+import java.io.IOException;
+import java.util.HashMap;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.SequenceFile;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapred.Counters.Group;
+import org.apache.hadoop.mapred.FileInputFormat;
+import org.apache.hadoop.mapred.FileOutputFormat;
+import org.apache.hadoop.mapred.JobClient;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.RunningJob;
+import org.apache.hadoop.mapred.SequenceFileOutputFormat;
+
+import org.apache.sysml.api.DMLScript;
+import org.apache.sysml.conf.ConfigurationManager;
+import org.apache.sysml.conf.DMLConfig;
+import org.apache.sysml.runtime.DMLRuntimeException;
+import org.apache.sysml.runtime.controlprogram.LocalVariableMap;
+import org.apache.sysml.runtime.controlprogram.ParForProgramBlock;
+import org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PDataPartitionFormat;
+import org.apache.sysml.runtime.controlprogram.caching.CacheStatistics;
+import org.apache.sysml.runtime.controlprogram.caching.CacheableData;
+import org.apache.sysml.runtime.controlprogram.caching.MatrixObject;
+import org.apache.sysml.runtime.controlprogram.parfor.stat.InfrastructureAnalyzer;
+import org.apache.sysml.runtime.controlprogram.parfor.stat.Stat;
+import org.apache.sysml.runtime.controlprogram.parfor.util.PairWritableBlock;
+import org.apache.sysml.runtime.controlprogram.parfor.util.PairWritableCell;
+import org.apache.sysml.runtime.instructions.cp.Data;
+import org.apache.sysml.runtime.io.MatrixReader;
+import org.apache.sysml.runtime.matrix.data.InputInfo;
+import org.apache.sysml.runtime.matrix.data.OutputInfo;
+import org.apache.sysml.runtime.matrix.mapred.MRJobConfiguration;
+import org.apache.sysml.runtime.util.MapReduceTool;
+import org.apache.sysml.utils.Statistics;
+import org.apache.sysml.yarn.DMLAppMasterUtils;
+
+/**
+ * MR job class for submitting parfor remote MR jobs, controlling its execution and obtaining results.
+ * 
+ *
+ */
+public class RemoteDPParForMR
+{
+	
+	protected static final Log LOG = LogFactory.getLog(RemoteDPParForMR.class.getName());
+	
+	/**
+	 * 
+	 * @param pfid
+	 * @param program
+	 * @param taskFile
+	 * @param resultFile
+	 * @param enableCPCaching 
+	 * @param mode
+	 * @param numMappers
+	 * @param replication
+	 * @return
+	 * @throws DMLRuntimeException
+	 */
+	public static RemoteParForJobReturn runJob(long pfid, String itervar, String matrixvar, String program, String resultFile, MatrixObject input, 
+			                                   PDataPartitionFormat dpf, OutputInfo oi, boolean tSparseCol, //config params
+			                                   boolean enableCPCaching, int numReducers, int replication, int max_retry)  //opt params
+		throws DMLRuntimeException
+	{
+		RemoteParForJobReturn ret = null;
+		String jobname = "ParFor-DPEMR";
+		long t0 = DMLScript.STATISTICS ? System.nanoTime() : 0;
+		
+		JobConf job;
+		job = new JobConf( RemoteDPParForMR.class );
+		job.setJobName(jobname+pfid);
+		
+		//maintain dml script counters
+		Statistics.incrementNoOfCompiledMRJobs();
+	
+		try
+		{
+			/////
+			//configure the MR job
+		
+			//set arbitrary CP program blocks that will perform in the reducers
+			MRJobConfiguration.setProgramBlocks(job, program); 
+
+			//enable/disable caching
+			MRJobConfiguration.setParforCachingConfig(job, enableCPCaching);
+		
+			//setup input matrix
+			Path path = new Path( input.getFileName() );
+			long rlen = input.getNumRows();
+			long clen = input.getNumColumns();
+			int brlen = (int) input.getNumRowsPerBlock();
+			int bclen = (int) input.getNumColumnsPerBlock();
+			MRJobConfiguration.setPartitioningInfo(job, rlen, clen, brlen, bclen, InputInfo.BinaryBlockInputInfo, oi, dpf, 1, input.getFileName(), itervar, matrixvar, tSparseCol);
+			job.setInputFormat(InputInfo.BinaryBlockInputInfo.inputFormatClass);
+			FileInputFormat.setInputPaths(job, path);
+			
+			//set mapper and reducers classes
+			job.setMapperClass(DataPartitionerRemoteMapper.class); 
+			job.setReducerClass(RemoteDPParWorkerReducer.class); 
+			
+		    //set output format
+		    job.setOutputFormat(SequenceFileOutputFormat.class);
+		    
+		    //set output path
+		    MapReduceTool.deleteFileIfExistOnHDFS(resultFile);
+		    FileOutputFormat.setOutputPath(job, new Path(resultFile));
+		    
+			//set the output key, value schema
+		    
+		    //parfor partitioning outputs (intermediates)
+		    job.setMapOutputKeyClass(LongWritable.class);
+		    if( oi == OutputInfo.BinaryBlockOutputInfo )
+		    	job.setMapOutputValueClass(PairWritableBlock.class); 
+		    else if( oi == OutputInfo.BinaryCellOutputInfo )
+		    	job.setMapOutputValueClass(PairWritableCell.class);
+		    else 
+		    	throw new DMLRuntimeException("Unsupported intermrediate output info: "+oi);
+		    //parfor exec output
+		    job.setOutputKeyClass(LongWritable.class);
+			job.setOutputValueClass(Text.class);
+			
+			//////
+			//set optimization parameters
+
+			//set the number of mappers and reducers 
+			job.setNumReduceTasks( numReducers );			
+			
+			//disable automatic tasks timeouts and speculative task exec
+			job.setInt("mapred.task.timeout", 0);			
+			job.setMapSpeculativeExecution(false);
+			
+			//set up preferred custom serialization framework for binary block format
+			if( MRJobConfiguration.USE_BINARYBLOCK_SERIALIZATION )
+				MRJobConfiguration.addBinaryBlockSerializationFramework( job );
+	
+			//set up map/reduce memory configurations (if in AM context)
+			DMLConfig config = ConfigurationManager.getConfig();
+			DMLAppMasterUtils.setupMRJobRemoteMaxMemory(job, config);
+			
+			//disable JVM reuse
+			job.setNumTasksToExecutePerJvm( 1 ); //-1 for unlimited 
+			
+			//set the replication factor for the results
+			job.setInt("dfs.replication", replication);
+			
+			//set the max number of retries per map task
+			//note: currently disabled to use cluster config
+			//job.setInt("mapreduce.map.maxattempts", max_retry);
+			
+			//set unique working dir
+			MRJobConfiguration.setUniqueWorkingDir(job);
+			
+			/////
+			// execute the MR job			
+			RunningJob runjob = JobClient.runJob(job);
+			
+			// Process different counters 
+			Statistics.incrementNoOfExecutedMRJobs();
+			Group pgroup = runjob.getCounters().getGroup(ParForProgramBlock.PARFOR_COUNTER_GROUP_NAME);
+			int numTasks = (int)pgroup.getCounter( Stat.PARFOR_NUMTASKS.toString() );
+			int numIters = (int)pgroup.getCounter( Stat.PARFOR_NUMITERS.toString() );
+			if( DMLScript.STATISTICS && !InfrastructureAnalyzer.isLocalMode() ) {
+				Statistics.incrementJITCompileTime( pgroup.getCounter( Stat.PARFOR_JITCOMPILE.toString() ) );
+				Statistics.incrementJVMgcCount( pgroup.getCounter( Stat.PARFOR_JVMGC_COUNT.toString() ) );
+				Statistics.incrementJVMgcTime( pgroup.getCounter( Stat.PARFOR_JVMGC_TIME.toString() ) );
+				Group cgroup = runjob.getCounters().getGroup(CacheableData.CACHING_COUNTER_GROUP_NAME.toString());
+				CacheStatistics.incrementMemHits((int)cgroup.getCounter( CacheStatistics.Stat.CACHE_HITS_MEM.toString() ));
+				CacheStatistics.incrementFSBuffHits((int)cgroup.getCounter( CacheStatistics.Stat.CACHE_HITS_FSBUFF.toString() ));
+				CacheStatistics.incrementFSHits((int)cgroup.getCounter( CacheStatistics.Stat.CACHE_HITS_FS.toString() ));
+				CacheStatistics.incrementHDFSHits((int)cgroup.getCounter( CacheStatistics.Stat.CACHE_HITS_HDFS.toString() ));
+				CacheStatistics.incrementFSBuffWrites((int)cgroup.getCounter( CacheStatistics.Stat.CACHE_WRITES_FSBUFF.toString() ));
+				CacheStatistics.incrementFSWrites((int)cgroup.getCounter( CacheStatistics.Stat.CACHE_WRITES_FS.toString() ));
+				CacheStatistics.incrementHDFSWrites((int)cgroup.getCounter( CacheStatistics.Stat.CACHE_WRITES_HDFS.toString() ));
+				CacheStatistics.incrementAcquireRTime(cgroup.getCounter( CacheStatistics.Stat.CACHE_TIME_ACQR.toString() ));
+				CacheStatistics.incrementAcquireMTime(cgroup.getCounter( CacheStatistics.Stat.CACHE_TIME_ACQM.toString() ));
+				CacheStatistics.incrementReleaseTime(cgroup.getCounter( CacheStatistics.Stat.CACHE_TIME_RLS.toString() ));
+				CacheStatistics.incrementExportTime(cgroup.getCounter( CacheStatistics.Stat.CACHE_TIME_EXP.toString() ));
+			}
+				
+			// read all files of result variables and prepare for return
+			LocalVariableMap[] results = readResultFile(job, resultFile); 
+
+			ret = new RemoteParForJobReturn(runjob.isSuccessful(), 
+					                        numTasks, numIters, 
+					                        results);  	
+		}
+		catch(Exception ex)
+		{
+			throw new DMLRuntimeException(ex);
+		}
+		finally
+		{
+			// remove created files 
+			try
+			{
+				MapReduceTool.deleteFileIfExistOnHDFS(new Path(resultFile), job);
+			}
+			catch(IOException ex)
+			{
+				throw new DMLRuntimeException(ex);
+			}
+		}
+		
+		if( DMLScript.STATISTICS ){
+			long t1 = System.nanoTime();
+			Statistics.maintainCPHeavyHitters("MR-Job_"+jobname, t1-t0);
+		}
+		
+		return ret;
+	}
+	
+
+	/**
+	 * Result file contains hierarchy of workerID-resultvar(incl filename). We deduplicate
+	 * on the workerID. Without JVM reuse each task refers to a unique workerID, so we
+	 * will not find any duplicates. With JVM reuse, however, each slot refers to a workerID, 
+	 * and there are duplicate filenames due to partial aggregation and overwrite of fname 
+	 * (the RemoteParWorkerMapper ensures uniqueness of those files independent of the 
+	 * runtime implementation). 
+	 * 
+	 * @param job 
+	 * @param fname
+	 * @return
+	 * @throws DMLRuntimeException
+	 */
+	@SuppressWarnings("deprecation")
+	public static LocalVariableMap [] readResultFile( JobConf job, String fname )
+		throws DMLRuntimeException, IOException
+	{
+		HashMap<Long,LocalVariableMap> tmp = new HashMap<Long,LocalVariableMap>();
+
+		FileSystem fs = FileSystem.get(job);
+		Path path = new Path(fname);
+		LongWritable key = new LongWritable(); //workerID
+		Text value = new Text();               //serialized var header (incl filename)
+		
+		int countAll = 0;
+		for( Path lpath : MatrixReader.getSequenceFilePaths(fs, path) )
+		{
+			SequenceFile.Reader reader = new SequenceFile.Reader(FileSystem.get(job),lpath,job);
+			try
+			{
+				while( reader.next(key, value) )
+				{
+					//System.out.println("key="+key.get()+", value="+value.toString());
+					if( !tmp.containsKey( key.get() ) )
+		        		tmp.put(key.get(), new LocalVariableMap ());	   
+					Object[] dat = ProgramConverter.parseDataObject( value.toString() );
+		        	tmp.get( key.get() ).put((String)dat[0], (Data)dat[1]);
+		        	countAll++;
+				}
+			}	
+			finally
+			{
+				if( reader != null )
+					reader.close();
+			}
+		}		
+
+		LOG.debug("Num remote worker results (before deduplication): "+countAll);
+		LOG.debug("Num remote worker results: "+tmp.size());
+
+		//create return array
+		return tmp.values().toArray(new LocalVariableMap[0]);	
+	}
+}