You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by du...@apache.org on 2016/01/21 02:06:22 UTC

[5/8] incubator-systemml git commit: [SYSTEMML-148] Refactored .parser.{antlr4, python} to .parser.{dml, pydml}.

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/c04fc99f/src/main/java/org/apache/sysml/parser/pydml/PyDMLParserWrapper.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/parser/pydml/PyDMLParserWrapper.java b/src/main/java/org/apache/sysml/parser/pydml/PyDMLParserWrapper.java
new file mode 100644
index 0000000..25ee006
--- /dev/null
+++ b/src/main/java/org/apache/sysml/parser/pydml/PyDMLParserWrapper.java
@@ -0,0 +1,283 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysml.parser.pydml;
+
+import java.io.ByteArrayInputStream;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.HashMap;
+import java.util.Map;
+
+import org.antlr.v4.runtime.ANTLRInputStream;
+import org.antlr.v4.runtime.BailErrorStrategy;
+import org.antlr.v4.runtime.CommonTokenStream;
+import org.antlr.v4.runtime.DefaultErrorStrategy;
+import org.antlr.v4.runtime.atn.PredictionMode;
+import org.antlr.v4.runtime.misc.ParseCancellationException;
+import org.antlr.v4.runtime.tree.ParseTree;
+import org.antlr.v4.runtime.tree.ParseTreeWalker;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.sysml.api.DMLScript;
+import org.apache.sysml.parser.AParserWrapper;
+import org.apache.sysml.parser.DMLProgram;
+import org.apache.sysml.parser.ForStatement;
+import org.apache.sysml.parser.ForStatementBlock;
+import org.apache.sysml.parser.FunctionStatementBlock;
+import org.apache.sysml.parser.IfStatement;
+import org.apache.sysml.parser.IfStatementBlock;
+import org.apache.sysml.parser.ImportStatement;
+import org.apache.sysml.parser.LanguageException;
+import org.apache.sysml.parser.ParForStatement;
+import org.apache.sysml.parser.ParForStatementBlock;
+import org.apache.sysml.parser.ParseException;
+import org.apache.sysml.parser.Statement;
+import org.apache.sysml.parser.StatementBlock;
+import org.apache.sysml.parser.WhileStatement;
+import org.apache.sysml.parser.WhileStatementBlock;
+import org.apache.sysml.parser.dml.DMLParserWrapper;
+import org.apache.sysml.parser.pydml.PydmlParser.FunctionStatementContext;
+import org.apache.sysml.parser.pydml.PydmlParser.PmlprogramContext;
+import org.apache.sysml.parser.pydml.PydmlParser.StatementContext;
+import org.apache.sysml.parser.pydml.PydmlSyntacticErrorListener.CustomDmlErrorListener;
+
+/**
+ * Logic of this wrapper is similar to DMLParserWrapper.
+ * 
+ * Note: ExpressionInfo and StatementInfo are simply wrapper objects and are reused in both DML and PyDML parsers.
+ *
+ */
+public class PyDMLParserWrapper extends AParserWrapper
+{
+	private static final Log LOG = LogFactory.getLog(DMLScript.class.getName());
+
+	/**
+	 * Custom wrapper to convert statement into statement blocks. Called by doParse and in PydmlSyntacticValidator for for, parfor, while, ...
+	 * @param current a statement
+	 * @return corresponding statement block
+	 */
+	public static StatementBlock getStatementBlock(org.apache.sysml.parser.Statement current) {
+		StatementBlock blk = null;
+		if(current instanceof ParForStatement) {
+			blk = new ParForStatementBlock();
+			blk.addStatement(current);
+		}
+		else if(current instanceof ForStatement) {
+			blk = new ForStatementBlock();
+			blk.addStatement(current);
+		}
+		else if(current instanceof IfStatement) {
+			blk = new IfStatementBlock();
+			blk.addStatement(current);
+		}
+		else if(current instanceof WhileStatement) {
+			blk = new WhileStatementBlock();
+			blk.addStatement(current);
+		}
+		else {
+			// This includes ImportStatement
+			blk = new StatementBlock();
+			blk.addStatement(current);
+		}
+		return blk;
+	}
+
+	/**
+	 * Parses the passed file with command line parameters. You can either pass both (local file) or just dmlScript (hdfs) or just file name (import command)
+	 * @param fileName either full path or null --> only used for better error handling
+	 * @param dmlScript required
+	 * @param argVals
+	 * @return
+	 * @throws ParseException
+	 */
+	@Override
+	public DMLProgram parse(String fileName, String dmlScript, HashMap<String,String> argVals) throws ParseException {
+		DMLProgram prog = null;
+		
+		if(dmlScript == null || dmlScript.trim().isEmpty()) {
+			throw new ParseException("Incorrect usage of parse. Please pass dmlScript not just filename");
+		}
+		
+		// Set the pipeline required for ANTLR parsing
+		PyDMLParserWrapper parser = new PyDMLParserWrapper();
+		prog = parser.doParse(fileName, dmlScript, argVals);
+		
+		if(prog == null) {
+			throw new ParseException("One or more errors found during parsing. (could not construct AST for file: " + fileName + "). Cannot proceed ahead.");
+		}
+		return prog;
+	}
+
+	/**
+	 * This function is supposed to be called directly only from PydmlSyntacticValidator when it encounters 'import'
+	 * @param fileName
+	 * @return null if atleast one error
+	 */
+	public DMLProgram doParse(String fileName, String dmlScript, HashMap<String,String> argVals) throws ParseException {
+		DMLProgram dmlPgm = null;
+		
+		ANTLRInputStream in;
+		try {
+			if(dmlScript == null) {
+				dmlScript = DMLParserWrapper.readDMLScript(fileName);
+			}
+			
+			InputStream stream = new ByteArrayInputStream(dmlScript.getBytes());
+			in = new org.antlr.v4.runtime.ANTLRInputStream(stream);
+		} 
+		catch (FileNotFoundException e) {
+			throw new ParseException("ERROR: Cannot find file:" + fileName, e);
+		} 
+		catch (IOException e) {
+			throw new ParseException("ERROR: Cannot open file:" + fileName, e);
+		} 
+		catch (LanguageException e) {
+			throw new ParseException("ERROR: " + e.getMessage(), e);
+		}
+
+		PmlprogramContext ast = null;
+		CustomDmlErrorListener errorListener = new CustomDmlErrorListener();
+		
+		try {
+			PydmlLexer lexer = new PydmlLexer(in);
+			CommonTokenStream tokens = new CommonTokenStream(lexer);
+			PydmlParser antlr4Parser = new PydmlParser(tokens);
+			
+			boolean tryOptimizedParsing = false; // For now no optimization, since it is not able to parse integer value. 
+	
+			if(tryOptimizedParsing) {
+				// Try faster and simpler SLL
+				antlr4Parser.getInterpreter().setPredictionMode(PredictionMode.SLL);
+				antlr4Parser.removeErrorListeners();
+				antlr4Parser.setErrorHandler(new BailErrorStrategy());
+				try{
+					ast = antlr4Parser.pmlprogram();
+					// If successful, no need to try out full LL(*) ... SLL was enough
+				}
+				catch(ParseCancellationException ex) {
+					// Error occurred, so now try full LL(*) for better error messages
+					tokens.reset();
+					antlr4Parser.reset();
+					if(fileName != null) {
+						errorListener.pushCurrentFileName(fileName);
+					}
+					else {
+						errorListener.pushCurrentFileName("MAIN_SCRIPT");
+					}
+					// Set our custom error listener
+					antlr4Parser.addErrorListener(errorListener);
+					antlr4Parser.setErrorHandler(new DefaultErrorStrategy());
+					antlr4Parser.getInterpreter().setPredictionMode(PredictionMode.LL);
+					ast = antlr4Parser.pmlprogram();
+				}
+			}
+			else {
+				// Set our custom error listener
+				antlr4Parser.removeErrorListeners();
+				antlr4Parser.addErrorListener(errorListener);
+				errorListener.pushCurrentFileName(fileName);
+	
+				// Now do the parsing
+				ast = antlr4Parser.pmlprogram();
+			}
+		}
+		catch(Exception e) {
+			throw new ParseException("ERROR: Cannot parse the program:" + fileName, e);
+		}
+		
+
+		try {
+			// Now convert the parse tree into DMLProgram
+			// Do syntactic validation while converting 
+			ParseTree tree = ast;
+			// And also do syntactic validation
+			ParseTreeWalker walker = new ParseTreeWalker();
+			PydmlSyntacticValidatorHelper helper = new PydmlSyntacticValidatorHelper(errorListener);
+			PydmlSyntacticValidator validator = new PydmlSyntacticValidator(helper, fileName, argVals);
+			walker.walk(validator, tree);
+			errorListener.popFileName();
+			if(errorListener.isAtleastOneError()) {
+				return null;
+			}
+			dmlPgm = createDMLProgram(ast);
+		}
+		catch(Exception e) {
+			throw new ParseException("ERROR: Cannot translate the parse tree into DMLProgram" + e.getMessage(), e);
+		}
+		
+		return dmlPgm;
+	}
+
+
+	private DMLProgram createDMLProgram(PmlprogramContext ast) {
+
+		DMLProgram dmlPgm = new DMLProgram();
+
+		// First add all the functions
+		for(FunctionStatementContext fn : ast.functionBlocks) {
+			FunctionStatementBlock functionStmtBlk = new FunctionStatementBlock();
+			functionStmtBlk.addStatement(fn.info.stmt);
+			try {
+				// TODO: currently the logic of nested namespace is not clear.
+				String namespace = DMLProgram.DEFAULT_NAMESPACE;
+				dmlPgm.addFunctionStatementBlock(namespace, fn.info.functionName, functionStmtBlk);
+			} catch (LanguageException e) {
+				LOG.error("line: " + fn.start.getLine() + ":" + fn.start.getCharPositionInLine() + " cannot process the function " + fn.info.functionName);
+				return null;
+			}
+		}
+
+		// Then add all the statements
+		for(StatementContext stmtCtx : ast.blocks) {
+			Statement current = stmtCtx.info.stmt;
+			if(current == null) {
+				LOG.error("line: " + stmtCtx.start.getLine() + ":" + stmtCtx.start.getCharPositionInLine() + " cannot process the statement");
+				return null;
+			}
+			
+			// Ignore Newline logic 
+			if(current.isEmptyNewLineStatement()) {
+				continue;
+			}
+
+			if(current instanceof ImportStatement) {
+				// Handle import statements separately
+				if(stmtCtx.info.namespaces != null) {
+					// Add the DMLProgram entries into current program
+					for(Map.Entry<String, DMLProgram> entry : stmtCtx.info.namespaces.entrySet()) {
+						dmlPgm.getNamespaces().put(entry.getKey(), entry.getValue());
+					}
+				}
+				else {
+					LOG.error("line: " + stmtCtx.start.getLine() + ":" + stmtCtx.start.getCharPositionInLine() + " cannot process the import statement");
+					return null;
+				}
+			}
+
+			// Now wrap statement into individual statement block
+			// merge statement will take care of merging these blocks
+			dmlPgm.addStatementBlock(getStatementBlock(current));
+		}
+
+		dmlPgm.mergeStatementBlocks();
+		return dmlPgm;
+	}
+}

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/c04fc99f/src/main/java/org/apache/sysml/parser/pydml/Pydml.g4
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/parser/pydml/Pydml.g4 b/src/main/java/org/apache/sysml/parser/pydml/Pydml.g4
new file mode 100644
index 0000000..425859c
--- /dev/null
+++ b/src/main/java/org/apache/sysml/parser/pydml/Pydml.g4
@@ -0,0 +1,384 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+grammar Pydml;
+
+@header
+{
+    // package org.apache.sysml.parser.pydml;
+    //import org.apache.sysml.parser.dml.StatementInfo;
+    //import org.apache.sysml.parser.dml.ExpressionInfo;
+}
+
+// This antlr grammar is based on Python 3.3 language reference: https://docs.python.org/3.3/reference/grammar.html
+
+tokens { INDENT, DEDENT }
+
+@lexer::members {
+    private boolean debugIndentRules = false;
+
+    // Indentation level stack
+    private java.util.Stack<Integer> indents = new java.util.Stack<Integer>();
+
+    // Extra tokens queue (see the NEWLINE rule).
+    private java.util.Queue<Token> tokens = new java.util.LinkedList<Token>();
+
+    // Number of opened braces, brackets and parenthesis.
+    private int opened = 0;
+
+    // This is only used to set the line number for dedent
+    private Token lastToken = null;
+
+
+    @Override
+    public void emit(Token t) {
+        if(debugIndentRules)
+            System.out.println("Emitted token:" + t);
+
+        super.setToken(t);
+        tokens.offer(t);
+    }
+
+
+    @Override
+    public Token nextToken() {
+        if (_input.LA(1) == EOF && !this.indents.isEmpty()) {
+            if(debugIndentRules)
+                System.out.println("EOF reached and expecting some DEDENTS, so emitting them");
+
+            tokens.poll();
+            this.emit(commonToken(PydmlParser.NEWLINE, "\n"));
+
+            // Now emit as much DEDENT tokens as needed.
+            while (!indents.isEmpty()) {
+                if(debugIndentRules)
+                    System.out.println("Emitting (inserted) DEDENTS");
+
+                this.emit(createDedent());
+                indents.pop();
+            }
+            // Put the EOF back on the token stream.
+            this.emit(commonToken(PydmlParser.EOF, "<EOF>"));
+        }
+        Token next = super.nextToken();
+        if (next.getChannel() == Token.DEFAULT_CHANNEL) {
+            // Keep track of the last token on the default channel.
+            this.lastToken = next;
+        }
+        Token retVal = tokens.isEmpty() ? next : tokens.poll();
+
+        if(debugIndentRules)
+            System.out.println("Returning nextToken: [" + retVal + "]<<" + tokens.isEmpty());
+
+        return retVal;
+    }
+
+    private Token createDedent() {
+        CommonToken dedent = commonToken(PydmlParser.DEDENT, "");
+        dedent.setLine(this.lastToken.getLine());
+        return dedent;
+    }
+
+    private CommonToken commonToken(int type, String text) {
+        // Nike: Main change: This logic was screwed up and was emitting additional 3 characters, so commenting it for now.
+        // int start = this.getCharIndex();
+        // int stop = start + text.length();
+        // return new CommonToken(this._tokenFactorySourcePair, type, DEFAULT_TOKEN_CHANNEL, start, stop);
+        return new CommonToken(type, text); // Main change
+    }
+
+    // Calculates the indentation level from the spaces:
+    // "Tabs are replaced (from left to right) by one to eight spaces
+    // such that the total number of characters up to and including
+    // the replacement is a multiple of eight [...]"
+    // https://docs.python.org/3.1/reference/lexical_analysis.html#indentation
+    static int getIndentationCount(String spaces) {
+        int count = 0;
+        for (char ch : spaces.toCharArray()) {
+            switch (ch) {
+                case '\t':
+                    count += 8 - (count % 8);
+                    break;
+                default:
+                    // A normal space char.
+                    count++;
+            }
+        }
+        return count;
+    }
+}
+
+
+// 2. Modify this g4 by comparing it with Java:
+// - https://pythonconquerstheuniverse.wordpress.com/2009/10/03/python-java-a-side-by-side-comparison/
+// - http://www.cs.gordon.edu/courses/cps122/handouts-2014/From%20Python%20to%20Java%20Lecture/A%20Comparison%20of%20the%20Syntax%20of%20Python%20and%20Java.pdf
+// - http://cs.joensuu.fi/~pviktor/python/slides/cheatsheet.pdf
+// - http://www.interfaceware.com/manual/chameleon/scripts/quickreference.pdf
+
+// DML Program is a list of expression
+// For now, we only allow global function definitions (not nested or inside a while block)
+pmlprogram: (blocks+=statement | functionBlocks+=functionStatement)*  NEWLINE* EOF;
+
+
+
+statement returns [ StatementInfo info ]
+@init {
+       // This actions occurs regardless of how many alternatives in this rule
+       $info = new StatementInfo();
+} :
+    // ------------------------------------------
+    // ImportStatement
+    'source' OPEN_PAREN filePath = STRING CLOSE_PAREN  'as' namespace=ID  NEWLINE      # ImportStatement
+    | 'setwd'  OPEN_PAREN pathValue = STRING CLOSE_PAREN NEWLINE                     # PathStatement
+    // ------------------------------------------
+    // AssignmentStatement
+    | targetList+=dataIdentifier '=' 'ifdef' OPEN_PAREN commandLineParam=dataIdentifier ','  source=expression CLOSE_PAREN NEWLINE   # IfdefAssignmentStatement
+    // ------------------------------------------
+    // Treat function call as AssignmentStatement or MultiAssignmentStatement
+    // For backward compatibility and also since the behavior of foo() * A + foo() ... where foo returns A
+    // Convert FunctionCallIdentifier(paramExprs, ..) -> source
+    | // TODO: Throw an informative error if user doesnot provide the optional assignment
+    ( targetList+=dataIdentifier '=' )? name=ID OPEN_PAREN (paramExprs+=parameterizedExpression (',' paramExprs+=parameterizedExpression)* )? CLOSE_PAREN NEWLINE  # FunctionCallAssignmentStatement
+    | OPEN_BRACK targetList+=dataIdentifier (',' targetList+=dataIdentifier)* CLOSE_BRACK '=' name=ID OPEN_PAREN (paramExprs+=parameterizedExpression (',' paramExprs+=parameterizedExpression)* )? CLOSE_PAREN  NEWLINE  # FunctionCallMultiAssignmentStatement
+    // {notifyErrorListeners("Too many parentheses");}
+    // We don't support block statement
+    // | '{' body+=expression ';'* ( body+=expression ';'* )*  '}' # BlockStatement
+    // ------------------------------------------
+    | targetList+=dataIdentifier '=' source=expression NEWLINE   # AssignmentStatement
+    // IfStatement
+    // | 'if' OPEN_PAREN predicate=expression CLOSE_PAREN (ifBody+=statement ';'* |  NEWLINE INDENT (ifBody+=statement)+  DEDENT )  ('else' (elseBody+=statement ';'* | '{' (elseBody+=statement ';'*)*  '}'))?  # IfStatement
+    | 'if' (OPEN_PAREN predicate=expression CLOSE_PAREN | predicate=expression) ':'  NEWLINE INDENT (ifBody+=statement)+  DEDENT   ('else'  ':'  NEWLINE INDENT (elseBody+=statement)+  DEDENT )?  # IfStatement
+    // ------------------------------------------
+    // ForStatement & ParForStatement
+    | 'for' (OPEN_PAREN iterVar=ID 'in' iterPred=iterablePredicate (',' parForParams+=strictParameterizedExpression)* CLOSE_PAREN |  iterVar=ID 'in' iterPred=iterablePredicate (',' parForParams+=strictParameterizedExpression)* ) ':'  NEWLINE INDENT (body+=statement)+  DEDENT  # ForStatement
+    // Convert strictParameterizedExpression to HashMap<String, String> for parForParams
+    | 'parfor' (OPEN_PAREN iterVar=ID 'in' iterPred=iterablePredicate (',' parForParams+=strictParameterizedExpression)* CLOSE_PAREN | iterVar=ID 'in' iterPred=iterablePredicate (',' parForParams+=strictParameterizedExpression)* ) ':' NEWLINE INDENT (body+=statement)+  DEDENT  # ParForStatement
+    | 'while' ( OPEN_PAREN predicate=expression CLOSE_PAREN | predicate=expression ) ':' NEWLINE INDENT (body+=statement)+  DEDENT  # WhileStatement
+    // ------------------------------------------
+    | NEWLINE #IgnoreNewLine
+;
+
+iterablePredicate returns [ ExpressionInfo info ]
+  @init {
+         // This actions occurs regardless of how many alternatives in this rule
+         $info = new ExpressionInfo();
+  } :
+    from=expression ':' to=expression #IterablePredicateColonExpression
+    | ID OPEN_PAREN from=expression ',' to=expression ',' increment=expression CLOSE_PAREN #IterablePredicateSeqExpression
+    ;
+
+functionStatement returns [ StatementInfo info ]
+@init {
+       // This actions occurs regardless of how many alternatives in this rule
+       $info = new StatementInfo();
+} :
+    // ------------------------------------------
+    // FunctionStatement & ExternalFunctionStatement
+    // small change: only allow typed arguments here ... instead of data identifier
+    'def' name=ID  OPEN_PAREN ( inputParams+=typedArgNoAssign (',' inputParams+=typedArgNoAssign)* )? CLOSE_PAREN  ( '->' OPEN_PAREN ( outputParams+=typedArgNoAssign (',' outputParams+=typedArgNoAssign)* )? CLOSE_PAREN )? ':' NEWLINE INDENT (body+=statement)+ DEDENT # InternalFunctionDefExpression
+    | 'defExternal' name=ID  OPEN_PAREN ( inputParams+=typedArgNoAssign (',' inputParams+=typedArgNoAssign)* )? CLOSE_PAREN  ( '->' OPEN_PAREN ( outputParams+=typedArgNoAssign (',' outputParams+=typedArgNoAssign)* )? CLOSE_PAREN )?   'implemented' 'in' OPEN_PAREN ( otherParams+=strictParameterizedKeyValueString (',' otherParams+=strictParameterizedKeyValueString)* )? CLOSE_PAREN NEWLINE    # ExternalFunctionDefExpression
+    // ------------------------------------------
+;
+
+
+// Other data identifiers are typedArgNoAssign, parameterizedExpression and strictParameterizedExpression
+dataIdentifier returns [ ExpressionInfo dataInfo ]
+@init {
+       // This actions occurs regardless of how many alternatives in this rule
+       $dataInfo = new ExpressionInfo();
+       // $dataInfo.expr = new org.apache.sysml.parser.DataIdentifier();
+} :
+    // ------------------------------------------
+    // IndexedIdentifier
+    name=ID OPEN_BRACK (rowLower=expression (':' rowUpper=expression)?)? ',' (colLower=expression (':' colUpper=expression)?)? CLOSE_BRACK # IndexedExpression
+    // ------------------------------------------
+    | ID                                            # SimpleDataIdentifierExpression
+    | COMMANDLINE_NAMED_ID                          # CommandlineParamExpression
+    | COMMANDLINE_POSITION_ID                       # CommandlinePositionExpression
+;
+expression returns [ ExpressionInfo info ]
+@init {
+       // This actions occurs regardless of how many alternatives in this rule
+       $info = new ExpressionInfo();
+       // $info.expr = new org.apache.sysml.parser.BinaryExpression(org.apache.sysml.parser.Expression.BinaryOp.INVALID);
+} :
+    // ------------------------------------------
+    // BinaryExpression
+    // power
+    <assoc=right> left=expression op='**' right=expression  # PowerExpression
+    // unary plus and minus
+    | op=('-'|'+') left=expression                        # UnaryExpression
+    // sequence - since we are only using this into for loop => Array not supported
+    //| left=expression op=':' right=expression             # SequenceExpression
+    // matrix multiply
+    // | left=expression op='*' right=expression           # MatrixMulExpression
+    // modulus and integer division
+    | left=expression op=('//' | '%' ) right=expression # ModIntDivExpression
+    // arithmetic multiply and divide
+    | left=expression op=('*'|'/') right=expression       # MultDivExpression
+    // arithmetic addition and subtraction
+    | left=expression op=('+'|'-') right=expression       # AddSubExpression
+    // ------------------------------------------
+    // RelationalExpression
+    | left=expression op=('>'|'>='|'<'|'<='|'=='|'!=') right=expression # RelationalExpression
+    // ------------------------------------------
+    // BooleanExpression
+    // boolean not
+    | op='!' left=expression # BooleanNotExpression
+    // boolean and
+    | left=expression op=('&'|'and') right=expression # BooleanAndExpression
+    // boolean or
+    | left=expression op=('|'|'or') right=expression # BooleanOrExpression
+
+    // ---------------------------------
+    // only applicable for builtin function expressions
+    // Add following additional functions and check number of parameters:
+    // power, full, matrix, reshape, dot
+    // Also take care whether there is y.transpose() => which sometinamespace
+    | name=ID OPEN_PAREN (paramExprs+=parameterizedExpression (',' paramExprs+=parameterizedExpression)* )? CLOSE_PAREN ';'*  # BuiltinFunctionExpression
+
+    // 4. Atomic
+    | OPEN_PAREN left=expression CLOSE_PAREN                       # AtomicExpression
+
+    // Should you allow indexed expression here ?
+    // | OPEN_BRACK targetList+=expression (',' targetList+=expression)* CLOSE_BRACK  # MultiIdExpression
+
+    // | BOOLEAN                                       # ConstBooleanIdExpression
+    | 'True'                                        # ConstTrueExpression
+    | 'False'                                       # ConstFalseExpression
+    | INT                                           # ConstIntIdExpression
+    | DOUBLE                                        # ConstDoubleIdExpression
+    | STRING                                        # ConstStringIdExpression
+    | dataIdentifier                                # DataIdExpression
+    // Special
+    // | 'NULL' | 'NA' | 'Inf' | 'NaN'
+;
+
+typedArgNoAssign : paramName=ID ':' paramType=ml_type ;
+parameterizedExpression : (paramName=ID '=')? paramVal=expression;
+strictParameterizedExpression : paramName=ID '=' paramVal=expression ;
+strictParameterizedKeyValueString : paramName=ID '=' paramVal=STRING ;
+// sometimes this is matrix object and sometimes its namespace
+ID : (ALPHABET (ALPHABET|DIGIT|'_')*  '.')? ALPHABET (ALPHABET|DIGIT|'_')*
+    // Special ID cases:
+   // | 'matrix' // --> This is a special case which causes lot of headache
+   // | 'scalar' |  'float' | 'int' | 'bool' // corresponds to as.scalar, as.double, as.integer and as.logical
+   | 'index.return'
+;
+// Unfortunately, we have datatype name clashing with builtin function name: matrix :(
+// Therefore, ugly work around for checking datatype
+ml_type :  valueType | dataType OPEN_BRACK valueType CLOSE_BRACK;
+// Note to reduce number of keywords, these are case-sensitive,
+// To allow case-insenstive,  'int' becomes: ('i' | 'I') ('n' | 'N') ('t' | 'T')
+valueType:
+    ID # ValueDataTypeCheck
+   //  'int' | 'str' | 'bool' | 'float'
+;
+dataType:
+        // 'scalar' # ScalarDataTypeDummyCheck
+        // |
+        ID # MatrixDataTypeCheck //{ if($ID.text.compareTo("matrix") != 0) { notifyErrorListeners("incorrect datatype"); } }
+        //|  'matrix' //---> See ID, this causes lot of headache
+        ;
+INT : DIGIT+  [Ll]?;
+// BOOLEAN : 'TRUE' | 'FALSE';
+DOUBLE: DIGIT+ '.' DIGIT* EXP? [Ll]?
+| DIGIT+ EXP? [Ll]?
+| '.' DIGIT+ EXP? [Ll]?
+;
+DIGIT: '0'..'9';
+ALPHABET : [a-zA-Z] ;
+fragment EXP : ('E' | 'e') ('+' | '-')? INT ;
+COMMANDLINE_NAMED_ID: '$' ALPHABET (ALPHABET|DIGIT|'_')*;
+COMMANDLINE_POSITION_ID: '$' DIGIT+;
+
+// supports single and double quoted string with escape characters
+STRING: '"' ( ESC | ~[\\"] )*? '"' | '\'' ( ESC | ~[\\'] )*? '\'';
+fragment ESC : '\\' [abtnfrv"'\\] ;
+// Comments, whitespaces and new line
+// LINE_COMMENT : '#' .*? '\r'? '\n' -> skip ;
+// MULTILINE_BLOCK_COMMENT : '/*' .*? '*/' -> skip ;
+// WHITESPACE : (' ' | '\r' | '\n')+ -> skip ;
+
+OPEN_BRACK : '[' {opened++;};
+CLOSE_BRACK : ']' {opened--;};
+OPEN_PAREN : '(' {opened++;};
+CLOSE_PAREN : ')' {opened--;};
+// OPEN_BRACE : '{' {opened++;};
+// CLOSE_BRACE : '}' {opened--;};
+
+fragment SPACES : [ \t]+ ;
+fragment COMMENT : '#' ~[\r\n]* ;
+fragment LINE_JOINING : '\\' SPACES? ( '\r'? '\n' | '\r' ) ;
+
+NEWLINE : ( '\r'? '\n' | '\r' ) SPACES?
+{
+    String newLine = getText().replaceAll("[^\r\n]+", "");
+    String spaces = getText().replaceAll("[\r\n]+", "");
+    int next = _input.LA(1);
+    if (opened > 0 || next == '\r' || next == '\n' || next == '#') {
+        // If we're inside a list or on a blank line, ignore all indents,
+        // dedents and line breaks.
+        skip();
+        if(debugIndentRules) {
+            if(next == '\r' || next == '\n') {
+                    System.out.println("4.1 Skipping (blank lines)");
+            }
+            else if(next == '#') {
+                System.out.println("4.2 Skipping (comment)");
+            }
+            else {
+                System.out.println("4.2 Skipping something else");
+            }
+        }
+    }
+    else {
+        emit(commonToken(NEWLINE, newLine));
+
+        int indent = getIndentationCount(spaces);
+        int previous = indents.isEmpty() ? 0 : indents.peek();
+        if (indent == previous) {
+            if(debugIndentRules)
+                System.out.println("3. Skipping identation as of same size:" + next);
+
+            // skip indents of the same size as the present indent-size
+            skip();
+        }
+        else if (indent > previous) {
+            if(debugIndentRules)
+                System.out.println("1. Indent:" + next);
+
+            indents.push(indent);
+            emit(commonToken(PydmlParser.INDENT, spaces));
+        }
+        else {
+            // Possibly emit more than 1 DEDENT token.
+            while(!indents.isEmpty() && indents.peek() > indent) {
+                if(debugIndentRules)
+                    System.out.println("2. Dedent:" + next);
+
+                this.emit(createDedent());
+                indents.pop();
+            }
+        }
+    }
+}
+;
+
+SKIP : ( SPACES | COMMENT | LINE_JOINING ) -> skip ;

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/c04fc99f/src/main/java/org/apache/sysml/parser/pydml/PydmlSyntacticErrorListener.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/parser/pydml/PydmlSyntacticErrorListener.java b/src/main/java/org/apache/sysml/parser/pydml/PydmlSyntacticErrorListener.java
new file mode 100644
index 0000000..8b2cc34
--- /dev/null
+++ b/src/main/java/org/apache/sysml/parser/pydml/PydmlSyntacticErrorListener.java
@@ -0,0 +1,114 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysml.parser.pydml;
+
+import java.util.Stack;
+
+import org.antlr.v4.runtime.BaseErrorListener;
+import org.antlr.v4.runtime.RecognitionException;
+import org.antlr.v4.runtime.Recognizer;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+
+import org.apache.sysml.api.DMLScript;
+
+public class PydmlSyntacticErrorListener 
+{	
+	private static final Log LOG = LogFactory.getLog(DMLScript.class.getName());
+	
+	public static class CustomDmlErrorListener extends BaseErrorListener {
+	
+		private boolean atleastOneError = false;
+		private Stack<String> currentFileName = new Stack<String>();
+		
+		public void pushCurrentFileName(String currentFilePath) {
+			currentFileName.push(currentFilePath);
+		}
+		
+		public String peekFileName() {
+			return currentFileName.peek();
+		}
+		
+		public String popFileName() {
+			return currentFileName.pop();
+		}
+		
+		public void validationError(int line, int charPositionInLine, String msg) {
+			try {
+				setAtleastOneError(true);
+				// Print error messages with file name
+				if(currentFileName == null || currentFileName.empty()) {
+					LOG.error("line "+line+":"+charPositionInLine+" "+msg);
+				}
+				else {
+					String fileName = currentFileName.peek();
+					LOG.error(fileName + " line "+line+":"+charPositionInLine+" "+msg);
+				}
+			}
+			catch(Exception e1) {
+				LOG.error("ERROR: while customizing error message:" + e1);
+			}
+		}
+		
+		public void validationWarning(int line, int charPositionInLine, String msg) {
+			try {
+				//atleastOneError = true; ---> not an error, just warning
+				// Print error messages with file name
+				if(currentFileName == null || currentFileName.empty())
+					LOG.warn("line "+line+":"+charPositionInLine+" "+msg);
+				else {
+					String fileName = currentFileName.peek();
+					LOG.warn(fileName + " line "+line+":"+charPositionInLine+" "+msg);
+				}
+			}
+			catch(Exception e1) {
+				LOG.warn("ERROR: while customizing error message:" + e1);
+			}
+		}
+		
+		@Override
+		public void syntaxError(Recognizer<?, ?> recognizer, Object offendingSymbol,
+				int line, int charPositionInLine,
+				String msg, RecognitionException e)
+		{	
+			try {
+				setAtleastOneError(true);
+				// Print error messages with file name
+				if(currentFileName == null || currentFileName.empty())
+					LOG.error("line "+line+":"+charPositionInLine+" "+msg);
+				else {
+					String fileName = currentFileName.peek();
+					LOG.error(fileName + " line "+line+":"+charPositionInLine+" "+msg);
+				}
+			}
+			catch(Exception e1) {
+				LOG.error("ERROR: while customizing error message:" + e1);
+			}
+		}
+
+		public boolean isAtleastOneError() {
+			return atleastOneError;
+		}
+
+		public void setAtleastOneError(boolean atleastOneError) {
+			this.atleastOneError = atleastOneError;
+		}
+	}
+}