You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by du...@apache.org on 2016/01/21 02:06:22 UTC
[5/8] incubator-systemml git commit: [SYSTEMML-148] Refactored
.parser.{antlr4, python} to .parser.{dml, pydml}.
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/c04fc99f/src/main/java/org/apache/sysml/parser/pydml/PyDMLParserWrapper.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/parser/pydml/PyDMLParserWrapper.java b/src/main/java/org/apache/sysml/parser/pydml/PyDMLParserWrapper.java
new file mode 100644
index 0000000..25ee006
--- /dev/null
+++ b/src/main/java/org/apache/sysml/parser/pydml/PyDMLParserWrapper.java
@@ -0,0 +1,283 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysml.parser.pydml;
+
+import java.io.ByteArrayInputStream;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.HashMap;
+import java.util.Map;
+
+import org.antlr.v4.runtime.ANTLRInputStream;
+import org.antlr.v4.runtime.BailErrorStrategy;
+import org.antlr.v4.runtime.CommonTokenStream;
+import org.antlr.v4.runtime.DefaultErrorStrategy;
+import org.antlr.v4.runtime.atn.PredictionMode;
+import org.antlr.v4.runtime.misc.ParseCancellationException;
+import org.antlr.v4.runtime.tree.ParseTree;
+import org.antlr.v4.runtime.tree.ParseTreeWalker;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.sysml.api.DMLScript;
+import org.apache.sysml.parser.AParserWrapper;
+import org.apache.sysml.parser.DMLProgram;
+import org.apache.sysml.parser.ForStatement;
+import org.apache.sysml.parser.ForStatementBlock;
+import org.apache.sysml.parser.FunctionStatementBlock;
+import org.apache.sysml.parser.IfStatement;
+import org.apache.sysml.parser.IfStatementBlock;
+import org.apache.sysml.parser.ImportStatement;
+import org.apache.sysml.parser.LanguageException;
+import org.apache.sysml.parser.ParForStatement;
+import org.apache.sysml.parser.ParForStatementBlock;
+import org.apache.sysml.parser.ParseException;
+import org.apache.sysml.parser.Statement;
+import org.apache.sysml.parser.StatementBlock;
+import org.apache.sysml.parser.WhileStatement;
+import org.apache.sysml.parser.WhileStatementBlock;
+import org.apache.sysml.parser.dml.DMLParserWrapper;
+import org.apache.sysml.parser.pydml.PydmlParser.FunctionStatementContext;
+import org.apache.sysml.parser.pydml.PydmlParser.PmlprogramContext;
+import org.apache.sysml.parser.pydml.PydmlParser.StatementContext;
+import org.apache.sysml.parser.pydml.PydmlSyntacticErrorListener.CustomDmlErrorListener;
+
+/**
+ * Logic of this wrapper is similar to DMLParserWrapper.
+ *
+ * Note: ExpressionInfo and StatementInfo are simply wrapper objects and are reused in both DML and PyDML parsers.
+ *
+ */
+public class PyDMLParserWrapper extends AParserWrapper
+{
+ private static final Log LOG = LogFactory.getLog(DMLScript.class.getName());
+
+ /**
+ * Custom wrapper to convert statement into statement blocks. Called by doParse and in PydmlSyntacticValidator for for, parfor, while, ...
+ * @param current a statement
+ * @return corresponding statement block
+ */
+ public static StatementBlock getStatementBlock(org.apache.sysml.parser.Statement current) {
+ StatementBlock blk = null;
+ if(current instanceof ParForStatement) {
+ blk = new ParForStatementBlock();
+ blk.addStatement(current);
+ }
+ else if(current instanceof ForStatement) {
+ blk = new ForStatementBlock();
+ blk.addStatement(current);
+ }
+ else if(current instanceof IfStatement) {
+ blk = new IfStatementBlock();
+ blk.addStatement(current);
+ }
+ else if(current instanceof WhileStatement) {
+ blk = new WhileStatementBlock();
+ blk.addStatement(current);
+ }
+ else {
+ // This includes ImportStatement
+ blk = new StatementBlock();
+ blk.addStatement(current);
+ }
+ return blk;
+ }
+
+ /**
+ * Parses the passed file with command line parameters. You can either pass both (local file) or just dmlScript (hdfs) or just file name (import command)
+ * @param fileName either full path or null --> only used for better error handling
+ * @param dmlScript required
+ * @param argVals
+ * @return
+ * @throws ParseException
+ */
+ @Override
+ public DMLProgram parse(String fileName, String dmlScript, HashMap<String,String> argVals) throws ParseException {
+ DMLProgram prog = null;
+
+ if(dmlScript == null || dmlScript.trim().isEmpty()) {
+ throw new ParseException("Incorrect usage of parse. Please pass dmlScript not just filename");
+ }
+
+ // Set the pipeline required for ANTLR parsing
+ PyDMLParserWrapper parser = new PyDMLParserWrapper();
+ prog = parser.doParse(fileName, dmlScript, argVals);
+
+ if(prog == null) {
+ throw new ParseException("One or more errors found during parsing. (could not construct AST for file: " + fileName + "). Cannot proceed ahead.");
+ }
+ return prog;
+ }
+
+ /**
+ * This function is supposed to be called directly only from PydmlSyntacticValidator when it encounters 'import'
+ * @param fileName
+ * @return null if atleast one error
+ */
+ public DMLProgram doParse(String fileName, String dmlScript, HashMap<String,String> argVals) throws ParseException {
+ DMLProgram dmlPgm = null;
+
+ ANTLRInputStream in;
+ try {
+ if(dmlScript == null) {
+ dmlScript = DMLParserWrapper.readDMLScript(fileName);
+ }
+
+ InputStream stream = new ByteArrayInputStream(dmlScript.getBytes());
+ in = new org.antlr.v4.runtime.ANTLRInputStream(stream);
+ }
+ catch (FileNotFoundException e) {
+ throw new ParseException("ERROR: Cannot find file:" + fileName, e);
+ }
+ catch (IOException e) {
+ throw new ParseException("ERROR: Cannot open file:" + fileName, e);
+ }
+ catch (LanguageException e) {
+ throw new ParseException("ERROR: " + e.getMessage(), e);
+ }
+
+ PmlprogramContext ast = null;
+ CustomDmlErrorListener errorListener = new CustomDmlErrorListener();
+
+ try {
+ PydmlLexer lexer = new PydmlLexer(in);
+ CommonTokenStream tokens = new CommonTokenStream(lexer);
+ PydmlParser antlr4Parser = new PydmlParser(tokens);
+
+ boolean tryOptimizedParsing = false; // For now no optimization, since it is not able to parse integer value.
+
+ if(tryOptimizedParsing) {
+ // Try faster and simpler SLL
+ antlr4Parser.getInterpreter().setPredictionMode(PredictionMode.SLL);
+ antlr4Parser.removeErrorListeners();
+ antlr4Parser.setErrorHandler(new BailErrorStrategy());
+ try{
+ ast = antlr4Parser.pmlprogram();
+ // If successful, no need to try out full LL(*) ... SLL was enough
+ }
+ catch(ParseCancellationException ex) {
+ // Error occurred, so now try full LL(*) for better error messages
+ tokens.reset();
+ antlr4Parser.reset();
+ if(fileName != null) {
+ errorListener.pushCurrentFileName(fileName);
+ }
+ else {
+ errorListener.pushCurrentFileName("MAIN_SCRIPT");
+ }
+ // Set our custom error listener
+ antlr4Parser.addErrorListener(errorListener);
+ antlr4Parser.setErrorHandler(new DefaultErrorStrategy());
+ antlr4Parser.getInterpreter().setPredictionMode(PredictionMode.LL);
+ ast = antlr4Parser.pmlprogram();
+ }
+ }
+ else {
+ // Set our custom error listener
+ antlr4Parser.removeErrorListeners();
+ antlr4Parser.addErrorListener(errorListener);
+ errorListener.pushCurrentFileName(fileName);
+
+ // Now do the parsing
+ ast = antlr4Parser.pmlprogram();
+ }
+ }
+ catch(Exception e) {
+ throw new ParseException("ERROR: Cannot parse the program:" + fileName, e);
+ }
+
+
+ try {
+ // Now convert the parse tree into DMLProgram
+ // Do syntactic validation while converting
+ ParseTree tree = ast;
+ // And also do syntactic validation
+ ParseTreeWalker walker = new ParseTreeWalker();
+ PydmlSyntacticValidatorHelper helper = new PydmlSyntacticValidatorHelper(errorListener);
+ PydmlSyntacticValidator validator = new PydmlSyntacticValidator(helper, fileName, argVals);
+ walker.walk(validator, tree);
+ errorListener.popFileName();
+ if(errorListener.isAtleastOneError()) {
+ return null;
+ }
+ dmlPgm = createDMLProgram(ast);
+ }
+ catch(Exception e) {
+ throw new ParseException("ERROR: Cannot translate the parse tree into DMLProgram" + e.getMessage(), e);
+ }
+
+ return dmlPgm;
+ }
+
+
+ private DMLProgram createDMLProgram(PmlprogramContext ast) {
+
+ DMLProgram dmlPgm = new DMLProgram();
+
+ // First add all the functions
+ for(FunctionStatementContext fn : ast.functionBlocks) {
+ FunctionStatementBlock functionStmtBlk = new FunctionStatementBlock();
+ functionStmtBlk.addStatement(fn.info.stmt);
+ try {
+ // TODO: currently the logic of nested namespace is not clear.
+ String namespace = DMLProgram.DEFAULT_NAMESPACE;
+ dmlPgm.addFunctionStatementBlock(namespace, fn.info.functionName, functionStmtBlk);
+ } catch (LanguageException e) {
+ LOG.error("line: " + fn.start.getLine() + ":" + fn.start.getCharPositionInLine() + " cannot process the function " + fn.info.functionName);
+ return null;
+ }
+ }
+
+ // Then add all the statements
+ for(StatementContext stmtCtx : ast.blocks) {
+ Statement current = stmtCtx.info.stmt;
+ if(current == null) {
+ LOG.error("line: " + stmtCtx.start.getLine() + ":" + stmtCtx.start.getCharPositionInLine() + " cannot process the statement");
+ return null;
+ }
+
+ // Ignore Newline logic
+ if(current.isEmptyNewLineStatement()) {
+ continue;
+ }
+
+ if(current instanceof ImportStatement) {
+ // Handle import statements separately
+ if(stmtCtx.info.namespaces != null) {
+ // Add the DMLProgram entries into current program
+ for(Map.Entry<String, DMLProgram> entry : stmtCtx.info.namespaces.entrySet()) {
+ dmlPgm.getNamespaces().put(entry.getKey(), entry.getValue());
+ }
+ }
+ else {
+ LOG.error("line: " + stmtCtx.start.getLine() + ":" + stmtCtx.start.getCharPositionInLine() + " cannot process the import statement");
+ return null;
+ }
+ }
+
+ // Now wrap statement into individual statement block
+ // merge statement will take care of merging these blocks
+ dmlPgm.addStatementBlock(getStatementBlock(current));
+ }
+
+ dmlPgm.mergeStatementBlocks();
+ return dmlPgm;
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/c04fc99f/src/main/java/org/apache/sysml/parser/pydml/Pydml.g4
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/parser/pydml/Pydml.g4 b/src/main/java/org/apache/sysml/parser/pydml/Pydml.g4
new file mode 100644
index 0000000..425859c
--- /dev/null
+++ b/src/main/java/org/apache/sysml/parser/pydml/Pydml.g4
@@ -0,0 +1,384 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+grammar Pydml;
+
+@header
+{
+ // package org.apache.sysml.parser.pydml;
+ //import org.apache.sysml.parser.dml.StatementInfo;
+ //import org.apache.sysml.parser.dml.ExpressionInfo;
+}
+
+// This antlr grammar is based on Python 3.3 language reference: https://docs.python.org/3.3/reference/grammar.html
+
+tokens { INDENT, DEDENT }
+
+@lexer::members {
+ private boolean debugIndentRules = false;
+
+ // Indentation level stack
+ private java.util.Stack<Integer> indents = new java.util.Stack<Integer>();
+
+ // Extra tokens queue (see the NEWLINE rule).
+ private java.util.Queue<Token> tokens = new java.util.LinkedList<Token>();
+
+ // Number of opened braces, brackets and parenthesis.
+ private int opened = 0;
+
+ // This is only used to set the line number for dedent
+ private Token lastToken = null;
+
+
+ @Override
+ public void emit(Token t) {
+ if(debugIndentRules)
+ System.out.println("Emitted token:" + t);
+
+ super.setToken(t);
+ tokens.offer(t);
+ }
+
+
+ @Override
+ public Token nextToken() {
+ if (_input.LA(1) == EOF && !this.indents.isEmpty()) {
+ if(debugIndentRules)
+ System.out.println("EOF reached and expecting some DEDENTS, so emitting them");
+
+ tokens.poll();
+ this.emit(commonToken(PydmlParser.NEWLINE, "\n"));
+
+ // Now emit as much DEDENT tokens as needed.
+ while (!indents.isEmpty()) {
+ if(debugIndentRules)
+ System.out.println("Emitting (inserted) DEDENTS");
+
+ this.emit(createDedent());
+ indents.pop();
+ }
+ // Put the EOF back on the token stream.
+ this.emit(commonToken(PydmlParser.EOF, "<EOF>"));
+ }
+ Token next = super.nextToken();
+ if (next.getChannel() == Token.DEFAULT_CHANNEL) {
+ // Keep track of the last token on the default channel.
+ this.lastToken = next;
+ }
+ Token retVal = tokens.isEmpty() ? next : tokens.poll();
+
+ if(debugIndentRules)
+ System.out.println("Returning nextToken: [" + retVal + "]<<" + tokens.isEmpty());
+
+ return retVal;
+ }
+
+ private Token createDedent() {
+ CommonToken dedent = commonToken(PydmlParser.DEDENT, "");
+ dedent.setLine(this.lastToken.getLine());
+ return dedent;
+ }
+
+ private CommonToken commonToken(int type, String text) {
+ // Nike: Main change: This logic was screwed up and was emitting additional 3 characters, so commenting it for now.
+ // int start = this.getCharIndex();
+ // int stop = start + text.length();
+ // return new CommonToken(this._tokenFactorySourcePair, type, DEFAULT_TOKEN_CHANNEL, start, stop);
+ return new CommonToken(type, text); // Main change
+ }
+
+ // Calculates the indentation level from the spaces:
+ // "Tabs are replaced (from left to right) by one to eight spaces
+ // such that the total number of characters up to and including
+ // the replacement is a multiple of eight [...]"
+ // https://docs.python.org/3.1/reference/lexical_analysis.html#indentation
+ static int getIndentationCount(String spaces) {
+ int count = 0;
+ for (char ch : spaces.toCharArray()) {
+ switch (ch) {
+ case '\t':
+ count += 8 - (count % 8);
+ break;
+ default:
+ // A normal space char.
+ count++;
+ }
+ }
+ return count;
+ }
+}
+
+
+// 2. Modify this g4 by comparing it with Java:
+// - https://pythonconquerstheuniverse.wordpress.com/2009/10/03/python-java-a-side-by-side-comparison/
+// - http://www.cs.gordon.edu/courses/cps122/handouts-2014/From%20Python%20to%20Java%20Lecture/A%20Comparison%20of%20the%20Syntax%20of%20Python%20and%20Java.pdf
+// - http://cs.joensuu.fi/~pviktor/python/slides/cheatsheet.pdf
+// - http://www.interfaceware.com/manual/chameleon/scripts/quickreference.pdf
+
+// DML Program is a list of expression
+// For now, we only allow global function definitions (not nested or inside a while block)
+pmlprogram: (blocks+=statement | functionBlocks+=functionStatement)* NEWLINE* EOF;
+
+
+
+statement returns [ StatementInfo info ]
+@init {
+ // This actions occurs regardless of how many alternatives in this rule
+ $info = new StatementInfo();
+} :
+ // ------------------------------------------
+ // ImportStatement
+ 'source' OPEN_PAREN filePath = STRING CLOSE_PAREN 'as' namespace=ID NEWLINE # ImportStatement
+ | 'setwd' OPEN_PAREN pathValue = STRING CLOSE_PAREN NEWLINE # PathStatement
+ // ------------------------------------------
+ // AssignmentStatement
+ | targetList+=dataIdentifier '=' 'ifdef' OPEN_PAREN commandLineParam=dataIdentifier ',' source=expression CLOSE_PAREN NEWLINE # IfdefAssignmentStatement
+ // ------------------------------------------
+ // Treat function call as AssignmentStatement or MultiAssignmentStatement
+ // For backward compatibility and also since the behavior of foo() * A + foo() ... where foo returns A
+ // Convert FunctionCallIdentifier(paramExprs, ..) -> source
+ | // TODO: Throw an informative error if user doesnot provide the optional assignment
+ ( targetList+=dataIdentifier '=' )? name=ID OPEN_PAREN (paramExprs+=parameterizedExpression (',' paramExprs+=parameterizedExpression)* )? CLOSE_PAREN NEWLINE # FunctionCallAssignmentStatement
+ | OPEN_BRACK targetList+=dataIdentifier (',' targetList+=dataIdentifier)* CLOSE_BRACK '=' name=ID OPEN_PAREN (paramExprs+=parameterizedExpression (',' paramExprs+=parameterizedExpression)* )? CLOSE_PAREN NEWLINE # FunctionCallMultiAssignmentStatement
+ // {notifyErrorListeners("Too many parentheses");}
+ // We don't support block statement
+ // | '{' body+=expression ';'* ( body+=expression ';'* )* '}' # BlockStatement
+ // ------------------------------------------
+ | targetList+=dataIdentifier '=' source=expression NEWLINE # AssignmentStatement
+ // IfStatement
+ // | 'if' OPEN_PAREN predicate=expression CLOSE_PAREN (ifBody+=statement ';'* | NEWLINE INDENT (ifBody+=statement)+ DEDENT ) ('else' (elseBody+=statement ';'* | '{' (elseBody+=statement ';'*)* '}'))? # IfStatement
+ | 'if' (OPEN_PAREN predicate=expression CLOSE_PAREN | predicate=expression) ':' NEWLINE INDENT (ifBody+=statement)+ DEDENT ('else' ':' NEWLINE INDENT (elseBody+=statement)+ DEDENT )? # IfStatement
+ // ------------------------------------------
+ // ForStatement & ParForStatement
+ | 'for' (OPEN_PAREN iterVar=ID 'in' iterPred=iterablePredicate (',' parForParams+=strictParameterizedExpression)* CLOSE_PAREN | iterVar=ID 'in' iterPred=iterablePredicate (',' parForParams+=strictParameterizedExpression)* ) ':' NEWLINE INDENT (body+=statement)+ DEDENT # ForStatement
+ // Convert strictParameterizedExpression to HashMap<String, String> for parForParams
+ | 'parfor' (OPEN_PAREN iterVar=ID 'in' iterPred=iterablePredicate (',' parForParams+=strictParameterizedExpression)* CLOSE_PAREN | iterVar=ID 'in' iterPred=iterablePredicate (',' parForParams+=strictParameterizedExpression)* ) ':' NEWLINE INDENT (body+=statement)+ DEDENT # ParForStatement
+ | 'while' ( OPEN_PAREN predicate=expression CLOSE_PAREN | predicate=expression ) ':' NEWLINE INDENT (body+=statement)+ DEDENT # WhileStatement
+ // ------------------------------------------
+ | NEWLINE #IgnoreNewLine
+;
+
+iterablePredicate returns [ ExpressionInfo info ]
+ @init {
+ // This actions occurs regardless of how many alternatives in this rule
+ $info = new ExpressionInfo();
+ } :
+ from=expression ':' to=expression #IterablePredicateColonExpression
+ | ID OPEN_PAREN from=expression ',' to=expression ',' increment=expression CLOSE_PAREN #IterablePredicateSeqExpression
+ ;
+
+functionStatement returns [ StatementInfo info ]
+@init {
+ // This actions occurs regardless of how many alternatives in this rule
+ $info = new StatementInfo();
+} :
+ // ------------------------------------------
+ // FunctionStatement & ExternalFunctionStatement
+ // small change: only allow typed arguments here ... instead of data identifier
+ 'def' name=ID OPEN_PAREN ( inputParams+=typedArgNoAssign (',' inputParams+=typedArgNoAssign)* )? CLOSE_PAREN ( '->' OPEN_PAREN ( outputParams+=typedArgNoAssign (',' outputParams+=typedArgNoAssign)* )? CLOSE_PAREN )? ':' NEWLINE INDENT (body+=statement)+ DEDENT # InternalFunctionDefExpression
+ | 'defExternal' name=ID OPEN_PAREN ( inputParams+=typedArgNoAssign (',' inputParams+=typedArgNoAssign)* )? CLOSE_PAREN ( '->' OPEN_PAREN ( outputParams+=typedArgNoAssign (',' outputParams+=typedArgNoAssign)* )? CLOSE_PAREN )? 'implemented' 'in' OPEN_PAREN ( otherParams+=strictParameterizedKeyValueString (',' otherParams+=strictParameterizedKeyValueString)* )? CLOSE_PAREN NEWLINE # ExternalFunctionDefExpression
+ // ------------------------------------------
+;
+
+
+// Other data identifiers are typedArgNoAssign, parameterizedExpression and strictParameterizedExpression
+dataIdentifier returns [ ExpressionInfo dataInfo ]
+@init {
+ // This actions occurs regardless of how many alternatives in this rule
+ $dataInfo = new ExpressionInfo();
+ // $dataInfo.expr = new org.apache.sysml.parser.DataIdentifier();
+} :
+ // ------------------------------------------
+ // IndexedIdentifier
+ name=ID OPEN_BRACK (rowLower=expression (':' rowUpper=expression)?)? ',' (colLower=expression (':' colUpper=expression)?)? CLOSE_BRACK # IndexedExpression
+ // ------------------------------------------
+ | ID # SimpleDataIdentifierExpression
+ | COMMANDLINE_NAMED_ID # CommandlineParamExpression
+ | COMMANDLINE_POSITION_ID # CommandlinePositionExpression
+;
+expression returns [ ExpressionInfo info ]
+@init {
+ // This actions occurs regardless of how many alternatives in this rule
+ $info = new ExpressionInfo();
+ // $info.expr = new org.apache.sysml.parser.BinaryExpression(org.apache.sysml.parser.Expression.BinaryOp.INVALID);
+} :
+ // ------------------------------------------
+ // BinaryExpression
+ // power
+ <assoc=right> left=expression op='**' right=expression # PowerExpression
+ // unary plus and minus
+ | op=('-'|'+') left=expression # UnaryExpression
+ // sequence - since we are only using this into for loop => Array not supported
+ //| left=expression op=':' right=expression # SequenceExpression
+ // matrix multiply
+ // | left=expression op='*' right=expression # MatrixMulExpression
+ // modulus and integer division
+ | left=expression op=('//' | '%' ) right=expression # ModIntDivExpression
+ // arithmetic multiply and divide
+ | left=expression op=('*'|'/') right=expression # MultDivExpression
+ // arithmetic addition and subtraction
+ | left=expression op=('+'|'-') right=expression # AddSubExpression
+ // ------------------------------------------
+ // RelationalExpression
+ | left=expression op=('>'|'>='|'<'|'<='|'=='|'!=') right=expression # RelationalExpression
+ // ------------------------------------------
+ // BooleanExpression
+ // boolean not
+ | op='!' left=expression # BooleanNotExpression
+ // boolean and
+ | left=expression op=('&'|'and') right=expression # BooleanAndExpression
+ // boolean or
+ | left=expression op=('|'|'or') right=expression # BooleanOrExpression
+
+ // ---------------------------------
+ // only applicable for builtin function expressions
+ // Add following additional functions and check number of parameters:
+ // power, full, matrix, reshape, dot
+ // Also take care whether there is y.transpose() => which sometinamespace
+ | name=ID OPEN_PAREN (paramExprs+=parameterizedExpression (',' paramExprs+=parameterizedExpression)* )? CLOSE_PAREN ';'* # BuiltinFunctionExpression
+
+ // 4. Atomic
+ | OPEN_PAREN left=expression CLOSE_PAREN # AtomicExpression
+
+ // Should you allow indexed expression here ?
+ // | OPEN_BRACK targetList+=expression (',' targetList+=expression)* CLOSE_BRACK # MultiIdExpression
+
+ // | BOOLEAN # ConstBooleanIdExpression
+ | 'True' # ConstTrueExpression
+ | 'False' # ConstFalseExpression
+ | INT # ConstIntIdExpression
+ | DOUBLE # ConstDoubleIdExpression
+ | STRING # ConstStringIdExpression
+ | dataIdentifier # DataIdExpression
+ // Special
+ // | 'NULL' | 'NA' | 'Inf' | 'NaN'
+;
+
+typedArgNoAssign : paramName=ID ':' paramType=ml_type ;
+parameterizedExpression : (paramName=ID '=')? paramVal=expression;
+strictParameterizedExpression : paramName=ID '=' paramVal=expression ;
+strictParameterizedKeyValueString : paramName=ID '=' paramVal=STRING ;
+// sometimes this is matrix object and sometimes its namespace
+ID : (ALPHABET (ALPHABET|DIGIT|'_')* '.')? ALPHABET (ALPHABET|DIGIT|'_')*
+ // Special ID cases:
+ // | 'matrix' // --> This is a special case which causes lot of headache
+ // | 'scalar' | 'float' | 'int' | 'bool' // corresponds to as.scalar, as.double, as.integer and as.logical
+ | 'index.return'
+;
+// Unfortunately, we have datatype name clashing with builtin function name: matrix :(
+// Therefore, ugly work around for checking datatype
+ml_type : valueType | dataType OPEN_BRACK valueType CLOSE_BRACK;
+// Note to reduce number of keywords, these are case-sensitive,
+// To allow case-insenstive, 'int' becomes: ('i' | 'I') ('n' | 'N') ('t' | 'T')
+valueType:
+ ID # ValueDataTypeCheck
+ // 'int' | 'str' | 'bool' | 'float'
+;
+dataType:
+ // 'scalar' # ScalarDataTypeDummyCheck
+ // |
+ ID # MatrixDataTypeCheck //{ if($ID.text.compareTo("matrix") != 0) { notifyErrorListeners("incorrect datatype"); } }
+ //| 'matrix' //---> See ID, this causes lot of headache
+ ;
+INT : DIGIT+ [Ll]?;
+// BOOLEAN : 'TRUE' | 'FALSE';
+DOUBLE: DIGIT+ '.' DIGIT* EXP? [Ll]?
+| DIGIT+ EXP? [Ll]?
+| '.' DIGIT+ EXP? [Ll]?
+;
+DIGIT: '0'..'9';
+ALPHABET : [a-zA-Z] ;
+fragment EXP : ('E' | 'e') ('+' | '-')? INT ;
+COMMANDLINE_NAMED_ID: '$' ALPHABET (ALPHABET|DIGIT|'_')*;
+COMMANDLINE_POSITION_ID: '$' DIGIT+;
+
+// supports single and double quoted string with escape characters
+STRING: '"' ( ESC | ~[\\"] )*? '"' | '\'' ( ESC | ~[\\'] )*? '\'';
+fragment ESC : '\\' [abtnfrv"'\\] ;
+// Comments, whitespaces and new line
+// LINE_COMMENT : '#' .*? '\r'? '\n' -> skip ;
+// MULTILINE_BLOCK_COMMENT : '/*' .*? '*/' -> skip ;
+// WHITESPACE : (' ' | '\r' | '\n')+ -> skip ;
+
+OPEN_BRACK : '[' {opened++;};
+CLOSE_BRACK : ']' {opened--;};
+OPEN_PAREN : '(' {opened++;};
+CLOSE_PAREN : ')' {opened--;};
+// OPEN_BRACE : '{' {opened++;};
+// CLOSE_BRACE : '}' {opened--;};
+
+fragment SPACES : [ \t]+ ;
+fragment COMMENT : '#' ~[\r\n]* ;
+fragment LINE_JOINING : '\\' SPACES? ( '\r'? '\n' | '\r' ) ;
+
+NEWLINE : ( '\r'? '\n' | '\r' ) SPACES?
+{
+ String newLine = getText().replaceAll("[^\r\n]+", "");
+ String spaces = getText().replaceAll("[\r\n]+", "");
+ int next = _input.LA(1);
+ if (opened > 0 || next == '\r' || next == '\n' || next == '#') {
+ // If we're inside a list or on a blank line, ignore all indents,
+ // dedents and line breaks.
+ skip();
+ if(debugIndentRules) {
+ if(next == '\r' || next == '\n') {
+ System.out.println("4.1 Skipping (blank lines)");
+ }
+ else if(next == '#') {
+ System.out.println("4.2 Skipping (comment)");
+ }
+ else {
+ System.out.println("4.2 Skipping something else");
+ }
+ }
+ }
+ else {
+ emit(commonToken(NEWLINE, newLine));
+
+ int indent = getIndentationCount(spaces);
+ int previous = indents.isEmpty() ? 0 : indents.peek();
+ if (indent == previous) {
+ if(debugIndentRules)
+ System.out.println("3. Skipping identation as of same size:" + next);
+
+ // skip indents of the same size as the present indent-size
+ skip();
+ }
+ else if (indent > previous) {
+ if(debugIndentRules)
+ System.out.println("1. Indent:" + next);
+
+ indents.push(indent);
+ emit(commonToken(PydmlParser.INDENT, spaces));
+ }
+ else {
+ // Possibly emit more than 1 DEDENT token.
+ while(!indents.isEmpty() && indents.peek() > indent) {
+ if(debugIndentRules)
+ System.out.println("2. Dedent:" + next);
+
+ this.emit(createDedent());
+ indents.pop();
+ }
+ }
+ }
+}
+;
+
+SKIP : ( SPACES | COMMENT | LINE_JOINING ) -> skip ;
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/c04fc99f/src/main/java/org/apache/sysml/parser/pydml/PydmlSyntacticErrorListener.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/parser/pydml/PydmlSyntacticErrorListener.java b/src/main/java/org/apache/sysml/parser/pydml/PydmlSyntacticErrorListener.java
new file mode 100644
index 0000000..8b2cc34
--- /dev/null
+++ b/src/main/java/org/apache/sysml/parser/pydml/PydmlSyntacticErrorListener.java
@@ -0,0 +1,114 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysml.parser.pydml;
+
+import java.util.Stack;
+
+import org.antlr.v4.runtime.BaseErrorListener;
+import org.antlr.v4.runtime.RecognitionException;
+import org.antlr.v4.runtime.Recognizer;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+
+import org.apache.sysml.api.DMLScript;
+
+public class PydmlSyntacticErrorListener
+{
+ private static final Log LOG = LogFactory.getLog(DMLScript.class.getName());
+
+ public static class CustomDmlErrorListener extends BaseErrorListener {
+
+ private boolean atleastOneError = false;
+ private Stack<String> currentFileName = new Stack<String>();
+
+ public void pushCurrentFileName(String currentFilePath) {
+ currentFileName.push(currentFilePath);
+ }
+
+ public String peekFileName() {
+ return currentFileName.peek();
+ }
+
+ public String popFileName() {
+ return currentFileName.pop();
+ }
+
+ public void validationError(int line, int charPositionInLine, String msg) {
+ try {
+ setAtleastOneError(true);
+ // Print error messages with file name
+ if(currentFileName == null || currentFileName.empty()) {
+ LOG.error("line "+line+":"+charPositionInLine+" "+msg);
+ }
+ else {
+ String fileName = currentFileName.peek();
+ LOG.error(fileName + " line "+line+":"+charPositionInLine+" "+msg);
+ }
+ }
+ catch(Exception e1) {
+ LOG.error("ERROR: while customizing error message:" + e1);
+ }
+ }
+
+ public void validationWarning(int line, int charPositionInLine, String msg) {
+ try {
+ //atleastOneError = true; ---> not an error, just warning
+ // Print error messages with file name
+ if(currentFileName == null || currentFileName.empty())
+ LOG.warn("line "+line+":"+charPositionInLine+" "+msg);
+ else {
+ String fileName = currentFileName.peek();
+ LOG.warn(fileName + " line "+line+":"+charPositionInLine+" "+msg);
+ }
+ }
+ catch(Exception e1) {
+ LOG.warn("ERROR: while customizing error message:" + e1);
+ }
+ }
+
+ @Override
+ public void syntaxError(Recognizer<?, ?> recognizer, Object offendingSymbol,
+ int line, int charPositionInLine,
+ String msg, RecognitionException e)
+ {
+ try {
+ setAtleastOneError(true);
+ // Print error messages with file name
+ if(currentFileName == null || currentFileName.empty())
+ LOG.error("line "+line+":"+charPositionInLine+" "+msg);
+ else {
+ String fileName = currentFileName.peek();
+ LOG.error(fileName + " line "+line+":"+charPositionInLine+" "+msg);
+ }
+ }
+ catch(Exception e1) {
+ LOG.error("ERROR: while customizing error message:" + e1);
+ }
+ }
+
+ public boolean isAtleastOneError() {
+ return atleastOneError;
+ }
+
+ public void setAtleastOneError(boolean atleastOneError) {
+ this.atleastOneError = atleastOneError;
+ }
+ }
+}