You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pig.apache.org by rd...@apache.org on 2011/03/26 01:07:42 UTC
svn commit: r1085612 [1/4] - in /pig/trunk: ./ src/org/apache/pig/
src/org/apache/pig/parser/ src/org/apache/pig/scripting/
src/org/apache/pig/tools/parameters/
src/org/apache/pig/tools/pigscript/parser/
src/org/apache/pig/tools/pigstats/ test/org/apac...
Author: rding
Date: Sat Mar 26 00:07:41 2011
New Revision: 1085612
URL: http://svn.apache.org/viewvc?rev=1085612&view=rev
Log:
PIG-1931: Integrate Macro Expansion with New Parser
Added:
pig/trunk/src/org/apache/pig/parser/AstPrinter.g
pig/trunk/src/org/apache/pig/parser/PigParserNode.java
pig/trunk/src/org/apache/pig/parser/PigParserNodeAdaptor.java
pig/trunk/test/org/apache/pig/test/data/ExpectedResult6.pig
pig/trunk/test/org/apache/pig/test/data/input6.pig
Removed:
pig/trunk/src/org/apache/pig/parser/MacroExpansion.g
pig/trunk/src/org/apache/pig/parser/MacroImport.g
pig/trunk/src/org/apache/pig/parser/MacroRecursion.g
pig/trunk/src/org/apache/pig/parser/ParserUtil.java
Modified:
pig/trunk/CHANGES.txt
pig/trunk/build.xml
pig/trunk/src/org/apache/pig/Main.java
pig/trunk/src/org/apache/pig/parser/AliasMasker.g
pig/trunk/src/org/apache/pig/parser/AstValidator.g
pig/trunk/src/org/apache/pig/parser/LogicalPlanGenerator.g
pig/trunk/src/org/apache/pig/parser/PigMacro.java
pig/trunk/src/org/apache/pig/parser/QueryLexer.g
pig/trunk/src/org/apache/pig/parser/QueryParser.g
pig/trunk/src/org/apache/pig/parser/QueryParserDriver.java
pig/trunk/src/org/apache/pig/parser/QueryParserUtils.java
pig/trunk/src/org/apache/pig/scripting/Pig.java
pig/trunk/src/org/apache/pig/tools/parameters/PigFileParser.jj
pig/trunk/src/org/apache/pig/tools/pigscript/parser/PigScriptParser.jj
pig/trunk/src/org/apache/pig/tools/pigstats/ScriptState.java
pig/trunk/test/org/apache/pig/parser/ParserTestingUtils.java
pig/trunk/test/org/apache/pig/parser/TestAstValidator.java
pig/trunk/test/org/apache/pig/parser/TestQueryParser.java
pig/trunk/test/org/apache/pig/test/TestMacroExpansion.java
pig/trunk/test/org/apache/pig/test/TestParamSubPreproc.java
pig/trunk/test/org/apache/pig/test/data/output1.pig
Modified: pig/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/pig/trunk/CHANGES.txt?rev=1085612&r1=1085611&r2=1085612&view=diff
==============================================================================
--- pig/trunk/CHANGES.txt (original)
+++ pig/trunk/CHANGES.txt Sat Mar 26 00:07:41 2011
@@ -118,6 +118,8 @@ PIG-1696: Performance: Use System.arrayc
BUG FIXES
+PIG-1931: Integrate Macro Expansion with New Parser (rding)
+
PIG-1933: Hints such as 'collected' and 'skewed' for "group by" or "join by"
should not be treated as tokens. (xuefuz via thejas)
Modified: pig/trunk/build.xml
URL: http://svn.apache.org/viewvc/pig/trunk/build.xml?rev=1085612&r1=1085611&r2=1085612&view=diff
==============================================================================
--- pig/trunk/build.xml (original)
+++ pig/trunk/build.xml Sat Mar 26 00:07:41 2011
@@ -314,7 +314,7 @@
description="generates token parser class from an ANTLR grammar">
<java classname="org.antlr.Tool"
classpathref="classpath" fork="true">
- <arg line="-o ${src.gen.dir}/${grammar.package.dir} ${src.dir}/${grammar.package.dir}/MacroExpansion.g ${src.dir}/${grammar.package.dir}/MacroRecursion.g ${src.dir}/${grammar.package.dir}/MacroImport.g ${src.dir}/${grammar.package.dir}/${grammar.name}Parser.g"/>
+ <arg line="-o ${src.gen.dir}/${grammar.package.dir} ${src.dir}/${grammar.package.dir}/${grammar.name}Parser.g"/>
</java>
</target>
@@ -323,7 +323,7 @@
description="generates tree parser class from an ANTLR grammar">
<java classname="org.antlr.Tool"
classpathref="classpath" fork="true">
- <arg line="-o ${src.gen.dir}/${grammar.package.dir} ${src.dir}/${grammar.package.dir}/AliasMasker.g ${src.dir}/${grammar.package.dir}/AstValidator.g ${src.dir}/${grammar.package.dir}/LogicalPlanGenerator.g"/>
+ <arg line="-o ${src.gen.dir}/${grammar.package.dir} ${src.dir}/${grammar.package.dir}/AstPrinter.g ${src.dir}/${grammar.package.dir}/AliasMasker.g ${src.dir}/${grammar.package.dir}/AstValidator.g ${src.dir}/${grammar.package.dir}/LogicalPlanGenerator.g"/>
</java>
</target>
Modified: pig/trunk/src/org/apache/pig/Main.java
URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/Main.java?rev=1085612&r1=1085611&r2=1085612&view=diff
==============================================================================
--- pig/trunk/src/org/apache/pig/Main.java (original)
+++ pig/trunk/src/org/apache/pig/Main.java Sat Mar 26 00:07:41 2011
@@ -35,6 +35,7 @@ import java.util.AbstractList;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Date;
+import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
@@ -66,7 +67,6 @@ import org.apache.pig.impl.util.ObjectSe
import org.apache.pig.impl.util.PropertiesUtil;
import org.apache.pig.impl.util.UDFContext;
import org.apache.pig.parser.QueryParserDriver;
-import org.apache.pig.parser.ParserUtil;
import org.apache.pig.scripting.ScriptEngine;
import org.apache.pig.scripting.ScriptEngine.SupportedScriptLang;
import org.apache.pig.tools.cmdline.CmdLineParser;
@@ -370,6 +370,8 @@ static int run(String args[], PigProgres
properties.setProperty("pig.jars.relative.to.dfs", "true");
}
+ scriptState.setFileName(file);
+
if (embedded) {
return runEmbeddedScript(pigContext, localFileRet.file.getPath(), engine);
} else {
@@ -379,18 +381,28 @@ static int run(String args[], PigProgres
.getPath(), type.name().toLowerCase());
}
}
-
- // run macro expansion
- FileReader fr = new FileReader(localFileRet.file);
- in = ParserUtil.getExpandedMacroAsBufferedReader(fr);
+ in = new BufferedReader(new FileReader(localFileRet.file));
+
// run parameter substitution preprocessor first
substFile = file + ".substituted";
- pin = runParamPreprocessor(properties, in, params, paramFiles, substFile, debug || dryrun || checkScriptOnly);
+ pin = runParamPreprocessor(properties, in, params, paramFiles,
+ substFile, debug || dryrun || checkScriptOnly);
if (dryrun) {
- log.info("Dry run completed. Substituted pig script is at " + substFile);
+ QueryParserDriver driver = new QueryParserDriver(
+ pigContext, "0", new HashMap<String, String>());
+ if (driver.dryrun(substFile)) {
+ log.info("Dry run completed. Substituted pig script is at "
+ + substFile
+ + ". Expanded pig script is at "
+ + file + ".expanded");
+ } else {
+ log.info("Dry run completed. Substituted pig script is at "
+ + substFile);
+ }
return ReturnCode.SUCCESS;
}
+
logFileName = validateLogFile(logFileName, file);
pigContext.getProperties().setProperty("pig.logfile", logFileName);
@@ -439,9 +451,7 @@ static int run(String args[], PigProgres
scriptState.setScript(sb.toString());
- // run macro expansion
- StringReader sr = new StringReader(sb.toString());
- in = ParserUtil.getExpandedMacroAsBufferedReader(sr);
+ in = new BufferedReader(new StringReader(sb.toString()));
grunt = new Grunt(in, pigContext);
gruntCalled = true;
@@ -490,6 +500,8 @@ static int run(String args[], PigProgres
properties.setProperty("pig.jars.relative.to.dfs", "true");
}
+ scriptState.setFileName(remainders[0]);
+
if (embedded) {
return runEmbeddedScript(pigContext, localFileRet.file.getPath(), engine);
} else {
@@ -499,16 +511,24 @@ static int run(String args[], PigProgres
.getPath(), type.name().toLowerCase());
}
}
-
- // run macro expansion
- FileReader fr = new FileReader(localFileRet.file);
- in = ParserUtil.getExpandedMacroAsBufferedReader(fr);
+
+ in = new BufferedReader(new FileReader(localFileRet.file));
// run parameter substitution preprocessor first
substFile = remainders[0] + ".substituted";
pin = runParamPreprocessor(properties, in, params, paramFiles, substFile, debug || dryrun || checkScriptOnly);
- if (dryrun){
- log.info("Dry run completed. Substituted pig script is at " + substFile);
+ if (dryrun) {
+ QueryParserDriver driver = new QueryParserDriver(
+ pigContext, "0", new HashMap<String, String>());
+ if (driver.dryrun(substFile)) {
+ log.info("Dry run completed. Substituted pig script is at "
+ + substFile
+ + ". Expanded pig script is at "
+ + remainders[0] + ".expanded");
+ } else {
+ log.info("Dry run completed. Substituted pig script is at "
+ + substFile);
+ }
return ReturnCode.SUCCESS;
}
Modified: pig/trunk/src/org/apache/pig/parser/AliasMasker.g
URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/parser/AliasMasker.g?rev=1085612&r1=1085611&r2=1085612&view=diff
==============================================================================
--- pig/trunk/src/org/apache/pig/parser/AliasMasker.g (original)
+++ pig/trunk/src/org/apache/pig/parser/AliasMasker.g Sat Mar 26 00:07:41 2011
@@ -37,36 +37,16 @@ package org.apache.pig.parser;
import java.util.HashSet;
import java.util.Set;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
}
@members {
-private static Log log = LogFactory.getLog( AliasMasker.class );
-
-public String getErrorMessage(RecognitionException e, String[] tokenNames) {
- String msg = e.getMessage();
- if ( e instanceof DuplicatedSchemaAliasException ) {
- DuplicatedSchemaAliasException dae = (DuplicatedSchemaAliasException)e;
- msg = "Duplicated schema alias name '"+ dae.getAlias() + "' in the schema definition";
- } else if( e instanceof UndefinedAliasException ) {
- UndefinedAliasException dae = (UndefinedAliasException)e;
- msg = "Alias '"+ dae.getAlias() + "' is not defined";
- }
-
- return msg;
-}
-
public void setParams(Set ps, String macro, long idx) {
params = ps;
macroName = macro;
index = idx;
}
-public String getResult() { return sb.toString(); }
-
private String getMask(String alias) {
return params.contains( alias )
? alias
@@ -81,8 +61,6 @@ private String macroName = "";
private long index = 0;
-private StringBuilder sb = new StringBuilder();
-
} // End of @members
@rulecatch {
@@ -95,7 +73,7 @@ query : ^( QUERY statement* )
;
statement : general_statement
- | split_statement { sb.append(";\n"); }
+ | split_statement
;
split_statement : split_clause
@@ -103,16 +81,20 @@ split_statement : split_clause
// For foreach statement that with complex inner plan.
general_statement
- : ^( STATEMENT ( alias { sb.append(" = "); } )?
- op_clause parallel_clause? ) { sb.append(";\n"); }
+ : ^( STATEMENT ( alias )?
+ op_clause parallel_clause? )
;
parallel_clause
- : ^( PARALLEL INTEGER ) { sb.append(" ").append($PARALLEL.text).append(" ").append($INTEGER.text); }
+ : ^( PARALLEL INTEGER )
;
alias
- : IDENTIFIER { sb.append(getMask($IDENTIFIER.text)); aliasSeen.add($IDENTIFIER.text); }
+ : IDENTIFIER
+ {
+ aliasSeen.add($IDENTIFIER.text);
+ $IDENTIFIER.getToken().setText(getMask($IDENTIFIER.text));
+ }
;
op_clause : define_clause
@@ -134,81 +116,69 @@ op_clause : define_clause
;
define_clause
- : ^( DEFINE IDENTIFIER { sb.append($DEFINE.text).append(" ").append($IDENTIFIER.text).append(" "); }
- ( cmd | func_clause ) )
+ : ^( DEFINE alias ( cmd | func_clause ) )
;
cmd
- : ^( EXECCOMMAND { sb.append($EXECCOMMAND.text); }
+ : ^( EXECCOMMAND
( ship_clause | cache_caluse | input_clause | output_clause | error_clause )* )
;
ship_clause
- : ^( SHIP { sb.append(" ").append($SHIP.text).append(" ("); } path_list? { sb.append(")"); } )
+ : ^( SHIP path_list? )
;
path_list
- : a=QUOTEDSTRING { sb.append(" ").append($a.text); }
- (b=QUOTEDSTRING { sb.append(", ").append($b.text); } )*
+ : QUOTEDSTRING+
;
cache_caluse
- : ^( CACHE { sb.append(" ").append($CACHE.text).append(" ("); } path_list { sb.append(")"); } )
+ : ^( CACHE path_list )
;
input_clause
- : ^( INPUT { sb.append(" ").append($INPUT.text).append("("); }
- stream_cmd ( { sb.append(", "); } stream_cmd)* { sb.append(")"); } )
+ : ^( INPUT stream_cmd+ )
;
stream_cmd
- : ^( STDIN { sb.append($STDIN.text).append(" USING "); } func_clause? )
- | ^( STDOUT { sb.append($STDOUT.text).append(" USING "); } func_clause? )
- | ^( QUOTEDSTRING { sb.append($QUOTEDSTRING.text).append(" USING "); } func_clause? )
+ : ^( STDIN func_clause? )
+ | ^( STDOUT func_clause? )
+ | ^( QUOTEDSTRING func_clause? )
;
output_clause
- : ^( OUTPUT { sb.append(" ").append($OUTPUT.text).append(" ("); }
- stream_cmd ( { sb.append(","); } stream_cmd)* { sb.append(")"); } )
+ : ^( OUTPUT stream_cmd+ )
;
error_clause
- : ^( STDERROR { sb.append(" ").append($STDERROR.text).append(" ("); }
- ( QUOTEDSTRING { sb.append($QUOTEDSTRING.text); } (INTEGER { sb.append(" LIMIT ").append($INTEGER); } )? )? { sb.append(")"); } )
+ : ^( STDERROR ( QUOTEDSTRING INTEGER? )? )
;
load_clause
- : ^( LOAD { sb.append($LOAD.text).append(" "); } filename
- ( { sb.append(" USING "); } func_clause)? as_clause? )
+ : ^( LOAD filename func_clause? as_clause? )
;
filename
- : QUOTEDSTRING { sb.append($QUOTEDSTRING.text); }
+ : QUOTEDSTRING
;
as_clause
- : ^( AS { sb.append(" ").append($AS.text).append(" "); } field_def_list )
+ : ^( AS field_def_list )
;
field_def
- : ^( FIELD_DEF IDENTIFIER { sb.append($IDENTIFIER.text); } ( {sb.append(":"); } type)? )
+ : ^( FIELD_DEF IDENTIFIER type? )
;
field_def_list
- : { sb.append("("); } field_def ( { sb.append(", "); } field_def )+ { sb.append(")"); }
- | field_def
+ : field_def+
;
type : simple_type | tuple_type | bag_type | map_type
;
simple_type
- : INT { sb.append($INT.text); }
- | LONG { sb.append($LONG.text); }
- | FLOAT { sb.append($FLOAT.text); }
- | DOUBLE { sb.append($DOUBLE.text); }
- | CHARARRAY { sb.append($CHARARRAY.text); }
- | BYTEARRAY { sb.append($BYTEARRAY.text); }
+ : INT | LONG | FLOAT | DOUBLE | CHARARRAY | BYTEARRAY
;
tuple_type
@@ -216,94 +186,85 @@ tuple_type
;
bag_type
- : ^( BAG_TYPE { sb.append("bag{"); } ( { sb.append("T:"); } tuple_type )? ) { sb.append("}"); }
+ : ^( BAG_TYPE tuple_type? )
;
-map_type : ^( MAP_TYPE { sb.append("map["); } type? ) { sb.append("]"); }
+map_type : ^( MAP_TYPE type? )
;
func_clause
: ^( FUNC_REF func_name )
- | ^( FUNC func_name { sb.append("("); } func_args? { sb.append(")"); } )
+ | ^( FUNC func_name func_args? )
;
func_name
- : eid ( ( PERIOD { sb.append($PERIOD.text); } | DOLLAR { sb.append($DOLLAR.text); } ) eid )*
+ : eid ( ( PERIOD | DOLLAR ) eid )*
;
func_args
- : a=QUOTEDSTRING { sb.append($a.text); }
- (b=QUOTEDSTRING { sb.append(", ").append($b.text); } )*
+ : QUOTEDSTRING+
;
group_clause
- : ^( ( GROUP { sb.append($GROUP.text).append(" "); } | COGROUP { sb.append($COGROUP.text).append(" "); } )
- group_item ( { sb.append(", "); } group_item )*
- ( { sb.append(" USING "); } group_type )?
- partition_clause?
- )
+ : ^( ( GROUP | COGROUP ) group_item+ group_type? partition_clause? )
;
-group_type : QUOTEDSTRING { sb.append( $QUOTEDSTRING.text ); }
+group_type : QUOTEDSTRING
;
group_item
- : rel ( join_group_by_clause
- | ALL { sb.append(" ").append($ALL.text); } | ANY { sb.append(" ").append($ANY.text); } )
- ( INNER { sb.append(" ").append($INNER.text); } | OUTER { sb.append(" ").append($OUTER.text); } )?
+ : rel ( join_group_by_clause | ALL | ANY ) ( INNER | OUTER )?
;
rel
- : alias
- | { sb.append(" ("); } op_clause { sb.append(") "); }
+ : alias | op_clause
;
flatten_generated_item
- : ( flatten_clause | expr | STAR { sb.append(" ").append($STAR.text); } ) ( { sb.append(" AS "); } field_def_list)?
+ : ( flatten_clause | expr | STAR ) field_def_list?
;
flatten_clause
- : ^( FLATTEN { sb.append($FLATTEN.text).append("("); } expr { sb.append(") "); } )
+ : ^( FLATTEN expr )
;
store_clause
- : ^( STORE { sb.append($STORE.text).append(" "); } rel { sb.append(" INTO "); } filename ( { sb.append(" USING "); } func_clause)? )
+ : ^( STORE alias filename func_clause? )
;
filter_clause
- : ^( FILTER { sb.append($FILTER.text).append(" "); } rel { sb.append(" BY ("); } cond { sb.append(")"); } )
+ : ^( FILTER rel cond )
;
cond
- : ^( OR { sb.append("("); } cond { sb.append(") ").append($OR.text).append(" ("); } cond { sb.append(")"); } )
- | ^( AND { sb.append("("); } cond { sb.append(") ").append($AND.text).append(" ("); } cond { sb.append(")"); } )
- | ^( NOT { sb.append(" ").append($NOT.text).append(" ("); } cond { sb.append(")"); } )
- | ^( NULL expr { sb.append(" IS "); } (NOT { sb.append($NOT.text).append(" "); } )? { sb.append($NULL.text); } )
- | ^( rel_op expr { sb.append(" ").append($rel_op.result).append(" "); } expr )
+ : ^( OR cond cond )
+ | ^( AND cond cond )
+ | ^( NOT cond )
+ | ^( NULL expr NOT? )
+ | ^( rel_op expr expr )
| func_eval
;
func_eval
- : ^( FUNC_EVAL func_name { sb.append("("); } real_arg ( { sb.append(", "); } real_arg)* { sb.append(")"); } )
- | ^( FUNC_EVAL func_name { sb.append("()"); } )
+ : ^( FUNC_EVAL func_name real_arg* )
;
real_arg
- : expr | STAR { sb.append($STAR.text); }
+ : expr | STAR
;
expr
- : ^( PLUS expr { sb.append(" ").append($PLUS.text).append(" "); } expr )
- | ^( MINUS expr { sb.append(" ").append($MINUS.text).append(" "); } expr )
- | ^( STAR expr { sb.append(" ").append($STAR.text).append(" "); } expr )
- | ^( DIV expr { sb.append(" ").append($DIV.text).append(" "); } expr )
- | ^( PERCENT expr { sb.append(" ").append($PERCENT.text).append(" "); } expr )
- | ^( CAST_EXPR { sb.append("("); } type { sb.append(")"); } expr )
+ : ^( PLUS expr expr )
+ | ^( MINUS expr expr )
+ | ^( STAR expr expr )
+ | ^( DIV expr expr )
+ | ^( PERCENT expr expr )
+ | ^( CAST_EXPR type expr )
| const_expr
| var_expr
- | ^( NEG { sb.append($NEG.text); } expr )
- | ^( CAST_EXPR { sb.append("("); } type_cast { sb.append(")"); } expr )
- | ^( EXPR_IN_PAREN { sb.append("("); } expr { sb.append(")"); } )
+ | ^( NEG expr )
+ | ^( CAST_EXPR type_cast expr )
+ | ^( EXPR_IN_PAREN expr )
;
type_cast
@@ -311,12 +272,11 @@ type_cast
;
tuple_type_cast
- : ^( TUPLE_TYPE_CAST { sb.append("tuple("); } type_cast ( {sb.append(", "); } type_cast)* {sb.append(")"); } )
- | ^( TUPLE_TYPE_CAST { sb.append("tuple("); } type_cast? {sb.append(")"); } )
+ : ^( TUPLE_TYPE_CAST type_cast* )
;
bag_type_cast
- : ^( BAG_TYPE_CAST { sb.append("bag{"); } tuple_type_cast? {sb.append("}"); } )
+ : ^( BAG_TYPE_CAST tuple_type_cast? )
;
var_expr
@@ -328,95 +288,89 @@ projectable_expr
;
dot_proj
- : ^( PERIOD { sb.append(".("); } col_alias_or_index ( { sb.append(", "); } col_alias_or_index)* { sb.append(")"); } )
+ : ^( PERIOD col_alias_or_index+ )
;
col_alias_or_index : col_alias | col_index
;
col_alias
- : GROUP { sb.append($GROUP.text); }
+ : GROUP
| scoped_col_alias
;
scoped_col_alias
- : ^( SCOPED_ALIAS a=IDENTIFIER {
+ : ^( SCOPED_ALIAS (a=IDENTIFIER {
if (aliasSeen.contains($a.text)) {
- sb.append(getMask($a.text));
+ $a.getToken().setText(getMask($a.text));
} else {
- sb.append($a.text);
+ $a.getToken().setText($a.text);
}
- }
- (b=IDENTIFIER { sb.append("::").append($b.text); })* )
+ })+ )
;
col_index
- : DOLLARVAR { sb.append($DOLLARVAR.text); }
+ : DOLLARVAR
;
pound_proj
- : ^( POUND { sb.append($POUND.text); }
- ( QUOTEDSTRING { sb.append($QUOTEDSTRING.text); } | NULL { sb.append($NULL.text); } ) )
+ : ^( POUND ( QUOTEDSTRING | NULL ) )
;
bin_expr
- : ^( BIN_EXPR { sb.append(" ("); } cond { sb.append(" ? "); } expr { sb.append(" : "); } expr { sb.append(") "); } )
+ : ^( BIN_EXPR cond expr expr )
;
limit_clause
- : ^( LIMIT { sb.append($LIMIT.text).append(" "); } rel
- ( INTEGER { sb.append(" ").append($INTEGER.text); } | LONGINTEGER { sb.append(" ").append($LONGINTEGER.text); } ) )
+ : ^( LIMIT rel ( INTEGER | LONGINTEGER ) )
;
sample_clause
- : ^( SAMPLE { sb.append($SAMPLE.text).append(" "); } rel DOUBLENUMBER { sb.append(" ").append($DOUBLENUMBER.text); } )
+ : ^( SAMPLE rel DOUBLENUMBER )
;
order_clause
- : ^( ORDER { sb.append($ORDER.text).append(" "); } rel
- { sb.append(" BY "); } order_by_clause
- ( { sb.append(" USING "); } func_clause )? )
+ : ^( ORDER rel order_by_clause func_clause? )
;
order_by_clause
- : STAR { sb.append($STAR.text); } ( ASC { sb.append(" ").append($ASC.text); } | DESC { sb.append(" ").append($DESC.text); } )?
- | order_col ( { sb.append(", "); } order_col)*
+ : STAR ( ASC | DESC )?
+ | order_col+
;
order_col
- : col_ref ( ASC { sb.append(" ").append($ASC.text); } | DESC { sb.append(" ").append($DESC.text); } )?
+ : col_ref ( ASC | DESC )?
;
distinct_clause
- : ^( DISTINCT { sb.append($DISTINCT.text).append(" "); } rel partition_clause? )
+ : ^( DISTINCT rel partition_clause? )
;
partition_clause
- : ^( PARTITION { sb.append(" ").append($PARTITION.text).append(" BY "); } func_name )
+ : ^( PARTITION func_name )
;
cross_clause
- : ^( CROSS { sb.append($CROSS.text).append(" "); } rel_list partition_clause? )
+ : ^( CROSS rel_list partition_clause? )
;
rel_list
- : rel ( { sb.append(", "); } rel)*
+ : rel+
;
join_clause
- : ^( JOIN { sb.append($JOIN.text).append(" "); } join_sub_clause ( { sb.append(" USING "); } join_type )?
- ( partition_clause )? )
+ : ^( JOIN join_sub_clause join_type? partition_clause? )
;
-join_type : QUOTEDSTRING { sb.append( $QUOTEDSTRING.text ); }
+join_type : QUOTEDSTRING
;
join_sub_clause
- : join_item ( LEFT { sb.append(" ").append($LEFT.text); }
- | RIGHT { sb.append(" ").append($RIGHT.text); }
- | FULL { sb.append(" ").append($FULL.text); }
- ) (OUTER { sb.append(" ").append($OUTER.text); } )? { sb.append(", "); } join_item
- | join_item ( { sb.append(", "); } join_item )*
+ : join_item ( LEFT
+ | RIGHT
+ | FULL
+ ) OUTER? join_item
+ | join_item+
;
join_item
@@ -424,20 +378,19 @@ join_item
;
join_group_by_clause
- : ^( BY { sb.append(" ").append($BY.text).append(" ("); }
- join_group_by_expr ( { sb.append(", "); } join_group_by_expr )* { sb.append(")"); } )
+ : ^( BY join_group_by_expr+ )
;
join_group_by_expr
- : expr | STAR { sb.append($STAR.text); }
+ : expr | STAR
;
union_clause
- : ^( UNION { sb.append($UNION.text).append(" "); } (ONSCHEMA { sb.append($ONSCHEMA.text).append(" "); } )? rel_list )
+ : ^( UNION ONSCHEMA? rel_list )
;
foreach_clause
- : ^( FOREACH { sb.append($FOREACH.text).append(" "); } rel foreach_plan )
+ : ^( FOREACH rel foreach_plan )
;
foreach_plan
@@ -446,17 +399,16 @@ foreach_plan
;
nested_blk
- : { sb.append(" { "); } (nested_command { sb.append("; "); } )* generate_clause { sb.append("; } "); }
+ : nested_command* generate_clause
;
generate_clause
- : ^( GENERATE { sb.append(" ").append($GENERATE.text).append(" "); }
- flatten_generated_item ( { sb.append(", "); } flatten_generated_item)* )
+ : ^( GENERATE flatten_generated_item+ )
;
nested_command
- : ^( NESTED_CMD IDENTIFIER { sb.append($IDENTIFIER.text).append(" = "); } nested_op )
- | ^( NESTED_CMD_ASSI IDENTIFIER { sb.append($IDENTIFIER.text).append(" = "); } expr )
+ : ^( NESTED_CMD IDENTIFIER nested_op )
+ | ^( NESTED_CMD_ASSI IDENTIFIER expr )
;
nested_op : nested_proj
@@ -467,71 +419,62 @@ nested_op : nested_proj
;
nested_proj
- : ^( NESTED_PROJ col_ref { sb.append(".("); } col_ref ( { sb.append(", "); } col_ref)* { sb.append(")"); } )
+ : ^( NESTED_PROJ col_ref col_ref+ )
;
nested_filter
- : ^( FILTER { sb.append($FILTER.text).append(" "); } nested_op_input { sb.append(" BY "); } cond )
+ : ^( FILTER nested_op_input cond )
;
nested_sort
- : ^( ORDER { sb.append($ORDER.text).append(" "); } nested_op_input
- { sb.append(" BY "); } order_by_clause ( { sb.append(" USING "); } func_clause)? )
+ : ^( ORDER nested_op_input order_by_clause func_clause? )
;
nested_distinct
- : ^( DISTINCT { sb.append($DISTINCT.text).append(" "); } nested_op_input )
+ : ^( DISTINCT nested_op_input )
;
nested_limit
- : ^( LIMIT { sb.append($LIMIT.text).append(" "); } nested_op_input INTEGER { sb.append(" ").append($INTEGER.text); } )
+ : ^( LIMIT nested_op_input INTEGER )
;
nested_op_input : col_ref | nested_proj
;
stream_clause
- : ^( STREAM { sb.append($STREAM.text).append(" "); } rel { sb.append(" THROUGH "); }
- ( EXECCOMMAND { sb.append($EXECCOMMAND.text); }
- | IDENTIFIER { sb.append($IDENTIFIER.text); } ) as_clause? )
+ : ^( STREAM rel ( EXECCOMMAND | alias ) as_clause? )
;
mr_clause
- : ^( MAPREDUCE QUOTEDSTRING { sb.append($MAPREDUCE.text).append(" ").append($QUOTEDSTRING.text).append(" "); }
- ({ sb.append(" ("); } path_list { sb.append(") "); } )? store_clause { sb.append(" "); } load_clause
- (EXECCOMMAND { sb.append(" ").append($EXECCOMMAND.text); } )? )
+ : ^( MAPREDUCE QUOTEDSTRING path_list? store_clause load_clause EXECCOMMAND? )
;
split_clause
- : ^( SPLIT { sb.append($SPLIT.text).append(" "); }
- rel { sb.append(" INTO "); } split_branch ( { sb.append(", "); } split_branch)+ )
+ : ^( SPLIT rel split_branch split_branch+ )
;
split_branch
- : ^( SPLIT_BRANCH IDENTIFIER { sb.append($IDENTIFIER.text).append(" IF "); } cond )
+ : ^( SPLIT_BRANCH alias cond )
;
col_ref : alias_col_ref | dollar_col_ref
;
alias_col_ref
- : GROUP { sb.append($GROUP.text); }
+ : GROUP
| scoped_alias_col_ref
;
scoped_alias_col_ref
- : ^( SCOPED_ALIAS name=IDENTIFIER {
+ : ^( SCOPED_ALIAS (name=IDENTIFIER {
if (aliasSeen.contains($name.text)) {
- sb.append(getMask($name.text));
- } else {
- sb.append($name.text);
- } }
- (name1=IDENTIFIER { sb.append("::").append($name1.text); }
- )* )
+ $name.getToken().setText(getMask($name.text));
+ }
+ } )+ )
;
dollar_col_ref
- : DOLLARVAR { sb.append($DOLLARVAR.text); }
+ : DOLLARVAR
;
const_expr : literal
@@ -540,152 +483,153 @@ const_expr : literal
literal : scalar | map | bag | tuple
;
-scalar : num_scalar
- | QUOTEDSTRING { sb.append($QUOTEDSTRING.text); }
- | NULL { sb.append($NULL.text); }
-;
-
-num_scalar : ( MINUS { sb.append( "-" ); } )?
- ( INTEGER { sb.append($INTEGER.text); }
- | LONGINEGER { sb.append($LONGINEGER.text); }
- | FLOATNUMBER { sb.append($FLOATNUMBER.text); }
- | DOUBLENUMBER { sb.append($DOUBLENUMBER.text); }
- )
+scalar
+ : INTEGER
+ | LONGINEGER
+ | FLOATNUMBER
+ | DOUBLENUMBER
+ | QUOTEDSTRING
+ | NULL
;
map
- : ^( MAP_VAL { sb.append("["); } keyvalue ( { sb.append(", "); } keyvalue)* { sb.append("]"); } )
- | ^( MAP_VAL { sb.append("[]"); } )
+ : ^( MAP_VAL keyvalue* )
;
keyvalue
- : ^( KEY_VAL_PAIR map_key { sb.append("#"); } const_expr )
+ : ^( KEY_VAL_PAIR map_key const_expr )
;
-map_key : QUOTEDSTRING { sb.append($QUOTEDSTRING.text); }
+map_key : QUOTEDSTRING
;
bag
- : ^( BAG_VAL { sb.append("{"); } tuple ( { sb.append(", "); } tuple)* { sb.append("}"); } )
- | ^( BAG_VAL { sb.append("{}"); } )
+ : ^( BAG_VAL tuple* )
;
tuple
- : ^( TUPLE_VAL { sb.append("("); } literal ( { sb.append(", "); } literal)* { sb.append(")"); } )
- | ^( TUPLE_VAL { sb.append("()"); } )
+ : ^( TUPLE_VAL literal* )
;
// extended identifier, handling the keyword and identifier conflicts. Ugly but there is no other choice.
eid : rel_str_op
- | DEFINE { sb.append($DEFINE.text); }
- | LOAD { sb.append($LOAD.text); }
- | FILTER { sb.append($FILTER.text); }
- | FOREACH { sb.append($FOREACH.text); }
- | MATCHES { sb.append($MATCHES.text); }
- | ORDER { sb.append($ORDER.text); }
- | DISTINCT { sb.append($DISTINCT.text); }
- | COGROUP { sb.append($COGROUP.text); }
- | JOIN { sb.append($JOIN.text); }
- | CROSS { sb.append($CROSS.text); }
- | UNION { sb.append($UNION.text); }
- | SPLIT { sb.append($SPLIT.text); }
- | INTO { sb.append($INTO.text); }
- | IF { sb.append($IF.text); }
- | ALL { sb.append($ALL.text); }
- | AS { sb.append($AS.text); }
- | BY { sb.append($BY.text); }
- | USING { sb.append($USING.text); }
- | INNER { sb.append($INNER.text); }
- | OUTER { sb.append($OUTER.text); }
- | PARALLEL { sb.append($PARALLEL.text); }
- | PARTITION { sb.append($PARTITION.text); }
- | GROUP { sb.append($GROUP.text); }
- | AND { sb.append($AND.text); }
- | OR { sb.append($OR.text); }
- | NOT { sb.append($NOT.text); }
- | GENERATE { sb.append($GENERATE.text); }
- | FLATTEN { sb.append($FLATTEN.text); }
- | EVAL { sb.append($EVAL.text); }
- | ASC { sb.append($ASC.text); }
- | DESC { sb.append($DESC.text); }
- | INT { sb.append($INT.text); }
- | LONG { sb.append($LONG.text); }
- | FLOAT { sb.append($FLOAT.text); }
- | DOUBLE { sb.append($DOUBLE.text); }
- | CHARARRAY { sb.append($CHARARRAY.text); }
- | BYTEARRAY { sb.append($BYTEARRAY.text); }
- | BAG { sb.append($BAG.text); }
- | TUPLE { sb.append($TUPLE.text); }
- | MAP { sb.append($MAP.text); }
- | IS { sb.append($IS.text); }
- | NULL { sb.append($NULL.text); }
- | STREAM { sb.append($STREAM.text); }
- | THROUGH { sb.append($THROUGH.text); }
- | STORE { sb.append($STORE.text); }
- | MAPREDUCE { sb.append($MAPREDUCE.text); }
- | SHIP { sb.append($SHIP.text); }
- | CACHE { sb.append($CACHE.text); }
- | INPUT { sb.append($INPUT.text); }
- | OUTPUT { sb.append($OUTPUT.text); }
- | ERROR { sb.append($ERROR.text); }
- | STDIN { sb.append($STDIN.text); }
- | STDOUT { sb.append($STDOUT.text); }
- | LIMIT { sb.append($LIMIT.text); }
- | SAMPLE { sb.append($SAMPLE.text); }
- | LEFT { sb.append($LEFT.text); }
- | RIGHT { sb.append($RIGHT.text); }
- | FULL { sb.append($FULL.text); }
- | IDENTIFIER { sb.append($IDENTIFIER.text); }
+ | IMPORT
+ | RETURNS
+ | DEFINE
+ | LOAD
+ | FILTER
+ | FOREACH
+ | MATCHES
+ | ORDER
+ | DISTINCT
+ | COGROUP
+ | JOIN
+ | CROSS
+ | UNION
+ | SPLIT
+ | INTO
+ | IF
+ | ALL
+ | AS
+ | BY
+ | USING
+ | INNER
+ | OUTER
+ | PARALLEL
+ | PARTITION
+ | GROUP
+ | AND
+ | OR
+ | NOT
+ | GENERATE
+ | FLATTEN
+ | EVAL
+ | ASC
+ | DESC
+ | INT
+ | LONG
+ | FLOAT
+ | DOUBLE
+ | CHARARRAY
+ | BYTEARRAY
+ | BAG
+ | TUPLE
+ | MAP
+ | IS
+ | NULL
+ | STREAM
+ | THROUGH
+ | STORE
+ | MAPREDUCE
+ | SHIP
+ | CACHE
+ | INPUT
+ | OUTPUT
+ | ERROR
+ | STDIN
+ | STDOUT
+ | LIMIT
+ | SAMPLE
+ | LEFT
+ | RIGHT
+ | FULL
+ | a = IDENTIFIER {
+ if (aliasSeen.contains($a.text)) {
+ $a.getToken().setText(getMask($a.text));
+ } else {
+ $a.getToken().setText($a.text);
+ }
+ }
;
// relational operator
-rel_op returns[String result]
- : rel_op_eq { $result = $rel_op_eq.result; }
- | rel_op_ne { $result = $rel_op_ne.result; }
- | rel_op_gt { $result = $rel_op_gt.result; }
- | rel_op_gte { $result = $rel_op_gte.result; }
- | rel_op_lt { $result = $rel_op_lt.result; }
- | rel_op_lte { $result = $rel_op_lte.result; }
- | STR_OP_MATCHES { $result = $STR_OP_MATCHES.text; }
+rel_op
+ : rel_op_eq
+ | rel_op_ne
+ | rel_op_gt
+ | rel_op_gte
+ | rel_op_lt
+ | rel_op_lte
+ | STR_OP_MATCHES
;
-rel_op_eq returns[String result]
- : STR_OP_EQ { $result = $STR_OP_EQ.text; }
- | NUM_OP_EQ { $result = $NUM_OP_EQ.text; }
+rel_op_eq
+ : STR_OP_EQ
+ | NUM_OP_EQ
;
-rel_op_ne returns[String result]
- : STR_OP_NE { $result = $STR_OP_NE.text; }
- | NUM_OP_NE { $result = $NUM_OP_NE.text; }
+rel_op_ne
+ : STR_OP_NE
+ | NUM_OP_NE
;
-rel_op_gt returns[String result]
- : STR_OP_GT { $result = $STR_OP_GT.text; }
- | NUM_OP_GT { $result = $NUM_OP_GT.text; }
+rel_op_gt
+ : STR_OP_GT
+ | NUM_OP_GT
;
-rel_op_gte returns[String result]
- : STR_OP_GTE { $result = $STR_OP_GTE.text; }
- | NUM_OP_GTE { $result = $NUM_OP_GTE.text; }
+rel_op_gte
+ : STR_OP_GTE
+ | NUM_OP_GTE
;
-rel_op_lt returns[String result]
- : STR_OP_LT { $result = $STR_OP_LT.text; }
- | NUM_OP_LT { $result = $NUM_OP_LT.text; }
+rel_op_lt
+ : STR_OP_LT
+ | NUM_OP_LT
;
-rel_op_lte returns[String result]
- : STR_OP_LTE { $result = $STR_OP_LTE.text; }
- | NUM_OP_LTE { $result = $NUM_OP_LTE.text; }
+rel_op_lte
+ : STR_OP_LTE
+ | NUM_OP_LTE
;
rel_str_op
- : STR_OP_EQ { sb.append($STR_OP_EQ.text); }
- | STR_OP_NE { sb.append($STR_OP_NE.text); }
- | STR_OP_GT { sb.append($STR_OP_GT.text); }
- | STR_OP_LT { sb.append($STR_OP_LT.text); }
- | STR_OP_GTE { sb.append($STR_OP_GTE.text); }
- | STR_OP_LTE { sb.append($STR_OP_LTE.text); }
- | STR_OP_MATCHES { sb.append($STR_OP_MATCHES.text); }
+ : STR_OP_EQ
+ | STR_OP_NE
+ | STR_OP_GT
+ | STR_OP_LT
+ | STR_OP_GTE
+ | STR_OP_LTE
+ | STR_OP_MATCHES
;
Added: pig/trunk/src/org/apache/pig/parser/AstPrinter.g
URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/parser/AstPrinter.g?rev=1085612&view=auto
==============================================================================
--- pig/trunk/src/org/apache/pig/parser/AstPrinter.g (added)
+++ pig/trunk/src/org/apache/pig/parser/AstPrinter.g Sat Mar 26 00:07:41 2011
@@ -0,0 +1,645 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Grammar file for Pig tree parser (visitor for printing Pig script from Ast).
+ *
+ * NOTE: THIS FILE IS BASED ON QueryParser.g, SO IF YOU CHANGE THAT FILE, YOU WILL
+ * PROBABLY NEED TO MAKE CORRESPONDING CHANGES TO THIS FILE AS WELL.
+ */
+
+tree grammar AstPrinter;
+
+options {
+ tokenVocab=QueryParser;
+ ASTLabelType=CommonTree;
+ output=AST;
+ backtrack=true;
+}
+
+@header {
+package org.apache.pig.parser;
+}
+
+@members {
+
+public String getResult() { return sb.toString(); }
+
+private StringBuilder sb = new StringBuilder();
+
+} // End of @members
+
+@rulecatch {
+catch(RecognitionException re) {
+ throw re;
+}
+}
+
+query : ^( QUERY statement* )
+;
+
+statement : general_statement
+ | split_statement { sb.append(";\n"); }
+;
+
+split_statement : split_clause
+;
+
+// For foreach statement that with complex inner plan.
+general_statement
+ : ^( STATEMENT ( alias { sb.append(" = "); } )?
+ op_clause parallel_clause? ) { sb.append(";\n"); }
+;
+
+parallel_clause
+ : ^( PARALLEL INTEGER ) { sb.append(" ").append($PARALLEL.text).append(" ").append($INTEGER.text); }
+;
+
+alias
+ : IDENTIFIER { sb.append($IDENTIFIER.text); }
+;
+
+op_clause : define_clause
+ | load_clause
+ | group_clause
+ | store_clause
+ | filter_clause
+ | distinct_clause
+ | limit_clause
+ | sample_clause
+ | order_clause
+ | cross_clause
+ | join_clause
+ | union_clause
+ | stream_clause
+ | mr_clause
+ | split_clause
+ | foreach_clause
+;
+
+define_clause
+ : ^( DEFINE IDENTIFIER { sb.append($DEFINE.text).append(" ").append($IDENTIFIER.text).append(" "); }
+ ( cmd | func_clause ) )
+;
+
+cmd
+ : ^( EXECCOMMAND { sb.append($EXECCOMMAND.text); }
+ ( ship_clause | cache_caluse | input_clause | output_clause | error_clause )* )
+;
+
+ship_clause
+ : ^( SHIP { sb.append(" ").append($SHIP.text).append(" ("); } path_list? { sb.append(")"); } )
+;
+
+path_list
+ : a=QUOTEDSTRING { sb.append(" ").append($a.text); }
+ (b=QUOTEDSTRING { sb.append(", ").append($b.text); } )*
+;
+
+cache_caluse
+ : ^( CACHE { sb.append(" ").append($CACHE.text).append(" ("); } path_list { sb.append(")"); } )
+;
+
+input_clause
+ : ^( INPUT { sb.append(" ").append($INPUT.text).append("("); }
+ stream_cmd ( { sb.append(", "); } stream_cmd)* { sb.append(")"); } )
+;
+
+stream_cmd
+ : ^( STDIN { sb.append($STDIN.text).append(" USING "); } func_clause? )
+ | ^( STDOUT { sb.append($STDOUT.text).append(" USING "); } func_clause? )
+ | ^( QUOTEDSTRING { sb.append($QUOTEDSTRING.text).append(" USING "); } func_clause? )
+;
+
+output_clause
+ : ^( OUTPUT { sb.append(" ").append($OUTPUT.text).append(" ("); }
+ stream_cmd ( { sb.append(","); } stream_cmd)* { sb.append(")"); } )
+;
+
+error_clause
+ : ^( STDERROR { sb.append(" ").append($STDERROR.text).append(" ("); }
+ ( QUOTEDSTRING { sb.append($QUOTEDSTRING.text); } (INTEGER { sb.append(" LIMIT ").append($INTEGER); } )? )? { sb.append(")"); } )
+;
+
+load_clause
+ : ^( LOAD { sb.append($LOAD.text).append(" "); } filename
+ ( { sb.append(" USING "); } func_clause)? as_clause? )
+;
+
+filename
+ : QUOTEDSTRING { sb.append($QUOTEDSTRING.text); }
+;
+
+as_clause
+ : ^( AS { sb.append(" ").append($AS.text).append(" "); } field_def_list )
+;
+
+field_def
+ : ^( FIELD_DEF IDENTIFIER { sb.append($IDENTIFIER.text); } ( {sb.append(":"); } type)? )
+;
+
+field_def_list
+ : { sb.append("("); } field_def ( { sb.append(", "); } field_def )+ { sb.append(")"); }
+ | field_def
+;
+
+type : simple_type | tuple_type | bag_type | map_type
+;
+
+simple_type
+ : INT { sb.append($INT.text); }
+ | LONG { sb.append($LONG.text); }
+ | FLOAT { sb.append($FLOAT.text); }
+ | DOUBLE { sb.append($DOUBLE.text); }
+ | CHARARRAY { sb.append($CHARARRAY.text); }
+ | BYTEARRAY { sb.append($BYTEARRAY.text); }
+;
+
+tuple_type
+ : ^( TUPLE_TYPE field_def_list? )
+;
+
+bag_type
+ : ^( BAG_TYPE { sb.append("bag{"); } ( { sb.append("T:"); } tuple_type )? ) { sb.append("}"); }
+;
+
+map_type : ^( MAP_TYPE { sb.append("map["); } type? ) { sb.append("]"); }
+;
+
+func_clause
+ : ^( FUNC_REF func_name )
+ | ^( FUNC func_name { sb.append("("); } func_args? { sb.append(")"); } )
+;
+
+func_name
+ : eid ( ( PERIOD { sb.append($PERIOD.text); } | DOLLAR { sb.append($DOLLAR.text); } ) eid )*
+;
+
+func_args
+ : a=QUOTEDSTRING { sb.append($a.text); }
+ (b=QUOTEDSTRING { sb.append(", ").append($b.text); } )*
+;
+
+group_clause
+ : ^( ( GROUP { sb.append($GROUP.text).append(" "); } | COGROUP { sb.append($COGROUP.text).append(" "); } )
+ group_item ( { sb.append(", "); } group_item )*
+ ( { sb.append(" USING "); } group_type )?
+ partition_clause?
+ )
+;
+
+group_type : QUOTEDSTRING { sb.append($QUOTEDSTRING.text); }
+;
+
+group_item
+ : rel ( join_group_by_clause
+ | ALL { sb.append(" ").append($ALL.text); } | ANY { sb.append(" ").append($ANY.text); } )
+ ( INNER { sb.append(" ").append($INNER.text); } | OUTER { sb.append(" ").append($OUTER.text); } )?
+;
+
+rel
+ : alias
+ | { sb.append(" ("); } op_clause { sb.append(") "); }
+;
+
+flatten_generated_item
+ : ( flatten_clause | expr | STAR { sb.append(" ").append($STAR.text); } ) ( { sb.append(" AS "); } field_def_list)?
+;
+
+flatten_clause
+ : ^( FLATTEN { sb.append($FLATTEN.text).append("("); } expr { sb.append(") "); } )
+;
+
+store_clause
+ : ^( STORE { sb.append($STORE.text).append(" "); } rel { sb.append(" INTO "); } filename ( { sb.append(" USING "); } func_clause)? )
+;
+
+filter_clause
+ : ^( FILTER { sb.append($FILTER.text).append(" "); } rel { sb.append(" BY ("); } cond { sb.append(")"); } )
+;
+
+cond
+ : ^( OR { sb.append("("); } cond { sb.append(") ").append($OR.text).append(" ("); } cond { sb.append(")"); } )
+ | ^( AND { sb.append("("); } cond { sb.append(") ").append($AND.text).append(" ("); } cond { sb.append(")"); } )
+ | ^( NOT { sb.append(" ").append($NOT.text).append(" ("); } cond { sb.append(")"); } )
+ | ^( NULL expr { sb.append(" IS "); } (NOT { sb.append($NOT.text).append(" "); } )? { sb.append($NULL.text); } )
+ | ^( rel_op expr { sb.append(" ").append($rel_op.result).append(" "); } expr )
+ | func_eval
+;
+
+func_eval
+ : ^( FUNC_EVAL func_name { sb.append("("); } real_arg ( { sb.append(", "); } real_arg)* { sb.append(")"); } )
+ | ^( FUNC_EVAL func_name { sb.append("()"); } )
+;
+
+real_arg
+ : expr | STAR { sb.append($STAR.text); }
+;
+
+expr
+ : ^( PLUS expr { sb.append(" ").append($PLUS.text).append(" "); } expr )
+ | ^( MINUS expr { sb.append(" ").append($MINUS.text).append(" "); } expr )
+ | ^( STAR expr { sb.append(" ").append($STAR.text).append(" "); } expr )
+ | ^( DIV expr { sb.append(" ").append($DIV.text).append(" "); } expr )
+ | ^( PERCENT expr { sb.append(" ").append($PERCENT.text).append(" "); } expr )
+ | ^( CAST_EXPR { sb.append("("); } type { sb.append(")"); } expr )
+ | const_expr
+ | var_expr
+ | ^( NEG { sb.append($NEG.text); } expr )
+ | ^( CAST_EXPR { sb.append("("); } type_cast { sb.append(")"); } expr )
+ | ^( EXPR_IN_PAREN { sb.append("("); } expr { sb.append(")"); } )
+;
+
+type_cast
+ : simple_type | map_type | tuple_type_cast | bag_type_cast
+;
+
+tuple_type_cast
+ : ^( TUPLE_TYPE_CAST { sb.append("tuple("); } type_cast ( {sb.append(", "); } type_cast)* {sb.append(")"); } )
+ | ^( TUPLE_TYPE_CAST { sb.append("tuple("); } type_cast? {sb.append(")"); } )
+;
+
+bag_type_cast
+ : ^( BAG_TYPE_CAST { sb.append("bag{"); } tuple_type_cast? {sb.append("}"); } )
+;
+
+var_expr
+ : projectable_expr ( dot_proj | pound_proj )*
+;
+
+projectable_expr
+ : func_eval | col_ref | bin_expr
+;
+
+dot_proj
+ : ^( PERIOD { sb.append(".("); } col_alias_or_index ( { sb.append(", "); } col_alias_or_index)* { sb.append(")"); } )
+;
+
+col_alias_or_index : col_alias | col_index
+;
+
+col_alias
+ : GROUP { sb.append($GROUP.text); }
+ | scoped_col_alias
+;
+
+scoped_col_alias
+ : ^( SCOPED_ALIAS a=IDENTIFIER {
+ sb.append($a.text);
+ }
+ (b=IDENTIFIER { sb.append("::").append($b.text); })* )
+;
+
+col_index
+ : DOLLARVAR { sb.append($DOLLARVAR.text); }
+;
+
+pound_proj
+ : ^( POUND { sb.append($POUND.text); }
+ ( QUOTEDSTRING { sb.append($QUOTEDSTRING.text); } | NULL { sb.append($NULL.text); } ) )
+;
+
+bin_expr
+ : ^( BIN_EXPR { sb.append(" ("); } cond { sb.append(" ? "); } expr { sb.append(" : "); } expr { sb.append(") "); } )
+;
+
+limit_clause
+ : ^( LIMIT { sb.append($LIMIT.text).append(" "); } rel
+ ( INTEGER { sb.append(" ").append($INTEGER.text); } | LONGINTEGER { sb.append(" ").append($LONGINTEGER.text); } ) )
+;
+
+sample_clause
+ : ^( SAMPLE { sb.append($SAMPLE.text).append(" "); } rel DOUBLENUMBER { sb.append(" ").append($DOUBLENUMBER.text); } )
+;
+
+order_clause
+ : ^( ORDER { sb.append($ORDER.text).append(" "); } rel
+ { sb.append(" BY "); } order_by_clause
+ ( { sb.append(" USING "); } func_clause )? )
+;
+
+order_by_clause
+ : STAR { sb.append($STAR.text); } ( ASC { sb.append(" ").append($ASC.text); } | DESC { sb.append(" ").append($DESC.text); } )?
+ | order_col ( { sb.append(", "); } order_col)*
+;
+
+order_col
+ : col_ref ( ASC { sb.append(" ").append($ASC.text); } | DESC { sb.append(" ").append($DESC.text); } )?
+;
+
+distinct_clause
+ : ^( DISTINCT { sb.append($DISTINCT.text).append(" "); } rel partition_clause? )
+;
+
+partition_clause
+ : ^( PARTITION { sb.append(" ").append($PARTITION.text).append(" BY "); } func_name )
+;
+
+cross_clause
+ : ^( CROSS { sb.append($CROSS.text).append(" "); } rel_list partition_clause? )
+;
+
+rel_list
+ : rel ( { sb.append(", "); } rel)*
+;
+
+join_clause
+ : ^( JOIN { sb.append($JOIN.text).append(" "); } join_sub_clause ( { sb.append(" USING "); } join_type )?
+ ( partition_clause )? )
+;
+
+join_type : QUOTEDSTRING { sb.append($QUOTEDSTRING.text); }
+;
+
+join_sub_clause
+ : join_item ( LEFT { sb.append(" ").append($LEFT.text); }
+ | RIGHT { sb.append(" ").append($RIGHT.text); }
+ | FULL { sb.append(" ").append($FULL.text); }
+ ) (OUTER { sb.append(" ").append($OUTER.text); } )? { sb.append(", "); } join_item
+ | join_item ( { sb.append(", "); } join_item )*
+;
+
+join_item
+ : ^( JOIN_ITEM rel join_group_by_clause )
+;
+
+join_group_by_clause
+ : ^( BY { sb.append(" ").append($BY.text).append(" ("); }
+ join_group_by_expr ( { sb.append(", "); } join_group_by_expr )* { sb.append(")"); } )
+;
+
+join_group_by_expr
+ : expr | STAR { sb.append($STAR.text); }
+;
+
+union_clause
+ : ^( UNION { sb.append($UNION.text).append(" "); } (ONSCHEMA { sb.append($ONSCHEMA.text).append(" "); } )? rel_list )
+;
+
+foreach_clause
+ : ^( FOREACH { sb.append($FOREACH.text).append(" "); } rel foreach_plan )
+;
+
+foreach_plan
+ : ^( FOREACH_PLAN_SIMPLE generate_clause )
+ | ^( FOREACH_PLAN_COMPLEX nested_blk )
+;
+
+nested_blk
+ : { sb.append(" { "); } (nested_command { sb.append("; "); } )* generate_clause { sb.append("; } "); }
+;
+
+generate_clause
+ : ^( GENERATE { sb.append(" ").append($GENERATE.text).append(" "); }
+ flatten_generated_item ( { sb.append(", "); } flatten_generated_item)* )
+;
+
+nested_command
+ : ^( NESTED_CMD IDENTIFIER { sb.append($IDENTIFIER.text).append(" = "); } nested_op )
+ | ^( NESTED_CMD_ASSI IDENTIFIER { sb.append($IDENTIFIER.text).append(" = "); } expr )
+;
+
+nested_op : nested_proj
+ | nested_filter
+ | nested_sort
+ | nested_distinct
+ | nested_limit
+;
+
+nested_proj
+ : ^( NESTED_PROJ col_ref { sb.append(".("); } col_ref ( { sb.append(", "); } col_ref)* { sb.append(")"); } )
+;
+
+nested_filter
+ : ^( FILTER { sb.append($FILTER.text).append(" "); } nested_op_input { sb.append(" BY "); } cond )
+;
+
+nested_sort
+ : ^( ORDER { sb.append($ORDER.text).append(" "); } nested_op_input
+ { sb.append(" BY "); } order_by_clause ( { sb.append(" USING "); } func_clause)? )
+;
+
+nested_distinct
+ : ^( DISTINCT { sb.append($DISTINCT.text).append(" "); } nested_op_input )
+;
+
+nested_limit
+ : ^( LIMIT { sb.append($LIMIT.text).append(" "); } nested_op_input INTEGER { sb.append(" ").append($INTEGER.text); } )
+;
+
+nested_op_input : col_ref | nested_proj
+;
+
+stream_clause
+ : ^( STREAM { sb.append($STREAM.text).append(" "); } rel { sb.append(" THROUGH "); }
+ ( EXECCOMMAND { sb.append($EXECCOMMAND.text); }
+ | IDENTIFIER { sb.append($IDENTIFIER.text); } ) as_clause? )
+;
+
+mr_clause
+ : ^( MAPREDUCE QUOTEDSTRING { sb.append($MAPREDUCE.text).append(" ").append($QUOTEDSTRING.text).append(" "); }
+ ({ sb.append(" ("); } path_list { sb.append(") "); } )? store_clause { sb.append(" "); } load_clause
+ (EXECCOMMAND { sb.append(" ").append($EXECCOMMAND.text); } )? )
+;
+
+split_clause
+ : ^( SPLIT { sb.append($SPLIT.text).append(" "); }
+ rel { sb.append(" INTO "); } split_branch ( { sb.append(", "); } split_branch)+ )
+;
+
+split_branch
+ : ^( SPLIT_BRANCH alias { sb.append(" IF "); } cond )
+;
+
+col_ref : alias_col_ref | dollar_col_ref
+;
+
+alias_col_ref
+ : GROUP { sb.append($GROUP.text); }
+ | scoped_alias_col_ref
+;
+
+scoped_alias_col_ref
+ : ^( SCOPED_ALIAS name=IDENTIFIER {
+ sb.append($name.text);
+ }
+ (name1=IDENTIFIER { sb.append("::").append($name1.text); }
+ )* )
+;
+
+dollar_col_ref
+ : DOLLARVAR { sb.append($DOLLARVAR.text); }
+;
+
+const_expr : literal
+;
+
+literal : scalar | map | bag | tuple
+;
+
+scalar : num_scalar
+ | QUOTEDSTRING { sb.append($QUOTEDSTRING.text); }
+ | NULL { sb.append($NULL.text); }
+;
+
+num_scalar : ( MINUS { sb.append( "-" ); } )?
+ ( INTEGER { sb.append($INTEGER.text); }
+ | LONGINEGER { sb.append($LONGINEGER.text); }
+ | FLOATNUMBER { sb.append($FLOATNUMBER.text); }
+ | DOUBLENUMBER { sb.append($DOUBLENUMBER.text); }
+ )
+;
+
+map
+ : ^( MAP_VAL { sb.append("["); } keyvalue ( { sb.append(", "); } keyvalue)* { sb.append("]"); } )
+ | ^( MAP_VAL { sb.append("[]"); } )
+;
+
+keyvalue
+ : ^( KEY_VAL_PAIR map_key { sb.append("#"); } const_expr )
+;
+
+map_key : QUOTEDSTRING { sb.append($QUOTEDSTRING.text); }
+;
+
+bag
+ : ^( BAG_VAL { sb.append("{"); } tuple ( { sb.append(", "); } tuple)* { sb.append("}"); } )
+ | ^( BAG_VAL { sb.append("{}"); } )
+;
+
+tuple
+ : ^( TUPLE_VAL { sb.append("("); } literal ( { sb.append(", "); } literal)* { sb.append(")"); } )
+ | ^( TUPLE_VAL { sb.append("()"); } )
+;
+
+// extended identifier, handling the keyword and identifier conflicts. Ugly but there is no other choice.
+eid : rel_str_op
+ | IMPORT { sb.append($IMPORT.text); }
+ | RETURNS { sb.append($RETURNS.text); }
+ | DEFINE { sb.append($DEFINE.text); }
+ | LOAD { sb.append($LOAD.text); }
+ | FILTER { sb.append($FILTER.text); }
+ | FOREACH { sb.append($FOREACH.text); }
+ | MATCHES { sb.append($MATCHES.text); }
+ | ORDER { sb.append($ORDER.text); }
+ | DISTINCT { sb.append($DISTINCT.text); }
+ | COGROUP { sb.append($COGROUP.text); }
+ | JOIN { sb.append($JOIN.text); }
+ | CROSS { sb.append($CROSS.text); }
+ | UNION { sb.append($UNION.text); }
+ | SPLIT { sb.append($SPLIT.text); }
+ | INTO { sb.append($INTO.text); }
+ | IF { sb.append($IF.text); }
+ | ALL { sb.append($ALL.text); }
+ | AS { sb.append($AS.text); }
+ | BY { sb.append($BY.text); }
+ | USING { sb.append($USING.text); }
+ | INNER { sb.append($INNER.text); }
+ | OUTER { sb.append($OUTER.text); }
+ | PARALLEL { sb.append($PARALLEL.text); }
+ | PARTITION { sb.append($PARTITION.text); }
+ | GROUP { sb.append($GROUP.text); }
+ | AND { sb.append($AND.text); }
+ | OR { sb.append($OR.text); }
+ | NOT { sb.append($NOT.text); }
+ | GENERATE { sb.append($GENERATE.text); }
+ | FLATTEN { sb.append($FLATTEN.text); }
+ | EVAL { sb.append($EVAL.text); }
+ | ASC { sb.append($ASC.text); }
+ | DESC { sb.append($DESC.text); }
+ | INT { sb.append($INT.text); }
+ | LONG { sb.append($LONG.text); }
+ | FLOAT { sb.append($FLOAT.text); }
+ | DOUBLE { sb.append($DOUBLE.text); }
+ | CHARARRAY { sb.append($CHARARRAY.text); }
+ | BYTEARRAY { sb.append($BYTEARRAY.text); }
+ | BAG { sb.append($BAG.text); }
+ | TUPLE { sb.append($TUPLE.text); }
+ | MAP { sb.append($MAP.text); }
+ | IS { sb.append($IS.text); }
+ | NULL { sb.append($NULL.text); }
+ | STREAM { sb.append($STREAM.text); }
+ | THROUGH { sb.append($THROUGH.text); }
+ | STORE { sb.append($STORE.text); }
+ | MAPREDUCE { sb.append($MAPREDUCE.text); }
+ | SHIP { sb.append($SHIP.text); }
+ | CACHE { sb.append($CACHE.text); }
+ | INPUT { sb.append($INPUT.text); }
+ | OUTPUT { sb.append($OUTPUT.text); }
+ | ERROR { sb.append($ERROR.text); }
+ | STDIN { sb.append($STDIN.text); }
+ | STDOUT { sb.append($STDOUT.text); }
+ | LIMIT { sb.append($LIMIT.text); }
+ | SAMPLE { sb.append($SAMPLE.text); }
+ | LEFT { sb.append($LEFT.text); }
+ | RIGHT { sb.append($RIGHT.text); }
+ | FULL { sb.append($FULL.text); }
+ | IDENTIFIER { sb.append($IDENTIFIER.text); }
+;
+
+// relational operator
+rel_op returns[String result]
+ : rel_op_eq { $result = $rel_op_eq.result; }
+ | rel_op_ne { $result = $rel_op_ne.result; }
+ | rel_op_gt { $result = $rel_op_gt.result; }
+ | rel_op_gte { $result = $rel_op_gte.result; }
+ | rel_op_lt { $result = $rel_op_lt.result; }
+ | rel_op_lte { $result = $rel_op_lte.result; }
+ | STR_OP_MATCHES { $result = $STR_OP_MATCHES.text; }
+;
+
+rel_op_eq returns[String result]
+ : STR_OP_EQ { $result = $STR_OP_EQ.text; }
+ | NUM_OP_EQ { $result = $NUM_OP_EQ.text; }
+;
+
+rel_op_ne returns[String result]
+ : STR_OP_NE { $result = $STR_OP_NE.text; }
+ | NUM_OP_NE { $result = $NUM_OP_NE.text; }
+;
+
+rel_op_gt returns[String result]
+ : STR_OP_GT { $result = $STR_OP_GT.text; }
+ | NUM_OP_GT { $result = $NUM_OP_GT.text; }
+;
+
+rel_op_gte returns[String result]
+ : STR_OP_GTE { $result = $STR_OP_GTE.text; }
+ | NUM_OP_GTE { $result = $NUM_OP_GTE.text; }
+;
+
+rel_op_lt returns[String result]
+ : STR_OP_LT { $result = $STR_OP_LT.text; }
+ | NUM_OP_LT { $result = $NUM_OP_LT.text; }
+;
+
+rel_op_lte returns[String result]
+ : STR_OP_LTE { $result = $STR_OP_LTE.text; }
+ | NUM_OP_LTE { $result = $NUM_OP_LTE.text; }
+;
+
+rel_str_op
+ : STR_OP_EQ { sb.append($STR_OP_EQ.text); }
+ | STR_OP_NE { sb.append($STR_OP_NE.text); }
+ | STR_OP_GT { sb.append($STR_OP_GT.text); }
+ | STR_OP_LT { sb.append($STR_OP_LT.text); }
+ | STR_OP_GTE { sb.append($STR_OP_GTE.text); }
+ | STR_OP_LTE { sb.append($STR_OP_LTE.text); }
+ | STR_OP_MATCHES { sb.append($STR_OP_MATCHES.text); }
+;
+
Modified: pig/trunk/src/org/apache/pig/parser/AstValidator.g
URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/parser/AstValidator.g?rev=1085612&r1=1085611&r2=1085612&view=diff
==============================================================================
--- pig/trunk/src/org/apache/pig/parser/AstValidator.g (original)
+++ pig/trunk/src/org/apache/pig/parser/AstValidator.g Sat Mar 26 00:07:41 2011
@@ -463,9 +463,9 @@ split_clause : ^( SPLIT rel split_branch
;
split_branch
- : ^( SPLIT_BRANCH IDENTIFIER cond )
+ : ^( SPLIT_BRANCH alias cond )
{
- aliases.add( $IDENTIFIER.text );
+ aliases.add( $alias.name );
}
;
@@ -510,6 +510,8 @@ tuple : ^( TUPLE_VAL literal* )
// extended identifier, handling the keyword and identifier conflicts. Ugly but there is no other choice.
eid : rel_str_op
+ | IMPORT
+ | RETURNS
| DEFINE
| LOAD
| FILTER
Modified: pig/trunk/src/org/apache/pig/parser/LogicalPlanGenerator.g
URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/parser/LogicalPlanGenerator.g?rev=1085612&r1=1085611&r2=1085612&view=diff
==============================================================================
--- pig/trunk/src/org/apache/pig/parser/LogicalPlanGenerator.g (original)
+++ pig/trunk/src/org/apache/pig/parser/LogicalPlanGenerator.g Sat Mar 26 00:07:41 2011
@@ -1243,9 +1243,9 @@ scope GScope;
LogicalExpressionPlan splitPlan = new LogicalExpressionPlan();
$GScope::currentOp = builder.createSplitOutputOp();
}
- : ^( SPLIT_BRANCH IDENTIFIER cond[splitPlan] )
+ : ^( SPLIT_BRANCH alias cond[splitPlan] )
{
- builder.buildSplitOutputOp( (LOSplitOutput)$GScope::currentOp, $IDENTIFIER.text,
+ builder.buildSplitOutputOp( (LOSplitOutput)$GScope::currentOp, $alias.name,
$statement::inputAlias, splitPlan );
}
;
@@ -1424,6 +1424,8 @@ tuple returns[Tuple value]
// extended identifier, handling the keyword and identifier conflicts. Ugly but there is no other choice.
eid returns[String id] : rel_str_op { $id = $rel_str_op.id; }
+ | IMPORT { $id = $IMPORT.text; }
+ | RETURNS { $id = $RETURNS.text; }
| DEFINE { $id = $DEFINE.text; }
| LOAD { $id = $LOAD.text; }
| FILTER { $id = $FILTER.text; }
Modified: pig/trunk/src/org/apache/pig/parser/PigMacro.java
URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/parser/PigMacro.java?rev=1085612&r1=1085611&r2=1085612&view=diff
==============================================================================
--- pig/trunk/src/org/apache/pig/parser/PigMacro.java (original)
+++ pig/trunk/src/org/apache/pig/parser/PigMacro.java Sat Mar 26 00:07:41 2011
@@ -28,21 +28,21 @@ import java.util.List;
import java.util.Map;
import java.util.Set;
-import org.antlr.runtime.ANTLRReaderStream;
import org.antlr.runtime.CharStream;
import org.antlr.runtime.CommonTokenStream;
-import org.antlr.runtime.Token;
+import org.antlr.runtime.RecognitionException;
+import org.antlr.runtime.tree.CommonTree;
import org.antlr.runtime.tree.CommonTreeNodeStream;
import org.antlr.runtime.tree.Tree;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.pig.tools.parameters.ParameterSubstitutionPreprocessor;
-import org.apache.pig.tools.parameters.ParseException;
public class PigMacro {
private static final Log LOG = LogFactory.getLog(PigMacro.class);
+ private String fileName;
private String name;
private String body;
private List<String> params;
@@ -54,13 +54,16 @@ public class PigMacro {
this.name = name;
this.params = new ArrayList<String>();
this.rets = new ArrayList<String>();
- LOG.info("Macro '" + name + "' is defined");
+ LOG.debug("Macro '" + name + "' is defined");
}
+ public void setFile(String file) {
+ this.fileName = file;
+ }
+
public void setBody(String body, Map<String, PigMacro> seen) {
this.body = body;
this.seen = new HashMap<String, PigMacro>(seen);
- expandBody();
}
public void addParam(String param) {
@@ -73,39 +76,42 @@ public class PigMacro {
public String getName() { return name; }
- public String getBody() { return body; }
-
- public List<String> getParams() { return params; }
-
- public List<String> getReturns() { return rets; }
-
- public String inline(String[] inputs, String[] outputs) {
- String in = substituteParams(inputs, outputs);
+ public CommonTree inline(String[] inputs, String[] outputs, int lineNumber,
+ String file) throws ParserException {
+ String in = substituteParams(inputs, outputs, lineNumber, file);
+
Set<String> masks = new HashSet<String>();
if (inputs != null) {
for (String s : inputs) {
masks.add(s);
}
}
+
for (String s : outputs) {
masks.add(s);
}
- return maskAlias(in, masks);
+
+ return maskAlias(in, masks, lineNumber, file);
}
- public String substituteParams(String[] inputs, String[] outputs) {
+ private String substituteParams(String[] inputs, String[] outputs,
+ int line, String file) throws ParserException {
if ((inputs == null && !params.isEmpty())
|| (inputs != null && inputs.length != params.size())) {
- throw new RuntimeException("Failed to expand macro '" + name
- + "': expected number of parameters: " + params.size()
- + " actual number of inputs: "
- + ((inputs == null) ? 0 : inputs.length));
+ String msg = getErrorMessage(file, line,
+ "Failed to expand macro '" + name + "'",
+ "Expected number of parameters: " + params.size()
+ + " actual number of inputs: "
+ + ((inputs == null) ? 0 : inputs.length));
+ throw new RuntimeException(msg);
}
if (outputs == null || outputs.length != rets.size()) {
- throw new RuntimeException("Failed to expand macro '" + name
- + "': expected number of return aliases: " + rets.size()
- + " actual number of return values: "
- + ((outputs == null) ? 0 : outputs.length));
+ String msg = getErrorMessage(file, line, "Failed to expand macro '"
+ + name + "'",
+ "Expected number of return aliases: " + rets.size()
+ + " actual number of return values: "
+ + ((outputs == null) ? 0 : outputs.length));
+ throw new ParserException(msg);
}
String[] args = new String[params.size() + rets.size()];
@@ -125,66 +131,172 @@ public class PigMacro {
ParameterSubstitutionPreprocessor psp = new ParameterSubstitutionPreprocessor(
50);
psp.genSubstitutedFile(in, writer, args, null);
- } catch (ParseException e) {
- throw new RuntimeException(
- "Parameter substitution failed for macro " + name, e);
- }
+ } catch (Exception e) {
+ // catch both ParserException and RuntimeException
+ String msg = getErrorMessage(file, line,
+ "Macro inline failed for macro '" + name + "'",
+ e.getMessage() + "\n Macro content: " + body);
+ throw new ParserException(msg);
+ }
LOG.debug("--- after substition:\n" + writer.toString());
return writer.toString();
}
-
- public String maskAlias(String in, Set<String> masks) {
- String resultString = "";
+
+ private CommonTree maskAlias(String in, Set<String> masks, int line,
+ String file) throws ParserException {
+ CharStream input = null;
try {
- CharStream input = new QueryParserStringStream(in);
- QueryLexer lex = new QueryLexer(input);
- CommonTokenStream tokens = new CommonTokenStream(lex);
-
- QueryParser parser = new QueryParser(tokens);
- QueryParser.query_return result = parser.query();
-
- Tree ast = (Tree)result.getTree();
+ // parse macro body into ast
+ input = new QueryParserStringStream(in);
+ } catch (IOException e) {
+ String msg = getErrorMessage(file, line, "Failed to inline macro '"
+ + name + "'", e.getMessage() + "\nmacro content: " + in);
+ throw new ParserException(msg);
+ }
+
+ QueryLexer lex = new QueryLexer(input);
+ CommonTokenStream tokens = new CommonTokenStream(lex);
- LOG.debug(ast.toStringTree());
-
- CommonTreeNodeStream nodes = new CommonTreeNodeStream(ast);
- AliasMasker walker = new AliasMasker(nodes);
- walker.setParams(masks, name, idx++);
-
- walker.query();
+ QueryParser.query_return result = null;
+ QueryParser parser = QueryParserUtils.createParser(tokens);
+
+ try {
+ result = parser.query();
+ } catch (RecognitionException e) {
+ String msg = (fileName == null) ? parser.getErrorHeader(e)
+ : QueryParserUtils.generateErrorHeader(e, fileName);
+ msg += " " + parser.getErrorMessage(e, parser.getTokenNames());
+ String msg2 = getErrorMessage(file, line, "Failed to parse macro '"
+ + name + "'", msg + "\nmacro content: " + in);
+ throw new ParserException(msg2);
+ }
- LOG.debug("--- walk: \n" + walker.getResult());
+ CommonTree ast = (CommonTree)result.getTree();
+
+ LOG.debug("AST for macro '" + name + "':\n" + ast.toStringTree());
+
+ List<CommonTree> macroDefNodes = new ArrayList<CommonTree>();
+ traverseMacro(ast, macroDefNodes, "MACRO_DEF");
+ if (!macroDefNodes.isEmpty()) {
+ String fname = ((PigParserNode)ast).getFileName();
+ String msg = getErrorMessage(fname, ast.getLine(),
+ "Invalide macro definition", "macro '" + name
+ + "' contains macro definition.\nmacro content: "
+ + body);
+ throw new ParserException(msg);
+ }
+
+ // recursively expand the inline macros
+ List<CommonTree> inlineNodes = new ArrayList<CommonTree>();
+ traverseMacro(ast, inlineNodes, "MACRO_INLINE");
+
+ for (CommonTree t : inlineNodes) {
+ CommonTree newTree = macroInline(t,
+ new ArrayList<PigMacro>(seen.values()));
+ QueryParserUtils.replaceNodeWithNodeList(t, newTree, null);
+ }
+
+ // mask the aliases in the inlined macro
+ CommonTreeNodeStream nodes = new CommonTreeNodeStream(ast);
+ AliasMasker walker = new AliasMasker(nodes);
+ walker.setParams(masks, name, idx++);
- resultString = walker.getResult();
- } catch (Exception e) {
- throw new RuntimeException(
- "Query parsing failed for macro " + name, e);
+ AliasMasker.query_return result2 = null;
+ CommonTree commonTree = null;
+
+ try {
+ result2 = walker.query();
+ } catch (RecognitionException e) {
+ String msg = walker.getErrorHeader(e) + " "
+ + walker.getErrorMessage(e, walker.getTokenNames());
+ String msg2 = getErrorMessage(file, line, "Failed to mask macro '"
+ + name + "'", msg + "\nmacro content: " + in);
+ throw new ParserException(msg2);
}
+
+ commonTree = result2.tree;
- return resultString;
+ LOG.debug("AST for masked macro '" + name + "':\n"
+ + commonTree.toStringTree());
+
+ return commonTree;
}
- private void expandBody() {
- // expand macros
- boolean done = false;
-
- while (!done) {
- StringReader srd = new StringReader(body);
- ANTLRReaderStream input;
- try {
- input = new ANTLRReaderStream(srd);
- } catch (IOException e) {
- throw new RuntimeException("Failed to read ", e);
+ private static void traverseMacro(Tree t, List<CommonTree> nodes,
+ String nodeType) {
+ if (t.getText().equals(nodeType)) {
+ nodes.add((CommonTree) t);
+ }
+ int n = t.getChildCount();
+ for (int i = 0; i < n; i++) {
+ Tree t0 = t.getChild(i);
+ traverseMacro(t0, nodes, nodeType);
+ }
+ }
+
+ /*
+ * Macro inline nodes have the following form:
+ *
+ * (MACRO_INLINE <name> (RETURN_VAL <values>) (PARAMS <values>))
+ *
+ * Child nodes:
+ * 0: macro name
+ * 1: list of return values
+ * 2: list of parameters
+ */
+ static CommonTree macroInline(CommonTree t, List<PigMacro> macroDefs)
+ throws ParserException {
+ // get name
+ String mn = t.getChild(0).getText();
+
+ // get macroDef
+ PigMacro macro = null;
+ for (PigMacro pm : macroDefs) {
+ if (pm.getName().equals(mn)) {
+ macro = pm;
+ break;
}
- MacroRecursion expander = new MacroRecursion(input);
- expander.setMacros(seen);
- Token token = Token.EOF_TOKEN;
- while ((token = expander.nextToken()) != Token.EOF_TOKEN);
+ }
+
+ String file = ((PigParserNode)t).getFileName();
- body = expander.getResultString();
- done = !expander.isExpanded();
+ if (macro == null) {
+ String msg = getErrorMessage(file, t.getLine(),
+ "Cannot expand macro '" + mn + "'",
+ "Macro must be defined before expansion.");
+ throw new ParserException(msg);
+ }
+
+ // get return values
+ int n = t.getChild(1).getChildCount();
+ String[] rets = new String[n];
+ for (int i = 0; i < n; i++) {
+ rets[i] = t.getChild(1).getChild(i).getText();
+ }
+
+ // get parameters
+ int m = t.getChild(2).getChildCount();
+ String[] params = new String[m];
+ for (int i = 0; i < m; i++) {
+ params[i] = t.getChild(2).getChild(i).getText();
+ }
+
+ return macro.inline(params, rets, t.getLine(), file);
+ }
+
+ private static String getErrorMessage(String file, int line, String header,
+ String reason) {
+ StringBuilder sb = new StringBuilder();
+ sb.append("<");
+ if (file != null) {
+ sb.append("at ").append(file).append(", ");
+ }
+ sb.append("line ").append(line).append("> ").append(header);
+ if (reason != null) {
+ sb.append(". Reason: ").append(reason);
}
+ return sb.toString();
}
}
Added: pig/trunk/src/org/apache/pig/parser/PigParserNode.java
URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/parser/PigParserNode.java?rev=1085612&view=auto
==============================================================================
--- pig/trunk/src/org/apache/pig/parser/PigParserNode.java (added)
+++ pig/trunk/src/org/apache/pig/parser/PigParserNode.java Sat Mar 26 00:07:41 2011
@@ -0,0 +1,52 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.pig.parser;
+
+import org.antlr.runtime.Token;
+import org.antlr.runtime.tree.CommonTree;
+import org.antlr.runtime.tree.Tree;
+
+public class PigParserNode extends CommonTree {
+
+ // the script file this node belongs to
+ private String fileName = null;
+
+ public PigParserNode(Token t) {
+ super(t);
+ }
+
+ public PigParserNode(PigParserNode node) {
+ super(node);
+ this.setFileName(node.getFileName());
+ }
+
+
+ public Tree dupNode() {
+ return new PigParserNode(this);
+ }
+
+ public void setFileName(String fileName) {
+ this.fileName = fileName;
+ }
+
+ public String getFileName() {
+ return fileName;
+ }
+
+
+}
Added: pig/trunk/src/org/apache/pig/parser/PigParserNodeAdaptor.java
URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/parser/PigParserNodeAdaptor.java?rev=1085612&view=auto
==============================================================================
--- pig/trunk/src/org/apache/pig/parser/PigParserNodeAdaptor.java (added)
+++ pig/trunk/src/org/apache/pig/parser/PigParserNodeAdaptor.java Sat Mar 26 00:07:41 2011
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.pig.parser;
+
+import org.antlr.runtime.Token;
+import org.antlr.runtime.tree.CommonTreeAdaptor;
+
+public class PigParserNodeAdaptor extends CommonTreeAdaptor {
+
+ @Override
+ public Object create(Token t) {
+ return new PigParserNode(t);
+ }
+
+}
Modified: pig/trunk/src/org/apache/pig/parser/QueryLexer.g
URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/parser/QueryLexer.g?rev=1085612&r1=1085611&r2=1085612&view=diff
==============================================================================
--- pig/trunk/src/org/apache/pig/parser/QueryLexer.g (original)
+++ pig/trunk/src/org/apache/pig/parser/QueryLexer.g Sat Mar 26 00:07:41 2011
@@ -57,6 +57,12 @@ public String getErrorHeader(Recognition
} // End of members.
+IMPORT : 'IMPORT'
+;
+
+RETURNS : 'RETURNS'
+;
+
DEFINE : 'DEFINE'
;
@@ -389,4 +395,3 @@ MINUS : '-'
QMARK : '?'
;
-
\ No newline at end of file
Modified: pig/trunk/src/org/apache/pig/parser/QueryParser.g
URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/parser/QueryParser.g?rev=1085612&r1=1085611&r2=1085612&view=diff
==============================================================================
--- pig/trunk/src/org/apache/pig/parser/QueryParser.g (original)
+++ pig/trunk/src/org/apache/pig/parser/QueryParser.g Sat Mar 26 00:07:41 2011
@@ -60,18 +60,29 @@ tokens {
SCOPED_ALIAS;
TUPLE_TYPE_CAST;
BAG_TYPE_CAST;
+ PARAMS;
+ RETURN_VAL;
+ MACRO_DEF;
+ MACRO_BODY;
+ MACRO_INLINE;
}
@header {
package org.apache.pig.parser;
+import java.util.Set;
+import java.util.HashSet;
+import java.util.Collections;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
+import org.apache.pig.parser.PigMacro;
}
@members {
private static Log log = LogFactory.getLog( QueryParser.class );
+private Set<String> memory = new HashSet<String>();
+
@Override
protected Object recoverFromMismatchedToken(IntStream input, int ttype, BitSet follow)
throws RecognitionException {
@@ -129,7 +140,15 @@ query : statement*
statement : SEMI_COLON!
| general_statement
| foreach_statement
- | split_statement
+ | split_statement
+ | inline_statement
+ | import_statement
+;
+
+import_statement : import_clause SEMI_COLON!
+;
+
+inline_statement : inline_clause SEMI_COLON!
;
split_statement : split_clause SEMI_COLON!
@@ -155,6 +174,16 @@ foreach_statement : ( alias EQUAL )? for
alias : IDENTIFIER
;
+parameter
+ : IDENTIFIER
+ | INTEGER
+ | DOUBLENUMBER
+ | QUOTEDSTRING
+;
+
+content : LEFT_CURLY ( content | ~(LEFT_CURLY | RIGHT_CURLY) )* RIGHT_CURLY
+;
+
op_clause : define_clause
| load_clause
| group_clause
@@ -171,7 +200,38 @@ op_clause : define_clause
| mr_clause
;
-define_clause : DEFINE^ alias ( cmd | func_clause )
+macro_param_clause : LEFT_PAREN ( alias (COMMA alias)* )? RIGHT_PAREN
+ -> ^(PARAMS alias*)
+;
+
+macro_return_clause : RETURNS alias (COMMA alias)*
+ -> ^(RETURN_VAL alias+)
+;
+
+macro_body_clause : content
+ -> ^(MACRO_BODY { new PigParserNode(new CommonToken(1, $content.text)) } )
+;
+
+macro_clause : macro_param_clause macro_return_clause macro_body_clause
+ -> ^(MACRO_DEF macro_param_clause macro_return_clause macro_body_clause)
+;
+
+inline_return_clause : alias (COMMA alias)*
+ -> ^(RETURN_VAL alias+)
+;
+
+inline_param_clause : LEFT_PAREN ( parameter (COMMA parameter)* )? RIGHT_PAREN
+ -> ^(PARAMS parameter*)
+;
+
+inline_clause : inline_return_clause EQUAL alias inline_param_clause
+ -> ^(MACRO_INLINE alias inline_return_clause inline_param_clause)
+;
+
+import_clause : IMPORT^ QUOTEDSTRING
+;
+
+define_clause : DEFINE^ alias ( cmd | func_clause | macro_clause)
;
cmd : EXECCOMMAND^ ( ship_clause | cache_caluse | input_clause | output_clause | error_clause )*
@@ -516,7 +576,7 @@ nested_limit : LIMIT^ nested_op_input IN
nested_op_input : col_ref | nested_proj
;
-stream_clause : STREAM^ rel THROUGH! ( EXECCOMMAND | IDENTIFIER ) as_clause?
+stream_clause : STREAM^ rel THROUGH! ( EXECCOMMAND | alias ) as_clause?
;
mr_clause : MAPREDUCE^ QUOTEDSTRING ( LEFT_PAREN! path_list RIGHT_PAREN! )? store_clause load_clause EXECCOMMAND?
@@ -526,8 +586,8 @@ split_clause : SPLIT rel INTO split_bran
-> ^( SPLIT rel split_branch+ )
;
-split_branch : IDENTIFIER IF cond
- -> ^( SPLIT_BRANCH IDENTIFIER cond )
+split_branch : alias IF cond
+ -> ^( SPLIT_BRANCH alias cond )
;
col_ref : alias_col_ref | dollar_col_ref
@@ -583,6 +643,8 @@ tuple : LEFT_PAREN ( literal ( COMMA lit
// extended identifier, handling the keyword and identifier conflicts. Ugly but there is no other choice.
eid : rel_str_op
+ | IMPORT
+ | RETURNS
| DEFINE
| LOAD
| FILTER