You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pig.apache.org by th...@apache.org on 2010/11/18 17:07:02 UTC
svn commit: r1036509 - in /pig/trunk:
src/org/apache/pig/parser/QueryParser.g test/findbugsExcludeFile.xml
test/org/apache/pig/parser/TestAST.pig
test/org/apache/pig/parser/TestQueryParser.java
Author: thejas
Date: Thu Nov 18 16:07:01 2010
New Revision: 1036509
URL: http://svn.apache.org/viewvc?rev=1036509&view=rev
Log:
PIG-1618: Switch to new parser generator technology
Added:
pig/trunk/test/org/apache/pig/parser/TestAST.pig
Modified:
pig/trunk/src/org/apache/pig/parser/QueryParser.g
pig/trunk/test/findbugsExcludeFile.xml
pig/trunk/test/org/apache/pig/parser/TestQueryParser.java
Modified: pig/trunk/src/org/apache/pig/parser/QueryParser.g
URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/parser/QueryParser.g?rev=1036509&r1=1036508&r2=1036509&view=diff
==============================================================================
--- pig/trunk/src/org/apache/pig/parser/QueryParser.g (original)
+++ pig/trunk/src/org/apache/pig/parser/QueryParser.g Thu Nov 18 16:07:01 2010
@@ -31,9 +31,7 @@ options {
tokens {
QUERY;
STATEMENT;
- SCHEMA;
FUNC;
- COND;
CAST_EXPR;
BIN_EXPR;
TUPLE_VAL;
@@ -73,20 +71,20 @@ throws RecognitionException {
*/
public String getErrorMessage(RecognitionException e, String[] tokenNames ) {
- List stack = getRuleInvocationStack( e, this.getClass().getName() );
- String msg = null;
- if( e instanceof NoViableAltException ) {
- NoViableAltException nvae = (NoViableAltException)e;
- msg = " no viable alt; token = " + e.token + " (decision=" + nvae.decisionNumber + " state " + nvae.stateNumber + ")" +
- " decision=<<" + nvae.grammarDecisionDescription + ">>";
- } else {
- msg = super.getErrorMessage( e, tokenNames );
- }
- return stack + " " + msg;
+ List stack = getRuleInvocationStack( e, this.getClass().getName() );
+ String msg = null;
+ if( e instanceof NoViableAltException ) {
+ NoViableAltException nvae = (NoViableAltException)e;
+ msg = " no viable alt; token = " + e.token + " (decision=" + nvae.decisionNumber + " state " + nvae.stateNumber + ")" +
+ " decision=<<" + nvae.grammarDecisionDescription + ">>";
+ } else {
+ msg = super.getErrorMessage( e, tokenNames );
+ }
+ return stack + " " + msg;
}
public String getTokenErrorDisplay(Token t) {
- return t.toString();
+ return t.toString();
}
} // End of @members
@@ -100,17 +98,20 @@ catch(RecognitionException e) {
*/
query : statement*
+ -> ^( QUERY statement* )
;
statement : general_statement | foreach_statement
;
general_statement : ( alias EQUAL )? op_clause ( PARALLEL INTEGER )? SEMI_COLON
+ -> ^( STATEMENT alias? op_clause INTEGER? )
;
// We need to handle foreach specifically because of the ending ';', which is not required
// if there is a nested block. This is ugly, but it gets the job done.
foreach_statement : ( alias EQUAL )? foreach_clause
+ -> ^( STATEMENT alias? foreach_clause )
;
alias : IDENTIFIER
@@ -134,58 +135,63 @@ op_clause : define_clause
| split_clause
;
-define_clause : DEFINE alias ( cmd | func_clause )
+define_clause : DEFINE^ alias ( cmd | func_clause )
;
-cmd : EXECCOMMAND ( ship_clause | cache_caluse | input_clause | output_clause | error_clause )*
+cmd : EXECCOMMAND^ ( ship_clause | cache_caluse | input_clause | output_clause | error_clause )*
;
-ship_clause : SHIP LEFT_PAREN path_list? RIGHT_PAREN
+ship_clause : SHIP^ LEFT_PAREN! path_list? RIGHT_PAREN!
;
path_list : QUOTEDSTRING ( COMMA QUOTEDSTRING )*
+ -> QUOTEDSTRING+
;
-cache_caluse : CACHE LEFT_PAREN path_list RIGHT_PAREN
+cache_caluse : CACHE^ LEFT_PAREN! path_list RIGHT_PAREN!
;
-input_clause : INPUT LEFT_PAREN stream_cmd_list RIGHT_PAREN
+input_clause : INPUT^ LEFT_PAREN! stream_cmd_list RIGHT_PAREN!
;
stream_cmd_list : stream_cmd ( COMMA stream_cmd )*
+ -> stream_cmd+
;
-stream_cmd : ( STDIN | STDOUT | QUOTEDSTRING ) ( USING ( func_clause ) )?
+stream_cmd : ( STDIN | STDOUT | QUOTEDSTRING )^ ( USING! ( func_clause ) )?
;
-output_clause : OUTPUT LEFT_PAREN stream_cmd_list RIGHT_PAREN
+output_clause : OUTPUT^ LEFT_PAREN! stream_cmd_list RIGHT_PAREN!
;
-error_clause : ERROR LEFT_PAREN error_cmd? RIGHT_PAREN
+error_clause : ERROR^ LEFT_PAREN! error_cmd? RIGHT_PAREN!
;
-error_cmd : QUOTEDSTRING ( LIMIT INTEGER )?
+error_cmd : QUOTEDSTRING^ ( LIMIT! INTEGER )?
;
-load_clause : LOAD filename ( USING func_clause )? as_clause?
+load_clause : LOAD^ filename ( USING! func_clause )? as_clause?
;
filename : QUOTEDSTRING
;
-as_clause: AS tuple_def
+as_clause: AS! tuple_def
;
tuple_def : tuple_def_full | tuple_def_simple
;
tuple_def_full : LEFT_PAREN field ( COMMA field )* RIGHT_PAREN
+ -> ^( TUPLE_DEF field+ )
;
tuple_def_simple : field
+ -> ^( TUPLE_DEF field )
;
field : IDENTIFIER ( COLON type )?
+ -> ^( FIELD IDENTIFIER type? )
;
type : simple_type | tuple_type | bag_type | map_type
@@ -195,67 +201,74 @@ simple_type : INT | LONG | FLOAT | DOUBL
;
tuple_type : TUPLE? tuple_def_full
+ -> tuple_def_full
;
-bag_type : BAG? LEFT_CURLY tuple_def? RIGHT_CURLY
+bag_type : BAG? LEFT_CURLY! tuple_def? RIGHT_CURLY!
;
-map_type : MAP LEFT_BRACKET RIGHT_BRACKET
+map_type : MAP! LEFT_BRACKET! RIGHT_BRACKET!
;
func_clause : func_name LEFT_PAREN func_args? RIGHT_PAREN
+ -> ^( FUNC func_name func_args? )
| func_alias
+ -> ^( FUNC func_alias )
;
func_name : eid ( PERIOD eid )*
+ -> eid+
;
func_alias : IDENTIFIER
;
func_args : QUOTEDSTRING ( COMMA QUOTEDSTRING )*
+ -> QUOTEDSTRING+
;
-group_clause : ( GROUP | COGROUP ) group_item_list ( USING QUOTEDSTRING )?
+group_clause : ( GROUP | COGROUP )^ group_item_list ( USING! QUOTEDSTRING )?
;
group_item_list : group_item ( COMMA group_item )*
+ -> group_item+
;
-group_item : rel ( ( BY flatten_generated_item_list ) | ALL | ANY ) ( INNER | OUTER )?
+group_item : rel ( ( BY! flatten_generated_item_list ) | ALL | ANY ) ( INNER | OUTER )?
;
-rel : alias | LEFT_PAREN op_clause RIGHT_PAREN
+rel : alias | LEFT_PAREN! op_clause RIGHT_PAREN!
;
flatten_generated_item_list : LEFT_PAREN flatten_generated_item ( COMMA flatten_generated_item )* RIGHT_PAREN
+ -> flatten_generated_item+
| flatten_generated_item
;
flatten_generated_item : ( flatten_clause | expr | STAR ) as_clause?
;
-flatten_clause : FLATTEN LEFT_PAREN expr RIGHT_PAREN
+flatten_clause : FLATTEN^ LEFT_PAREN! expr RIGHT_PAREN!
;
-store_clause : STORE alias INTO filename ( USING func_clause )?
+store_clause : STORE^ alias INTO! filename ( USING! func_clause )?
;
-filter_clause : FILTER rel BY cond
+filter_clause : FILTER^ rel BY! cond
;
cond : or_cond
;
-or_cond : and_cond ( OR and_cond )*
+or_cond : and_cond ( OR^ and_cond )*
;
-and_cond : unary_cond ( AND unary_cond )*
+and_cond : unary_cond ( AND^ unary_cond )*
;
-unary_cond : LEFT_PAREN cond RIGHT_PAREN
+unary_cond : LEFT_PAREN! cond RIGHT_PAREN!
|
- expr FILTEROP expr
+ expr FILTEROP^ expr
|
func_eval
|
@@ -264,35 +277,37 @@ unary_cond : LEFT_PAREN cond RIGHT_PAREN
not_cond
;
-not_cond : NOT unary_cond
+not_cond : NOT^ unary_cond
;
-func_eval: func_name LEFT_PAREN real_arg_list? RIGHT_PAREN
+func_eval: func_name LEFT_PAREN! real_arg_list? RIGHT_PAREN!
;
real_arg_list : real_arg ( COMMA real_arg )*
+ -> real_arg+
;
real_arg : expr | STAR
;
-null_check_cond : expr IS NOT? NULL
+null_check_cond : expr IS! NOT? NULL^
;
expr : add_expr
;
-add_expr : multi_expr ( ( PLUS | MINUS ) multi_expr )*
+add_expr : multi_expr ( ( PLUS | MINUS )^ multi_expr )*
;
-multi_expr : cast_expr ( ( STAR | DIV | PERCENT ) cast_expr )*
+multi_expr : cast_expr ( ( STAR | DIV | PERCENT )^ cast_expr )*
;
cast_expr : ( LEFT_PAREN type RIGHT_PAREN ) unary_expr
+ -> ^( CAST_EXPR type unary_expr )
| unary_expr
;
-unary_expr : expr_eval | ( LEFT_PAREN expr RIGHT_PAREN ) | neg_expr
+unary_expr : expr_eval | ( LEFT_PAREN! expr RIGHT_PAREN! ) | neg_expr
;
expr_eval : const_expr | var_expr
@@ -305,24 +320,26 @@ projectable_expr: func_eval | col_ref |
;
dot_proj : PERIOD ( col_ref | ( LEFT_PAREN col_ref ( COMMA col_ref )* RIGHT_PAREN ) )
+ -> ^( PERIOD col_ref+ )
;
-pound_proj : POUND ( QUOTEDSTRING | NULL )
+pound_proj : POUND^ ( QUOTEDSTRING | NULL )
;
bin_expr : LEFT_PAREN cond QMARK exp1 = expr COLON exp2 = expr RIGHT_PAREN
+ -> ^( BIN_EXPR cond $exp1 $exp2 )
;
-neg_expr : MINUS cast_expr
+neg_expr : MINUS^ cast_expr
;
-limit_clause : LIMIT rel ( INTEGER | LONGINTEGER )
+limit_clause : LIMIT^ rel ( INTEGER | LONGINTEGER )
;
-sample_clause : SAMPLE rel DOUBLENUMBER
+sample_clause : SAMPLE^ rel DOUBLENUMBER
;
-order_clause : ORDER rel BY order_by_clause ( USING func_clause )?
+order_clause : ORDER^ rel BY! order_by_clause ( USING! func_clause )?
;
order_by_clause : STAR ( ASC | DESC )?
@@ -330,59 +347,67 @@ order_by_clause : STAR ( ASC | DESC )?
;
order_col_list : order_col ( COMMA order_col )*
+ -> order_col+
;
order_col : col_ref ( ASC | DESC )?
- | LEFT_PAREN col_ref ( ASC | DESC )? RIGHT_PAREN
+ | LEFT_PAREN! col_ref ( ASC | DESC )? RIGHT_PAREN!
;
-distinct_clause : DISTINCT rel partition_clause?
+distinct_clause : DISTINCT^ rel partition_clause?
;
-partition_clause : PARTITION BY func_name
+partition_clause : PARTITION^ BY! func_name
;
-cross_clause : CROSS rel_list partition_clause?
+cross_clause : CROSS^ rel_list partition_clause?
;
rel_list : rel ( COMMA rel )*
+ -> rel+
;
-joint_clause : JOIN join_sub_clause ( USING QUOTEDSTRING )? partition_clause?
+joint_clause : JOIN^ join_sub_clause ( USING! QUOTEDSTRING )? partition_clause?
;
join_sub_clause : join_item ( LEFT | RIGHT | FULL ) OUTER? join_item
| join_item_list
;
-join_item_list : join_item ( COMMA join_item )+
+join_item_list : join_item ( COMMA! join_item )+
;
-join_item : rel BY flatten_generated_item_list
+join_item : rel BY! flatten_generated_item_list
;
-union_clause : UNION ONSCHEMA? rel_list
+union_clause : UNION^ ONSCHEMA? rel_list
;
-foreach_clause : FOREACH rel nested_plan
+foreach_clause : FOREACH^ rel nested_plan
;
nested_plan : foreach_blk SEMI_COLON?
+ -> ^( NESTED_PLAN foreach_blk )
| ( generate_clause SEMI_COLON )
+ -> ^( NESTED_PLAN generate_clause )
;
-foreach_blk : LEFT_CURLY nested_command_list ( generate_clause SEMI_COLON ) RIGHT_CURLY
+foreach_blk : LEFT_CURLY! nested_command_list ( generate_clause SEMI_COLON! ) RIGHT_CURLY!
;
generate_clause : GENERATE flatten_generated_item ( COMMA flatten_generated_item )*
+ -> ^( GENERATE flatten_generated_item+ )
;
nested_command_list : ( nested_command SEMI_COLON )*
+ -> nested_command*
|
;
nested_command : IDENTIFIER EQUAL expr
+ -> ^( NESTED_CMD IDENTIFIER expr )
| IDENTIFIER EQUAL nested_op
+ -> ^( NESTED_CMD IDENTIFIER nested_op )
;
nested_op : nested_proj
@@ -393,33 +418,37 @@ nested_op : nested_proj
;
nested_proj : col_ref PERIOD col_ref_list
+ -> ^( NESTED_PROJ col_ref col_ref_list )
;
col_ref_list : ( col_ref | ( LEFT_PAREN col_ref ( COMMA col_ref )* RIGHT_PAREN ) )
+ -> col_ref+
;
-nested_filter : FILTER ( IDENTIFIER | nested_proj | expr_eval ) BY cond
+nested_filter : FILTER^ ( IDENTIFIER | nested_proj | expr_eval ) BY! cond
;
-nested_sort : ORDER ( IDENTIFIER | nested_proj | expr_eval ) BY order_by_clause ( USING func_clause )?
+nested_sort : ORDER^ ( IDENTIFIER | nested_proj | expr_eval ) BY! order_by_clause ( USING! func_clause )?
;
-nested_distinct : DISTINCT ( IDENTIFIER | nested_proj | expr_eval )
+nested_distinct : DISTINCT^ ( IDENTIFIER | nested_proj | expr_eval )
;
-nested_limit : LIMIT ( IDENTIFIER | nested_proj | expr_eval ) INTEGER
+nested_limit : LIMIT^ ( IDENTIFIER | nested_proj | expr_eval ) INTEGER
;
-stream_clause : STREAM rel THROUGH ( EXECCOMMAND | IDENTIFIER ) as_clause?
+stream_clause : STREAM^ rel THROUGH! ( EXECCOMMAND | IDENTIFIER ) as_clause?
;
-mr_clause : MAPREDUCE QUOTEDSTRING ( LEFT_PAREN path_list RIGHT_PAREN )? store_clause load_clause EXECCOMMAND?
+mr_clause : MAPREDUCE^ QUOTEDSTRING ( LEFT_PAREN! path_list RIGHT_PAREN! )? store_clause load_clause EXECCOMMAND?
;
split_clause : SPLIT rel INTO split_branch ( COMMA split_branch )+
+ -> ^( SPLIT rel split_branch+ )
;
split_branch : IDENTIFIER IF cond
+ -> ^( SPLIT_BRANCH IDENTIFIER cond )
;
col_ref : alias_col_ref | dollar_col_ref
@@ -428,7 +457,7 @@ col_ref : alias_col_ref | dollar_col_ref
alias_col_ref : GROUP | IDENTIFIER
;
-dollar_col_ref : DOLLAR INTEGER
+dollar_col_ref : DOLLAR^ INTEGER
;
const_expr : scalar | map | bag | tuple
@@ -438,18 +467,22 @@ scalar : INTEGER | LONGINEGER | FLOATNUM
;
map : LEFT_BRACKET ( keyvalue ( COMMA keyvalue )* )? RIGHT_BRACKET
+ -> ^( MAP_VAL keyvalue+ )
;
keyvalue : string_val POUND const_expr
+ -> ^( KEY_VAL_PAIR string_val const_expr )
;
string_val : QUOTEDSTRING | NULL
;
bag : LEFT_CURLY ( tuple ( COMMA tuple )* )? RIGHT_CURLY
+ -> ^( BAG_VAL tuple+ )
;
tuple : LEFT_PAREN ( const_expr ( COMMA const_expr )* )? RIGHT_PAREN
+ -> ^( TUPLE_VAL const_expr+ )
;
// extended identifier, handling the keyword and identifier conflicts. Ugly but there is no other choice.
Modified: pig/trunk/test/findbugsExcludeFile.xml
URL: http://svn.apache.org/viewvc/pig/trunk/test/findbugsExcludeFile.xml?rev=1036509&r1=1036508&r2=1036509&view=diff
==============================================================================
--- pig/trunk/test/findbugsExcludeFile.xml (original)
+++ pig/trunk/test/findbugsExcludeFile.xml Thu Nov 18 16:07:01 2010
@@ -4,6 +4,9 @@
<Package name="org.apache.pig.data.parser.*" />
</Match>
<Match>
+ <Package name="org.apache.pig.parser.*" />
+ </Match>
+ <Match>
<Package name="org.apache.pig.impl.logicalLayer.parser.*" />
</Match>
<Match>
Added: pig/trunk/test/org/apache/pig/parser/TestAST.pig
URL: http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/parser/TestAST.pig?rev=1036509&view=auto
==============================================================================
--- pig/trunk/test/org/apache/pig/parser/TestAST.pig (added)
+++ pig/trunk/test/org/apache/pig/parser/TestAST.pig Thu Nov 18 16:07:01 2010
@@ -0,0 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ A = LOAD 'source.txt' USING org.apache.pig.TextLoader() AS (a:int, b:long, c:chararray);
+ B = FOREACH A generate $0 * 5, b, $2;
+ C = FILTER B by a > 100;
+ D = LIMIT C 400;
+ STORE D into 'output.txt';
\ No newline at end of file
Modified: pig/trunk/test/org/apache/pig/parser/TestQueryParser.java
URL: http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/parser/TestQueryParser.java?rev=1036509&r1=1036508&r2=1036509&view=diff
==============================================================================
--- pig/trunk/test/org/apache/pig/parser/TestQueryParser.java (original)
+++ pig/trunk/test/org/apache/pig/parser/TestQueryParser.java Thu Nov 18 16:07:01 2010
@@ -83,7 +83,57 @@ public class TestQueryParser {
int errorCount = parse( "A = load '/Users/gates/test/data/studenttab10'; B = foreach A generate $0, 3.0e10.1;" );
Assert.assertTrue( errorCount > 0 );
}
+
+ @Test
+ public void test2() throws IOException, RecognitionException {
+ int errorCount = parse( "A = load '/Users/gates/test/data/studenttab10'; B = foreach A generate ( $0 == 0 ? 1 : 0 );" );
+ Assert.assertTrue( errorCount == 0 );
+ }
+
+ @Test
+ public void testAST() throws IOException, RecognitionException {
+ CharStream input = new QueryParserFileStream( "test/org/apache/pig/parser/TestAST.pig" );
+ QueryLexer lexer = new QueryLexer(input);
+ CommonTokenStream tokens = new CommonTokenStream(lexer);
+ QueryParser parser = new QueryParser(tokens);
+ QueryParser.query_return result = parser.query();
+
+ Tree ast = (Tree)result.getTree();
+
+ System.out.println( ast.toStringTree() );
+ printTree( (CommonTree)ast, 0 );
+ Assert.assertEquals( 0, lexer.getNumberOfSyntaxErrors() );
+ Assert.assertEquals( 0, parser.getNumberOfSyntaxErrors() );
+
+ Assert.assertEquals( "QUERY", ast.getText() );
+ Assert.assertEquals( 5, ast.getChildCount() );
+
+ for( int i = 0; i < ast.getChildCount(); i++ ) {
+ Tree c = ast.getChild( i );
+ Assert.assertEquals( "STATEMENT", c.getText() );
+ }
+
+ Tree stmt = ast.getChild( 0 );
+ Assert.assertEquals( "A", stmt.getChild( 0 ).getText() ); // alias
+ Assert.assertTrue( "LOAD".equalsIgnoreCase( stmt.getChild( 1 ).getText() ) );
+
+ stmt = ast.getChild( 1 );
+ Assert.assertEquals( "B", stmt.getChild( 0 ).getText() ); // alias
+ Assert.assertTrue( "FOREACH".equalsIgnoreCase( stmt.getChild( 1 ).getText() ) );
+
+ stmt = ast.getChild( 2 );
+ Assert.assertEquals( "C", stmt.getChild( 0 ).getText() ); // alias
+ Assert.assertTrue( "FILTER".equalsIgnoreCase( stmt.getChild( 1 ).getText() ) );
+
+ stmt = ast.getChild( 3 );
+ Assert.assertEquals( "D", stmt.getChild( 0 ).getText() ); // alias
+ Assert.assertTrue( "LIMIT".equalsIgnoreCase( stmt.getChild( 1 ).getText() ) );
+
+ stmt = ast.getChild( 4 );
+ Assert.assertTrue( "STORE".equalsIgnoreCase( stmt.getChild( 0 ).getText() ) );
+ }
+
private int parse(String query) throws IOException, RecognitionException {
CharStream input = new QueryParserStringStream( query );
QueryLexer lexer = new QueryLexer(input);