You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pig.apache.org by dv...@apache.org on 2013/04/04 22:38:53 UTC
svn commit: r1464734 - in /pig/branches/branch-0.11: ./
src/org/apache/pig/parser/ test/e2e/pig/tests/
test/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/
test/org/apache/pig/parser/ test/org/apache/pig/test/
Author: dvryaboy
Date: Thu Apr 4 20:38:52 2013
New Revision: 1464734
URL: http://svn.apache.org/r1464734
Log:
PIG-2769: a simple logic causes very long compiling time on pig 0.10.0
Modified:
pig/branches/branch-0.11/CHANGES.txt
pig/branches/branch-0.11/src/org/apache/pig/parser/QueryLexer.g
pig/branches/branch-0.11/src/org/apache/pig/parser/QueryParser.g
pig/branches/branch-0.11/test/e2e/pig/tests/macro.conf
pig/branches/branch-0.11/test/e2e/pig/tests/negative.conf
pig/branches/branch-0.11/test/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/TestInputSizeReducerEstimator.java
pig/branches/branch-0.11/test/org/apache/pig/parser/TestLogicalPlanGenerator.java
pig/branches/branch-0.11/test/org/apache/pig/parser/TestQueryParser.java
pig/branches/branch-0.11/test/org/apache/pig/test/TestLogicalPlanBuilder.java
Modified: pig/branches/branch-0.11/CHANGES.txt
URL: http://svn.apache.org/viewvc/pig/branches/branch-0.11/CHANGES.txt?rev=1464734&r1=1464733&r2=1464734&view=diff
==============================================================================
--- pig/branches/branch-0.11/CHANGES.txt (original)
+++ pig/branches/branch-0.11/CHANGES.txt Thu Apr 4 20:38:52 2013
@@ -26,6 +26,8 @@ IMPROVEMENTS
OPTIMIZATIONS
+PIG-2769: a simple logic causes very long compiling time on pig 0.10.0 (njw45 via dvryaboy) (prev. applied to 0.12)
+
BUG FIXES
PIG-3264: mvn signanddeploy target broken for pigunit, pigsmoke and piggybank (billgraham)
Modified: pig/branches/branch-0.11/src/org/apache/pig/parser/QueryLexer.g
URL: http://svn.apache.org/viewvc/pig/branches/branch-0.11/src/org/apache/pig/parser/QueryLexer.g?rev=1464734&r1=1464733&r2=1464734&view=diff
==============================================================================
--- pig/branches/branch-0.11/src/org/apache/pig/parser/QueryLexer.g (original)
+++ pig/branches/branch-0.11/src/org/apache/pig/parser/QueryLexer.g Thu Apr 4 20:38:52 2013
@@ -60,6 +60,9 @@ public String getErrorHeader(Recognition
VOID : 'VOID'
;
+NULL : 'NULL'
+;
+
IMPORT : 'IMPORT'
;
@@ -315,8 +318,12 @@ fragment ID: LETTER ( DIGIT | LETTER | S
DCOLON : '::'
;
-IDENTIFIER_L : ( ID DCOLON ) => ( ID DCOLON IDENTIFIER_L )
- | ID
+IDENTIFIER
+ @after {
+ if("null".equalsIgnoreCase(getText())){
+ state.type = NULL;
+ }
+ } : ( ID DCOLON ) => ( ID DCOLON IDENTIFIER ) | ID
;
fragment FLOATINGPOINT : INTEGER ( PERIOD INTEGER )? | PERIOD INTEGER
@@ -325,7 +332,7 @@ fragment FLOATINGPOINT : INTEGER ( PERIO
INTEGER: ( DIGIT )+
;
-LONGINTEGER: INTEGER ( 'L' )?
+LONGINTEGER: INTEGER 'L'
;
DOLLARVAR : DOLLAR INTEGER
@@ -334,7 +341,7 @@ DOLLARVAR : DOLLAR INTEGER
DOUBLENUMBER : FLOATINGPOINT ( 'E' ( MINUS | PLUS )? INTEGER )?
;
-FLOATNUMBER : DOUBLENUMBER ( 'F' )?
+FLOATNUMBER : DOUBLENUMBER 'F'
;
QUOTEDSTRING : '\'' ( ( ~ ( '\'' | '\\' | '\n' | '\r' ) )
Modified: pig/branches/branch-0.11/src/org/apache/pig/parser/QueryParser.g
URL: http://svn.apache.org/viewvc/pig/branches/branch-0.11/src/org/apache/pig/parser/QueryParser.g?rev=1464734&r1=1464733&r2=1464734&view=diff
==============================================================================
--- pig/branches/branch-0.11/src/org/apache/pig/parser/QueryParser.g (original)
+++ pig/branches/branch-0.11/src/org/apache/pig/parser/QueryParser.g Thu Apr 4 20:38:52 2013
@@ -30,7 +30,7 @@ parser grammar QueryParser;
options {
tokenVocab=QueryLexer;
output=AST;
- backtrack=true;
+ backtrack=false; // greatly slows down parsing!
}
tokens {
@@ -86,6 +86,9 @@ import java.util.Collections;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.pig.parser.PigMacro;
+
+import com.google.common.collect.ImmutableSet;
+import com.google.common.collect.ImmutableMap;
}
@members {
@@ -137,6 +140,45 @@ public String getErrorHeader(Recognition
return QueryParserUtils.generateErrorHeader( ex, this.getSourceName() );
}
+private static final Map<Integer, Integer> FUNC_TO_LITERAL = ImmutableMap.of(
+ TOBAG, BAG_VAL,
+ TOMAP, MAP_VAL,
+ TOTUPLE, TUPLE_VAL);
+
+private static final Set<Integer> BOOLEAN_TOKENS = ImmutableSet.of(
+ STR_OP_EQ,
+ STR_OP_NE,
+ STR_OP_GT,
+ STR_OP_LT,
+ STR_OP_GTE,
+ STR_OP_LTE,
+ STR_OP_MATCHES,
+ AND,
+ OR,
+ NOT,
+ NULL,
+ NUM_OP_EQ,
+ NUM_OP_NE,
+ NUM_OP_GT,
+ NUM_OP_GTE,
+ NUM_OP_LT,
+ NUM_OP_LTE);
+
+private static final Set<Integer> LITERAL_TOKENS = ImmutableSet.of(
+ INTEGER,
+ LONGINTEGER,
+ FLOATNUMBER,
+ DOUBLENUMBER,
+ QUOTEDSTRING,
+ NULL,
+ TRUE,
+ FALSE,
+ MAP_VAL,
+ BAG_VAL,
+ TUPLE_VAL,
+ PERIOD,
+ POUND);
+
} // End of @members
@rulecatch {
@@ -145,194 +187,188 @@ catch(RecognitionException re) {
}
}
-query : statement* EOF
- -> ^( QUERY statement* )
+query : statement* EOF -> ^( QUERY statement* )
;
+// STATEMENTS
+
statement : SEMI_COLON!
- | general_statement
+ | general_statement SEMI_COLON!
+ | split_clause SEMI_COLON!
+ | inline_clause SEMI_COLON!
+ | import_clause SEMI_COLON!
+ | realias_clause SEMI_COLON!
+ // semicolons after foreach_complex_statement are optional for backwards compatibility, but to keep
+ // the grammar unambiguous if there is one then we'll parse it as a single, standalone semicolon
+ // (which matches the first statement rule)
| foreach_statement
- | split_statement
- | inline_statement
- | import_statement
- | realias_statement
;
-import_statement : import_clause SEMI_COLON!
+nested_op_clause : LEFT_PAREN! op_clause parallel_clause? RIGHT_PAREN!
+ | LEFT_PAREN FOREACH rel ( foreach_plan_complex | ( foreach_plan_simple parallel_clause? ) ) RIGHT_PAREN
+ -> ^( FOREACH rel foreach_plan_complex? foreach_plan_simple? ) parallel_clause?
;
-inline_statement : inline_clause SEMI_COLON!
+general_statement : ( IDENTIFIER EQUAL )? ( ( op_clause parallel_clause? ) | nested_op_clause ) -> ^( STATEMENT IDENTIFIER? op_clause? parallel_clause? nested_op_clause? )
;
-split_statement : split_clause SEMI_COLON!
+// Statement represented by a foreach operator with a nested block. Simple foreach statement
+// is covered by general_statement.
+// We need to handle foreach specifically because of the ending ';', which is not required
+// if there is a nested block. This is ugly, but it gets the job done.
+foreach_statement : ( IDENTIFIER EQUAL )? FOREACH rel ( foreach_plan_complex | ( foreach_plan_simple parallel_clause? SEMI_COLON ) )
+ -> ^( STATEMENT IDENTIFIER? ^( FOREACH rel foreach_plan_complex? foreach_plan_simple? ) parallel_clause? )
;
-general_statement : ( alias EQUAL )? (op_clause parallel_clause? | LEFT_PAREN op_clause parallel_clause? RIGHT_PAREN) SEMI_COLON
- -> ^( STATEMENT alias? op_clause parallel_clause? )
+foreach_plan_complex : LEFT_CURLY nested_blk RIGHT_CURLY -> ^( FOREACH_PLAN_COMPLEX nested_blk )
;
-realias_statement : realias_clause SEMI_COLON!
+foreach_plan_simple : GENERATE flatten_generated_item ( COMMA flatten_generated_item )* -> ^( FOREACH_PLAN_SIMPLE ^( GENERATE flatten_generated_item+ ) )
;
-realias_clause : alias EQUAL identifier
- -> ^(REALIAS alias identifier)
+// MACRO grammar
+
+macro_content : LEFT_CURLY ( macro_content | ~(LEFT_CURLY | RIGHT_CURLY) )* RIGHT_CURLY
;
-parallel_clause : PARALLEL^ INTEGER
+macro_param_clause : LEFT_PAREN ( IDENTIFIER (COMMA IDENTIFIER)* )? RIGHT_PAREN
+ -> ^(PARAMS IDENTIFIER*)
;
-// Statement represented by a foreach operator with a nested block. Simple foreach statement
-// is covered by general_statement.
-// We need to handle foreach specifically because of the ending ';', which is not required
-// if there is a nested block. This is ugly, but it gets the job done.
-foreach_statement : ( ( alias EQUAL )? FOREACH rel LEFT_CURLY ) => foreach_complex_statement
- | foreach_simple_statement
+macro_return_clause
+ : RETURNS ((IDENTIFIER (COMMA IDENTIFIER)*) | VOID)
+ -> ^(RETURN_VAL IDENTIFIER*)
;
-foreach_complex_statement : ( alias EQUAL )? foreach_clause_complex SEMI_COLON?
- -> ^( STATEMENT alias? foreach_clause_complex )
+macro_body_clause : macro_content -> ^(MACRO_BODY { new PigParserNode(new CommonToken(1, $macro_content.text), this.getSourceName(), $macro_content.start) } )
;
-foreach_simple_statement : ( alias EQUAL )? (foreach_clause_simple parallel_clause?
- | LEFT_PAREN foreach_clause_simple parallel_clause? RIGHT_PAREN) SEMI_COLON
- -> ^( STATEMENT alias? foreach_clause_simple parallel_clause? )
+macro_clause : macro_param_clause macro_return_clause macro_body_clause
+ -> ^(MACRO_DEF macro_param_clause macro_return_clause macro_body_clause)
;
-alias : identifier
+inline_return_clause
+ : IDENTIFIER EQUAL -> ^(RETURN_VAL IDENTIFIER)
+ | IDENTIFIER (COMMA IDENTIFIER)+ EQUAL -> ^(RETURN_VAL IDENTIFIER+)
+ | -> ^(RETURN_VAL)
;
parameter
- : identifier
+ : IDENTIFIER
| INTEGER
| DOUBLENUMBER
| QUOTEDSTRING
| DOLLARVAR
;
-content : LEFT_CURLY ( content | ~(LEFT_CURLY | RIGHT_CURLY) )* RIGHT_CURLY
+inline_param_clause : LEFT_PAREN ( parameter (COMMA parameter)* )? RIGHT_PAREN
+ -> ^(PARAMS parameter*)
;
-op_clause : define_clause
- | load_clause
- | group_clause
- | cube_clause
- | store_clause
- | filter_clause
- | distinct_clause
- | limit_clause
- | sample_clause
- | order_clause
- | rank_clause
- | cross_clause
- | join_clause
- | union_clause
- | stream_clause
- | mr_clause
+inline_clause : inline_return_clause IDENTIFIER inline_param_clause
+ -> ^(MACRO_INLINE IDENTIFIER inline_return_clause inline_param_clause)
;
-macro_param_clause : LEFT_PAREN ( alias (COMMA alias)* )? RIGHT_PAREN
- -> ^(PARAMS alias*)
-;
+// TYPES
-macro_return_clause
- : RETURNS ((alias (COMMA alias)*) | VOID)
- -> ^(RETURN_VAL alias*)
+simple_type : BOOLEAN | INT | LONG | FLOAT | DOUBLE | DATETIME | CHARARRAY | BYTEARRAY
;
-macro_body_clause : content
- -> ^(MACRO_BODY { new PigParserNode(new CommonToken(1, $content.text), this.getSourceName(), $content.start) } )
+implicit_tuple_type : LEFT_PAREN field_def_list? RIGHT_PAREN -> ^( TUPLE_TYPE field_def_list? )
;
-macro_clause : macro_param_clause macro_return_clause macro_body_clause
- -> ^(MACRO_DEF macro_param_clause macro_return_clause macro_body_clause)
+explicit_tuple_type : TUPLE! implicit_tuple_type
;
-inline_return_clause
- : alias EQUAL -> ^(RETURN_VAL alias)
- | alias (COMMA alias)+ EQUAL -> ^(RETURN_VAL alias+)
- | -> ^(RETURN_VAL)
+explicit_tuple_type_cast : TUPLE LEFT_PAREN ( explicit_type_cast ( COMMA explicit_type_cast )* )? RIGHT_PAREN
+ -> ^( TUPLE_TYPE_CAST explicit_type_cast* )
;
-inline_param_clause : LEFT_PAREN ( parameter (COMMA parameter)* )? RIGHT_PAREN
- -> ^(PARAMS parameter*)
+tuple_type : implicit_tuple_type | explicit_tuple_type
;
-inline_clause : inline_return_clause alias inline_param_clause
- -> ^(MACRO_INLINE alias inline_return_clause inline_param_clause)
+implicit_bag_type : LEFT_CURLY NULL COLON tuple_type? RIGHT_CURLY -> ^( BAG_TYPE tuple_type? )
+ | LEFT_CURLY ( ( IDENTIFIER COLON )? tuple_type )? RIGHT_CURLY -> ^( BAG_TYPE IDENTIFIER? tuple_type? )
;
-import_clause : IMPORT^ QUOTEDSTRING
+explicit_bag_type : BAG! implicit_bag_type
;
-define_clause : DEFINE^ alias ( cmd | func_clause | macro_clause)
+explicit_bag_type_cast : BAG LEFT_CURLY explicit_tuple_type_cast? RIGHT_CURLY -> ^( BAG_TYPE_CAST explicit_tuple_type_cast? )
;
-cmd : EXECCOMMAND^ ( ship_clause | cache_clause | input_clause | output_clause | error_clause )*
+implicit_map_type : LEFT_BRACKET type? RIGHT_BRACKET -> ^( MAP_TYPE type? )
;
-ship_clause : SHIP^ LEFT_PAREN! path_list? RIGHT_PAREN!
+explicit_map_type : MAP! implicit_map_type
;
-path_list : QUOTEDSTRING ( COMMA QUOTEDSTRING )*
- -> QUOTEDSTRING+
+map_type : implicit_map_type | explicit_map_type
;
-cache_clause : CACHE^ LEFT_PAREN! path_list RIGHT_PAREN!
+explicit_type : simple_type | explicit_tuple_type | explicit_bag_type | explicit_map_type
;
-input_clause : INPUT^ LEFT_PAREN! stream_cmd_list RIGHT_PAREN!
+implicit_type : implicit_tuple_type | implicit_bag_type | implicit_map_type
;
-stream_cmd_list : stream_cmd ( COMMA stream_cmd )*
- -> stream_cmd+
+type : explicit_type | implicit_type
;
-stream_cmd : ( STDIN | STDOUT | QUOTEDSTRING )^ ( USING! ( func_clause ) )?
+explicit_type_cast : simple_type | explicit_map_type | explicit_tuple_type_cast | explicit_bag_type_cast
;
-output_clause : OUTPUT^ LEFT_PAREN! stream_cmd_list RIGHT_PAREN!
+// CLAUSES
+
+import_clause : IMPORT^ QUOTEDSTRING
;
-error_clause : STDERROR^ LEFT_PAREN! ( QUOTEDSTRING ( LIMIT! INTEGER )? )? RIGHT_PAREN!
+define_clause : DEFINE^ IDENTIFIER ( cmd | func_clause | macro_clause)
;
-load_clause : LOAD^ filename ( USING! func_clause )? as_clause?
+realias_clause : IDENTIFIER EQUAL IDENTIFIER -> ^(REALIAS IDENTIFIER IDENTIFIER)
;
-filename : QUOTEDSTRING
+parallel_clause : PARALLEL^ INTEGER
;
-as_clause: AS^ ( ( LEFT_PAREN! field_def_list RIGHT_PAREN! ) | field_def )
+op_clause : define_clause
+ | load_clause
+ | group_clause
+ | cube_clause
+ | store_clause
+ | filter_clause
+ | distinct_clause
+ | limit_clause
+ | sample_clause
+ | order_clause
+ | rank_clause
+ | cross_clause
+ | join_clause
+ | union_clause
+ | stream_clause
+ | mr_clause
;
-field_def : identifier ( COLON type )?
- -> ^( FIELD_DEF identifier type? )
- | type
- -> ^( FIELD_DEF_WITHOUT_IDENTIFIER type )
+ship_clause : SHIP^ LEFT_PAREN! path_list? RIGHT_PAREN!
;
-field_def_list : field_def ( COMMA field_def )*
- -> field_def+
+path_list : QUOTEDSTRING ( COMMA QUOTEDSTRING )* -> QUOTEDSTRING+
;
-type : simple_type | tuple_type | bag_type | map_type
+cache_clause : CACHE^ LEFT_PAREN! path_list RIGHT_PAREN!
;
-simple_type : BOOLEAN | INT | LONG | FLOAT | DOUBLE | DATETIME | CHARARRAY | BYTEARRAY
+input_clause : INPUT^ LEFT_PAREN! stream_cmd_list RIGHT_PAREN!
;
-tuple_type : TUPLE? LEFT_PAREN field_def_list? RIGHT_PAREN
- -> ^( TUPLE_TYPE field_def_list? )
+output_clause : OUTPUT^ LEFT_PAREN! stream_cmd_list RIGHT_PAREN!
;
-bag_type : BAG? LEFT_CURLY ( null_keyword COLON tuple_type? ) RIGHT_CURLY
- -> ^( BAG_TYPE tuple_type? )
- | BAG? LEFT_CURLY ( ( identifier COLON )? tuple_type )? RIGHT_CURLY
- -> ^( BAG_TYPE identifier? tuple_type? )
+error_clause : STDERROR^ LEFT_PAREN! ( QUOTEDSTRING ( LIMIT! INTEGER )? )? RIGHT_PAREN!
;
-map_type : MAP? LEFT_BRACKET type? RIGHT_BRACKET
- -> ^( MAP_TYPE type? )
+load_clause : LOAD^ QUOTEDSTRING ( USING! func_clause )? as_clause?
;
func_clause : func_name
@@ -341,6 +377,10 @@ func_clause : func_name
-> ^( FUNC func_name func_args? )
;
+// needed for disambiguation when parsing expressions...see below
+func_name_without_columns : eid_without_columns ( ( PERIOD | DOLLAR ) eid )*
+;
+
func_name : eid ( ( PERIOD | DOLLAR ) eid )*
;
@@ -351,10 +391,7 @@ func_args : func_args_string ( COMMA fun
-> func_args_string+
;
-group_clause : ( GROUP | COGROUP )^ group_item_list ( USING! group_type )? partition_clause?
-;
-
-group_type : QUOTEDSTRING
+group_clause : ( GROUP | COGROUP )^ group_item_list ( USING! QUOTEDSTRING )? partition_clause?
;
group_item_list : group_item ( COMMA group_item )*
@@ -364,281 +401,424 @@ group_item_list : group_item ( COMMA gro
group_item : rel ( join_group_by_clause | ALL | ANY ) ( INNER | OUTER )?
;
-rel : alias
- | LEFT_PAREN! ( foreach_clause_complex | ( ( op_clause | foreach_clause_simple ) parallel_clause? ) ) RIGHT_PAREN!
-;
+// "AS" CLAUSES
-flatten_generated_item : flatten_clause ( AS! ( ( LEFT_PAREN! field_def_list RIGHT_PAREN! ) | field_def ) )?
- | col_range ( AS! ( ( LEFT_PAREN! field_def_list RIGHT_PAREN! ) | field_def ) )?
- | expr ( AS! field_def )?
- | STAR ( AS! ( ( LEFT_PAREN! field_def_list RIGHT_PAREN! ) | field_def ) )?
-;
-
-flatten_clause : FLATTEN^ LEFT_PAREN! expr RIGHT_PAREN!
+explicit_field_def : IDENTIFIER ( COLON type )? -> ^( FIELD_DEF IDENTIFIER type? )
+ | explicit_type -> ^( FIELD_DEF_WITHOUT_IDENTIFIER explicit_type )
;
-store_clause : STORE^ rel INTO! filename ( USING! func_clause )?
+field_def : explicit_field_def
+ | implicit_type -> ^( FIELD_DEF_WITHOUT_IDENTIFIER implicit_type )
;
-filter_clause : FILTER^ rel BY! cond
+field_def_list : field_def ( COMMA! field_def )*
;
-cond : or_cond
+// we have two tuple types as implicit_tuple_types can be confused with parentheses around
+// a field_def - so to remove this ambiguity we'll decide brackets around a single field_def
+// type is *not* a tuple
+as_clause : AS^ ( explicit_field_def | ( LEFT_PAREN! field_def_list? RIGHT_PAREN! ) )
;
-or_cond : and_cond ( OR^ and_cond )*
+// OTHERS
+
+stream_cmd_list : stream_cmd ( COMMA stream_cmd )* -> stream_cmd+
;
-and_cond : unary_cond ( AND^ unary_cond )*
+stream_cmd : ( STDIN | STDOUT | QUOTEDSTRING )^ ( USING! func_clause )?
;
-unary_cond : expr rel_op^ expr
- | LEFT_PAREN! cond RIGHT_PAREN!
- | not_cond
- | func_eval
- | null_check_cond
- | bool_cond
+cmd : EXECCOMMAND^ ( ship_clause | cache_clause | input_clause | output_clause | error_clause )*
;
-bool_cond : expr -> ^(BOOL_COND expr)
+rel : IDENTIFIER | nested_op_clause
;
-not_cond : NOT^ unary_cond
+store_clause : STORE^ rel INTO! QUOTEDSTRING ( USING! func_clause )?
;
-func_eval : func_name LEFT_PAREN real_arg_list? RIGHT_PAREN
- -> ^( FUNC_EVAL func_name real_arg_list? )
+filter_clause : FILTER^ rel BY! cond
;
-real_arg_list : real_arg ( COMMA real_arg )*
- -> real_arg+
+stream_clause : STREAM^ rel THROUGH! ( EXECCOMMAND | IDENTIFIER ) as_clause?
;
-real_arg : expr | STAR | col_range
+mr_clause : MAPREDUCE^ QUOTEDSTRING ( LEFT_PAREN! path_list RIGHT_PAREN! )? store_clause load_clause EXECCOMMAND?
;
-null_check_cond : expr IS! NOT? null_keyword^
+split_clause : SPLIT^ rel INTO! split_branch split_branches
;
-expr : add_expr
+split_branch : IDENTIFIER IF cond -> ^( SPLIT_BRANCH IDENTIFIER cond )
;
-add_expr : multi_expr ( ( PLUS | MINUS )^ multi_expr )*
+split_otherwise : IDENTIFIER OTHERWISE^
;
-multi_expr : cast_expr ( ( STAR | DIV | PERCENT )^ cast_expr )*
+split_branches : COMMA! split_branch split_branches?
+ | COMMA! split_otherwise
;
-cast_expr : LEFT_PAREN type_cast RIGHT_PAREN unary_expr
- -> ^( CAST_EXPR type_cast unary_expr )
- | unary_expr
+limit_clause : LIMIT^ rel expr
;
-type_cast : simple_type | map_type | tuple_type_cast | bag_type_cast
+sample_clause : SAMPLE^ rel expr
;
-tuple_type_cast : TUPLE LEFT_PAREN ( type_cast ( COMMA type_cast )* )? RIGHT_PAREN
- -> ^( TUPLE_TYPE_CAST type_cast* )
+rank_clause : RANK^ rel ( rank_by_statement )?
;
-bag_type_cast : BAG LEFT_CURLY tuple_type_cast? RIGHT_CURLY
- -> ^( BAG_TYPE_CAST tuple_type_cast? )
+rank_by_statement : BY^ rank_by_clause DENSE?
;
-unary_expr : expr_eval
- | LEFT_PAREN expr RIGHT_PAREN
- -> ^( EXPR_IN_PAREN expr )
- | neg_expr
+rank_by_clause : STAR ( ASC | DESC )?
+ | rank_list
;
-expr_eval : const_expr | var_expr
+rank_list : rank_col ( COMMA rank_col )*
+ -> rank_col+
;
-var_expr : projectable_expr ( dot_proj | pound_proj )*
+rank_col : col_range ( ASC | DESC )?
+ | col_ref ( ASC | DESC )?
;
-projectable_expr: func_eval | col_ref | bin_expr | type_conversion
+order_clause : ORDER^ rel BY! order_by_clause ( USING! func_clause )?
;
-type_conversion : LEFT_CURLY real_arg_list RIGHT_CURLY
- -> ^( FUNC_EVAL TOBAG real_arg_list )
- | LEFT_BRACKET real_arg_list RIGHT_BRACKET
- -> ^( FUNC_EVAL TOMAP real_arg_list )
- | LEFT_PAREN real_arg ( COMMA real_arg )+ RIGHT_PAREN // to disable convertion on 1 element tuples
- -> ^( FUNC_EVAL TOTUPLE real_arg+ )
+order_by_clause : STAR ( ASC | DESC )?
+ | order_col_list
;
-dot_proj : PERIOD ( col_alias_or_index
- | ( LEFT_PAREN col_alias_or_index ( COMMA col_alias_or_index )* RIGHT_PAREN ) )
- -> ^( PERIOD col_alias_or_index+ )
+order_col_list : order_col ( COMMA order_col )*
+ -> order_col+
;
-col_alias_or_index : col_alias | col_index
+order_col : col_range (ASC | DESC)?
+ | col_ref ( ASC | DESC )?
+ | LEFT_PAREN! col_ref ( ASC | DESC )? RIGHT_PAREN!
;
-col_alias : GROUP | CUBE | identifier
+distinct_clause : DISTINCT^ rel partition_clause?
;
-col_index : DOLLARVAR
+partition_clause : PARTITION^ BY! func_name
;
-col_range : c1 = col_ref DOUBLE_PERIOD c2 = col_ref?
- -> ^(COL_RANGE $c1 DOUBLE_PERIOD $c2?)
- | DOUBLE_PERIOD col_ref
- -> ^(COL_RANGE DOUBLE_PERIOD col_ref)
+rel_list : rel ( COMMA rel )* -> rel+
;
-pound_proj : POUND^ ( QUOTEDSTRING | null_keyword )
+cross_clause : CROSS^ rel_list partition_clause?
;
-bin_expr : LEFT_PAREN cond QMARK exp1 = expr COLON exp2 = expr RIGHT_PAREN
- -> ^( BIN_EXPR cond $exp1 $exp2 )
-;
-neg_expr : MINUS cast_expr
- -> ^( NEG cast_expr )
+join_clause : JOIN^ join_sub_clause ( USING! join_type )? partition_clause?
;
-limit_clause : LIMIT^ rel ( (INTEGER SEMI_COLON) => INTEGER | (LONGINTEGER SEMI_COLON) => LONGINTEGER | expr )
+join_type : QUOTEDSTRING
;
-sample_clause : SAMPLE^ rel ( (DOUBLENUMBER SEMI_COLON) => DOUBLENUMBER | expr )
+join_sub_clause : join_item ( ( ( LEFT | RIGHT | FULL ) OUTER? COMMA! join_item ) | ( ( COMMA! join_item )+ ) )
;
-rank_clause : RANK^ rel ( rank_by_statement )?
+join_item : rel join_group_by_clause -> ^( JOIN_ITEM rel join_group_by_clause )
;
-rank_by_statement : BY^ rank_by_clause ( DENSE )?
+// this can either be a single arg or something like (a,b) - which is
+// indistinguishable from a tuple. We'll therefore parse a single argument
+// (which can be a tuple of several real_args) and expand it:
+join_group_by_clause
+ @after
+ {
+ Tree by = (Tree) retval.getTree();
+ Tree realArg = by.getChild(0);
+ if(realArg.getType() == TUPLE_VAL
+ || (realArg.getType() == FUNC_EVAL && realArg.getChild(0).getType() == TOTUPLE)) {
+ retval.tree = adaptor.create(by.getType(), by.getText());
+ for(int i = 0; i < realArg.getChildCount(); ++i) {
+ if(realArg.getChild(i).getType()!=TOTUPLE)
+ ((Tree)retval.tree).addChild(realArg.getChild(i));
+ }
+ adaptor.setTokenBoundaries(retval.tree, retval.start, retval.stop);
+ }
+ }
+ : BY^ real_arg
;
-rank_by_clause : STAR ( ASC | DESC )?
- | rank_list
+union_clause : UNION^ ONSCHEMA? rel_list
;
-rank_list : rank_col ( COMMA rank_col )*
- -> rank_col+
+cube_clause : CUBE rel BY cube_rollup_list ( COMMA cube_rollup_list )* -> ^( CUBE rel ^( BY cube_rollup_list+ ) )
;
-rank_col : col_range ( ASC | DESC )?
- | col_ref ( ASC | DESC )?
+cube_rollup_list : ( CUBE | ROLLUP )^ LEFT_PAREN! real_arg ( COMMA! real_arg )* RIGHT_PAREN!
;
-order_clause : ORDER^ rel BY! order_by_clause ( USING! func_clause )?
+flatten_clause : FLATTEN^ LEFT_PAREN! expr RIGHT_PAREN!
;
-order_by_clause : STAR ( ASC | DESC )?
- | order_col_list
+// unlike loading and streaming, we want the as_clause (if present) in a different format (i.e.
+// we drop the AS token itself).
+generate_as_clause : AS! ( ( LEFT_PAREN! field_def_list RIGHT_PAREN! ) | explicit_field_def )
;
-order_col_list : order_col ( COMMA order_col )*
- -> order_col+
+flatten_generated_item : flatten_clause generate_as_clause?
+ | real_arg generate_as_clause?
;
-order_col : col_range (ASC | DESC)?
- | col_ref ( ASC | DESC )?
- | LEFT_PAREN! col_ref ( ASC | DESC )? RIGHT_PAREN!
-;
+// EXPRESSIONS
-distinct_clause : DISTINCT^ rel partition_clause?
-;
+// conditional precedence is OR weakest, then AND, then NOT, then IS NOT NULL and the comparison operators equally
+// by design the boolean operator hierarchy is entirely below the expression hierarchy
-partition_clause : PARTITION^ BY! func_name
+real_arg : expr
+ | STAR
+ | col_range
;
-cross_clause : CROSS^ rel_list partition_clause?
+cond : and_cond ( OR^ and_cond )*
;
-rel_list : rel ( COMMA rel )*
- -> rel+
+and_cond : not_cond ( AND^ not_cond )*
;
-join_clause : JOIN^ join_sub_clause ( USING! join_type )? partition_clause?
+not_cond : NOT^? unary_cond
;
-join_type : QUOTEDSTRING
+unary_cond
+ @after
+ {
+ // Expressions in parentheses are a little tricky to match as
+ // they could contain either "cond" rules or "expr" rules. If
+ // they are "expr" rules then they're put under a BOOL_COND node
+ // in the tree, but "cond" rules put no extra tokens in the tree.
+ // As we're matching non-recursively we'll parse whatever's in the
+ // brackets, and if the AST has a boolean expression at its root
+ // then we'll assume we've just got a "cond" expression in
+ // brackets, and otherwise we'll assume its an "expr" (and so
+ // we'll have to strip off the BOOL_COND token the "cast_expr"
+ // rule added)
+ Tree tree = (Tree)retval.getTree();
+ if(tree.getType() == BOOL_COND
+ && tree.getChild(0).getType() == EXPR_IN_PAREN
+ && BOOLEAN_TOKENS.contains(tree.getChild(0).getChild(0).getType())) {
+ retval.tree = tree.getChild(0).getChild(0);
+ adaptor.setTokenBoundaries(retval.tree, retval.start, retval.stop);
+ }
+ }
+ : exp1 = expr
+ ( ( IS NOT? NULL -> ^( NULL $exp1 NOT? ) )
+ | ( rel_op exp2 = expr -> ^( rel_op $exp1 $exp2 ) )
+ | ( -> ^(BOOL_COND expr) ) )
;
-join_sub_clause : join_item ( LEFT | RIGHT | FULL ) OUTER? COMMA! join_item
- | join_item_list
+expr : multi_expr ( ( PLUS | MINUS )^ multi_expr )*
;
-join_item_list : join_item ( COMMA! join_item )+
+multi_expr : cast_expr ( ( STAR | DIV | PERCENT )^ cast_expr )*
;
-join_item : rel join_group_by_clause
- -> ^( JOIN_ITEM rel join_group_by_clause )
+func_name_suffix : ( ( DOLLAR | PERIOD ) eid )+
;
-join_group_by_clause : BY^ join_group_by_expr_list
-;
+cast_expr
+ @after
+ {
+ BaseTree tree = (BaseTree) retval.getTree();
+
+ // the parser does an initial optimisation step: it removes TOTUPLE / TOMAP / TOBAG
+ // function calls if it knows they'll just return the input (i.e. because the function's
+ // argument is a literal). We'll do this here by post-processing the result:
+ if(tree.getType() == FUNC_EVAL) {
+ Integer func = FUNC_TO_LITERAL.get(tree.getChild(0).getType());
+ if(func != null) {
+ boolean canBeOptimised = true;
+ for(int arg = 1; arg < tree.getChildCount() && canBeOptimised; ++arg) {
+ canBeOptimised &= LITERAL_TOKENS.contains(tree.getChild(arg).getType());
+ }
+ if(canBeOptimised) {
+ retval.tree = adaptor.create(func, func.toString());
+ ((BaseTree)retval.tree).addChildren(tree.getChildren());
+ ((BaseTree)retval.tree).deleteChild(0); // the (e.g.) TOBAG token
+ adaptor.setTokenBoundaries(retval.tree, retval.start, retval.stop);
+ }
+ }
+ }
+
+ // a minor correction to the token text for formatting -
+ // we want NEG's text to be the same as MINUSes
+ if(tree.getType() == NEG) {
+ ((CommonTree)tree).token.setText("-");
+ }
+
+ // As noted below, brackets around a single literal mean a tuple
+ // of that literal, not a nested expression which evaluates to
+ // that literal. Remember that a NULL with children is a boolean
+ // expression, not a literal!
+ if(tree.getType() == EXPR_IN_PAREN
+ && LITERAL_TOKENS.contains(tree.getChild(0).getType())
+ && (tree.getChild(0).getType() != NULL || tree.getChild(0).getChildCount() == 0)) {
+ ((CommonTree)tree).token.setType(TUPLE_VAL);
+ }
+ }
+ : scalar
+ | MINUS cast_expr -> ^( NEG cast_expr )
+ // single columns and functions (both of which can start with an identifier). Note that we have to be
+ // careful with periods straight after the identifier, as we want those to be projections, not function
+ // calls
+ | col_ref_without_identifier projection*
+ | IDENTIFIER projection*
+ | IDENTIFIER func_name_suffix? LEFT_PAREN ( real_arg ( COMMA real_arg )* )? RIGHT_PAREN projection* -> ^( FUNC_EVAL IDENTIFIER func_name_suffix? real_arg* ) projection*
+ | func_name_without_columns LEFT_PAREN ( real_arg ( COMMA real_arg )* )? RIGHT_PAREN projection* -> ^( FUNC_EVAL func_name_without_columns real_arg* ) projection*
+ | paren_expr
+ | curly_expr
+ | bracket_expr
+;
+
+// now we have to deal with parentheses: in an expr, '(' can be the
+// start of a cast, the start of a nested expression or the start of
+// a tuple. We'll ensure parsing is unambiguous by assuming a single
+// expression in parentheses is a nested expression, whereas two or
+// more nested expressions are a tuple (unless that single expression
+// is a literal, in which case we assume tuple with a single element
+// - that literal).
+paren_expr
+ @after
+ {
+ BaseTree tree = (BaseTree)retval.getTree();
+
+ // the other side of the @after block in unary_cond: if we've
+ // matched an EXPR_IN_PAREN we expect the nested expression to
+ // be an "expr", not a "cond", so we should strip off the
+ // BOOL_COND token.
+ if(tree.getType() == EXPR_IN_PAREN
+ && tree.getChild(0).getType() == BOOL_COND) {
+ int type = tree.getChild(0).getChild(0).getType();
+ // NULL is a special case - if it has children it's a boolean
+ // expression, and if not it's a literal NULL. Note that we
+ // replace *all* children
+ if(!BOOLEAN_TOKENS.contains(type)
+ || (type == NULL && tree.getChild(0).getChild(0).getChildCount() == 0)) {
+ Tree addChildrenOf = tree.getChild(0);
+ for(int i = 0; i < tree.getChildCount(); ++i)
+ tree.deleteChild(i);
+ for(int i = 0; i < addChildrenOf.getChildCount(); ++i)
+ tree.addChild(addChildrenOf.getChild(i));
+ }
+ }
-join_group_by_expr_list : LEFT_PAREN join_group_by_expr ( COMMA join_group_by_expr )* RIGHT_PAREN
- -> join_group_by_expr+
- | join_group_by_expr
+ // A function call to TOTUPLE is inserted into the AST for
+ // some tuple literals - but as we assume the first expression
+ // after an open bracket is a "cond" rule, and as "cond" rules
+ // nest "expr" rules under a BOOL_COND token we get an invalid
+ // AST. We'll remove this BOOL_COND here:
+ if(tree.getType() == FUNC_EVAL
+ && tree.getChild(0).getType() == TOTUPLE
+ && tree.getChildCount() > 1
+ && tree.getChild(1).getType() == BOOL_COND) {
+ Tree insertChildrenOf = tree.getChild(1);
+ tree.deleteChild(1);
+ for(int i = insertChildrenOf.getChildCount() - 1; i >= 0; --i)
+ tree.insertChild(1, insertChildrenOf.getChild(i));
+ }
+ }
+ : LEFT_PAREN! try_implicit_map_cast
;
-join_group_by_expr : col_range | expr | STAR
+try_implicit_map_cast
+ // we'll also allow implicit map casts (for backwards compatibility only -
+ // bag and tuple casts have to be explicit and it makes the grammar more
+ // simple). Unfortunately we'll have to turn on back-tracking for this rule,
+ // as LEFT_PAREN LEFT_BRACKET could be a literal map in a EXPR_IN_PAREN.
+ // It'd be much better if we could remove this from the Pig language (and
+ // just rely on explicit map casts) - then we'd have no backtracking at all!
+ : ( implicit_map_type RIGHT_PAREN cast_expr) => implicit_map_type RIGHT_PAREN cast_expr -> ^( CAST_EXPR implicit_map_type cast_expr )
+ | after_left_paren
;
-union_clause : UNION^ ONSCHEMA? rel_list
+after_left_paren : explicit_type_cast RIGHT_PAREN cast_expr -> ^( CAST_EXPR explicit_type_cast cast_expr )
+ // tuples
+ | RIGHT_PAREN projection* -> ^( TUPLE_VAL ) projection*
+ | STAR ( COMMA real_arg )* RIGHT_PAREN projection* -> ^( FUNC_EVAL TOTUPLE STAR real_arg* ) projection*
+ | col_range ( COMMA real_arg )* RIGHT_PAREN projection* -> ^( FUNC_EVAL TOTUPLE col_range real_arg* ) projection*
+ // Tuples begin with '(' expr, but shorthand-booleans begin with '(' cond. As cond
+ // and expr are indistinguishable, we'll parse as a cond (i.e. the most lenient) and
+ // for exprs, strip off the BOOL_COND trees. You can have both nested conds and nested
+ // exprs, so we'll just assume cond.
+ | cond
+ ( ( ( COMMA real_arg )+ RIGHT_PAREN projection* -> ^( FUNC_EVAL TOTUPLE cond real_arg+ ) projection* )
+ | ( RIGHT_PAREN -> ^( EXPR_IN_PAREN cond ) )
+ | ( QMARK exp1 = expr COLON exp2 = expr RIGHT_PAREN -> ^( BIN_EXPR cond $exp1 $exp2 ) ) )
;
-foreach_clause_simple : FOREACH^ rel foreach_plan_simple
+curly_expr : LEFT_CURLY real_arg ( COMMA real_arg )* RIGHT_CURLY projection* -> ^( FUNC_EVAL TOBAG real_arg+ ) projection*
+ | LEFT_CURLY RIGHT_CURLY projection* -> ^( BAG_VAL ) projection*
;
-foreach_plan_simple : generate_clause
- -> ^( FOREACH_PLAN_SIMPLE generate_clause )
+bracket_expr : LEFT_BRACKET real_arg ( COMMA real_arg )* RIGHT_BRACKET projection* -> ^( FUNC_EVAL TOMAP real_arg+ ) projection*
+ | LEFT_BRACKET keyvalue ( COMMA keyvalue )* RIGHT_BRACKET projection* -> ^( MAP_VAL keyvalue+ ) projection*
+ | LEFT_BRACKET RIGHT_BRACKET projection* -> ^( MAP_VAL ) projection*
;
-foreach_clause_complex : FOREACH^ rel foreach_plan_complex
+projection : PERIOD ( col_ref | LEFT_PAREN col_ref ( COMMA col_ref )* RIGHT_PAREN ) -> ^( PERIOD col_ref+ )
+ | POUND^ ( QUOTEDSTRING | NULL )
;
-foreach_plan_complex : nested_blk
- -> ^( FOREACH_PLAN_COMPLEX nested_blk )
-;
+// ATOMS
-cube_clause : CUBE^ cube_item
+// for disambiguation with func_names
+col_ref_without_identifier : GROUP | CUBE | DOLLARVAR
;
-cube_item : rel ( cube_by_clause )
+col_ref : col_ref_without_identifier | IDENTIFIER
;
-cube_by_clause : BY^ cube_or_rollup
+col_range : c1 = col_ref DOUBLE_PERIOD c2 = col_ref? -> ^(COL_RANGE $c1 DOUBLE_PERIOD $c2?)
+ | DOUBLE_PERIOD col_ref -> ^(COL_RANGE DOUBLE_PERIOD col_ref)
;
-cube_or_rollup : cube_rollup_list ( COMMA cube_rollup_list )*
- -> cube_rollup_list+
+scalar : INTEGER
+ | LONGINTEGER
+ | FLOATNUMBER
+ | DOUBLENUMBER
+ | QUOTEDSTRING
+ | NULL
+ | TRUE
+ | FALSE
;
-cube_rollup_list : ( CUBE | ROLLUP )^ cube_by_expr_list
+keyvalue : QUOTEDSTRING POUND literal -> ^( KEY_VAL_PAIR QUOTEDSTRING literal )
;
-cube_by_expr_list : LEFT_PAREN cube_by_expr ( COMMA cube_by_expr )* RIGHT_PAREN
- -> cube_by_expr+
+literal_map : LEFT_BRACKET keyvalue ( COMMA keyvalue )* RIGHT_BRACKET -> ^( MAP_VAL keyvalue+ )
+ | LEFT_BRACKET RIGHT_BRACKET -> ^( MAP_VAL )
;
-cube_by_expr : col_range | expr | STAR
+
+literal_bag : LEFT_CURLY literal_tuple ( COMMA literal_tuple )* RIGHT_CURLY -> ^( BAG_VAL literal_tuple+ )
+ | LEFT_CURLY RIGHT_CURLY -> ^( BAG_VAL )
;
-nested_blk : LEFT_CURLY! nested_command_list ( generate_clause SEMI_COLON! ) RIGHT_CURLY!
+literal_tuple : LEFT_PAREN literal ( COMMA literal )* RIGHT_PAREN -> ^( TUPLE_VAL literal+ )
+ | LEFT_PAREN RIGHT_PAREN -> ^( TUPLE_VAL )
;
-generate_clause : GENERATE flatten_generated_item ( COMMA flatten_generated_item )*
- -> ^( GENERATE flatten_generated_item+ )
+literal : scalar | literal_map | literal_bag | literal_tuple
;
-nested_command_list : ( nested_command SEMI_COLON )*
- -> nested_command*
- |
+// NESTING
+
+nested_blk : ( nested_command SEMI_COLON )* GENERATE flatten_generated_item ( COMMA flatten_generated_item )* SEMI_COLON
+ -> nested_command* ^( GENERATE flatten_generated_item+ )
;
-nested_command : ( identifier EQUAL col_ref PERIOD col_ref_list { input.LA( 1 ) == SEMI_COLON }? ) => ( identifier EQUAL nested_proj )
- -> ^( NESTED_CMD identifier nested_proj )
- | identifier EQUAL expr
- -> ^( NESTED_CMD_ASSI identifier expr )
- | identifier EQUAL nested_op
- -> ^( NESTED_CMD identifier nested_op )
+nested_command : ( IDENTIFIER EQUAL col_ref PERIOD col_ref_list { input.LA( 1 ) == SEMI_COLON }? ) => ( IDENTIFIER EQUAL nested_proj )
+ -> ^( NESTED_CMD IDENTIFIER nested_proj )
+ | IDENTIFIER EQUAL expr
+ -> ^( NESTED_CMD_ASSI IDENTIFIER expr )
+ | IDENTIFIER EQUAL nested_op
+ -> ^( NESTED_CMD IDENTIFIER nested_op )
;
nested_op : nested_filter
@@ -672,7 +852,8 @@ nested_limit : LIMIT^ nested_op_input (
nested_cross : CROSS^ nested_op_input_list
;
-nested_foreach: FOREACH^ nested_op_input generate_clause
+nested_foreach: FOREACH nested_op_input GENERATE flatten_generated_item ( COMMA flatten_generated_item )*
+ -> ^( FOREACH nested_op_input ^( GENERATE flatten_generated_item+ ) )
;
nested_op_input : col_ref | nested_proj
@@ -682,80 +863,16 @@ nested_op_input_list : nested_op_input (
-> nested_op_input+
;
-stream_clause : STREAM^ rel THROUGH! ( EXECCOMMAND | alias ) as_clause?
-;
-
-mr_clause : MAPREDUCE^ QUOTEDSTRING ( LEFT_PAREN! path_list RIGHT_PAREN! )? store_clause load_clause EXECCOMMAND?
-;
-
-split_clause : SPLIT rel INTO split_branch ( ( COMMA split_branch )+ | ( ( COMMA split_branch )* COMMA split_otherwise ) )
- -> ^( SPLIT rel split_branch+ split_otherwise?)
-;
-
-split_branch : alias IF cond
- -> ^( SPLIT_BRANCH alias cond )
-;
-
-split_otherwise : alias OTHERWISE
- -> ^( OTHERWISE alias )
-;
-
-col_ref : alias_col_ref | dollar_col_ref
-;
-
-alias_col_ref : GROUP | CUBE | identifier
-;
-
-dollar_col_ref : DOLLARVAR
-;
-
-const_expr : literal
-;
-
-literal : scalar | map | bag | tuple
-;
-
-
-scalar : num_scalar | QUOTEDSTRING | null_keyword | TRUE | FALSE
-;
-
-num_scalar : MINUS? ( INTEGER | LONGINTEGER | FLOATNUMBER | DOUBLENUMBER )
-;
-
-map : LEFT_BRACKET keyvalue ( COMMA keyvalue )* RIGHT_BRACKET
- -> ^( MAP_VAL keyvalue+ )
- | LEFT_BRACKET RIGHT_BRACKET
- -> ^( MAP_VAL )
-;
-
-keyvalue : map_key POUND const_expr
- -> ^( KEY_VAL_PAIR map_key const_expr )
-;
-
-map_key : QUOTEDSTRING
-;
-
-bag : LEFT_CURLY tuple ( COMMA tuple )* RIGHT_CURLY
- -> ^( BAG_VAL tuple+ )
- | LEFT_CURLY RIGHT_CURLY
- -> ^( BAG_VAL )
-;
-
-tuple : LEFT_PAREN literal ( COMMA literal )* RIGHT_PAREN
- -> ^( TUPLE_VAL literal+ )
- | LEFT_PAREN RIGHT_PAREN
- -> ^( TUPLE_VAL )
-;
+// IDENTIFIERS
// extended identifier, handling the keyword and identifier conflicts. Ugly but there is no other choice.
-eid : rel_str_op
+eid_without_columns : rel_str_op
| IMPORT
| RETURNS
| DEFINE
| LOAD
| FILTER
| FOREACH
- | CUBE
| ROLLUP
| ORDER
| DISTINCT
@@ -774,25 +891,15 @@ eid : rel_str_op
| OUTER
| PARALLEL
| PARTITION
- | GROUP
| AND
| OR
- | NOT
| GENERATE
- | FLATTEN
| ASC
| DESC
| BOOL
- | INT
- | LONG
- | FLOAT
- | DOUBLE
| DATETIME
| CHARARRAY
| BYTEARRAY
- | BAG
- | TUPLE
- | MAP
| IS
| STREAM
| THROUGH
@@ -810,40 +917,36 @@ eid : rel_str_op
| LEFT
| RIGHT
| FULL
- | identifier
- | null_keyword
- | TRUE
- | FALSE
| REALIAS
| BOOL_COND
;
-// relational operator
-rel_op : rel_op_eq
- | rel_op_ne
- | rel_op_gt
- | rel_op_gte
- | rel_op_lt
- | rel_op_lte
- | STR_OP_MATCHES
-;
-
-rel_op_eq : STR_OP_EQ | NUM_OP_EQ
-;
-
-rel_op_ne : STR_OP_NE | NUM_OP_NE
-;
-
-rel_op_gt : STR_OP_GT | NUM_OP_GT
-;
-
-rel_op_gte : STR_OP_GTE | NUM_OP_GTE
-;
-
-rel_op_lt : STR_OP_LT | NUM_OP_LT
+eid : eid_without_columns
+ | IDENTIFIER
+ | GROUP
+ | CUBE
+ | TRUE
+ | FALSE
+ | INT
+ | LONG
+ | FLOAT
+ | DOUBLE
+ | NULL
+ | NOT
+ | FLATTEN
+ | BAG
+ | TUPLE
+ | MAP
;
-rel_op_lte : STR_OP_LTE | NUM_OP_LTE
+// relational operator
+rel_op : rel_str_op
+ | NUM_OP_EQ
+ | NUM_OP_NE
+ | NUM_OP_GT
+ | NUM_OP_GTE
+ | NUM_OP_LT
+ | NUM_OP_LTE
;
rel_str_op : STR_OP_EQ
@@ -854,11 +957,3 @@ rel_str_op : STR_OP_EQ
| STR_OP_LTE
| STR_OP_MATCHES
;
-
-null_keyword : {input.LT(1).getText().equalsIgnoreCase("NULL")}? IDENTIFIER_L
- -> NULL[$IDENTIFIER_L]
-;
-
-identifier : {!input.LT(1).getText().equalsIgnoreCase("NULL")}? IDENTIFIER_L
- -> IDENTIFIER[$IDENTIFIER_L]
-;
Modified: pig/branches/branch-0.11/test/e2e/pig/tests/macro.conf
URL: http://svn.apache.org/viewvc/pig/branches/branch-0.11/test/e2e/pig/tests/macro.conf?rev=1464734&r1=1464733&r2=1464734&view=diff
==============================================================================
--- pig/branches/branch-0.11/test/e2e/pig/tests/macro.conf (original)
+++ pig/branches/branch-0.11/test/e2e/pig/tests/macro.conf Thu Apr 4 20:38:52 2013
@@ -466,7 +466,7 @@ $cfg = {
a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
x = simple_macro(a, '3.0', '40');
store x into ':OUTPATH:';#,
- 'expected_err_regex' => "mismatched input 'foreach' expecting IDENTIFIER_L"
+ 'expected_err_regex' => "mismatched input 'foreach' expecting IDENTIFIER"
},
{
# UDF as macro name
Modified: pig/branches/branch-0.11/test/e2e/pig/tests/negative.conf
URL: http://svn.apache.org/viewvc/pig/branches/branch-0.11/test/e2e/pig/tests/negative.conf?rev=1464734&r1=1464733&r2=1464734&view=diff
==============================================================================
--- pig/branches/branch-0.11/test/e2e/pig/tests/negative.conf (original)
+++ pig/branches/branch-0.11/test/e2e/pig/tests/negative.conf Thu Apr 4 20:38:52 2013
@@ -142,7 +142,7 @@ store a into ':INPATH:/singlefile/fileex
'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
b = group a by name;
c = foreach b { split a into ba if age < '25', bb if age > '40'; generate group, COUNT(ba), COUNT(bb);}\,
- 'expected_err_regex' => "Syntax error, unexpected symbol at or near 'split'",
+ 'expected_err_regex' => "mismatched input 'split' expecting GENERATE",
},
{
'num' => 8,
@@ -160,7 +160,7 @@ store a into ':INPATH:/singlefile/fileex
'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
b = group a by name;
c = foreach b { store a into ':OUTPATH:'; generate *;}\,
- 'expected_err_regex' => "Syntax error, unexpected symbol at or near 'store'",
+ 'expected_err_regex' => "mismatched input 'store' expecting GENERATE",
},
]
@@ -288,7 +288,7 @@ define `perl PigStreaming.pl foo -`;
A = load ':INPATH:/singlefile/studenttab10k';
B = stream A through CMD;
dump B;#,
- 'expected_err_regex' => "mismatched input '`perl PigStreaming.pl foo -`' expecting IDENTIFIER_L",
+ 'expected_err_regex' => "mismatched input '`perl PigStreaming.pl foo -`' expecting IDENTIFIER",
},
{
# quotes missing from name of the file in ship script
Modified: pig/branches/branch-0.11/test/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/TestInputSizeReducerEstimator.java
URL: http://svn.apache.org/viewvc/pig/branches/branch-0.11/test/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/TestInputSizeReducerEstimator.java?rev=1464734&r1=1464733&r2=1464734&view=diff
==============================================================================
--- pig/branches/branch-0.11/test/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/TestInputSizeReducerEstimator.java (original)
+++ pig/branches/branch-0.11/test/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/TestInputSizeReducerEstimator.java Thu Apr 4 20:38:52 2013
@@ -29,7 +29,7 @@ import org.junit.Test;
public class TestInputSizeReducerEstimator {
- private static final Configuration CONF = new Configuration();
+ private static final Configuration CONF = new Configuration(false);
@Test
public void testGetInputSizeFromFs() throws Exception {
Modified: pig/branches/branch-0.11/test/org/apache/pig/parser/TestLogicalPlanGenerator.java
URL: http://svn.apache.org/viewvc/pig/branches/branch-0.11/test/org/apache/pig/parser/TestLogicalPlanGenerator.java?rev=1464734&r1=1464733&r2=1464734&view=diff
==============================================================================
--- pig/branches/branch-0.11/test/org/apache/pig/parser/TestLogicalPlanGenerator.java (original)
+++ pig/branches/branch-0.11/test/org/apache/pig/parser/TestLogicalPlanGenerator.java Thu Apr 4 20:38:52 2013
@@ -35,6 +35,7 @@ import org.apache.pig.builtin.mock.Stora
import org.apache.pig.data.Tuple;
import org.apache.pig.impl.logicalLayer.schema.Schema;
import org.apache.pig.impl.util.Utils;
+import org.apache.pig.test.TestEvalPipeline.MapUDF;
import org.apache.pig.test.Util;
import org.junit.Before;
import org.junit.BeforeClass;
@@ -345,14 +346,33 @@ public class TestLogicalPlanGenerator {
}
@Test
- public void testFilter() throws Exception {
+ public void testFilter1() throws Exception {
String query = "A = load 'x' as ( u:int, v:long, w:bytearray); " +
"B = filter A by 2 > 1;\n" +
"store B into 'y';";
generateLogicalPlan( query );
}
+
+ @Test
+ public void testFilter2() throws Exception {
+ generateLogicalPlan(
+ "A = load 'x' as ( u:int, v:long, w:bytearray); " +
+ "B = filter A by u is null;\n");
+ }
+
+ @Test
+ public void testFilter3() throws Exception {
+ generateLogicalPlan(
+ "A = load 'x' as ( u:int, v:long, w:bytearray); " +
+ "B = filter A by u is not null;\n");
+ }
@Test
+ public void testFilter4() throws Exception {
+ generateLogicalPlan("b = filter (load 'd.txt' as (id:int, v1, v2)) by (id > 3) AND (v1 is null);");
+ }
+
+ @Test
public void testScopedAlias() throws Exception {
String query = "A = load 'x' as ( u:int, v:long, w:bytearray);" +
"B = load 'y' as ( u:int, x:int, y:chararray);" +
@@ -474,7 +494,130 @@ public class TestLogicalPlanGenerator {
+ "C = rank A;";
generateLogicalPlan(query);
}
+
+ @Test
+ public void testCast1() throws Exception {
+ String query = "data = LOAD 'data.txt' AS (num:CHARARRAY);" +
+ "numbers = FOREACH data GENERATE (INT) num;";
+ generateLogicalPlan(query);
+ }
+
+ @Test
+ public void testCast2() throws Exception {
+ generateLogicalPlan(
+ "sds = LOAD '/my/data/location' AS (simpleFields:map[], mapFields:map[], listMapFields:map[]); " +
+ "queries_rand = FOREACH sds GENERATE (CHARARRAY) (mapFields#'page_params'#'query') AS query_string;");
+ }
+
+ @Test
+ public void testBoolean1() throws Exception {
+ generateLogicalPlan(
+ "A = load 'INPUT_FILE' as (id:int, fruit);" +
+ "B = group A by id;" +
+ "C = foreach B generate group, " +
+ "((org.apache.pig.test.utils.AccumulatorBagCount(A)>1 and " +
+ "org.apache.pig.test.utils.AccumulatorBagCount(A)<3)?0:1);");
+ }
+
+ @Test
+ public void testBoolean2() throws Exception {
+ generateLogicalPlan(
+ "A = load 'INPUT_FILE' as (id:int, fruit);" +
+ "B = group A by id;" +
+ "C = foreach B generate group, " +
+ "((org.apache.pig.test.utils.AccumulatorBagCount(A)>3 or " +
+ "org.apache.pig.test.utils.AccumulatorBagCount(A)<2)?0:1);");
+ }
+
+ @Test
+ public void testBoolean3() throws Exception {
+ generateLogicalPlan(
+ "A = load 'INPUT_FILE' as (id:int, fruit);" +
+ "B = filter A by id < 5 and ( fruit neq 'cabbage' or id == 17 );");
+ }
+
+ @Test
+ public void testBoolean4() throws Exception {
+ generateLogicalPlan(
+ "a = load '1.txt' as (a0, a1);" +
+ "b = foreach a generate (a0 is not null ? 0 : 1);");
+ }
+
+ @Test
+ public void testBoolean5() throws Exception {
+ generateLogicalPlan(
+ "a = load '1.txt' as (a0, a1);" +
+ "b = foreach a generate (a0 is null ? 0 : 2);");
+ }
+
+ @Test
+ public void testAccumWithRegexp() throws Exception {
+ generateLogicalPlan(
+ "A = load 'AccumulatorInput.txt' as (id:int, fruit);" +
+ "B = group A by id;" +
+ "C = foreach B generate group, (((chararray)org.apache.pig.test.utils.AccumulatorBagCount(A)) matches '1*' ?0:1);");
+ }
+
+ @Test
+ public void testMapsideGroupByMultipleColumns() throws Exception {
+ generateLogicalPlan(
+ "A = LOAD 'MapSideGroupInput.txt' using org.apache.pig.test.TestCollectedGroup$DummyCollectableLoader() as (id, name, grade);" +
+ "B = group A by (id, name) using 'collected';");
+ }
+
+ @Test
+ public void testMapUDF() throws Exception {
+ generateLogicalPlan(
+ "A = LOAD 'someData';" +
+ "B = foreach A generate " + MapUDF.class.getName() + "($0) as mymap;" +
+ "C = foreach B {" +
+ "generate (double)mymap#'double' as d, " +
+ "(long)mymap#'long' + (float)mymap#'float' as float_sum, " +
+ "CONCAT((chararray) mymap#'string', ' World!'), " +
+ "mymap#'int' * 10, " +
+ "(bag{tuple()}) mymap#'bag' as mybag, " +
+ "(tuple()) mymap#'tuple' as mytuple, " +
+ "(map[])mymap#'map' as mapInMap, " +
+ "mymap#'dba' as dba;" +
+ "};");
+ }
+ @Test
+ public void testSimpleMapCast() throws Exception {
+ generateLogicalPlan(
+ "a = load 'testSimpleMapCast' as (m);" +
+ "b = foreach a generate ([int])m;");
+ }
+
+ @Test
+ public void testComplexCast() throws Exception {
+ generateLogicalPlan(
+ "a = load 'testComplexCast' as (m);" +
+ "b = foreach a generate ([{(i:int,j:int)}])m;");
+ }
+
+ @Test
+ public void testNullConstant() throws Exception {
+ generateLogicalPlan(
+ "a = load 'foo' as (x:int, y:double, str:chararray);" +
+ "b = foreach a generate {(null)}, ['2'#null];");
+ }
+
+ @Test
+ public void testEmptyTupConst() throws Exception {
+ generateLogicalPlan( "a = foreach (load 'b') generate ({});");
+ }
+
+ @Test
+ public void testJoin1() throws Exception {
+ generateLogicalPlan(
+ "A = load 'hat' as (m:map[]);" +
+ "B = filter A by m#'cond'==1;" +
+ "C = filter B by m#'key1'==1;" +
+ "D = filter B by m#'key2'==2;" +
+ "E = join C by m#'key1', D by m#'key1';");
+ }
+
// See: PIG-2937
@Test
public void testRelationAliasInNestedForeachWhereUnspecified() throws Exception {
Modified: pig/branches/branch-0.11/test/org/apache/pig/parser/TestQueryParser.java
URL: http://svn.apache.org/viewvc/pig/branches/branch-0.11/test/org/apache/pig/parser/TestQueryParser.java?rev=1464734&r1=1464733&r2=1464734&view=diff
==============================================================================
--- pig/branches/branch-0.11/test/org/apache/pig/parser/TestQueryParser.java (original)
+++ pig/branches/branch-0.11/test/org/apache/pig/parser/TestQueryParser.java Thu Apr 4 20:38:52 2013
@@ -22,20 +22,17 @@ import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
-import junit.framework.Assert;
-
import org.antlr.runtime.CharStream;
import org.antlr.runtime.CommonTokenStream;
-import org.antlr.runtime.MismatchedTokenException;
import org.antlr.runtime.RecognitionException;
import org.antlr.runtime.tree.CommonTree;
import org.antlr.runtime.tree.Tree;
import org.apache.pig.ExecType;
import org.apache.pig.PigRunner;
import org.apache.pig.PigServer;
-import org.apache.pig.backend.executionengine.ExecException;
import org.apache.pig.test.Util;
import org.apache.pig.tools.pigstats.PigStats;
+import org.junit.Assert;
import org.junit.Test;
public class TestQueryParser {
@@ -303,7 +300,7 @@ public class TestQueryParser {
@Test //PIG-2083
public void testNullInBinCondNoSpace() throws IOException{
String query = "a = load '1.txt' as (a0, a1);" +
- "b = foreach a generate (a0==0?null:2);"; //no space around the null keyword
+ "b = foreach a generate (a0==0?null:2);"; //no space around the null keyword, so the lexer doesn't emit a NULL token
PigServer pig = new PigServer(ExecType.LOCAL);
Util.registerMultiLineQuery(pig, query);
pig.explain("b", System.out);
@@ -473,4 +470,181 @@ public class TestQueryParser {
query += "B = rank A by * DENSE;";
shouldPass(query);
}
+
+ @Test // PIG-2769
+ public void testSlowQuery() throws Exception {
+ String query = "A = load 'A.txt' using PigStorage() AS (m: int);";
+ query += "B = FOREACH A { days_str = (chararray) (m == 1 ? 31: (m == 2 ? 28: (m == 3 ? 31: (m == 4 ? 30: (m == 5 ? 31: (m == 6 ? 30: (m == 7 ? 31: (m == 8 ? 31: (m == 9 ? 30: (m == 10 ? 31: (m == 11 ? 30:31))))))))))); GENERATE days_str as days_str; }";
+ query += "store B into 'B';";
+ shouldPass(query);
+ }
+
+ @Test
+ public void testFunction1() throws Exception {
+ shouldPass("B = foreach A generate org.apache.pig.builtin.CONCAT(b, c);");
+ }
+
+ @Test
+ public void testFunction2() throws Exception {
+ shouldPass("B = foreach A generate flatten(myudfs.Swap(name, age)), gpa;");
+ }
+
+ @Test
+ public void testFilter1() throws Exception {
+ shouldPass("E = FILTER D BY not IsEmpty(B);");
+ }
+
+ @Test
+ public void testFilter2() throws Exception {
+ shouldPass("C = filter B by 2 > 1;");
+ }
+
+ @Test
+ public void testFilter3() throws Exception {
+ shouldPass("C = filter B by a is null;");
+ }
+
+ @Test
+ public void testFilter4() throws Exception {
+ shouldPass("C = filter B by a is not null;");
+ }
+
+ @Test
+ public void testGroup1() throws Exception {
+ shouldPass("B = group A by ( a, $2 );");
+ }
+
+ @Test
+ public void testCast1() throws Exception {
+ shouldPass("B = FOREACH A GENERATE (int)$0 + 1;");
+ }
+
+ @Test
+ public void testCast2() throws Exception {
+ shouldPass("B = FOREACH A GENERATE (tuple(int,int,float))fld;");
+ }
+
+ @Test
+ public void testCast3() throws Exception {
+ shouldPass("B = FOREACH A GENERATE (bag{tuple(long)})fld; ");
+ }
+
+ @Test
+ public void testCast4() throws Exception {
+ shouldPass("B = FOREACH A GENERATE (map[])fld;");
+ }
+
+ @Test
+ public void testCast5() throws Exception {
+ shouldPass("E = foreach D generate userid, clicks/(double)C.total, cnt;");
+ }
+
+ @Test
+ public void testCast6() throws Exception {
+ shouldPass("X = FOREACH B GENERATE group, (chararray)COUNT(A) AS total;");
+ }
+
+ @Test
+ public void testCast7() throws Exception {
+ shouldPass("B = FOREACH A GENERATE a + (int)null;");
+ }
+
+ @Test
+ public void testCast8() throws Exception {
+ shouldPass("a = load '1.txt' as(map[int]); --Map value is int");
+ }
+
+ @Test
+ public void testCast9() throws Exception {
+ shouldPass("b = foreach a generate (map[(i:int)])a0; -- Map value is tuple");
+ }
+
+ @Test
+ public void testCast10() throws Exception {
+ shouldPass("b = stream a through `cat` as (m:map[{(i:int,j:chararray)}]); -- Map value is bag");
+ }
+
+ @Test
+ public void testNull1() throws Exception {
+ shouldPass("B = FOREACH A GENERATE a, null;");
+ }
+
+ @Test
+ public void testNull2() throws Exception {
+ shouldPass("D = FOREACH C GENERATE FLATTEN((IsEmpty(A) ? null : A)), FLATTEN((IsEmpty(B) ? null : B));");
+ }
+
+ @Test
+ public void testNull3() throws Exception {
+ shouldPass("B = FOREACH A GENERATE a + null;");
+ }
+
+ @Test
+ public void testStar1() throws Exception {
+ shouldPass("B = FOREACH A GENERATE *, MyUDF(name, age);");
+ }
+
+ @Test
+ public void testStar2() throws Exception {
+ shouldPass("C = FOREACH A GENERATE name, age, MyUDF(*);");
+ }
+
+ @Test
+ public void testProjectRange1() throws Exception {
+ shouldPass("F = foreach IN generate (int)col0, col1 .. col3; ");
+ }
+
+ @Test
+ public void testProjectRange2() throws Exception {
+ shouldPass("SORT = order IN by col2 .. col3, col0, col4 ..; ");
+ }
+
+ @Test
+ public void testProjectRange3() throws Exception {
+ shouldPass("J = join IN1 by $0 .. $3, IN2 by $0 .. $3; ");
+ }
+
+ @Test
+ public void testProjectRange4() throws Exception {
+ shouldPass("g = group l1 by b .. c; ");
+ }
+
+ @Test
+ public void testProjection1() throws Exception {
+ shouldPass("b = foreach a generate flatten(group), SUM($1.$2);");
+ }
+
+ @Test
+ public void testProjection2() throws Exception {
+ shouldPass("a = group (load '/var/folders/bs/cy3sndf95ng5ljgy5nxs1j080000gn/T/test6322762304144938425txt') by ($0,$1);");
+ }
+
+ @Test
+ public void testPartition() throws Exception {
+ shouldPass("B = group A by $0 PARTITION BY org.apache.pig.test.utils.SimpleCustomPartitioner parallel 2;");
+ }
+
+ @Test
+ public void testBoolean1() throws Exception {
+ shouldPass("C = foreach B generate group, " +
+ "((org.apache.pig.test.utils.AccumulatorBagCount(A)>1 and " +
+ "org.apache.pig.test.utils.AccumulatorBagCount(A)<3)?0:1);");
+ }
+
+ @Test
+ public void testBoolean2() throws Exception {
+ shouldPass("C = foreach B generate group, " +
+ "((org.apache.pig.test.utils.AccumulatorBagCount(A)>3 or " +
+ "org.apache.pig.test.utils.AccumulatorBagCount(A)<2)?0:1);");
+ }
+
+ @Test
+ public void testSplit1() throws Exception {
+ shouldPass("split a into b if id > 3, c if id < 3, d otherwise;");
+ }
+
+ @Test
+ public void testSplit2() throws Exception {
+ shouldPass("SPLIT logs INTO logins IF command == 'login', all_quits IF command == 'quit';");
+ }
}
Modified: pig/branches/branch-0.11/test/org/apache/pig/test/TestLogicalPlanBuilder.java
URL: http://svn.apache.org/viewvc/pig/branches/branch-0.11/test/org/apache/pig/test/TestLogicalPlanBuilder.java?rev=1464734&r1=1464733&r2=1464734&view=diff
==============================================================================
--- pig/branches/branch-0.11/test/org/apache/pig/test/TestLogicalPlanBuilder.java (original)
+++ pig/branches/branch-0.11/test/org/apache/pig/test/TestLogicalPlanBuilder.java Thu Apr 4 20:38:52 2013
@@ -25,9 +25,6 @@ import java.util.List;
import java.util.Map;
import java.util.Properties;
-import junit.framework.Assert;
-import junit.framework.AssertionFailedError;
-
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapreduce.InputFormat;
import org.apache.hadoop.mapreduce.Job;
@@ -65,6 +62,9 @@ import org.apache.pig.test.utils.Identit
import org.junit.Before;
import org.junit.Test;
+import junit.framework.Assert;
+import junit.framework.AssertionFailedError;
+
public class TestLogicalPlanBuilder {
PigContext pigContext = new PigContext(ExecType.LOCAL, new Properties());
private PigServer pigServer = null;
@@ -790,7 +790,12 @@ public class TestLogicalPlanBuilder {
public void testQueryFail67() throws Exception {
String q = " a = load 'input1' as (name, age, gpa);" +
" b = foreach a generate age, age * 10L, gpa/0.2f, {16, 4.0e-2, 'hello'};";
- buildPlan(q);
+ try {
+ buildPlan(q);
+ } catch (AssertionFailedError e) {
+ return;
+ }
+ Assert.fail( "query should fail" );
}
@Test