You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pig.apache.org by ch...@apache.org on 2013/04/16 23:57:13 UTC
svn commit: r1468623 - in /pig/trunk: CHANGES.txt
src/org/apache/pig/parser/QueryParser.g
test/org/apache/pig/test/TestParser.java
Author: cheolsoo
Date: Tue Apr 16 21:57:13 2013
New Revision: 1468623
URL: http://svn.apache.org/r1468623
Log:
PIG-3122: Operators should not implicitly become reserved keywords (jcoveney via cheolsoo)
Modified:
pig/trunk/CHANGES.txt
pig/trunk/src/org/apache/pig/parser/QueryParser.g
pig/trunk/test/org/apache/pig/test/TestParser.java
Modified: pig/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/pig/trunk/CHANGES.txt?rev=1468623&r1=1468622&r2=1468623&view=diff
==============================================================================
--- pig/trunk/CHANGES.txt (original)
+++ pig/trunk/CHANGES.txt Tue Apr 16 21:57:13 2013
@@ -156,6 +156,8 @@ PIG-3013: BinInterSedes improve chararra
BUG FIXES
+PIG-3122: Operators should not implicitly become reserved keywords (jcoveney via cheolsoo)
+
PIG-3193: Fix "ant docs" warnings (cheolsoo)
PIG-3186: tar/deb/pkg ant targets should depend on piggybank (lbendig via gates)
Modified: pig/trunk/src/org/apache/pig/parser/QueryParser.g
URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/parser/QueryParser.g?rev=1468623&r1=1468622&r2=1468623&view=diff
==============================================================================
--- pig/trunk/src/org/apache/pig/parser/QueryParser.g (original)
+++ pig/trunk/src/org/apache/pig/parser/QueryParser.g Tue Apr 16 21:57:13 2013
@@ -211,7 +211,7 @@ nested_op_clause : LEFT_PAREN! op_clause
;
general_statement : FAT_ARROW ( ( op_clause parallel_clause? ) | nested_op_clause ) -> ^( STATEMENT IDENTIFIER["____RESERVED____"] op_clause? parallel_clause? nested_op_clause? )
- | ( IDENTIFIER EQUAL )? ( ( op_clause parallel_clause? ) | nested_op_clause ) -> ^( STATEMENT IDENTIFIER? op_clause? parallel_clause? nested_op_clause? )
+ | ( identifier_plus EQUAL )? ( ( op_clause parallel_clause? ) | nested_op_clause ) -> ^( STATEMENT identifier_plus? op_clause? parallel_clause? nested_op_clause? )
;
// Statement represented by a foreach operator with a nested block. Simple foreach statement
@@ -220,8 +220,8 @@ general_statement : FAT_ARROW ( ( op_cla
// if there is a nested block. This is ugly, but it gets the job done.
foreach_statement : FAT_ARROW FOREACH rel ( foreach_plan_complex | ( foreach_plan_simple parallel_clause? SEMI_COLON ) )
-> ^( STATEMENT IDENTIFIER["____RESERVED____"] ^( FOREACH rel foreach_plan_complex? foreach_plan_simple? ) parallel_clause? )
- | ( IDENTIFIER EQUAL )? FOREACH rel ( foreach_plan_complex | ( foreach_plan_simple parallel_clause? SEMI_COLON ) )
- -> ^( STATEMENT IDENTIFIER? ^( FOREACH rel foreach_plan_complex? foreach_plan_simple? ) parallel_clause? )
+ | ( identifier_plus EQUAL )? FOREACH rel ( foreach_plan_complex | ( foreach_plan_simple parallel_clause? SEMI_COLON ) )
+ -> ^( STATEMENT identifier_plus? ^( FOREACH rel foreach_plan_complex? foreach_plan_simple? ) parallel_clause? )
;
foreach_plan_complex : LEFT_CURLY nested_blk RIGHT_CURLY -> ^( FOREACH_PLAN_COMPLEX nested_blk )
@@ -235,13 +235,13 @@ foreach_plan_simple : GENERATE flatten_g
macro_content : LEFT_CURLY ( macro_content | ~(LEFT_CURLY | RIGHT_CURLY) )* RIGHT_CURLY
;
-macro_param_clause : LEFT_PAREN ( IDENTIFIER (COMMA IDENTIFIER)* )? RIGHT_PAREN
- -> ^(PARAMS IDENTIFIER*)
+macro_param_clause : LEFT_PAREN ( identifier_plus (COMMA identifier_plus)* )? RIGHT_PAREN
+ -> ^(PARAMS identifier_plus*)
;
macro_return_clause
- : RETURNS ((IDENTIFIER (COMMA IDENTIFIER)*) | VOID)
- -> ^(RETURN_VAL IDENTIFIER*)
+ : RETURNS ((identifier_plus (COMMA identifier_plus)*) | VOID)
+ -> ^(RETURN_VAL identifier_plus*)
;
macro_body_clause : macro_content -> ^(MACRO_BODY { new PigParserNode(new CommonToken(1, $macro_content.text), this.getSourceName(), $macro_content.start) } )
@@ -252,8 +252,8 @@ macro_clause : macro_param_clause macro_
;
inline_return_clause
- : IDENTIFIER EQUAL -> ^(RETURN_VAL IDENTIFIER)
- | IDENTIFIER (COMMA IDENTIFIER)+ EQUAL -> ^(RETURN_VAL IDENTIFIER+)
+ : identifier_plus EQUAL -> ^(RETURN_VAL identifier_plus)
+ | identifier_plus (COMMA identifier_plus)+ EQUAL -> ^(RETURN_VAL identifier_plus+)
| -> ^(RETURN_VAL)
;
@@ -271,8 +271,8 @@ inline_param_clause : LEFT_PAREN ( param
-> ^(PARAMS parameter*)
;
-inline_clause : inline_return_clause IDENTIFIER inline_param_clause
- -> ^(MACRO_INLINE IDENTIFIER inline_return_clause inline_param_clause)
+inline_clause : inline_return_clause identifier_plus inline_param_clause
+ -> ^(MACRO_INLINE identifier_plus inline_return_clause inline_param_clause)
;
// TYPES
@@ -294,7 +294,7 @@ tuple_type : implicit_tuple_type | expli
;
implicit_bag_type : LEFT_CURLY NULL COLON tuple_type? RIGHT_CURLY -> ^( BAG_TYPE tuple_type? )
- | LEFT_CURLY ( ( IDENTIFIER COLON )? tuple_type )? RIGHT_CURLY -> ^( BAG_TYPE IDENTIFIER? tuple_type? )
+ | LEFT_CURLY ( ( identifier_plus COLON )? tuple_type )? RIGHT_CURLY -> ^( BAG_TYPE identifier_plus? tuple_type? )
;
explicit_bag_type : BAG! implicit_bag_type
@@ -332,7 +332,7 @@ import_clause : IMPORT^ QUOTEDSTRING
define_clause : DEFINE^ IDENTIFIER ( cmd | func_clause | macro_clause)
;
-realias_clause : IDENTIFIER EQUAL IDENTIFIER -> ^(REALIAS IDENTIFIER IDENTIFIER)
+realias_clause : identifier_plus EQUAL identifier_plus -> ^(REALIAS identifier_plus identifier_plus)
;
parallel_clause : PARALLEL^ INTEGER
@@ -409,7 +409,10 @@ group_item : rel ( join_group_by_clause
// "AS" CLAUSES
-explicit_field_def : IDENTIFIER ( COLON type )? -> ^( FIELD_DEF IDENTIFIER type? )
+identifier_plus : IDENTIFIER | reserved_identifier_whitelist -> IDENTIFIER[$reserved_identifier_whitelist.text]
+;
+
+explicit_field_def : identifier_plus ( COLON type )? -> ^( FIELD_DEF identifier_plus type? )
| explicit_type -> ^( FIELD_DEF_WITHOUT_IDENTIFIER explicit_type )
;
@@ -437,7 +440,7 @@ stream_cmd : ( STDIN | STDOUT | QUOTEDST
cmd : EXECCOMMAND^ ( ship_clause | cache_clause | input_clause | output_clause | error_clause )*
;
-rel : IDENTIFIER | previous_rel | nested_op_clause
+rel : identifier_plus | previous_rel | nested_op_clause
;
previous_rel : ARROBA
@@ -449,7 +452,7 @@ store_clause : STORE^ rel INTO! QUOTEDST
filter_clause : FILTER^ rel BY! cond
;
-stream_clause : STREAM^ rel THROUGH! ( EXECCOMMAND | IDENTIFIER ) as_clause?
+stream_clause : STREAM^ rel THROUGH! ( EXECCOMMAND | identifier_plus ) as_clause?
;
mr_clause : MAPREDUCE^ QUOTEDSTRING ( LEFT_PAREN! path_list RIGHT_PAREN! )? store_clause load_clause EXECCOMMAND?
@@ -458,10 +461,10 @@ mr_clause : MAPREDUCE^ QUOTEDSTRING ( LE
split_clause : SPLIT^ rel INTO! split_branch split_branches
;
-split_branch : IDENTIFIER IF cond -> ^( SPLIT_BRANCH IDENTIFIER cond )
+split_branch : identifier_plus IF cond -> ^( SPLIT_BRANCH identifier_plus cond )
;
-split_otherwise : IDENTIFIER OTHERWISE^
+split_otherwise : identifier_plus OTHERWISE^
;
split_branches : COMMA! split_branch split_branches?
@@ -676,8 +679,8 @@ cast_expr
// careful with periods straight after the identifier, as we want those to be projections, not function
// calls
| col_ref_without_identifier projection*
- | IDENTIFIER projection*
- | IDENTIFIER func_name_suffix? LEFT_PAREN ( real_arg ( COMMA real_arg )* )? RIGHT_PAREN projection* -> ^( FUNC_EVAL IDENTIFIER func_name_suffix? real_arg* ) projection*
+ | identifier_plus projection*
+ | identifier_plus func_name_suffix? LEFT_PAREN ( real_arg ( COMMA real_arg )* )? RIGHT_PAREN projection* -> ^( FUNC_EVAL identifier_plus func_name_suffix? real_arg* ) projection*
| func_name_without_columns LEFT_PAREN ( real_arg ( COMMA real_arg )* )? RIGHT_PAREN projection* -> ^( FUNC_EVAL func_name_without_columns real_arg* ) projection*
| paren_expr
| curly_expr
@@ -776,10 +779,10 @@ projection : PERIOD ( col_ref | LEFT_PAR
// ATOMS
// for disambiguation with func_names
-col_ref_without_identifier : GROUP | CUBE | DOLLARVAR
+col_ref_without_identifier : GROUP | DOLLARVAR
;
-
-col_ref : col_ref_without_identifier | IDENTIFIER
+
+col_ref : col_ref_without_identifier | identifier_plus
;
col_range : c1 = col_ref DOUBLE_PERIOD c2 = col_ref? -> ^(COL_RANGE $c1 DOUBLE_PERIOD $c2?)
@@ -821,12 +824,12 @@ nested_blk : ( nested_command SEMI_COLON
-> nested_command* ^( GENERATE flatten_generated_item+ )
;
-nested_command : ( IDENTIFIER EQUAL col_ref PERIOD col_ref_list { input.LA( 1 ) == SEMI_COLON }? ) => ( IDENTIFIER EQUAL nested_proj )
- -> ^( NESTED_CMD IDENTIFIER nested_proj )
- | IDENTIFIER EQUAL expr
- -> ^( NESTED_CMD_ASSI IDENTIFIER expr )
- | IDENTIFIER EQUAL nested_op
- -> ^( NESTED_CMD IDENTIFIER nested_op )
+nested_command : ( identifier_plus EQUAL col_ref PERIOD col_ref_list { input.LA( 1 ) == SEMI_COLON }? ) => ( identifier_plus EQUAL nested_proj )
+ -> ^( NESTED_CMD identifier_plus nested_proj )
+ | identifier_plus EQUAL expr
+ -> ^( NESTED_CMD_ASSI identifier_plus expr )
+ | identifier_plus EQUAL nested_op
+ -> ^( NESTED_CMD identifier_plus nested_op )
;
nested_op : nested_filter
@@ -967,3 +970,8 @@ rel_str_op : STR_OP_EQ
| STR_OP_LTE
| STR_OP_MATCHES
;
+
+reserved_identifier_whitelist : RANK
+ | CUBE
+;
+
Modified: pig/trunk/test/org/apache/pig/test/TestParser.java
URL: http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/test/TestParser.java?rev=1468623&r1=1468622&r2=1468623&view=diff
==============================================================================
--- pig/trunk/test/org/apache/pig/test/TestParser.java (original)
+++ pig/trunk/test/org/apache/pig/test/TestParser.java Tue Apr 16 21:57:13 2013
@@ -18,10 +18,15 @@ package org.apache.pig.test;
import static org.apache.pig.ExecType.LOCAL;
import static org.apache.pig.ExecType.MAPREDUCE;
+import static org.apache.pig.builtin.mock.Storage.resetData;
+import static org.apache.pig.builtin.mock.Storage.tuple;
+import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
import java.io.IOException;
+import java.util.List;
import java.util.Properties;
+import java.util.Set;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
@@ -32,11 +37,14 @@ import org.apache.pig.backend.executione
import org.apache.pig.backend.hadoop.datastorage.ConfigurationUtil;
import org.apache.pig.builtin.mock.Storage;
import org.apache.pig.builtin.mock.Storage.Data;
+import org.apache.pig.data.Tuple;
import org.junit.After;
import org.junit.AfterClass;
import org.junit.Before;
import org.junit.Test;
+import com.google.common.collect.Sets;
+
public class TestParser {
protected final Log log = LogFactory.getLog(getClass());
@@ -166,5 +174,35 @@ public class TestParser {
conf.get("mapreduce.job.hdfs-servers").contains("hdfs://d.com:8020"));
}
+
+ @Test
+ public void testRestrictedColumnNamesWhitelist() throws Exception {
+ pigServer = new PigServer(LOCAL);
+
+ Data data = resetData(pigServer);
+
+ Set<Tuple> tuples = Sets.newHashSet(tuple(1),tuple(2),tuple(3));
+ data.set("foo",
+ "x:int",
+ tuples
+ );
+
+ pigServer.registerQuery("a = load 'foo' using mock.Storage();");
+ pigServer.registerQuery("a = foreach a generate x as rank;");
+ pigServer.registerQuery("a = foreach a generate rank as cube;");
+ pigServer.registerQuery("a = foreach a generate cube as y;");
+ pigServer.registerQuery("rank = a;");
+ pigServer.registerQuery("cube = rank;");
+ pigServer.registerQuery("rank = cube;");
+ pigServer.registerQuery("cube = foreach rank generate y as cube;");
+ pigServer.registerQuery("store cube into 'baz' using mock.Storage();");
+ List<Tuple> tuples2 = data.get("baz");
+ assertEquals(tuples.size(), tuples2.size());
+ for (Tuple t : tuples2) {
+ tuples.remove(t);
+ }
+ assertTrue(tuples.isEmpty());
+
+ }
}