You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pig.apache.org by ch...@apache.org on 2013/04/16 23:57:13 UTC

svn commit: r1468623 - in /pig/trunk: CHANGES.txt src/org/apache/pig/parser/QueryParser.g test/org/apache/pig/test/TestParser.java

Author: cheolsoo
Date: Tue Apr 16 21:57:13 2013
New Revision: 1468623

URL: http://svn.apache.org/r1468623
Log:
PIG-3122: Operators should not implicitly become reserved keywords (jcoveney via cheolsoo)

Modified:
    pig/trunk/CHANGES.txt
    pig/trunk/src/org/apache/pig/parser/QueryParser.g
    pig/trunk/test/org/apache/pig/test/TestParser.java

Modified: pig/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/pig/trunk/CHANGES.txt?rev=1468623&r1=1468622&r2=1468623&view=diff
==============================================================================
--- pig/trunk/CHANGES.txt (original)
+++ pig/trunk/CHANGES.txt Tue Apr 16 21:57:13 2013
@@ -156,6 +156,8 @@ PIG-3013: BinInterSedes improve chararra
 
 BUG FIXES
 
+PIG-3122: Operators should not implicitly become reserved keywords (jcoveney via cheolsoo)
+
 PIG-3193: Fix "ant docs" warnings (cheolsoo)
 
 PIG-3186: tar/deb/pkg ant targets should depend on piggybank (lbendig via gates)

Modified: pig/trunk/src/org/apache/pig/parser/QueryParser.g
URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/parser/QueryParser.g?rev=1468623&r1=1468622&r2=1468623&view=diff
==============================================================================
--- pig/trunk/src/org/apache/pig/parser/QueryParser.g (original)
+++ pig/trunk/src/org/apache/pig/parser/QueryParser.g Tue Apr 16 21:57:13 2013
@@ -211,7 +211,7 @@ nested_op_clause : LEFT_PAREN! op_clause
 ;
 
 general_statement : FAT_ARROW ( ( op_clause parallel_clause? ) | nested_op_clause ) -> ^( STATEMENT IDENTIFIER["____RESERVED____"] op_clause? parallel_clause? nested_op_clause? )
-                  | ( IDENTIFIER EQUAL )? ( ( op_clause parallel_clause? ) | nested_op_clause ) -> ^( STATEMENT IDENTIFIER? op_clause? parallel_clause? nested_op_clause? )
+                  | ( identifier_plus EQUAL )? ( ( op_clause parallel_clause? ) | nested_op_clause ) -> ^( STATEMENT identifier_plus? op_clause? parallel_clause? nested_op_clause? )
 ;
 
 // Statement represented by a foreach operator with a nested block. Simple foreach statement
@@ -220,8 +220,8 @@ general_statement : FAT_ARROW ( ( op_cla
 // if there is a nested block. This is ugly, but it gets the job done.
 foreach_statement : FAT_ARROW FOREACH rel ( foreach_plan_complex | ( foreach_plan_simple parallel_clause? SEMI_COLON ) )
     -> ^( STATEMENT IDENTIFIER["____RESERVED____"] ^( FOREACH rel foreach_plan_complex? foreach_plan_simple? ) parallel_clause? )
-                  | ( IDENTIFIER EQUAL )? FOREACH rel ( foreach_plan_complex | ( foreach_plan_simple parallel_clause? SEMI_COLON ) )
-    -> ^( STATEMENT IDENTIFIER? ^( FOREACH rel foreach_plan_complex? foreach_plan_simple? ) parallel_clause? )
+                  | ( identifier_plus EQUAL )? FOREACH rel ( foreach_plan_complex | ( foreach_plan_simple parallel_clause? SEMI_COLON ) )
+    -> ^( STATEMENT identifier_plus? ^( FOREACH rel foreach_plan_complex? foreach_plan_simple? ) parallel_clause? )
 ;
 
 foreach_plan_complex : LEFT_CURLY nested_blk RIGHT_CURLY -> ^( FOREACH_PLAN_COMPLEX nested_blk )
@@ -235,13 +235,13 @@ foreach_plan_simple : GENERATE flatten_g
 macro_content : LEFT_CURLY ( macro_content | ~(LEFT_CURLY | RIGHT_CURLY) )* RIGHT_CURLY
 ;
 
-macro_param_clause : LEFT_PAREN ( IDENTIFIER (COMMA IDENTIFIER)* )? RIGHT_PAREN
-    -> ^(PARAMS IDENTIFIER*)
+macro_param_clause : LEFT_PAREN ( identifier_plus (COMMA identifier_plus)* )? RIGHT_PAREN
+    -> ^(PARAMS identifier_plus*)
 ;
 
 macro_return_clause
-    : RETURNS ((IDENTIFIER (COMMA IDENTIFIER)*) | VOID)
-        -> ^(RETURN_VAL IDENTIFIER*)
+    : RETURNS ((identifier_plus (COMMA identifier_plus)*) | VOID)
+        -> ^(RETURN_VAL identifier_plus*)
 ;
 
 macro_body_clause : macro_content -> ^(MACRO_BODY { new PigParserNode(new CommonToken(1, $macro_content.text), this.getSourceName(), $macro_content.start) } )
@@ -252,8 +252,8 @@ macro_clause : macro_param_clause macro_
 ;
 
 inline_return_clause
-    : IDENTIFIER EQUAL -> ^(RETURN_VAL IDENTIFIER)
-	| IDENTIFIER (COMMA IDENTIFIER)+ EQUAL -> ^(RETURN_VAL IDENTIFIER+)
+    : identifier_plus EQUAL -> ^(RETURN_VAL identifier_plus)
+	| identifier_plus (COMMA identifier_plus)+ EQUAL -> ^(RETURN_VAL identifier_plus+)
 	| -> ^(RETURN_VAL)
 ;
 
@@ -271,8 +271,8 @@ inline_param_clause : LEFT_PAREN ( param
     -> ^(PARAMS parameter*)
 ;
 
-inline_clause : inline_return_clause IDENTIFIER inline_param_clause
-    -> ^(MACRO_INLINE IDENTIFIER inline_return_clause inline_param_clause)
+inline_clause : inline_return_clause identifier_plus inline_param_clause
+    -> ^(MACRO_INLINE identifier_plus inline_return_clause inline_param_clause)
 ;
 
 // TYPES
@@ -294,7 +294,7 @@ tuple_type : implicit_tuple_type | expli
 ;
 
 implicit_bag_type : LEFT_CURLY NULL COLON tuple_type? RIGHT_CURLY -> ^( BAG_TYPE tuple_type? )
-                  | LEFT_CURLY ( ( IDENTIFIER COLON )? tuple_type )? RIGHT_CURLY -> ^( BAG_TYPE IDENTIFIER? tuple_type? )
+                  | LEFT_CURLY ( ( identifier_plus COLON )? tuple_type )? RIGHT_CURLY -> ^( BAG_TYPE identifier_plus? tuple_type? )
 ;
 
 explicit_bag_type : BAG! implicit_bag_type
@@ -332,7 +332,7 @@ import_clause : IMPORT^ QUOTEDSTRING
 define_clause : DEFINE^ IDENTIFIER ( cmd | func_clause | macro_clause)
 ;
 
-realias_clause : IDENTIFIER EQUAL IDENTIFIER -> ^(REALIAS IDENTIFIER IDENTIFIER)
+realias_clause : identifier_plus EQUAL identifier_plus -> ^(REALIAS identifier_plus identifier_plus)
 ;
 
 parallel_clause : PARALLEL^ INTEGER
@@ -409,7 +409,10 @@ group_item : rel ( join_group_by_clause 
 
 // "AS" CLAUSES
 
-explicit_field_def : IDENTIFIER ( COLON type )? -> ^( FIELD_DEF IDENTIFIER type? )
+identifier_plus : IDENTIFIER | reserved_identifier_whitelist -> IDENTIFIER[$reserved_identifier_whitelist.text]
+;
+
+explicit_field_def : identifier_plus ( COLON type )? -> ^( FIELD_DEF identifier_plus type? )
                    | explicit_type -> ^( FIELD_DEF_WITHOUT_IDENTIFIER explicit_type )
 ;
 
@@ -437,7 +440,7 @@ stream_cmd : ( STDIN | STDOUT | QUOTEDST
 cmd : EXECCOMMAND^ ( ship_clause | cache_clause | input_clause | output_clause | error_clause )*
 ;
 
-rel : IDENTIFIER | previous_rel | nested_op_clause
+rel : identifier_plus | previous_rel | nested_op_clause
 ;
 
 previous_rel : ARROBA
@@ -449,7 +452,7 @@ store_clause : STORE^ rel INTO! QUOTEDST
 filter_clause : FILTER^ rel BY! cond
 ;
 
-stream_clause : STREAM^ rel THROUGH! ( EXECCOMMAND | IDENTIFIER ) as_clause?
+stream_clause : STREAM^ rel THROUGH! ( EXECCOMMAND | identifier_plus ) as_clause?
 ;
 
 mr_clause : MAPREDUCE^ QUOTEDSTRING ( LEFT_PAREN! path_list RIGHT_PAREN! )? store_clause load_clause EXECCOMMAND?
@@ -458,10 +461,10 @@ mr_clause : MAPREDUCE^ QUOTEDSTRING ( LE
 split_clause : SPLIT^ rel INTO! split_branch split_branches
 ;
 
-split_branch : IDENTIFIER IF cond -> ^( SPLIT_BRANCH IDENTIFIER cond )
+split_branch : identifier_plus IF cond -> ^( SPLIT_BRANCH identifier_plus cond )
 ;
 
-split_otherwise : IDENTIFIER OTHERWISE^
+split_otherwise : identifier_plus OTHERWISE^
 ;
 
 split_branches : COMMA! split_branch split_branches?
@@ -676,8 +679,8 @@ cast_expr
           // careful with periods straight after the identifier, as we want those to be projections, not function
           // calls
           | col_ref_without_identifier projection*
-          | IDENTIFIER projection*
-          | IDENTIFIER func_name_suffix? LEFT_PAREN ( real_arg ( COMMA real_arg )* )? RIGHT_PAREN projection* -> ^( FUNC_EVAL IDENTIFIER func_name_suffix? real_arg* ) projection*
+          | identifier_plus projection*
+          | identifier_plus func_name_suffix? LEFT_PAREN ( real_arg ( COMMA real_arg )* )? RIGHT_PAREN projection* -> ^( FUNC_EVAL identifier_plus func_name_suffix? real_arg* ) projection*
           | func_name_without_columns LEFT_PAREN ( real_arg ( COMMA real_arg )* )? RIGHT_PAREN projection* -> ^( FUNC_EVAL func_name_without_columns real_arg* ) projection*
           | paren_expr
           | curly_expr
@@ -776,10 +779,10 @@ projection : PERIOD ( col_ref | LEFT_PAR
 // ATOMS
 
 // for disambiguation with func_names
-col_ref_without_identifier : GROUP | CUBE | DOLLARVAR
+col_ref_without_identifier : GROUP | DOLLARVAR
 ;
-
-col_ref : col_ref_without_identifier | IDENTIFIER
+ 
+col_ref : col_ref_without_identifier | identifier_plus
 ;
 
 col_range : c1 = col_ref DOUBLE_PERIOD c2 = col_ref? -> ^(COL_RANGE $c1 DOUBLE_PERIOD $c2?)
@@ -821,12 +824,12 @@ nested_blk : ( nested_command SEMI_COLON
     -> nested_command* ^( GENERATE flatten_generated_item+ )
 ;
 
-nested_command : ( IDENTIFIER EQUAL col_ref PERIOD col_ref_list { input.LA( 1 ) == SEMI_COLON }? ) => ( IDENTIFIER EQUAL nested_proj )
-              -> ^( NESTED_CMD IDENTIFIER nested_proj )
-               | IDENTIFIER EQUAL expr
-              -> ^( NESTED_CMD_ASSI IDENTIFIER expr )
-               | IDENTIFIER EQUAL nested_op
-              -> ^( NESTED_CMD IDENTIFIER nested_op )
+nested_command : ( identifier_plus EQUAL col_ref PERIOD col_ref_list { input.LA( 1 ) == SEMI_COLON }? ) => ( identifier_plus EQUAL nested_proj )
+              -> ^( NESTED_CMD identifier_plus nested_proj )
+               | identifier_plus EQUAL expr
+              -> ^( NESTED_CMD_ASSI identifier_plus expr )
+               | identifier_plus EQUAL nested_op
+              -> ^( NESTED_CMD identifier_plus nested_op )
 ;
 
 nested_op : nested_filter
@@ -967,3 +970,8 @@ rel_str_op : STR_OP_EQ
            | STR_OP_LTE
            | STR_OP_MATCHES
 ;
+
+reserved_identifier_whitelist : RANK
+                              | CUBE
+;
+

Modified: pig/trunk/test/org/apache/pig/test/TestParser.java
URL: http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/test/TestParser.java?rev=1468623&r1=1468622&r2=1468623&view=diff
==============================================================================
--- pig/trunk/test/org/apache/pig/test/TestParser.java (original)
+++ pig/trunk/test/org/apache/pig/test/TestParser.java Tue Apr 16 21:57:13 2013
@@ -18,10 +18,15 @@ package org.apache.pig.test;
 
 import static org.apache.pig.ExecType.LOCAL;
 import static org.apache.pig.ExecType.MAPREDUCE;
+import static org.apache.pig.builtin.mock.Storage.resetData;
+import static org.apache.pig.builtin.mock.Storage.tuple;
+import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertTrue;
 
 import java.io.IOException;
+import java.util.List;
 import java.util.Properties;
+import java.util.Set;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
@@ -32,11 +37,14 @@ import org.apache.pig.backend.executione
 import org.apache.pig.backend.hadoop.datastorage.ConfigurationUtil;
 import org.apache.pig.builtin.mock.Storage;
 import org.apache.pig.builtin.mock.Storage.Data;
+import org.apache.pig.data.Tuple;
 import org.junit.After;
 import org.junit.AfterClass;
 import org.junit.Before;
 import org.junit.Test;
 
+import com.google.common.collect.Sets;
+
 public class TestParser {
 
     protected final Log log = LogFactory.getLog(getClass());
@@ -166,5 +174,35 @@ public class TestParser {
                 conf.get("mapreduce.job.hdfs-servers").contains("hdfs://d.com:8020"));
 
     }
+
+    @Test
+    public void testRestrictedColumnNamesWhitelist() throws Exception {
+        pigServer = new PigServer(LOCAL);
+
+        Data data = resetData(pigServer);
+
+        Set<Tuple> tuples = Sets.newHashSet(tuple(1),tuple(2),tuple(3));
+        data.set("foo",
+            "x:int",
+            tuples
+            );
+
+        pigServer.registerQuery("a = load 'foo' using mock.Storage();");
+        pigServer.registerQuery("a = foreach a generate x as rank;");
+        pigServer.registerQuery("a = foreach a generate rank as cube;");
+        pigServer.registerQuery("a = foreach a generate cube as y;");
+        pigServer.registerQuery("rank = a;");
+        pigServer.registerQuery("cube = rank;");
+        pigServer.registerQuery("rank = cube;");
+        pigServer.registerQuery("cube = foreach rank generate y as cube;");
+        pigServer.registerQuery("store cube into 'baz' using mock.Storage();");
+        List<Tuple> tuples2 = data.get("baz");
+        assertEquals(tuples.size(), tuples2.size());
+        for (Tuple t : tuples2) {
+            tuples.remove(t);
+        }
+        assertTrue(tuples.isEmpty());
+
+    }
 }