You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pig.apache.org by th...@apache.org on 2010/12/02 02:30:36 UTC
svn commit: r1041246 - in /pig/trunk:
src/org/apache/pig/parser/DefaultDataTypeInserter.g
test/org/apache/pig/parser/TestDefaultDataTypeInserter.java
test/org/apache/pig/parser/TestDefaultDataTypeInserter.pig
test/org/apache/pig/parser/TreePrinter.java
Author: thejas
Date: Thu Dec 2 01:30:36 2010
New Revision: 1041246
URL: http://svn.apache.org/viewvc?rev=1041246&view=rev
Log:
PIG-1618: Switch to new parser generator technology - NewParser-5.patch - adding missing files
Added:
pig/trunk/src/org/apache/pig/parser/DefaultDataTypeInserter.g
pig/trunk/test/org/apache/pig/parser/TestDefaultDataTypeInserter.java
pig/trunk/test/org/apache/pig/parser/TestDefaultDataTypeInserter.pig
pig/trunk/test/org/apache/pig/parser/TreePrinter.java
Added: pig/trunk/src/org/apache/pig/parser/DefaultDataTypeInserter.g
URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/parser/DefaultDataTypeInserter.g?rev=1041246&view=auto
==============================================================================
--- pig/trunk/src/org/apache/pig/parser/DefaultDataTypeInserter.g (added)
+++ pig/trunk/src/org/apache/pig/parser/DefaultDataTypeInserter.g Thu Dec 2 01:30:36 2010
@@ -0,0 +1,448 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Grammar file for Pig tree parser (visitor for default data type insertion).
+ *
+ * NOTE: THIS FILE IS BASED ON QueryParser.g, SO IF YOU CHANGE THAT FILE, YOU WILL
+ * PROBABLY NEED TO MAKE CORRESPONDING CHANGES TO THIS FILE AS WELL.
+ */
+
+tree grammar DefaultDataTypeInserter;
+
+options {
+ tokenVocab=QueryParser;
+ ASTLabelType=CommonTree;
+ output=AST;
+ backtrack=true;
+}
+
+@header {
+package org.apache.pig.parser;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+}
+
+@members {
+private static Log log = LogFactory.getLog( DefaultDataTypeInserter.class );
+} // End of @members
+
+query : ^( QUERY statement* )
+;
+
+statement : general_statement | foreach_statement
+;
+
+general_statement : ^( STATEMENT alias? op_clause INTEGER? )
+;
+
+// We need to handle foreach specifically because of the ending ';', which is not required
+// if there is a nested block. This is ugly, but it gets the job done.
+foreach_statement : ^( STATEMENT alias? foreach_clause )
+;
+
+alias : IDENTIFIER
+;
+
+op_clause : define_clause
+ | load_clause
+ | group_clause
+ | store_clause
+ | filter_clause
+ | distinct_clause
+ | limit_clause
+ | sample_clause
+ | order_clause
+ | partition_clause
+ | cross_clause
+ | joint_clause
+ | union_clause
+ | stream_clause
+ | mr_clause
+ | split_clause
+;
+
+define_clause : ^( DEFINE alias ( cmd | func_clause ) )
+;
+
+cmd : ^( EXECCOMMAND ( ship_clause | cache_caluse | input_clause | output_clause | error_clause )* )
+;
+
+ship_clause : ^( SHIP path_list? )
+;
+
+path_list : QUOTEDSTRING+
+;
+
+cache_caluse : ^( CACHE path_list )
+;
+
+input_clause : ^( INPUT stream_cmd_list )
+;
+
+stream_cmd_list : stream_cmd+
+;
+
+stream_cmd : ^( STDIN func_clause? )
+ | ^( STDOUT func_clause? )
+ | ^( QUOTEDSTRING func_clause? )
+;
+
+output_clause : ^( OUTPUT stream_cmd_list )
+;
+
+error_clause : ^( ERROR error_cmd? )
+;
+
+error_cmd : ^( QUOTEDSTRING INTEGER? )
+;
+
+load_clause : ^( LOAD filename func_clause? as_clause? )
+;
+
+filename : QUOTEDSTRING
+;
+
+as_clause: ^( AS tuple_def )
+;
+
+tuple_def : tuple_def_full | tuple_def_simple
+;
+
+tuple_def_full : ^( TUPLE_DEF field+ )
+;
+
+tuple_def_simple : ^( TUPLE_DEF field )
+;
+
+// Add default types for schema field.
+field : ^( FIELD IDENTIFIER )
+ -> ^( FIELD IDENTIFIER BYTEARRAY )
+ | ^( FIELD IDENTIFIER type )
+;
+
+type : simple_type | tuple_type | bag_type | map_type
+;
+
+simple_type : INT | LONG | FLOAT | DOUBLE | CHARARRAY | BYTEARRAY
+;
+
+tuple_type : ^( TUPLE_TYPE tuple_def_full )
+;
+
+bag_type : ^( BAG_TYPE tuple_def? )
+;
+
+map_type : MAP_TYPE
+;
+
+func_clause : ^( FUNC func_name func_args? )
+ | ^( FUNC func_alias )
+;
+
+func_name : eid+
+;
+
+func_alias : IDENTIFIER
+;
+
+func_args : QUOTEDSTRING+
+;
+
+group_clause : ^( GROUP group_item_list QUOTEDSTRING? )
+ | ^( COGROUP group_item_list QUOTEDSTRING? )
+;
+
+group_item_list : group_item+
+;
+
+group_item : rel ( ( flatten_generated_item_list ) | ALL | ANY ) ( INNER | OUTER )?
+;
+
+rel : alias | op_clause
+;
+
+flatten_generated_item_list : flatten_generated_item+
+;
+
+flatten_generated_item : ( flatten_clause | expr | STAR ) as_clause?
+;
+
+flatten_clause : ^( FLATTEN expr )
+;
+
+store_clause : ^( STORE alias filename func_clause? )
+;
+
+filter_clause : ^( FILTER rel cond )
+;
+
+cond : ^( OR cond cond )
+ | ^( AND cond cond )
+ | ^( NOT cond )
+ | ^( NULL expr NOT )
+ | ^( FILTEROP expr expr )
+ | func_eval
+ | ^( NULL expr NOT? )
+;
+
+func_eval: ^( FUNC_EVAL func_name real_arg_list? )
+;
+
+real_arg_list : real_arg+
+;
+
+real_arg : expr | STAR
+;
+
+expr : ^( PLUS expr expr )
+ | ^( MINUS expr expr )
+ | ^( STAR expr expr )
+ | ^( DIV expr expr )
+ | ^( PERCENT expr expr )
+ | ^( CAST_EXPR type expr )
+ | const_expr
+ | var_expr
+ | neg_expr
+;
+
+cast_expr : ^( CAST_EXPR type unary_expr )
+ | unary_expr
+;
+
+unary_expr : expr_eval | expr | neg_expr
+;
+
+expr_eval : const_expr | var_expr
+;
+
+var_expr : projectable_expr ( dot_proj | pound_proj )*
+;
+
+projectable_expr: func_eval | col_ref | bin_expr
+;
+
+dot_proj : ^( PERIOD col_ref+ )
+;
+
+pound_proj : ^( POUND ( QUOTEDSTRING | NULL ) )
+;
+
+bin_expr : ^( BIN_EXPR cond expr expr )
+;
+
+neg_expr : ^( MINUS cast_expr )
+;
+
+limit_clause : ^( LIMIT rel ( INTEGER | LONGINTEGER ) )
+;
+
+sample_clause : ^( SAMPLE rel DOUBLENUMBER )
+;
+
+order_clause : ^( ORDER rel order_by_clause func_clause? )
+;
+
+order_by_clause : STAR ( ASC | DESC )?
+ | order_col_list
+;
+
+order_col_list : order_col+
+;
+
+order_col : col_ref ( ASC | DESC )?
+;
+
+distinct_clause : ^( DISTINCT rel partition_clause? )
+;
+
+partition_clause : ^( PARTITION func_name )
+;
+
+cross_clause : ^( CROSS rel_list partition_clause? )
+;
+
+rel_list : rel+
+;
+
+joint_clause : ^( JOIN join_sub_clause QUOTEDSTRING? partition_clause? )
+;
+
+join_sub_clause : join_item ( LEFT | RIGHT | FULL ) OUTER? join_item
+ | join_item_list
+;
+
+join_item_list : join_item ( join_item )+
+;
+
+join_item : rel flatten_generated_item_list
+;
+
+union_clause : ^( UNION ONSCHEMA? rel_list )
+;
+
+foreach_clause : ^( FOREACH rel nested_plan )
+;
+
+nested_plan : ^( NESTED_PLAN foreach_blk )
+ | ^( NESTED_PLAN generate_clause )
+;
+
+foreach_blk : nested_command_list generate_clause
+;
+
+generate_clause : ^( GENERATE flatten_generated_item+ )
+;
+
+nested_command_list : nested_command*
+;
+
+nested_command : ^( NESTED_CMD IDENTIFIER expr )
+ | ^( NESTED_CMD IDENTIFIER nested_op )
+;
+
+nested_op : nested_proj
+ | nested_filter
+ | nested_sort
+ | nested_distinct
+ | nested_limit
+;
+
+nested_proj : ^( NESTED_PROJ col_ref col_ref_list )
+;
+
+col_ref_list : col_ref+
+;
+
+nested_filter : ^( FILTER ( IDENTIFIER | nested_proj | expr_eval ) cond )
+;
+
+nested_sort : ^( ORDER ( IDENTIFIER | nested_proj | expr_eval ) order_by_clause func_clause? )
+;
+
+nested_distinct : ^( DISTINCT ( IDENTIFIER | nested_proj | expr_eval ) )
+;
+
+nested_limit : ^( LIMIT ( IDENTIFIER | nested_proj | expr_eval ) INTEGER )
+;
+
+stream_clause : ^( STREAM rel ( EXECCOMMAND | IDENTIFIER ) as_clause? )
+;
+
+mr_clause : ^( MAPREDUCE QUOTEDSTRING path_list? store_clause load_clause EXECCOMMAND? )
+;
+
+split_clause : ^( SPLIT rel split_branch+ )
+;
+
+split_branch : ^( SPLIT_BRANCH IDENTIFIER cond )
+;
+
+col_ref : alias_col_ref | dollar_col_ref
+;
+
+alias_col_ref : GROUP | IDENTIFIER
+;
+
+dollar_col_ref : ^( DOLLAR INTEGER )
+;
+
+const_expr : scalar | map | bag | tuple
+;
+
+scalar : INTEGER | LONGINEGER | FLOATNUMBER | DOUBLENUMBER | QUOTEDSTRING | NULL
+;
+
+map : ^( MAP_VAL keyvalue+ )
+;
+
+keyvalue : ^( KEY_VAL_PAIR string_val const_expr )
+;
+
+string_val : QUOTEDSTRING | NULL
+;
+
+bag : ^( BAG_VAL tuple+ )
+;
+
+tuple : ^( TUPLE_VAL const_expr+ )
+;
+
+// extended identifier, handling the keyword and identifier conflicts. Ugly but there is no other choice.
+eid : FILTEROP
+ | DEFINE
+ | LOAD
+ | FILTER
+ | FOREACH
+ | MATCHES
+ | ORDER
+ | DISTINCT
+ | COGROUP
+ | JOIN
+ | CROSS
+ | UNION
+ | SPLIT
+ | INTO
+ | IF
+ | ALL
+ | AS
+ | BY
+ | USING
+ | INNER
+ | OUTER
+ | PARALLEL
+ | PARTITION
+ | GROUP
+ | AND
+ | OR
+ | NOT
+ | GENERATE
+ | FLATTEN
+ | EVAL
+ | ASC
+ | DESC
+ | INT
+ | LONG
+ | FLOAT
+ | DOUBLE
+ | CHARARRAY
+ | BYTEARRAY
+ | BAG
+ | TUPLE
+ | MAP
+ | IS
+ | NULL
+ | STREAM
+ | THROUGH
+ | STORE
+ | MAPREDUCE
+ | SHIP
+ | CACHE
+ | INPUT
+ | OUTPUT
+ | ERROR
+ | STDIN
+ | STDOUT
+ | LIMIT
+ | SAMPLE
+ | LEFT
+ | RIGHT
+ | FULL
+ | IDENTIFIER
+;
Added: pig/trunk/test/org/apache/pig/parser/TestDefaultDataTypeInserter.java
URL: http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/parser/TestDefaultDataTypeInserter.java?rev=1041246&view=auto
==============================================================================
--- pig/trunk/test/org/apache/pig/parser/TestDefaultDataTypeInserter.java (added)
+++ pig/trunk/test/org/apache/pig/parser/TestDefaultDataTypeInserter.java Thu Dec 2 01:30:36 2010
@@ -0,0 +1,82 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.pig.parser;
+
+import java.io.IOException;
+
+import junit.framework.Assert;
+
+import org.antlr.runtime.CharStream;
+import org.antlr.runtime.CommonTokenStream;
+import org.antlr.runtime.RecognitionException;
+import org.antlr.runtime.tree.CommonTree;
+import org.antlr.runtime.tree.CommonTreeNodeStream;
+import org.antlr.runtime.tree.Tree;
+import org.junit.Test;
+
+public class TestDefaultDataTypeInserter {
+
+ @Test
+ public void test() throws IOException, RecognitionException {
+ CharStream input = new QueryParserFileStream( "test/org/apache/pig/parser/TestDefaultDataTypeInserter.pig" );
+ QueryLexer lex = new QueryLexer(input);
+ CommonTokenStream tokens = new CommonTokenStream(lex);
+
+ QueryParser parser = new QueryParser(tokens);
+ QueryParser.query_return result = parser.query();
+
+ Tree ast = (Tree)result.getTree();
+
+ System.out.println( ast.toStringTree() );
+ TreePrinter.printTree( (CommonTree)ast, 0 );
+
+ CommonTreeNodeStream nodes = new CommonTreeNodeStream( ast );
+ DefaultDataTypeInserter walker = new DefaultDataTypeInserter( nodes );
+ DefaultDataTypeInserter.query_return newResult = walker.query();
+
+ Assert.assertEquals( 0, walker.getNumberOfSyntaxErrors() );
+
+ ast = (Tree)newResult.getTree();
+ validateDataTypePresent( (CommonTree)ast );
+
+ TreePrinter.printTree( (CommonTree)ast, 0 );
+ }
+
+ private void validateDataTypePresent(CommonTree tree) {
+ if( tree != null ) {
+
+ if( tree.getText().equals( "TUPLE_DEF" ) ) {
+ for ( int i = 0; i < tree.getChildCount(); i++ ) {
+ CommonTree child = (CommonTree)tree.getChild( i ); // FIELD node
+ Assert.assertTrue( "FIELD".equals( child.getText() ) );
+ CommonTree datatype = (CommonTree)child.getChild( 1 );
+ Assert.assertTrue( datatype != null );
+ String typeName = datatype.getText();
+ Assert.assertTrue( !typeName.isEmpty() );
+ validateDataTypePresent( child );
+ }
+ } else {
+ for ( int i = 0; i < tree.getChildCount(); i++ ) {
+ validateDataTypePresent( (CommonTree)tree.getChild( i ) );
+ }
+ }
+ }
+ }
+
+}
Added: pig/trunk/test/org/apache/pig/parser/TestDefaultDataTypeInserter.pig
URL: http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/parser/TestDefaultDataTypeInserter.pig?rev=1041246&view=auto
==============================================================================
--- pig/trunk/test/org/apache/pig/parser/TestDefaultDataTypeInserter.pig (added)
+++ pig/trunk/test/org/apache/pig/parser/TestDefaultDataTypeInserter.pig Thu Dec 2 01:30:36 2010
@@ -0,0 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+A = load 'xx' as ( uname, vname, w:tuple(z, zz) );
+store A into 'yy';
+
+B = load 'z' as ass;
\ No newline at end of file
Added: pig/trunk/test/org/apache/pig/parser/TreePrinter.java
URL: http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/parser/TreePrinter.java?rev=1041246&view=auto
==============================================================================
--- pig/trunk/test/org/apache/pig/parser/TreePrinter.java (added)
+++ pig/trunk/test/org/apache/pig/parser/TreePrinter.java Thu Dec 2 01:30:36 2010
@@ -0,0 +1,39 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.pig.parser;
+
+import org.antlr.runtime.tree.CommonTree;
+
+public class TreePrinter {
+
+ public static void printTree(CommonTree tree, int indent) {
+ if( tree != null ) {
+ StringBuilder sb = new StringBuilder();
+ for ( int i = 0; i < indent; i++ )
+ sb = sb.append( " " );
+
+ System.out.println( sb + tree.getText() );
+
+ for ( int i = 0; i < tree.getChildCount(); i++ ) {
+ printTree( (CommonTree)tree.getChild( i ), indent + 1 );
+ }
+ }
+ }
+
+}