You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pig.apache.org by th...@apache.org on 2011/01/27 01:31:21 UTC
svn commit: r1063930 - in /pig/trunk:
src/org/apache/pig/newplan/logical/expression/
src/org/apache/pig/newplan/logical/visitor/ src/org/apache/pig/parser/
test/org/apache/pig/parser/
Author: thejas
Date: Thu Jan 27 00:31:21 2011
New Revision: 1063930
URL: http://svn.apache.org/viewvc?rev=1063930&view=rev
Log:
PIG-1618: Switch to new parser generator technology - NewParser-14.patch - (xuefuz via thejas)
Added:
pig/trunk/src/org/apache/pig/newplan/logical/visitor/ColumnAliasConversionVisitor.java
pig/trunk/test/org/apache/pig/parser/TestColumnAliasConversion.java
Modified:
pig/trunk/src/org/apache/pig/newplan/logical/expression/DereferenceExpression.java
pig/trunk/src/org/apache/pig/newplan/logical/expression/ProjectExpression.java
pig/trunk/src/org/apache/pig/parser/AstValidator.g
pig/trunk/src/org/apache/pig/parser/LogicalPlanBuilder.java
pig/trunk/src/org/apache/pig/parser/LogicalPlanGenerator.g
Modified: pig/trunk/src/org/apache/pig/newplan/logical/expression/DereferenceExpression.java
URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/newplan/logical/expression/DereferenceExpression.java?rev=1063930&r1=1063929&r2=1063930&view=diff
==============================================================================
--- pig/trunk/src/org/apache/pig/newplan/logical/expression/DereferenceExpression.java (original)
+++ pig/trunk/src/org/apache/pig/newplan/logical/expression/DereferenceExpression.java Thu Jan 27 00:31:21 2011
@@ -88,6 +88,7 @@ public class DereferenceExpression exten
public void setBagColumns(List<Integer> columns) {
this.columns = columns;
+ this.rawColumns.clear(); // We don't need this any more.
}
@Override
@@ -178,4 +179,8 @@ public class DereferenceExpression exten
return copy;
}
+ public List<Object> getRawColumns() {
+ return this.rawColumns;
+ }
+
}
Modified: pig/trunk/src/org/apache/pig/newplan/logical/expression/ProjectExpression.java
URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/newplan/logical/expression/ProjectExpression.java?rev=1063930&r1=1063929&r2=1063930&view=diff
==============================================================================
--- pig/trunk/src/org/apache/pig/newplan/logical/expression/ProjectExpression.java (original)
+++ pig/trunk/src/org/apache/pig/newplan/logical/expression/ProjectExpression.java Thu Jan 27 00:31:21 2011
@@ -123,6 +123,7 @@ public class ProjectExpression extends C
*/
public void setColNum(int colNum) {
col = colNum;
+ alias = null; // Once the column number is set, alias is no longer needed.
}
public boolean isProjectStar() {
Added: pig/trunk/src/org/apache/pig/newplan/logical/visitor/ColumnAliasConversionVisitor.java
URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/newplan/logical/visitor/ColumnAliasConversionVisitor.java?rev=1063930&view=auto
==============================================================================
--- pig/trunk/src/org/apache/pig/newplan/logical/visitor/ColumnAliasConversionVisitor.java (added)
+++ pig/trunk/src/org/apache/pig/newplan/logical/visitor/ColumnAliasConversionVisitor.java Thu Jan 27 00:31:21 2011
@@ -0,0 +1,101 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.pig.newplan.logical.visitor;
+
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.pig.impl.logicalLayer.FrontendException;
+import org.apache.pig.newplan.DependencyOrderWalker;
+import org.apache.pig.newplan.Operator;
+import org.apache.pig.newplan.OperatorPlan;
+import org.apache.pig.newplan.logical.expression.DereferenceExpression;
+import org.apache.pig.newplan.logical.expression.LogicalExpression;
+import org.apache.pig.newplan.logical.expression.LogicalExpressionPlan;
+import org.apache.pig.newplan.logical.expression.LogicalExpressionVisitor;
+import org.apache.pig.newplan.logical.expression.ProjectExpression;
+import org.apache.pig.newplan.logical.optimizer.AllExpressionVisitor;
+import org.apache.pig.newplan.logical.relational.LogicalPlan;
+import org.apache.pig.newplan.logical.relational.LogicalRelationalOperator;
+import org.apache.pig.newplan.logical.relational.LogicalSchema;
+
+/**
+ * Logical plan visitor which will convert all column alias references to column
+ * indexes, using the underlying anonymous expression plan visitor.
+ */
+public class ColumnAliasConversionVisitor extends AllExpressionVisitor {
+ public ColumnAliasConversionVisitor(OperatorPlan plan) throws FrontendException {
+ super( plan, new DependencyOrderWalker( plan ) );
+ }
+
+ @Override
+ protected LogicalExpressionVisitor getVisitor(LogicalExpressionPlan exprPlan)
+ throws FrontendException {
+ return new LogicalExpressionVisitor( exprPlan, new DependencyOrderWalker( exprPlan ) ) {
+ @Override
+ public void visit(ProjectExpression expr) throws FrontendException {
+ LogicalRelationalOperator op = expr.getAttachedRelationalOp();
+ LogicalPlan lp = (LogicalPlan)op.getPlan();
+ List<Operator> inputs = lp.getPredecessors( op );
+ LogicalRelationalOperator input = (LogicalRelationalOperator)inputs.get( expr.getInputNum() );
+ LogicalSchema inputSchema = input.getSchema();
+ String alias = expr.getColAlias();
+ if( alias != null ) {
+ int colNum = inputSchema.getFieldPosition( alias );
+ if( colNum == -1 ) {
+ throw new FrontendException( "Invalid field projection: " + alias );
+ }
+ expr.setColNum( colNum );
+ } else {
+ int col = expr.getColNum();
+ if( col >= inputSchema.size() ) {
+ throw new FrontendException( "Out of bound access. Trying to access non-existent column: " +
+ col + ". Schema " + inputSchema + " has " + inputSchema.size() + " column(s)." );
+ }
+ }
+ }
+
+ public void visit(DereferenceExpression expr) throws FrontendException {
+ List<Object> rawCols = expr.getRawColumns();
+ if( rawCols.isEmpty() ) {
+ return;
+ }
+
+ List<Integer> cols = new ArrayList<Integer>( rawCols.size() );
+ LogicalExpressionPlan plan = (LogicalExpressionPlan)expr.getPlan();
+ LogicalExpression pred = (LogicalExpression)plan.getPredecessors( expr ).get(0);
+ LogicalSchema schema = pred.getFieldSchema().schema;
+
+ for( Object rc : rawCols ) {
+ if( rc instanceof Integer ) {
+ cols.add( (Integer)rc );
+ } else {
+ int col = schema.getFieldPosition( (String)rc );
+ if( col == -1 ) {
+ throw new FrontendException( "Invalid field projection: " + rc );
+ }
+ cols.add( col );
+ }
+ }
+ expr.setBagColumns( cols );
+ }
+ };
+ }
+}
Modified: pig/trunk/src/org/apache/pig/parser/AstValidator.g
URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/parser/AstValidator.g?rev=1063930&r1=1063929&r2=1063930&view=diff
==============================================================================
--- pig/trunk/src/org/apache/pig/parser/AstValidator.g (original)
+++ pig/trunk/src/org/apache/pig/parser/AstValidator.g Thu Jan 27 00:31:21 2011
@@ -219,7 +219,7 @@ rel : alias { validateAliasRef( aliases
| op_clause
;
-flatten_generated_item : ( flatten_clause | expr | START ) field_def_list?
+flatten_generated_item : ( flatten_clause | expr | STAR ) field_def_list?
;
flatten_clause : ^( FLATTEN expr )
Modified: pig/trunk/src/org/apache/pig/parser/LogicalPlanBuilder.java
URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/parser/LogicalPlanBuilder.java?rev=1063930&r1=1063929&r2=1063930&view=diff
==============================================================================
--- pig/trunk/src/org/apache/pig/parser/LogicalPlanBuilder.java (original)
+++ pig/trunk/src/org/apache/pig/parser/LogicalPlanBuilder.java Thu Jan 27 00:31:21 2011
@@ -286,13 +286,14 @@ public class LogicalPlanBuilder {
* @param operators All logical operators in lp;
* @param inputs inputs of the LOGenerate
*/
- static void processExpressionPlan(LOForEach foreach,
+ private static void processExpressionPlan(LOForEach foreach,
LogicalPlan lp,
LogicalExpressionPlan plan,
Map<String, Operator> operators,
ArrayList<Operator> inputs ) {
- List<Operator> sinks = plan.getSinks();
- for( Operator sink : sinks ) {
+ Iterator<Operator> it = plan.getOperators();
+ while( it.hasNext() ) {
+ Operator sink = it.next();
//check all ProjectExpression
if( sink instanceof ProjectExpression ) {
ProjectExpression projExpr = (ProjectExpression)sink;
@@ -312,12 +313,14 @@ public class LogicalPlanBuilder {
inputs.add( op );
}
projExpr.setInputNum( index );
+ projExpr.setColNum( -1 );
} else {
// this means the project expression refers to a column
// in the input of foreach. Add a LOInnerLoad and use that
// as input
projExpr.setInputNum( inputs.size() );
LOInnerLoad innerLoad = new LOInnerLoad( lp, foreach, colAlias );
+ projExpr.setColNum( -1 ); // Projection Expression on InnerLoad is always (*).
lp.add( innerLoad );
inputs.add( innerLoad );
}
@@ -326,6 +329,7 @@ public class LogicalPlanBuilder {
// using position (eg $1)
projExpr.setInputNum( inputs.size() );
LOInnerLoad innerLoad = new LOInnerLoad( lp, foreach, projExpr.getColNum() );
+ projExpr.setColNum( -1 ); // Projection Expression on InnerLoad is always (*).
lp.add( innerLoad );
inputs.add( innerLoad );
}
@@ -598,30 +602,50 @@ public class LogicalPlanBuilder {
Map<String, Operator> operators,
String alias, ProjectExpression projExpr, List<LogicalExpressionPlan> exprPlans) {
Operator input = null;
- boolean foreachNeeded = !exprPlans.isEmpty();
String colAlias = projExpr.getColAlias();
if( colAlias != null ) {
+ // ProjExpr refers to a name, which can be an alias for another operator or col name.
Operator op = operators.get( colAlias );
if( op != null ) {
+ // ProjExpr refers to an operator alias.
input = op ;
- if( !foreachNeeded )
- return op;
} else {
+ // Assuming that ProjExpr refers to a column by name. Create an LOInnerLoad
input = new LOInnerLoad( innerPlan, foreach, colAlias );
- if( !foreachNeeded && alias != null ) {
- operators.put( alias , input );
- }
}
} else {
+ // ProjExpr refers to a column by number.
input = new LOInnerLoad( innerPlan, foreach, projExpr.getColNum() );
}
- LogicalPlan lp = new LogicalPlan();
- boolean[] flatten = new boolean[exprPlans.size()];
- LOGenerate gen = new LOGenerate( lp, exprPlans, flatten );
- lp.add( gen );
+ LogicalPlan lp = new LogicalPlan(); // f's inner plan
LOForEach f = new LOForEach( innerPlan );
f.setInnerPlan( lp );
+ LOGenerate gen = new LOGenerate( lp );
+ boolean[] flatten = new boolean[exprPlans.size()];
+
+ List<Operator> innerLoads = new ArrayList<Operator>( exprPlans.size() );
+ for( LogicalExpressionPlan plan : exprPlans ) {
+ ProjectExpression pe = (ProjectExpression)plan.getSinks().get( 0 );
+ String al = pe.getColAlias();
+ LOInnerLoad iload = ( al == null ) ?
+ new LOInnerLoad( lp, f, pe.getColNum() ) : new LOInnerLoad( lp, f, al );
+ pe.setColNum( -1 );
+ pe.setInputNum( innerLoads.size() );
+ pe.setAttachedRelationalOp( gen );
+ innerLoads.add( iload );
+ }
+
+ gen.setOutputPlans( exprPlans );
+ gen.setFlattenFlags( flatten );
+ lp.add( gen );
+
+ for( Operator il : innerLoads ) {
+ lp.add( il );
+ lp.connect( il, gen );
+ }
+
+ // Connect the inner load operators to gen
setAlias( f, alias );
innerPlan.add( input );
innerPlan.add( f );
Modified: pig/trunk/src/org/apache/pig/parser/LogicalPlanGenerator.g
URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/parser/LogicalPlanGenerator.g?rev=1063930&r1=1063929&r2=1063930&view=diff
==============================================================================
--- pig/trunk/src/org/apache/pig/parser/LogicalPlanGenerator.g (original)
+++ pig/trunk/src/org/apache/pig/parser/LogicalPlanGenerator.g Thu Jan 27 00:31:21 2011
@@ -447,7 +447,7 @@ flatten_generated_item returns[LogicalEx
}
: ( flatten_clause[$plan] { $flattenFlag = true; }
| expr[$plan]
- | START
+ | STAR
{
builder.buildProjectExpr( $plan, currentOp, $statement::inputIndex, null, -1 );
}
Added: pig/trunk/test/org/apache/pig/parser/TestColumnAliasConversion.java
URL: http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/parser/TestColumnAliasConversion.java?rev=1063930&view=auto
==============================================================================
--- pig/trunk/test/org/apache/pig/parser/TestColumnAliasConversion.java (added)
+++ pig/trunk/test/org/apache/pig/parser/TestColumnAliasConversion.java Thu Jan 27 00:31:21 2011
@@ -0,0 +1,88 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.pig.parser;
+
+import java.io.IOException;
+
+import junit.framework.Assert;
+
+import org.antlr.runtime.RecognitionException;
+import org.apache.pig.impl.logicalLayer.FrontendException;
+import org.apache.pig.newplan.DependencyOrderWalker;
+import org.apache.pig.newplan.logical.expression.DereferenceExpression;
+import org.apache.pig.newplan.logical.expression.LogicalExpressionPlan;
+import org.apache.pig.newplan.logical.expression.LogicalExpressionVisitor;
+import org.apache.pig.newplan.logical.expression.ProjectExpression;
+import org.apache.pig.newplan.logical.optimizer.AllExpressionVisitor;
+import org.apache.pig.newplan.logical.relational.LogicalPlan;
+import org.apache.pig.newplan.logical.visitor.ColumnAliasConversionVisitor;
+
+import org.junit.Test;
+
+public class TestColumnAliasConversion {
+ @Test
+ public void test1() throws RecognitionException, ParsingFailureException, IOException {
+ String query = "A = load 'x' as ( u:int, v:long, w:bytearray); " +
+ "B = foreach A generate u, $1, w; " +
+ "C = store B into 'output';";
+ verify( query );
+ }
+
+ @Test
+ public void test2() throws RecognitionException, ParsingFailureException, IOException {
+ String query = "A = load 'x' as ( u:bag{tuple(x, y)}, v:long, w:bytearray); " +
+ "B = foreach A generate u.(x, y), v, w; " +
+ "C = store B into 'output';";
+ verify( query );
+ }
+
+ @Test
+ public void test3() throws RecognitionException, ParsingFailureException, IOException {
+ String query = "A = load 'x' as ( a : bag{ T:tuple(u, v) }, c : int, d : long );" +
+ "B = foreach A { R = a; S = R.u; T = limit S 100; generate S, T, c + d/5; };" +
+ "store B into 'y';";
+ verify( query );
+ }
+
+ private void verify(String query) throws RecognitionException, ParsingFailureException, IOException {
+ LogicalPlan plan = ParserTestingUtils.generateLogicalPlan( query );
+ ColumnAliasConversionVisitor visitor = new ColumnAliasConversionVisitor( plan );
+ visitor.visit();
+ System.out.println( "Plan after setter: " + plan.toString() );
+ new AllExpressionVisitor( plan, new DependencyOrderWalker( plan ) ) {
+ @Override
+ protected LogicalExpressionVisitor getVisitor(LogicalExpressionPlan exprPlan) throws FrontendException {
+ return new LogicalExpressionVisitor( exprPlan, new DependencyOrderWalker( exprPlan ) ) {
+ @Override
+ public void visit(ProjectExpression expr) throws FrontendException {
+ Assert.assertTrue( null == expr.getColAlias() );
+ Assert.assertTrue( expr.getColNum() >= -1 );
+ }
+
+ public void visit(DereferenceExpression expr) throws FrontendException {
+ Assert.assertTrue( expr.getRawColumns().isEmpty() );
+ Assert.assertTrue( !expr.getBagColumns().isEmpty() );
+ }
+ };
+ }
+
+ }.visit();
+ }
+
+}