You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pig.apache.org by th...@apache.org on 2011/01/27 01:31:21 UTC

svn commit: r1063930 - in /pig/trunk: src/org/apache/pig/newplan/logical/expression/ src/org/apache/pig/newplan/logical/visitor/ src/org/apache/pig/parser/ test/org/apache/pig/parser/

Author: thejas
Date: Thu Jan 27 00:31:21 2011
New Revision: 1063930

URL: http://svn.apache.org/viewvc?rev=1063930&view=rev
Log:
PIG-1618: Switch to new parser generator technology - NewParser-14.patch - (xuefuz via thejas)

Added:
    pig/trunk/src/org/apache/pig/newplan/logical/visitor/ColumnAliasConversionVisitor.java
    pig/trunk/test/org/apache/pig/parser/TestColumnAliasConversion.java
Modified:
    pig/trunk/src/org/apache/pig/newplan/logical/expression/DereferenceExpression.java
    pig/trunk/src/org/apache/pig/newplan/logical/expression/ProjectExpression.java
    pig/trunk/src/org/apache/pig/parser/AstValidator.g
    pig/trunk/src/org/apache/pig/parser/LogicalPlanBuilder.java
    pig/trunk/src/org/apache/pig/parser/LogicalPlanGenerator.g

Modified: pig/trunk/src/org/apache/pig/newplan/logical/expression/DereferenceExpression.java
URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/newplan/logical/expression/DereferenceExpression.java?rev=1063930&r1=1063929&r2=1063930&view=diff
==============================================================================
--- pig/trunk/src/org/apache/pig/newplan/logical/expression/DereferenceExpression.java (original)
+++ pig/trunk/src/org/apache/pig/newplan/logical/expression/DereferenceExpression.java Thu Jan 27 00:31:21 2011
@@ -88,6 +88,7 @@ public class DereferenceExpression exten
     
     public void setBagColumns(List<Integer> columns) {
         this.columns = columns;
+        this.rawColumns.clear(); // We don't need this any more.
     }
     
     @Override
@@ -178,4 +179,8 @@ public class DereferenceExpression exten
         return copy;
     }
 
+    public List<Object> getRawColumns() {
+        return this.rawColumns;
+    }
+
 }

Modified: pig/trunk/src/org/apache/pig/newplan/logical/expression/ProjectExpression.java
URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/newplan/logical/expression/ProjectExpression.java?rev=1063930&r1=1063929&r2=1063930&view=diff
==============================================================================
--- pig/trunk/src/org/apache/pig/newplan/logical/expression/ProjectExpression.java (original)
+++ pig/trunk/src/org/apache/pig/newplan/logical/expression/ProjectExpression.java Thu Jan 27 00:31:21 2011
@@ -123,6 +123,7 @@ public class ProjectExpression extends C
      */
     public void setColNum(int colNum) {
         col = colNum;
+        alias = null; // Once the column number is set, alias is no longer needed.
     }
     
     public boolean isProjectStar() {

Added: pig/trunk/src/org/apache/pig/newplan/logical/visitor/ColumnAliasConversionVisitor.java
URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/newplan/logical/visitor/ColumnAliasConversionVisitor.java?rev=1063930&view=auto
==============================================================================
--- pig/trunk/src/org/apache/pig/newplan/logical/visitor/ColumnAliasConversionVisitor.java (added)
+++ pig/trunk/src/org/apache/pig/newplan/logical/visitor/ColumnAliasConversionVisitor.java Thu Jan 27 00:31:21 2011
@@ -0,0 +1,101 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.pig.newplan.logical.visitor;
+
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.pig.impl.logicalLayer.FrontendException;
+import org.apache.pig.newplan.DependencyOrderWalker;
+import org.apache.pig.newplan.Operator;
+import org.apache.pig.newplan.OperatorPlan;
+import org.apache.pig.newplan.logical.expression.DereferenceExpression;
+import org.apache.pig.newplan.logical.expression.LogicalExpression;
+import org.apache.pig.newplan.logical.expression.LogicalExpressionPlan;
+import org.apache.pig.newplan.logical.expression.LogicalExpressionVisitor;
+import org.apache.pig.newplan.logical.expression.ProjectExpression;
+import org.apache.pig.newplan.logical.optimizer.AllExpressionVisitor;
+import org.apache.pig.newplan.logical.relational.LogicalPlan;
+import org.apache.pig.newplan.logical.relational.LogicalRelationalOperator;
+import org.apache.pig.newplan.logical.relational.LogicalSchema;
+
+/**
+ * Logical plan visitor which will convert all column alias references to column
+ * indexes, using the underlying anonymous expression plan visitor.
+ */
+public class ColumnAliasConversionVisitor extends AllExpressionVisitor {
+    public ColumnAliasConversionVisitor(OperatorPlan plan) throws FrontendException {
+        super( plan, new DependencyOrderWalker( plan ) );
+    }
+
+    @Override
+    protected LogicalExpressionVisitor getVisitor(LogicalExpressionPlan exprPlan)
+    throws FrontendException {
+        return new LogicalExpressionVisitor( exprPlan, new DependencyOrderWalker( exprPlan ) ) {
+            @Override
+            public void visit(ProjectExpression expr) throws FrontendException {
+                LogicalRelationalOperator op = expr.getAttachedRelationalOp();
+                LogicalPlan lp = (LogicalPlan)op.getPlan();
+                List<Operator> inputs = lp.getPredecessors( op );
+                LogicalRelationalOperator input = (LogicalRelationalOperator)inputs.get( expr.getInputNum() );
+                LogicalSchema inputSchema = input.getSchema();
+                String alias = expr.getColAlias();
+                if( alias != null ) {
+                    int colNum = inputSchema.getFieldPosition( alias );
+                    if( colNum == -1 ) {
+                        throw new FrontendException( "Invalid field projection: " + alias );
+                    }
+                    expr.setColNum( colNum );
+                } else {
+                    int col = expr.getColNum();
+                    if( col >= inputSchema.size() ) {
+                        throw new FrontendException( "Out of bound access. Trying to access non-existent column: " + 
+                                                      col + ". Schema " + inputSchema + " has " + inputSchema.size() + " column(s)." );
+                    }
+                }
+            }
+
+            public void visit(DereferenceExpression expr) throws FrontendException {
+                List<Object> rawCols = expr.getRawColumns();
+                if( rawCols.isEmpty() ) {
+                    return;
+                }
+                
+                List<Integer> cols = new ArrayList<Integer>( rawCols.size() );
+                LogicalExpressionPlan plan = (LogicalExpressionPlan)expr.getPlan();
+                LogicalExpression pred = (LogicalExpression)plan.getPredecessors( expr ).get(0);
+                LogicalSchema schema = pred.getFieldSchema().schema;
+                
+                for( Object rc : rawCols ) {
+                    if( rc instanceof Integer ) {
+                        cols.add( (Integer)rc );
+                    } else {
+                        int col = schema.getFieldPosition( (String)rc );
+                        if( col == -1 ) {
+                            throw new FrontendException( "Invalid field projection: " + rc );
+                        }
+                        cols.add( col );
+                    }
+                }
+                expr.setBagColumns( cols );
+            }
+        };
+    }
+}

Modified: pig/trunk/src/org/apache/pig/parser/AstValidator.g
URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/parser/AstValidator.g?rev=1063930&r1=1063929&r2=1063930&view=diff
==============================================================================
--- pig/trunk/src/org/apache/pig/parser/AstValidator.g (original)
+++ pig/trunk/src/org/apache/pig/parser/AstValidator.g Thu Jan 27 00:31:21 2011
@@ -219,7 +219,7 @@ rel : alias {  validateAliasRef( aliases
     | op_clause
 ;
 
-flatten_generated_item : ( flatten_clause | expr | START ) field_def_list?
+flatten_generated_item : ( flatten_clause | expr | STAR ) field_def_list?
 ;
 
 flatten_clause : ^( FLATTEN expr )

Modified: pig/trunk/src/org/apache/pig/parser/LogicalPlanBuilder.java
URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/parser/LogicalPlanBuilder.java?rev=1063930&r1=1063929&r2=1063930&view=diff
==============================================================================
--- pig/trunk/src/org/apache/pig/parser/LogicalPlanBuilder.java (original)
+++ pig/trunk/src/org/apache/pig/parser/LogicalPlanBuilder.java Thu Jan 27 00:31:21 2011
@@ -286,13 +286,14 @@ public class LogicalPlanBuilder {
      * @param operators All logical operators in lp;
      * @param inputs  inputs of the LOGenerate
      */
-    static void processExpressionPlan(LOForEach foreach,
+    private static void processExpressionPlan(LOForEach foreach,
                                       LogicalPlan lp,  
                                       LogicalExpressionPlan plan,  
                                       Map<String, Operator> operators,  
                                       ArrayList<Operator> inputs ) {
-        List<Operator> sinks = plan.getSinks();
-        for( Operator sink : sinks ) {
+        Iterator<Operator> it = plan.getOperators();
+        while( it.hasNext() ) {
+            Operator sink = it.next();
             //check all ProjectExpression
             if( sink instanceof ProjectExpression ) {
                 ProjectExpression projExpr = (ProjectExpression)sink;
@@ -312,12 +313,14 @@ public class LogicalPlanBuilder {
                             inputs.add( op );
                         }
                         projExpr.setInputNum( index );
+                        projExpr.setColNum( -1 );
                     } else {
                         // this means the project expression refers to a column
                         // in the input of foreach. Add a LOInnerLoad and use that
                         // as input
                         projExpr.setInputNum( inputs.size() );
                         LOInnerLoad innerLoad = new LOInnerLoad( lp, foreach, colAlias );
+                        projExpr.setColNum( -1 ); // Projection Expression on InnerLoad is always (*).
                         lp.add( innerLoad );
                         inputs.add( innerLoad );
                     }
@@ -326,6 +329,7 @@ public class LogicalPlanBuilder {
                     // using position (eg $1)
                     projExpr.setInputNum( inputs.size() );
                     LOInnerLoad innerLoad = new LOInnerLoad( lp, foreach, projExpr.getColNum() );
+                    projExpr.setColNum( -1 ); // Projection Expression on InnerLoad is always (*).
                     lp.add( innerLoad );
                     inputs.add( innerLoad );
                 }
@@ -598,30 +602,50 @@ public class LogicalPlanBuilder {
             Map<String, Operator> operators,
             String alias, ProjectExpression projExpr, List<LogicalExpressionPlan> exprPlans) {
         Operator input = null;
-        boolean foreachNeeded = !exprPlans.isEmpty();
         String colAlias = projExpr.getColAlias();
         if( colAlias != null ) {
+            // ProjExpr refers to a name, which can be an alias for another operator or col name.
             Operator op = operators.get( colAlias );
             if( op != null ) {
+                // ProjExpr refers to an operator alias.
                 input = op ;
-                if( !foreachNeeded )
-                    return op;
             } else {
+                // Assuming that ProjExpr refers to a column by name. Create an LOInnerLoad
                 input = new LOInnerLoad( innerPlan, foreach, colAlias );
-                if( !foreachNeeded && alias != null ) {
-                    operators.put( alias , input );
-                }
             }
         } else {
+            // ProjExpr refers to a column by number.
             input = new LOInnerLoad( innerPlan, foreach, projExpr.getColNum() );
         }
         
-        LogicalPlan lp = new LogicalPlan();
-        boolean[] flatten = new boolean[exprPlans.size()];
-        LOGenerate gen = new LOGenerate( lp, exprPlans, flatten );
-        lp.add( gen );
+        LogicalPlan lp = new LogicalPlan(); // f's inner plan
         LOForEach f = new LOForEach( innerPlan );
         f.setInnerPlan( lp );
+        LOGenerate gen = new LOGenerate( lp );
+        boolean[] flatten = new boolean[exprPlans.size()];
+        
+        List<Operator> innerLoads = new ArrayList<Operator>( exprPlans.size() );
+        for( LogicalExpressionPlan plan : exprPlans ) {
+            ProjectExpression pe = (ProjectExpression)plan.getSinks().get( 0 );
+            String al = pe.getColAlias();
+            LOInnerLoad iload = ( al == null ) ?  
+                    new LOInnerLoad( lp, f, pe.getColNum() ) : new LOInnerLoad( lp, f, al );
+            pe.setColNum( -1 );
+            pe.setInputNum( innerLoads.size() );
+            pe.setAttachedRelationalOp( gen );
+            innerLoads.add( iload );
+        }
+        
+        gen.setOutputPlans( exprPlans );
+        gen.setFlattenFlags( flatten );
+        lp.add( gen );
+
+        for( Operator il : innerLoads ) {
+            lp.add( il );
+            lp.connect( il, gen );
+        }
+        
+        // Connect the inner load operators to gen
         setAlias( f, alias );
         innerPlan.add( input );
         innerPlan.add( f );

Modified: pig/trunk/src/org/apache/pig/parser/LogicalPlanGenerator.g
URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/parser/LogicalPlanGenerator.g?rev=1063930&r1=1063929&r2=1063930&view=diff
==============================================================================
--- pig/trunk/src/org/apache/pig/parser/LogicalPlanGenerator.g (original)
+++ pig/trunk/src/org/apache/pig/parser/LogicalPlanGenerator.g Thu Jan 27 00:31:21 2011
@@ -447,7 +447,7 @@ flatten_generated_item returns[LogicalEx
 }
  : ( flatten_clause[$plan] { $flattenFlag = true; }
    | expr[$plan]
-   | START
+   | STAR
      {
          builder.buildProjectExpr( $plan, currentOp, $statement::inputIndex, null, -1 );
      }

Added: pig/trunk/test/org/apache/pig/parser/TestColumnAliasConversion.java
URL: http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/parser/TestColumnAliasConversion.java?rev=1063930&view=auto
==============================================================================
--- pig/trunk/test/org/apache/pig/parser/TestColumnAliasConversion.java (added)
+++ pig/trunk/test/org/apache/pig/parser/TestColumnAliasConversion.java Thu Jan 27 00:31:21 2011
@@ -0,0 +1,88 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.pig.parser;
+
+import java.io.IOException;
+
+import junit.framework.Assert;
+
+import org.antlr.runtime.RecognitionException;
+import org.apache.pig.impl.logicalLayer.FrontendException;
+import org.apache.pig.newplan.DependencyOrderWalker;
+import org.apache.pig.newplan.logical.expression.DereferenceExpression;
+import org.apache.pig.newplan.logical.expression.LogicalExpressionPlan;
+import org.apache.pig.newplan.logical.expression.LogicalExpressionVisitor;
+import org.apache.pig.newplan.logical.expression.ProjectExpression;
+import org.apache.pig.newplan.logical.optimizer.AllExpressionVisitor;
+import org.apache.pig.newplan.logical.relational.LogicalPlan;
+import org.apache.pig.newplan.logical.visitor.ColumnAliasConversionVisitor;
+
+import org.junit.Test;
+
+public class TestColumnAliasConversion {
+    @Test
+    public void test1() throws RecognitionException, ParsingFailureException, IOException {
+        String query = "A = load 'x' as ( u:int, v:long, w:bytearray); " + 
+                       "B = foreach A generate u, $1, w; " +
+                       "C = store B into 'output';";
+        verify( query );
+    }
+
+    @Test
+    public void test2() throws RecognitionException, ParsingFailureException, IOException {
+        String query = "A = load 'x' as ( u:bag{tuple(x, y)}, v:long, w:bytearray); " + 
+                       "B = foreach A generate u.(x, y), v, w; " +
+                       "C = store B into 'output';";
+        verify( query );
+    }
+    
+    @Test
+    public void test3() throws RecognitionException, ParsingFailureException, IOException {
+        String query = "A = load 'x' as ( a : bag{ T:tuple(u, v) }, c : int, d : long );" +
+                       "B = foreach A { R = a; S = R.u; T = limit S 100; generate S, T, c + d/5; };" +
+                       "store B into 'y';";
+        verify( query );
+    }
+    
+    private void verify(String query) throws RecognitionException, ParsingFailureException, IOException {
+        LogicalPlan plan = ParserTestingUtils.generateLogicalPlan( query );
+        ColumnAliasConversionVisitor visitor = new ColumnAliasConversionVisitor( plan );
+        visitor.visit();
+        System.out.println( "Plan after setter: " + plan.toString() );
+        new AllExpressionVisitor( plan, new DependencyOrderWalker( plan ) ) {
+            @Override
+            protected LogicalExpressionVisitor getVisitor(LogicalExpressionPlan exprPlan) throws FrontendException {
+                return new LogicalExpressionVisitor( exprPlan, new DependencyOrderWalker( exprPlan ) ) {
+                    @Override
+                    public void visit(ProjectExpression expr) throws FrontendException {
+                        Assert.assertTrue( null == expr.getColAlias() );
+                        Assert.assertTrue( expr.getColNum() >= -1 );
+                    }
+
+                    public void visit(DereferenceExpression expr) throws FrontendException {
+                        Assert.assertTrue( expr.getRawColumns().isEmpty() );
+                        Assert.assertTrue( !expr.getBagColumns().isEmpty() );
+                    }
+                };
+            }
+            
+        }.visit();
+    }
+
+}