You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pig.apache.org by da...@apache.org on 2012/03/18 09:23:04 UTC

svn commit: r1302088 - in /pig/branches/branch-0.10: ./ src/org/apache/pig/newplan/logical/expression/ src/org/apache/pig/newplan/logical/relational/ src/org/apache/pig/newplan/logical/rules/ test/org/apache/pig/test/

Author: daijy
Date: Sun Mar 18 08:23:03 2012
New Revision: 1302088

URL: http://svn.apache.org/viewvc?rev=1302088&view=rev
Log:
PIG-2563: IndexOutOfBoundsException: while projecting fields from a bag

Modified:
    pig/branches/branch-0.10/CHANGES.txt
    pig/branches/branch-0.10/src/org/apache/pig/newplan/logical/expression/ProjectExpression.java
    pig/branches/branch-0.10/src/org/apache/pig/newplan/logical/relational/LOForEach.java
    pig/branches/branch-0.10/src/org/apache/pig/newplan/logical/rules/ColumnPruneHelper.java
    pig/branches/branch-0.10/test/org/apache/pig/test/TestNestedForeach.java

Modified: pig/branches/branch-0.10/CHANGES.txt
URL: http://svn.apache.org/viewvc/pig/branches/branch-0.10/CHANGES.txt?rev=1302088&r1=1302087&r2=1302088&view=diff
==============================================================================
--- pig/branches/branch-0.10/CHANGES.txt (original)
+++ pig/branches/branch-0.10/CHANGES.txt Sun Mar 18 08:23:03 2012
@@ -176,6 +176,8 @@ PIG-2228: support partial aggregation in
 
 BUG FIXES
 
+PIG-2563: IndexOutOfBoundsException: while projecting fields from a bag (daijy)
+
 PIG-2411: AvroStorage UDF in PiggyBank fails to STORE a bag of single-field tuples as Avro arrays (russell.jurney via daijy)
 
 PIG-2588: e2e harness: use pig command for cluster deploy (thw via daijy)

Modified: pig/branches/branch-0.10/src/org/apache/pig/newplan/logical/expression/ProjectExpression.java
URL: http://svn.apache.org/viewvc/pig/branches/branch-0.10/src/org/apache/pig/newplan/logical/expression/ProjectExpression.java?rev=1302088&r1=1302087&r2=1302088&view=diff
==============================================================================
--- pig/branches/branch-0.10/src/org/apache/pig/newplan/logical/expression/ProjectExpression.java (original)
+++ pig/branches/branch-0.10/src/org/apache/pig/newplan/logical/expression/ProjectExpression.java Sun Mar 18 08:23:03 2012
@@ -24,6 +24,7 @@ import org.apache.pig.PigException;
 import org.apache.pig.data.DataType;
 import org.apache.pig.impl.logicalLayer.FrontendException;
 import org.apache.pig.impl.plan.PlanValidationException;
+import org.apache.pig.impl.util.Pair;
 import org.apache.pig.newplan.Operator;
 import org.apache.pig.newplan.OperatorPlan;
 import org.apache.pig.newplan.PlanVisitor;
@@ -273,7 +274,10 @@ public class ProjectExpression extends C
             if (!(findReferent() instanceof LOInnerLoad)||
                     ((LOInnerLoad)findReferent()).sourceIsBag()) {
                 String alias = findReferent().getAlias();
-                List<LOInnerLoad> innerLoads = LOForEach.findReacheableInnerLoadFromBoundaryProject(this);
+
+                Pair<List<LOInnerLoad>, Boolean> innerLoadsPair = LOForEach.findReacheableInnerLoadFromBoundaryProject(this);
+                List<LOInnerLoad> innerLoads = innerLoadsPair.first;
+                boolean needNewUid = innerLoadsPair.second;
                 
                 // pull tuple information from innerload
                 if (innerLoads.get(0).getProjection().getFieldSchema().schema!=null &&
@@ -281,7 +285,12 @@ public class ProjectExpression extends C
                     LogicalFieldSchema originalTupleFieldSchema = innerLoads.get(0).getProjection().getFieldSchema().schema.getField(0);
                     LogicalFieldSchema newTupleFieldSchema = new LogicalFieldSchema(originalTupleFieldSchema.alias,
                             schema, DataType.TUPLE);
-                    newTupleFieldSchema.uid = originalTupleFieldSchema.uid;
+                    if (needNewUid) {
+                        newTupleFieldSchema.uid = LogicalExpression.getNextUid();
+                    }
+                    else {
+                        newTupleFieldSchema.uid = originalTupleFieldSchema.uid;
+                    }
                     LogicalSchema newTupleSchema = new LogicalSchema();
                     newTupleSchema.addField(newTupleFieldSchema);
                     fieldSchema = new LogicalSchema.LogicalFieldSchema(alias, newTupleSchema, DataType.BAG);
@@ -289,7 +298,10 @@ public class ProjectExpression extends C
                 else {
                     fieldSchema = new LogicalSchema.LogicalFieldSchema(alias, schema, DataType.BAG);
                 }
-                fieldSchema.uid = innerLoads.get(0).getProjection().getFieldSchema().uid;
+                if (needNewUid)
+                    fieldSchema.uid = LogicalExpression.getNextUid();
+                else
+                    fieldSchema.uid = innerLoads.get(0).getProjection().getFieldSchema().uid;
             }
             else {
                 // InnerLoad and source is not bag

Modified: pig/branches/branch-0.10/src/org/apache/pig/newplan/logical/relational/LOForEach.java
URL: http://svn.apache.org/viewvc/pig/branches/branch-0.10/src/org/apache/pig/newplan/logical/relational/LOForEach.java?rev=1302088&r1=1302087&r2=1302088&view=diff
==============================================================================
--- pig/branches/branch-0.10/src/org/apache/pig/newplan/logical/relational/LOForEach.java (original)
+++ pig/branches/branch-0.10/src/org/apache/pig/newplan/logical/relational/LOForEach.java Sun Mar 18 08:23:03 2012
@@ -22,6 +22,7 @@ import java.util.List;
 import java.util.Stack;
 
 import org.apache.pig.impl.logicalLayer.FrontendException;
+import org.apache.pig.impl.util.Pair;
 import org.apache.pig.newplan.Operator;
 import org.apache.pig.newplan.OperatorPlan;
 import org.apache.pig.newplan.PlanVisitor;
@@ -74,8 +75,14 @@ public class LOForEach extends LogicalRe
         ((LogicalRelationalNodesVisitor)v).visit(this);
     }
     
-    public static List<LOInnerLoad> findReacheableInnerLoadFromBoundaryProject(ProjectExpression project) throws FrontendException {
+    // Find the LOInnerLoad of the inner plan corresponding to the project, and 
+    // also find whether there is a LOForEach in inner plan along the way
+    public static Pair<List<LOInnerLoad>, Boolean> findReacheableInnerLoadFromBoundaryProject(ProjectExpression project) throws FrontendException {
+        boolean needNewUid = false;
         LogicalRelationalOperator referred = project.findReferent();
+        // If it is nested foreach, generate new uid
+        if (referred instanceof LOForEach)
+            needNewUid = true;
         List<Operator> srcs = referred.getPlan().getSources();
         List<LOInnerLoad> innerLoads = new ArrayList<LOInnerLoad>();
         for (Operator src:srcs) {
@@ -85,7 +92,7 @@ public class LOForEach extends LogicalRe
             		continue;
             	}
             	
-                Stack<Operator> stack = new Stack<Operator>();
+            	Stack<Operator> stack = new Stack<Operator>();
                 List<Operator> succs = referred.getPlan().getSuccessors( src );
                 if( succs != null ) {
                 	for( Operator succ : succs ) {
@@ -110,7 +117,7 @@ public class LOForEach extends LogicalRe
                 }
             }
         }
-        return innerLoads;
+        return new Pair<List<LOInnerLoad>, Boolean>(innerLoads, needNewUid);
     }
     
     public LogicalSchema dumpNestedSchema(String alias, String nestedAlias) throws FrontendException {

Modified: pig/branches/branch-0.10/src/org/apache/pig/newplan/logical/rules/ColumnPruneHelper.java
URL: http://svn.apache.org/viewvc/pig/branches/branch-0.10/src/org/apache/pig/newplan/logical/rules/ColumnPruneHelper.java?rev=1302088&r1=1302087&r2=1302088&view=diff
==============================================================================
--- pig/branches/branch-0.10/src/org/apache/pig/newplan/logical/rules/ColumnPruneHelper.java (original)
+++ pig/branches/branch-0.10/src/org/apache/pig/newplan/logical/rules/ColumnPruneHelper.java Sun Mar 18 08:23:03 2012
@@ -478,7 +478,7 @@ public class ColumnPruneHelper {
                          List<Operator> srcs = exp.getSinks();
                          for (Operator src : srcs) {
                              if (src instanceof ProjectExpression) {
-                                 List<LOInnerLoad> innerLoads = LOForEach.findReacheableInnerLoadFromBoundaryProject((ProjectExpression)src);
+                                 List<LOInnerLoad> innerLoads = LOForEach.findReacheableInnerLoadFromBoundaryProject((ProjectExpression)src).first;
                                  for (LOInnerLoad innerLoad : innerLoads) {
                                      ProjectExpression prj = innerLoad.getProjection();
                                      if (prj.isProjectStar()) {
@@ -514,7 +514,7 @@ public class ColumnPruneHelper {
                  for (Operator src : srcs) {
                      if (!(src instanceof ProjectExpression))
                          continue;
-                     List<LOInnerLoad> innerLoads = LOForEach.findReacheableInnerLoadFromBoundaryProject((ProjectExpression)src);
+                     List<LOInnerLoad> innerLoads = LOForEach.findReacheableInnerLoadFromBoundaryProject((ProjectExpression)src).first;
                      for (LOInnerLoad innerLoad : innerLoads) {
                          ProjectExpression prj = innerLoad.getProjection();
                          if (prj.isProjectStar()) {

Modified: pig/branches/branch-0.10/test/org/apache/pig/test/TestNestedForeach.java
URL: http://svn.apache.org/viewvc/pig/branches/branch-0.10/test/org/apache/pig/test/TestNestedForeach.java?rev=1302088&r1=1302087&r2=1302088&view=diff
==============================================================================
--- pig/branches/branch-0.10/test/org/apache/pig/test/TestNestedForeach.java (original)
+++ pig/branches/branch-0.10/test/org/apache/pig/test/TestNestedForeach.java Sun Mar 18 08:23:03 2012
@@ -181,4 +181,26 @@ public class TestNestedForeach {
 		t = iter.next();
 		Assert.assertTrue(t.toString().equals("({(7)})"));
 	}
+	
+	// See PIG-2563
+	@Test
+    public void testNestedForeach() throws Exception {
+        String[] input = {
+                "1\t2\t3",
+                "2\t5\t2"
+        };
+
+        Util.createInputFile(cluster, "table_nf_project", input);
+
+        pig.registerQuery("A = load 'table_nf_project' as (a,b,c:chararray);");
+        pig.registerQuery("B = GROUP A BY a;");
+        pig.registerQuery("C = foreach B {tmp = A.a;generate A, tmp; };");
+        pig.registerQuery("D = foreach C generate A.(a,b) as v;");
+        Iterator<Tuple> iter = pig.openIterator("D");
+        Tuple t = iter.next();
+        Assert.assertTrue(t.toString().equals("({(1,2)})"));
+
+        t = iter.next();
+        Assert.assertTrue(t.toString().equals("({(2,5)})"));
+    }
 }