You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pig.apache.org by ch...@apache.org on 2013/06/10 00:09:47 UTC

svn commit: r1491307 - in /pig/branches/branch-0.11: ./ src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/ test/e2e/pig/tests/ test/e2e/pig/tools/generate/

Author: cheolsoo
Date: Sun Jun  9 22:09:47 2013
New Revision: 1491307

URL: http://svn.apache.org/r1491307
Log:
PIG-3329: RANK operator failed when working with SPLIT (xalan via cheolsoo)

Modified:
    pig/branches/branch-0.11/CHANGES.txt
    pig/branches/branch-0.11/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/MRCompiler.java
    pig/branches/branch-0.11/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/MapReduceOper.java
    pig/branches/branch-0.11/test/e2e/pig/tests/nightly.conf
    pig/branches/branch-0.11/test/e2e/pig/tools/generate/generate_data.pl

Modified: pig/branches/branch-0.11/CHANGES.txt
URL: http://svn.apache.org/viewvc/pig/branches/branch-0.11/CHANGES.txt?rev=1491307&r1=1491306&r2=1491307&view=diff
==============================================================================
--- pig/branches/branch-0.11/CHANGES.txt (original)
+++ pig/branches/branch-0.11/CHANGES.txt Sun Jun  9 22:09:47 2013
@@ -30,6 +30,8 @@ PIG-2769: a simple logic causes very lon
 
 BUG FIXES
 
+PIG-3329: RANK operator failed when working with SPLIT (xalan via cheolsoo)
+
 PIG-3345: Handle null in DateTime functions (rohini)
 
 PIG-3315: Automaton dependency missing from Pig 11.1-h2 POM. (stevel@apache.org via daijy)

Modified: pig/branches/branch-0.11/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/MRCompiler.java
URL: http://svn.apache.org/viewvc/pig/branches/branch-0.11/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/MRCompiler.java?rev=1491307&r1=1491306&r2=1491307&view=diff
==============================================================================
--- pig/branches/branch-0.11/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/MRCompiler.java (original)
+++ pig/branches/branch-0.11/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/MRCompiler.java Sun Jun  9 22:09:47 2013
@@ -2001,24 +2001,15 @@ public class MRCompiler extends PhyPlanV
                 PhysicalOperator leaf = mpLeaves.get(0);
                 if ( !curMROp.isMapDone() && !curMROp.isRankOperation() )
                 {
-                    curMROp.setIsCounterOperation(true);
-                    curMROp.setIsRowNumber(true);
-                    curMROp.setOperationID(op.getOperationID());
                     curMROp.mapPlan.addAsLeaf(op);
                 } else {
                     FileSpec fSpec = getTempFileSpec();
                     MapReduceOper prevMROper = endSingleInputPlanWithStr(fSpec);
                     MapReduceOper mrCounter = startNew(fSpec, prevMROper);
                     mrCounter.mapPlan.addAsLeaf(op);
-                    mrCounter.setIsCounterOperation(true);
-                    mrCounter.setIsRowNumber(true);
-                    mrCounter.setOperationID(op.getOperationID());
                     curMROp = mrCounter;
                 }
             } else {
-                curMROp.setIsCounterOperation(true);
-                curMROp.setIsRowNumber(false);
-                curMROp.setOperationID(op.getOperationID());
                 curMROp.reducePlan.addAsLeaf(op);
             }
 
@@ -2041,7 +2032,6 @@ public class MRCompiler extends PhyPlanV
             MapReduceOper prevMROper = endSingleInputPlanWithStr(fSpec);
 
             curMROp = startNew(fSpec, prevMROper);
-            curMROp.setOperationID(op.getOperationID());
             curMROp.mapPlan.addAsLeaf(op);
 
             phyToMROpMap.put(op, curMROp);

Modified: pig/branches/branch-0.11/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/MapReduceOper.java
URL: http://svn.apache.org/viewvc/pig/branches/branch-0.11/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/MapReduceOper.java?rev=1491307&r1=1491306&r2=1491307&view=diff
==============================================================================
--- pig/branches/branch-0.11/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/MapReduceOper.java (original)
+++ pig/branches/branch-0.11/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/MapReduceOper.java Sun Jun  9 22:09:47 2013
@@ -24,14 +24,15 @@ import java.util.Iterator;
 import java.util.List;
 import java.util.Set;
 
-import org.apache.pig.impl.plan.OperatorKey;
-import org.apache.pig.impl.plan.NodeIdGenerator;
 import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.plans.MROpPlanVisitor;
 import org.apache.pig.backend.hadoop.executionengine.physicalLayer.PhysicalOperator;
 import org.apache.pig.backend.hadoop.executionengine.physicalLayer.plans.PhysicalPlan;
+import org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POCounter;
 import org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.PORank;
 import org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POUnion;
+import org.apache.pig.impl.plan.NodeIdGenerator;
 import org.apache.pig.impl.plan.Operator;
+import org.apache.pig.impl.plan.OperatorKey;
 import org.apache.pig.impl.plan.PlanException;
 import org.apache.pig.impl.plan.VisitorException;
 import org.apache.pig.impl.util.MultiMap;
@@ -142,14 +143,6 @@ public class MapReduceOper extends Opera
     // That is, this MROper ends due to a POSPlit operator.
     private boolean splitter = false;
 
-    // Indicates that there is a counter operation in the MR job.
-    private boolean isCounterOperation = false;
-
-    // Indicates that there is a rank operation without sorting (row number) in the MR job.
-    private boolean isRowNumber = false;
-
-    private String operationID;
-
 	// Set to true if it is skewed join
 	private boolean skewedJoin = false;
 
@@ -499,14 +492,6 @@ public class MapReduceOper extends Opera
         return combineSmallSplits;
     }
 
-    public void setIsCounterOperation(boolean counter) {
-        this.isCounterOperation = counter;
-    }
-
-    public boolean isCounterOperation() {
-        return isCounterOperation;
-    }
-
     public boolean isRankOperation() {
         return getRankOperationId().size() != 0;
     }
@@ -524,19 +509,38 @@ public class MapReduceOper extends Opera
         return operationIDs;
     }
 
-    public void setIsRowNumber(boolean isRowNumber) {
-        this.isRowNumber = isRowNumber;
+    public boolean isCounterOperation() {
+        return (getCounterOperation() != null);
     }
 
     public boolean isRowNumber() {
-        return isRowNumber;
+        POCounter counter = getCounterOperation();
+        return (counter != null)?counter.isRowNumber():false;
     }
 
-    public void setOperationID(String operationID) {
-        this.operationID = operationID;
+    public String getOperationID() {
+        POCounter counter = getCounterOperation();
+        return (counter != null)?counter.getOperationID():null;
     }
 
-    public String getOperationID() {
-        return operationID;
+    private POCounter getCounterOperation() {
+        PhysicalOperator operator;
+        Iterator<PhysicalOperator> it =  this.mapPlan.getLeaves().iterator();
+
+        while(it.hasNext()) {
+            operator = it.next();
+            if(operator instanceof POCounter)
+                return (POCounter) operator;
+        }
+
+        it =  this.reducePlan.getLeaves().iterator();
+
+        while(it.hasNext()) {
+            operator = it.next();
+            if(operator instanceof POCounter)
+                return (POCounter) operator;
+        }
+
+        return null;
     }
 }

Modified: pig/branches/branch-0.11/test/e2e/pig/tests/nightly.conf
URL: http://svn.apache.org/viewvc/pig/branches/branch-0.11/test/e2e/pig/tests/nightly.conf?rev=1491307&r1=1491306&r2=1491307&view=diff
==============================================================================
--- pig/branches/branch-0.11/test/e2e/pig/tests/nightly.conf (original)
+++ pig/branches/branch-0.11/test/e2e/pig/tests/nightly.conf Sun Jun  9 22:09:47 2013
@@ -5170,7 +5170,23 @@ store a into ':OUTPATH:';\,
 									C = order B by rownumber;
 									store C into ':OUTPATH:';
 								\,
-            }
+            		}, {
+						'num' => 11,
+						'execonly' => 'mapred',
+						'pig' => q\
+									A = LOAD ':INPATH:/singlefile/prerank' using PigStorage(',') as (rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int,tail:bytearray);
+									split A into M if rownumber > 15, N if rownumber < 25;
+									C = rank N;
+									D = foreach C generate $0, a, b, c;
+									store D into ':OUTPATH:';
+								\,
+						'verify_pig_script' => q\
+									A = LOAD ':INPATH:/singlefile/prerank' using PigStorage(',') as (rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int,tail:bytearray);
+									B = filter A by rownumber < 25;
+									D = foreach B generate rownumber, a, b, c;
+									store D into ':OUTPATH:';
+								\,
+                    }
                 ]
             }
         ],

Modified: pig/branches/branch-0.11/test/e2e/pig/tools/generate/generate_data.pl
URL: http://svn.apache.org/viewvc/pig/branches/branch-0.11/test/e2e/pig/tools/generate/generate_data.pl?rev=1491307&r1=1491306&r2=1491307&view=diff
==============================================================================
--- pig/branches/branch-0.11/test/e2e/pig/tools/generate/generate_data.pl (original)
+++ pig/branches/branch-0.11/test/e2e/pig/tools/generate/generate_data.pl Sun Jun  9 22:09:47 2013
@@ -32,6 +32,17 @@ our @lastName = ("allen", "brown", "cars
     "nixon", "ovid", "polk", "quirinius", "robinson", "steinbeck", "thompson",
     "underhill", "van buren", "white", "xylophone", "young", "zipper");
 
+############################################################################
+# Explanation of rankedTuples: a pre-ranked set of tuples, each column meaning:
+#   rownumber: simple RANK, sequential number
+#	rankcabd: RANK BY c ASC , b DESC
+#	rankbdaa: RANK BY b DESC, a ASC
+#	rankbdca: RANK BY b DESC, c ASC
+#	rankaacd: RANK BY a ASC , c DESC
+#	rankaaba: RANK BY a ASC , b ASC
+#	a,b,c:    values
+#	tail:     long value in order to create multiple mappers
+############################################################################
 our @rankedTuples = (
 	"1,21,5,7,1,1,0,8,8","2,26,2,3,2,5,1,9,10","3,30,24,21,2,3,1,3,10","4,6,10,8,3,4,1,7,2",
 	"5,8,28,25,3,2,1,0,2","6,28,11,12,4,6,2,7,10","7,9,26,22,5,7,3,2,3","8,5,6,5,6,8,3,8,1",