You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pig.apache.org by ch...@apache.org on 2013/06/10 00:09:25 UTC

svn commit: r1491306 - in /pig/trunk: ./ src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/ test/e2e/pig/tests/ test/e2e/pig/tools/generate/

Author: cheolsoo
Date: Sun Jun  9 22:09:24 2013
New Revision: 1491306

URL: http://svn.apache.org/r1491306
Log:
PIG-3329: RANK operator failed when working with SPLIT (xalan via cheolsoo)

Modified:
    pig/trunk/CHANGES.txt
    pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/MRCompiler.java
    pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/MapReduceOper.java
    pig/trunk/test/e2e/pig/tests/nightly.conf
    pig/trunk/test/e2e/pig/tools/generate/generate_data.pl

Modified: pig/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/pig/trunk/CHANGES.txt?rev=1491306&r1=1491305&r2=1491306&view=diff
==============================================================================
--- pig/trunk/CHANGES.txt (original)
+++ pig/trunk/CHANGES.txt Sun Jun  9 22:09:24 2013
@@ -198,8 +198,6 @@ BUG FIXES
 
 PIG-3331: Default values not stored in avro file when using specific schemas during store in AvroStorage (viraj via rohini)
 
-PIG-3345: Handle null in DateTime functions (rohini)
-
 PIG-3322: AvroStorage give NPE on reading file with union as top level schema (viraj via rohini)
 
 PIG-2828: Handle nulls in DataType.compare (aniket486)
@@ -274,10 +272,6 @@ PIG-3060: FLATTEN in nested foreach fail
 
 PIG-2247: Pig parser does not detect multiple arguments with the same name passed to macro (dreambird via daijy)
 
-PIG-3264: mvn signanddeploy target broken for pigunit, pigsmoke and piggybank (billgraham)
-
-PIG-3262: Pig contrib 0.11 doesn't compile on certain rpm systems (mgrover via cheolsoo)
-
 PIG-3249: Pig startup script prints out a wrong version of hadoop when using fat jar (prkommireddi via daijy)
 
 PIG-3110: pig corrupts chararrays with trailing whitespace when converting them to long (prkommireddi via daijy)
@@ -389,10 +383,18 @@ PIG-2769: a simple logic causes very lon
 
 BUG FIXES
 
+PIG-3329: RANK operator failed when working with SPLIT (xalan via cheolsoo)
+
+PIG-3345: Handle null in DateTime functions (rohini)
+
 PIG-3223: AvroStorage does not handle comma separated input paths (dreambird via rohini)
 
+PIG-3262: Pig contrib 0.11 doesn't compile on certain rpm systems (mgrover via cheolsoo)
+
+PIG-3264: mvn signanddeploy target broken for pigunit, pigsmoke and piggybank (billgraham)
+
 
-Release 0.11.1 (unreleased)
+Release 0.11.1
 
 INCOMPATIBLE CHANGES
 

Modified: pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/MRCompiler.java
URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/MRCompiler.java?rev=1491306&r1=1491305&r2=1491306&view=diff
==============================================================================
--- pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/MRCompiler.java (original)
+++ pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/MRCompiler.java Sun Jun  9 22:09:24 2013
@@ -2002,24 +2002,15 @@ public class MRCompiler extends PhyPlanV
                 PhysicalOperator leaf = mpLeaves.get(0);
                 if ( !curMROp.isMapDone() && !curMROp.isRankOperation() )
                 {
-                    curMROp.setIsCounterOperation(true);
-                    curMROp.setIsRowNumber(true);
-                    curMROp.setOperationID(op.getOperationID());
                     curMROp.mapPlan.addAsLeaf(op);
                 } else {
                     FileSpec fSpec = getTempFileSpec();
                     MapReduceOper prevMROper = endSingleInputPlanWithStr(fSpec);
                     MapReduceOper mrCounter = startNew(fSpec, prevMROper);
                     mrCounter.mapPlan.addAsLeaf(op);
-                    mrCounter.setIsCounterOperation(true);
-                    mrCounter.setIsRowNumber(true);
-                    mrCounter.setOperationID(op.getOperationID());
                     curMROp = mrCounter;
                 }
             } else {
-                curMROp.setIsCounterOperation(true);
-                curMROp.setIsRowNumber(false);
-                curMROp.setOperationID(op.getOperationID());
                 curMROp.reducePlan.addAsLeaf(op);
             }
 
@@ -2042,7 +2033,6 @@ public class MRCompiler extends PhyPlanV
             MapReduceOper prevMROper = endSingleInputPlanWithStr(fSpec);
 
             curMROp = startNew(fSpec, prevMROper);
-            curMROp.setOperationID(op.getOperationID());
             curMROp.mapPlan.addAsLeaf(op);
 
             phyToMROpMap.put(op, curMROp);

Modified: pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/MapReduceOper.java
URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/MapReduceOper.java?rev=1491306&r1=1491305&r2=1491306&view=diff
==============================================================================
--- pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/MapReduceOper.java (original)
+++ pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/MapReduceOper.java Sun Jun  9 22:09:24 2013
@@ -24,14 +24,15 @@ import java.util.Iterator;
 import java.util.List;
 import java.util.Set;
 
-import org.apache.pig.impl.plan.OperatorKey;
-import org.apache.pig.impl.plan.NodeIdGenerator;
 import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.plans.MROpPlanVisitor;
 import org.apache.pig.backend.hadoop.executionengine.physicalLayer.PhysicalOperator;
 import org.apache.pig.backend.hadoop.executionengine.physicalLayer.plans.PhysicalPlan;
+import org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POCounter;
 import org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.PORank;
 import org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POUnion;
+import org.apache.pig.impl.plan.NodeIdGenerator;
 import org.apache.pig.impl.plan.Operator;
+import org.apache.pig.impl.plan.OperatorKey;
 import org.apache.pig.impl.plan.PlanException;
 import org.apache.pig.impl.plan.VisitorException;
 import org.apache.pig.impl.util.MultiMap;
@@ -142,14 +143,6 @@ public class MapReduceOper extends Opera
     // That is, this MROper ends due to a POSPlit operator.
     private boolean splitter = false;
 
-    // Indicates that there is a counter operation in the MR job.
-    private boolean isCounterOperation = false;
-
-    // Indicates that there is a rank operation without sorting (row number) in the MR job.
-    private boolean isRowNumber = false;
-
-    private String operationID;
-
 	// Set to true if it is skewed join
 	private boolean skewedJoin = false;
 
@@ -499,14 +492,6 @@ public class MapReduceOper extends Opera
         return combineSmallSplits;
     }
 
-    public void setIsCounterOperation(boolean counter) {
-        this.isCounterOperation = counter;
-    }
-
-    public boolean isCounterOperation() {
-        return isCounterOperation;
-    }
-
     public boolean isRankOperation() {
         return getRankOperationId().size() != 0;
     }
@@ -524,19 +509,38 @@ public class MapReduceOper extends Opera
         return operationIDs;
     }
 
-    public void setIsRowNumber(boolean isRowNumber) {
-        this.isRowNumber = isRowNumber;
+    public boolean isCounterOperation() {
+        return (getCounterOperation() != null);
     }
 
     public boolean isRowNumber() {
-        return isRowNumber;
+        POCounter counter = getCounterOperation();
+        return (counter != null)?counter.isRowNumber():false;
     }
 
-    public void setOperationID(String operationID) {
-        this.operationID = operationID;
+    public String getOperationID() {
+        POCounter counter = getCounterOperation();
+        return (counter != null)?counter.getOperationID():null;
     }
 
-    public String getOperationID() {
-        return operationID;
+    private POCounter getCounterOperation() {
+        PhysicalOperator operator;
+        Iterator<PhysicalOperator> it =  this.mapPlan.getLeaves().iterator();
+
+        while(it.hasNext()) {
+            operator = it.next();
+            if(operator instanceof POCounter)
+                return (POCounter) operator;
+        }
+
+        it =  this.reducePlan.getLeaves().iterator();
+
+        while(it.hasNext()) {
+            operator = it.next();
+            if(operator instanceof POCounter)
+                return (POCounter) operator;
+        }
+
+        return null;
     }
 }

Modified: pig/trunk/test/e2e/pig/tests/nightly.conf
URL: http://svn.apache.org/viewvc/pig/trunk/test/e2e/pig/tests/nightly.conf?rev=1491306&r1=1491305&r2=1491306&view=diff
==============================================================================
--- pig/trunk/test/e2e/pig/tests/nightly.conf (original)
+++ pig/trunk/test/e2e/pig/tests/nightly.conf Sun Jun  9 22:09:24 2013
@@ -5181,7 +5181,23 @@ store a into ':OUTPATH:';\,
 									C = order B by rownumber;
 									store C into ':OUTPATH:';
 								\,
-            }
+            		}, {
+						'num' => 11,
+						'execonly' => 'mapred',
+						'pig' => q\
+									A = LOAD ':INPATH:/singlefile/prerank' using PigStorage(',') as (rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int,tail:bytearray);
+									split A into M if rownumber > 15, N if rownumber < 25;
+									C = rank N;
+									D = foreach C generate $0, a, b, c;
+									store D into ':OUTPATH:';
+								\,
+						'verify_pig_script' => q\
+									A = LOAD ':INPATH:/singlefile/prerank' using PigStorage(',') as (rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int,tail:bytearray);
+									B = filter A by rownumber < 25;
+									D = foreach B generate rownumber, a, b, c;
+									store D into ':OUTPATH:';
+								\,
+                    }
                 ]
             }
         ],

Modified: pig/trunk/test/e2e/pig/tools/generate/generate_data.pl
URL: http://svn.apache.org/viewvc/pig/trunk/test/e2e/pig/tools/generate/generate_data.pl?rev=1491306&r1=1491305&r2=1491306&view=diff
==============================================================================
--- pig/trunk/test/e2e/pig/tools/generate/generate_data.pl (original)
+++ pig/trunk/test/e2e/pig/tools/generate/generate_data.pl Sun Jun  9 22:09:24 2013
@@ -32,6 +32,17 @@ our @lastName = ("allen", "brown", "cars
     "nixon", "ovid", "polk", "quirinius", "robinson", "steinbeck", "thompson",
     "underhill", "van buren", "white", "xylophone", "young", "zipper");
 
+############################################################################
+# Explanation of rankedTuples: a pre-ranked set of tuples, each column meaning:
+#   rownumber: simple RANK, sequential number
+#	rankcabd: RANK BY c ASC , b DESC
+#	rankbdaa: RANK BY b DESC, a ASC
+#	rankbdca: RANK BY b DESC, c ASC
+#	rankaacd: RANK BY a ASC , c DESC
+#	rankaaba: RANK BY a ASC , b ASC
+#	a,b,c:    values
+#	tail:     long value in order to create multiple mappers
+############################################################################
 our @rankedTuples = (
 	"1,21,5,7,1,1,0,8,8","2,26,2,3,2,5,1,9,10","3,30,24,21,2,3,1,3,10","4,6,10,8,3,4,1,7,2",
 	"5,8,28,25,3,2,1,0,2","6,28,11,12,4,6,2,7,10","7,9,26,22,5,7,3,2,3","8,5,6,5,6,8,3,8,1",