You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pig.apache.org by ro...@apache.org on 2016/07/26 12:22:26 UTC

svn commit: r1754119 - in /pig/branches/branch-0.16: ./ src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/

Author: rohini
Date: Tue Jul 26 12:22:26 2016
New Revision: 1754119

URL: http://svn.apache.org/viewvc?rev=1754119&view=rev
Log:
PIG-4960: Split followed by order by/skewed join is skewed in Tez (rohini)

Modified:
    pig/branches/branch-0.16/CHANGES.txt
    pig/branches/branch-0.16/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POPoissonSample.java
    pig/branches/branch-0.16/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POReservoirSample.java

Modified: pig/branches/branch-0.16/CHANGES.txt
URL: http://svn.apache.org/viewvc/pig/branches/branch-0.16/CHANGES.txt?rev=1754119&r1=1754118&r2=1754119&view=diff
==============================================================================
--- pig/branches/branch-0.16/CHANGES.txt (original)
+++ pig/branches/branch-0.16/CHANGES.txt Tue Jul 26 12:22:26 2016
@@ -30,6 +30,8 @@ OPTIMIZATIONS
 
 BUG FIXES
 
+PIG-4960: Split followed by order by/skewed join is skewed in Tez (rohini)
+
 PIG-4947: LOAD with HBaseStorage using a mix of pure wildcards and prefixed wildcards results in
   empty maps for the pure wildcarded column families (daijy)
 

Modified: pig/branches/branch-0.16/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POPoissonSample.java
URL: http://svn.apache.org/viewvc/pig/branches/branch-0.16/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POPoissonSample.java?rev=1754119&r1=1754118&r2=1754119&view=diff
==============================================================================
--- pig/branches/branch-0.16/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POPoissonSample.java (original)
+++ pig/branches/branch-0.16/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POPoissonSample.java Tue Jul 26 12:22:26 2016
@@ -44,6 +44,9 @@ public class POPoissonSample extends Phy
 
     private transient boolean initialized;
 
+    // num of rows skipped so far
+    private transient int numSkipped;
+
     // num of rows sampled so far
     private transient int numRowsSampled;
 
@@ -89,6 +92,7 @@ public class POPoissonSample extends Phy
     @Override
     public Result getNextTuple() throws ExecException {
         if (!initialized) {
+            numSkipped = 0;
             numRowsSampled = 0;
             avgTupleMemSz = 0;
             rowNum = 0;
@@ -134,7 +138,7 @@ public class POPoissonSample extends Phy
         }
 
         // skip tuples
-        for (long numSkipped  = 0; numSkipped < skipInterval; numSkipped++) {
+        while (numSkipped < skipInterval) {
             res = processInput();
             if (res.returnStatus == POStatus.STATUS_NULL) {
                 continue;
@@ -148,6 +152,7 @@ public class POPoissonSample extends Phy
                 return res;
             }
             rowNum++;
+            numSkipped++;
         }
 
         // skipped enough, get new sample
@@ -173,6 +178,8 @@ public class POPoissonSample extends Phy
 
             rowNum++;
             newSample = res;
+            // reset skipped
+            numSkipped = 0;
             return currentSample;
         }
     }

Modified: pig/branches/branch-0.16/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POReservoirSample.java
URL: http://svn.apache.org/viewvc/pig/branches/branch-0.16/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POReservoirSample.java?rev=1754119&r1=1754118&r2=1754119&view=diff
==============================================================================
--- pig/branches/branch-0.16/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POReservoirSample.java (original)
+++ pig/branches/branch-0.16/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POReservoirSample.java Tue Jul 26 12:22:26 2016
@@ -125,7 +125,7 @@ public class POReservoirSample extends P
                 }
 
                 // collect samples until input is exhausted
-                int rand = randGen.nextInt(rowProcessed);
+                int rand = randGen.nextInt(rowProcessed + 1);
                 if (rand < numSamples) {
                     samples[rand] = res;
                 }
@@ -133,8 +133,13 @@ public class POReservoirSample extends P
             }
         }
 
-        if (this.parentPlan.endOfAllInput && res.returnStatus == POStatus.STATUS_EOP) {
-            sampleCollectionDone = true;
+        if (res.returnStatus == POStatus.STATUS_EOP) {
+            if (this.parentPlan.endOfAllInput) {
+                sampleCollectionDone = true;
+            } else {
+                // In case of Split can get EOP in between.
+                return res;
+            }
         }
 
         return getSample();