You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pig.apache.org by ro...@apache.org on 2016/07/26 12:15:55 UTC
svn commit: r1754118 - in /pig/trunk: ./
src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/
Author: rohini
Date: Tue Jul 26 12:15:55 2016
New Revision: 1754118
URL: http://svn.apache.org/viewvc?rev=1754118&view=rev
Log:
PIG-4960: Split followed by order by/skewed join is skewed in Tez (rohini)
Modified:
pig/trunk/CHANGES.txt
pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POPoissonSample.java
pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POReservoirSample.java
Modified: pig/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/pig/trunk/CHANGES.txt?rev=1754118&r1=1754117&r2=1754118&view=diff
==============================================================================
--- pig/trunk/CHANGES.txt (original)
+++ pig/trunk/CHANGES.txt Tue Jul 26 12:15:55 2016
@@ -38,6 +38,8 @@ OPTIMIZATIONS
BUG FIXES
+PIG-4960: Split followed by order by/skewed join is skewed in Tez (rohini)
+
PIG-4957: See "Received kill signal" message for a normal run after PIG-4921 (rohini)
PIG-4953: Predicate push-down will not run filters for single unary expressions (rdblue via daijy)
Modified: pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POPoissonSample.java
URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POPoissonSample.java?rev=1754118&r1=1754117&r2=1754118&view=diff
==============================================================================
--- pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POPoissonSample.java (original)
+++ pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POPoissonSample.java Tue Jul 26 12:15:55 2016
@@ -44,6 +44,9 @@ public class POPoissonSample extends Phy
private transient boolean initialized;
+ // num of rows skipped so far
+ private transient int numSkipped;
+
// num of rows sampled so far
private transient int numRowsSampled;
@@ -89,6 +92,7 @@ public class POPoissonSample extends Phy
@Override
public Result getNextTuple() throws ExecException {
if (!initialized) {
+ numSkipped = 0;
numRowsSampled = 0;
avgTupleMemSz = 0;
rowNum = 0;
@@ -134,7 +138,7 @@ public class POPoissonSample extends Phy
}
// skip tuples
- for (long numSkipped = 0; numSkipped < skipInterval; numSkipped++) {
+ while (numSkipped < skipInterval) {
res = processInput();
if (res.returnStatus == POStatus.STATUS_NULL) {
continue;
@@ -148,6 +152,7 @@ public class POPoissonSample extends Phy
return res;
}
rowNum++;
+ numSkipped++;
}
// skipped enough, get new sample
@@ -173,6 +178,8 @@ public class POPoissonSample extends Phy
rowNum++;
newSample = res;
+ // reset skipped
+ numSkipped = 0;
return currentSample;
}
}
Modified: pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POReservoirSample.java
URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POReservoirSample.java?rev=1754118&r1=1754117&r2=1754118&view=diff
==============================================================================
--- pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POReservoirSample.java (original)
+++ pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POReservoirSample.java Tue Jul 26 12:15:55 2016
@@ -125,7 +125,7 @@ public class POReservoirSample extends P
}
// collect samples until input is exhausted
- int rand = randGen.nextInt(rowProcessed);
+ int rand = randGen.nextInt(rowProcessed + 1);
if (rand < numSamples) {
samples[rand] = res;
}
@@ -133,8 +133,13 @@ public class POReservoirSample extends P
}
}
- if (this.parentPlan.endOfAllInput && res.returnStatus == POStatus.STATUS_EOP) {
- sampleCollectionDone = true;
+ if (res.returnStatus == POStatus.STATUS_EOP) {
+ if (this.parentPlan.endOfAllInput) {
+ sampleCollectionDone = true;
+ } else {
+ // In case of Split can get EOP in between.
+ return res;
+ }
}
return getSample();