You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pig.apache.org by kn...@apache.org on 2015/02/25 00:13:09 UTC
svn commit: r1662138 - in /pig/trunk: ./
src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/
test/org/apache/pig/test/
Author: knoguchi
Date: Tue Feb 24 23:13:08 2015
New Revision: 1662138
URL: http://svn.apache.org/r1662138
Log:
PIG-4426: RowNumber(simple) Rank not producing correct results (knoguchi)
Modified:
pig/trunk/CHANGES.txt
pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/JobControlCompiler.java
pig/trunk/test/org/apache/pig/test/TestRank1.java
pig/trunk/test/org/apache/pig/test/TestRank2.java
pig/trunk/test/org/apache/pig/test/TestRank3.java
Modified: pig/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/pig/trunk/CHANGES.txt?rev=1662138&r1=1662137&r2=1662138&view=diff
==============================================================================
--- pig/trunk/CHANGES.txt (original)
+++ pig/trunk/CHANGES.txt Tue Feb 24 23:13:08 2015
@@ -48,6 +48,8 @@ PIG-4333: Split BigData tests into multi
BUG FIXES
+PIG-4426: RowNumber(simple) Rank not producing correct results (knoguchi)
+
PIG-4433: Loading bigdecimal in nested tuple does not work (kpriceyahoo via daijy)
PIG-4410: Fix testRankWithEmptyReduce in tez mode (daijy)
Modified: pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/JobControlCompiler.java
URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/JobControlCompiler.java?rev=1662138&r1=1662137&r2=1662138&view=diff
==============================================================================
--- pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/JobControlCompiler.java (original)
+++ pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/JobControlCompiler.java Tue Feb 24 23:13:08 2015
@@ -360,7 +360,7 @@ public class JobControlCompiler{
{
MapReduceOper mro = jobMroMap.get(job);
if (!pigContext.inIllustrator && mro.isCounterOperation())
- saveCounters(job,mro.getOperationID());
+ saveCounters(job,mro.getOperationID(), mro.isRowNumber());
plan.remove(mro);
}
}
@@ -378,7 +378,7 @@ public class JobControlCompiler{
* these values are passed via configuration file to PORank, by using the unique
* operation identifier
*/
- private void saveCounters(Job job, String operationID) {
+ private void saveCounters(Job job, String operationID, boolean isRowNumber ) {
Counters counters;
Group groupCounters;
@@ -409,7 +409,8 @@ public class JobControlCompiler{
HashMap<Integer,Long> counterList = new HashMap<Integer, Long>();
- for (int i=0;i<job.getJob().getNumReduceTasks();i++) {
+ int numTasks = isRowNumber ? job.getJobConf().getNumMapTasks() : job.getJobConf().getNumReduceTasks();
+ for ( int i=0; i < numTasks; i++ ) {
Long value = groupCounters.getCounter(Integer.toString(i));
counterList.put(i, value);
}
Modified: pig/trunk/test/org/apache/pig/test/TestRank1.java
URL: http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/test/TestRank1.java?rev=1662138&r1=1662137&r2=1662138&view=diff
==============================================================================
--- pig/trunk/test/org/apache/pig/test/TestRank1.java (original)
+++ pig/trunk/test/org/apache/pig/test/TestRank1.java Tue Feb 24 23:13:08 2015
@@ -33,7 +33,10 @@ import org.junit.Before;
import org.junit.Test;
import com.google.common.collect.ImmutableList;
-import com.google.common.collect.ImmutableSet;
+import com.google.common.collect.ImmutableMultiset;
+import com.google.common.collect.ImmutableSortedSet;
+import com.google.common.collect.TreeMultiset;
+import com.google.common.collect.Multiset;
public class TestRank1 {
private static TupleFactory tf = TupleFactory.getInstance();
@@ -76,7 +79,7 @@ public class TestRank1 {
Util.registerMultiLineQuery(pigServer, query);
- Set<Tuple> expected = ImmutableSet.of(
+ Multiset<Tuple> expected = ImmutableMultiset.of(
tf.newTuple(ImmutableList.of((long) 1, "A", 1, "N")),
tf.newTuple(ImmutableList.of((long) 2, "B", 2, "N")),
tf.newTuple(ImmutableList.of((long) 3, "C", 3, "M")),
@@ -100,7 +103,7 @@ public class TestRank1 {
Util.registerMultiLineQuery(pigServer, query);
- Set<Tuple> expected = ImmutableSet.of(
+ Multiset<Tuple> expected = ImmutableMultiset.of(
tf.newTuple(ImmutableList.of((long) 1, "Michael", "Blythe", 1,1, 1, 1, 4557045.046, 98027)),
tf.newTuple(ImmutableList.of((long) 2, "Linda","Mitchell", 2, 1, 1, 1, 5200475.231, 98027)),
tf.newTuple(ImmutableList.of((long) 3, "Jillian", "Carson", 3,1, 1, 1, 3857163.633, 98027)),
@@ -126,7 +129,7 @@ public class TestRank1 {
Util.registerMultiLineQuery(pigServer, query);
- Set<Tuple> expected = ImmutableSet.of(
+ Multiset<Tuple> expected = ImmutableMultiset.of(
tf.newTuple(ImmutableList.of((long) 1, "C", 3, "M")),
tf.newTuple(ImmutableList.of((long) 2, "A", 1, "N")),
tf.newTuple(ImmutableList.of((long) 2, "B", 2, "N")),
@@ -150,7 +153,7 @@ public class TestRank1 {
Util.registerMultiLineQuery(pigServer, query);
- Set<Tuple> expected = ImmutableSet.of(
+ Multiset<Tuple> expected = ImmutableMultiset.of(
tf.newTuple(ImmutableList.of((long) 1, "A", 1, "N")),
tf.newTuple(ImmutableList.of((long) 2, "B", 2, "N")),
tf.newTuple(ImmutableList.of((long) 3, "C", 3, "M")),
@@ -174,7 +177,7 @@ public class TestRank1 {
Util.registerMultiLineQuery(pigServer, query);
- Set<Tuple> expected = ImmutableSet.of(
+ Multiset<Tuple> expected = ImmutableMultiset.of(
tf.newTuple(ImmutableList.of((long) 1, "G", 10, "V")),
tf.newTuple(ImmutableList.of((long) 2, "F", 8, "T")),
tf.newTuple(ImmutableList.of((long) 2, "F", 8, "Q")),
@@ -198,7 +201,7 @@ public class TestRank1 {
Util.registerMultiLineQuery(pigServer, query);
- Set<Tuple> expected = ImmutableSet.of(
+ Multiset<Tuple> expected = ImmutableMultiset.of(
tf.newTuple(ImmutableList.of((long) 1, "Michael", "Blythe", 1,1, 1, 1, 4557045.046, 98027)),
tf.newTuple(ImmutableList.of((long) 1, "Linda","Mitchell", 2, 1, 1, 1, 5200475.231, 98027)),
tf.newTuple(ImmutableList.of((long) 1, "Jillian", "Carson", 3,1, 1, 1, 3857163.633, 98027)),
@@ -224,7 +227,7 @@ public class TestRank1 {
Util.registerMultiLineQuery(pigServer, query);
- Set<Tuple> expected = ImmutableSet.of(
+ Multiset<Tuple> expected = ImmutableMultiset.of(
tf.newTuple(ImmutableList.of((long) 1, "David", "Campbell", 8,6, 2, 3, 3587378.426, 98055)),
tf.newTuple(ImmutableList.of((long) 2, "Garrett","Vargas", 4, 1, 1, 1, 1764938.986, 98027)),
tf.newTuple(ImmutableList.of((long) 3, "Jae", "Pak", 12,6, 2, 4, 5015682.375, 98055)),
@@ -250,7 +253,7 @@ public class TestRank1 {
Util.registerMultiLineQuery(pigServer, query);
- Set<Tuple> expected = ImmutableSet.of(
+ Multiset<Tuple> expected = ImmutableMultiset.of(
tf.newTuple(ImmutableList.of((long) 1, "David", "Campbell", 8, 6, 2, 3, 3587378.426, 98055)),
tf.newTuple(ImmutableList.of((long) 2, "Garrett","Vargas", 4, 1, 1, 1, 1764938.986, 98027)),
tf.newTuple(ImmutableList.of((long) 3, "Jae", "Pak", 12,6, 2, 4, 5015682.375, 98055)),
@@ -276,7 +279,7 @@ public class TestRank1 {
Util.registerMultiLineQuery(pigServer, query);
- Set<Tuple> expected = ImmutableSet.of(
+ Multiset<Tuple> expected = ImmutableMultiset.of(
tf.newTuple(ImmutableList.of((long) 1, "A", 1, "N")),
tf.newTuple(ImmutableList.of((long) 2, "B", 2, "N")),
tf.newTuple(ImmutableList.of((long) 3, "C", 3, "M")),
@@ -292,10 +295,21 @@ public class TestRank1 {
verifyExpected(data.get("result"), expected);
}
- public void verifyExpected(List<Tuple> out, Set<Tuple> expected) {
+ public void verifyExpected(List<Tuple> out, Multiset<Tuple> expected) {
+ Multiset<Tuple> resultMultiset = TreeMultiset.create();
for (Tuple tup : out) {
- assertTrue(expected + " contains " + tup, expected.contains(tup));
+ resultMultiset.add(tup);
}
- }
+ StringBuilder error = new StringBuilder("Result does not match.\nActual result:\n");
+ for (Tuple tup : resultMultiset.elementSet() ) {
+ error.append(tup).append(" x ").append(resultMultiset.count(tup)).append("\n");
+ }
+ error.append("Expceted result:\n");
+ for (Tuple tup : ImmutableSortedSet.copyOf(expected) ) {
+ error.append(tup).append(" x ").append(expected.count(tup)).append("\n");
+ }
+
+ assertTrue(error.toString(), resultMultiset.equals(expected));
+ }
}
Modified: pig/trunk/test/org/apache/pig/test/TestRank2.java
URL: http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/test/TestRank2.java?rev=1662138&r1=1662137&r2=1662138&view=diff
==============================================================================
--- pig/trunk/test/org/apache/pig/test/TestRank2.java (original)
+++ pig/trunk/test/org/apache/pig/test/TestRank2.java Tue Feb 24 23:13:08 2015
@@ -33,7 +33,10 @@ import org.junit.Before;
import org.junit.Test;
import com.google.common.collect.ImmutableList;
-import com.google.common.collect.ImmutableSet;
+import com.google.common.collect.ImmutableMultiset;
+import com.google.common.collect.ImmutableSortedSet;
+import com.google.common.collect.TreeMultiset;
+import com.google.common.collect.Multiset;
public class TestRank2 {
private static PigServer pigServer;
@@ -76,7 +79,7 @@ public class TestRank2 {
Util.registerMultiLineQuery(pigServer, query);
- Set<Tuple> expected = ImmutableSet.of(
+ Multiset<Tuple> expected = ImmutableMultiset.of(
tf.newTuple(ImmutableList.of((long) 1, "C", 3, "M")),
tf.newTuple(ImmutableList.of((long) 2, "A", 1, "N")),
tf.newTuple(ImmutableList.of((long) 2, "B", 2, "N")),
@@ -100,7 +103,7 @@ public class TestRank2 {
Util.registerMultiLineQuery(pigServer, query);
- Set<Tuple> expected = ImmutableSet.of(
+ Multiset<Tuple> expected = ImmutableMultiset.of(
tf.newTuple(ImmutableList.of((long) 1, "A", 1, "N")),
tf.newTuple(ImmutableList.of((long) 2, "B", 2, "N")),
tf.newTuple(ImmutableList.of((long) 3, "C", 3, "M")),
@@ -124,7 +127,7 @@ public class TestRank2 {
Util.registerMultiLineQuery(pigServer, query);
- Set<Tuple> expected = ImmutableSet.of(
+ Multiset<Tuple> expected = ImmutableMultiset.of(
tf.newTuple(ImmutableList.of((long) 1, "G", 10, "V")),
tf.newTuple(ImmutableList.of((long) 2, "F", 8, "T")),
tf.newTuple(ImmutableList.of((long) 2, "F", 8, "Q")),
@@ -148,7 +151,7 @@ public class TestRank2 {
Util.registerMultiLineQuery(pigServer, query);
- Set<Tuple> expected = ImmutableSet.of(
+ Multiset<Tuple> expected = ImmutableMultiset.of(
tf.newTuple(ImmutableList.of((long) 1, "A", 1, "N")),
tf.newTuple(ImmutableList.of((long) 2, "B", 2, "N")),
tf.newTuple(ImmutableList.of((long) 3, "C", 3, "M")),
@@ -164,9 +167,23 @@ public class TestRank2 {
verifyExpected(data.get("result"), expected);
}
- public void verifyExpected(List<Tuple> out, Set<Tuple> expected) {
+ public void verifyExpected(List<Tuple> out, Multiset<Tuple> expected) {
+ Multiset<Tuple> resultMultiset = TreeMultiset.create();
for (Tuple tup : out) {
- assertTrue(expected + " contains " + tup, expected.contains(tup));
+ resultMultiset.add(tup);
}
+
+ StringBuilder error = new StringBuilder("Result does not match.\nActual result:\n");
+ for (Tuple tup : resultMultiset.elementSet() ) {
+ error.append(tup).append(" x ").append(resultMultiset.count(tup)).append("\n");
+ }
+ error.append("Expceted result:\n");
+ for (Tuple tup : ImmutableSortedSet.copyOf(expected) ) {
+ error.append(tup).append(" x ").append(expected.count(tup)).append("\n");
+ }
+
+ //This one line test should be sufficient but adding the above
+ //for-loop for better error messages
+ assertTrue(error.toString(), resultMultiset.equals(expected));
}
-}
\ No newline at end of file
+}
Modified: pig/trunk/test/org/apache/pig/test/TestRank3.java
URL: http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/test/TestRank3.java?rev=1662138&r1=1662137&r2=1662138&view=diff
==============================================================================
--- pig/trunk/test/org/apache/pig/test/TestRank3.java (original)
+++ pig/trunk/test/org/apache/pig/test/TestRank3.java Tue Feb 24 23:13:08 2015
@@ -35,7 +35,10 @@ import org.junit.BeforeClass;
import org.junit.Test;
import com.google.common.collect.ImmutableList;
-import com.google.common.collect.ImmutableSet;
+import com.google.common.collect.ImmutableMultiset;
+import com.google.common.collect.ImmutableSortedSet;
+import com.google.common.collect.TreeMultiset;
+import com.google.common.collect.Multiset;
public class TestRank3 {
private static PigServer pigServer;
@@ -113,7 +116,7 @@ public class TestRank3 {
Util.registerMultiLineQuery(pigServer, query);
- Set<Tuple> expected = ImmutableSet.of(
+ Multiset<Tuple> expected = ImmutableMultiset.of(
tf.newTuple(ImmutableList.of(1L,21L,5L,7L,1L,1L,0L,8L,8L)),
tf.newTuple(ImmutableList.of(2L,26L,2L,3L,2L,5L,1L,9L,10L)),
tf.newTuple(ImmutableList.of(3L,30L,24L,21L,2L,3L,1L,3L,10L)),
@@ -158,7 +161,7 @@ public class TestRank3 {
Util.registerMultiLineQuery(pigServer, query);
- Set<Tuple> expected = ImmutableSet.of();
+ Multiset<Tuple> expected = ImmutableMultiset.of();
verifyExpected(data.get("empty_result"), expected);
}
@@ -195,10 +198,24 @@ public class TestRank3 {
Util.checkQueryOutputsAfterSort(data.get("R4"), expectedResults);
}
- public void verifyExpected(List<Tuple> out, Set<Tuple> expected) {
+ public void verifyExpected(List<Tuple> out, Multiset<Tuple> expected) {
+ Multiset<Tuple> resultMultiset = TreeMultiset.create();
for (Tuple tup : out) {
- assertTrue(expected + " contains " + tup, expected.contains(tup));
+ resultMultiset.add(tup);
}
+
+ StringBuilder error = new StringBuilder("Result does not match.\nActual result:\n");
+ for (Tuple tup : resultMultiset.elementSet() ) {
+ error.append(tup).append(" x ").append(resultMultiset.count(tup)).append("\n");
+ }
+ error.append("Expceted result:\n");
+ for (Tuple tup : ImmutableSortedSet.copyOf(expected) ) {
+ error.append(tup).append(" x ").append(expected.count(tup)).append("\n");
+ }
+
+ //This one line test should be sufficient but adding the above
+ //for-loop for better error messages
+ assertTrue(error.toString(), resultMultiset.equals(expected));
}
}