You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pig.apache.org by an...@apache.org on 2013/06/18 07:41:56 UTC
svn commit: r1494026 - in /pig/trunk: CHANGES.txt
src/org/apache/pig/newplan/logical/rules/ColumnPruneHelper.java
test/org/apache/pig/test/TestNewPlanColumnPrune.java
Author: aniket486
Date: Tue Jun 18 05:41:56 2013
New Revision: 1494026
URL: http://svn.apache.org/r1494026
Log:
PIG-3355: ColumnMapKeyPrune bug with distinct operator (jeremykarn via aniket486)
Modified:
pig/trunk/CHANGES.txt
pig/trunk/src/org/apache/pig/newplan/logical/rules/ColumnPruneHelper.java
pig/trunk/test/org/apache/pig/test/TestNewPlanColumnPrune.java
Modified: pig/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/pig/trunk/CHANGES.txt?rev=1494026&r1=1494025&r2=1494026&view=diff
==============================================================================
--- pig/trunk/CHANGES.txt (original)
+++ pig/trunk/CHANGES.txt Tue Jun 18 05:41:56 2013
@@ -196,6 +196,8 @@ PIG-3013: BinInterSedes improve chararra
BUG FIXES
+PIG-3355: ColumnMapKeyPrune bug with distinct operator (jeremykarn via aniket486)
+
PIG-3318: AVRO: 'default value' not honored when merging schemas on load with AvroStorage (viraj via rohini)
PIG-3250: Pig dryrun generates wrong output in .expanded file for 'SPLIT....OTHERWISE...' command (dreambird via cheolsoo)
Modified: pig/trunk/src/org/apache/pig/newplan/logical/rules/ColumnPruneHelper.java
URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/newplan/logical/rules/ColumnPruneHelper.java?rev=1494026&r1=1494025&r2=1494026&view=diff
==============================================================================
--- pig/trunk/src/org/apache/pig/newplan/logical/rules/ColumnPruneHelper.java (original)
+++ pig/trunk/src/org/apache/pig/newplan/logical/rules/ColumnPruneHelper.java Tue Jun 18 05:41:56 2013
@@ -330,6 +330,8 @@ public class ColumnPruneHelper {
@Override
public void visit(LODistinct distinct) throws FrontendException {
+ setOutputUids(distinct);
+
Set<Long> input = new HashSet<Long>();
// Every field is required
Modified: pig/trunk/test/org/apache/pig/test/TestNewPlanColumnPrune.java
URL: http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/test/TestNewPlanColumnPrune.java?rev=1494026&r1=1494025&r2=1494026&view=diff
==============================================================================
--- pig/trunk/test/org/apache/pig/test/TestNewPlanColumnPrune.java (original)
+++ pig/trunk/test/org/apache/pig/test/TestNewPlanColumnPrune.java Tue Jun 18 05:41:56 2013
@@ -20,11 +20,13 @@ package org.apache.pig.test;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
import java.util.ArrayList;
import java.util.HashSet;
+import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Properties;
@@ -418,6 +420,36 @@ public class TestNewPlanColumnPrune {
}
}
+ @Test
+ public void testDistinct() throws Exception {
+ //Test for bug where distinct wasn't being pruned properly causing union
+ //to fail to get a schema since the distinct relation had an incompatible schema
+ //with the other relation being unioned.
+
+ String testQuery =
+ "a = load 'd.txt' as (id, v1, v2);" +
+ "b = load 'd.txt' as (id, v1, v2);" +
+ "c = distinct a;" +
+ "d = union b, c;" +
+ "e = foreach d generate id, v1;" +
+ "store e into 'empty';";
+
+ //Generate optimized plan.
+ LogicalPlan newLogicalPlan = buildPlan(testQuery);
+ PlanOptimizer optimizer = new MyPlanOptimizer(newLogicalPlan, 3);
+ optimizer.optimize();
+
+ Iterator<Operator> iter = newLogicalPlan.getOperators();
+ while (iter.hasNext()) {
+ Operator o = iter.next();
+ LogicalRelationalOperator lro = (LogicalRelationalOperator)o;
+ if (lro == null || lro.getAlias() == null) continue;
+ if (lro.getAlias().equals("d")) {
+ assertNotNull(lro.getSchema());
+ }
+ }
+ }
+
public class MyPlanOptimizer extends LogicalPlanOptimizer {
protected MyPlanOptimizer(OperatorPlan p, int iterations) {