You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pig.apache.org by ha...@apache.org on 2010/02/11 21:16:28 UTC
svn commit: r909129 - in /hadoop/pig/trunk: CHANGES.txt
src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/CombinerOptimizer.java
test/org/apache/pig/test/TestCombiner.java
Author: hashutosh
Date: Thu Feb 11 20:16:28 2010
New Revision: 909129
URL: http://svn.apache.org/viewvc?rev=909129&view=rev
Log:
PIG-834: incorrect plan when algebraic functions are nested
Modified:
hadoop/pig/trunk/CHANGES.txt
hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/CombinerOptimizer.java
hadoop/pig/trunk/test/org/apache/pig/test/TestCombiner.java
Modified: hadoop/pig/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/CHANGES.txt?rev=909129&r1=909128&r2=909129&view=diff
==============================================================================
--- hadoop/pig/trunk/CHANGES.txt (original)
+++ hadoop/pig/trunk/CHANGES.txt Thu Feb 11 20:16:28 2010
@@ -93,6 +93,8 @@
BUG FIXES
+PIG-834: incorrect plan when algebraic functions are nested (ashutoshc)
+
PIG-1217: Fix argToFuncMapping in Piggybank Top function (dvryaboy via gates)
PIG-1154: Local Mode fails when hadoop config directory is specified in
Modified: hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/CombinerOptimizer.java
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/CombinerOptimizer.java?rev=909129&r1=909128&r2=909129&view=diff
==============================================================================
--- hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/CombinerOptimizer.java (original)
+++ hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/CombinerOptimizer.java Thu Feb 11 20:16:28 2010
@@ -76,7 +76,7 @@
* works for things like generate group, SUM(A.$1 + 1). But it fails for
* things like the above. Certain types of inner plans will never be
* movable (like filters). But distinct or order by in the inner plan
- * should be moble. And, things like:
+ * should be mobile. And, things like:
* C = cogroup A by $0, B by $0;
* D = foreach C {
* D1 = distinct A;
@@ -427,13 +427,51 @@
}
return ExprType.SIMPLE_PROJECT;
} else if (leaf instanceof POUserFunc) {
- return ((POUserFunc)leaf).combinable() ? ExprType.ALGEBRAIC :
- ExprType.NOT_ALGEBRAIC;
+
+ POUserFunc userFunc = (POUserFunc)leaf;
+ if(!userFunc.combinable() ){
+ return ExprType.NOT_ALGEBRAIC;
+ }
+ // The leaf userFunc may be combinable, but there might be other
+ // algebraic userFuncs in the predecessors, if there are
+ // we choose not to fire combiner.
+ CheckCombinableUserFunc ccuf = new CheckCombinableUserFunc(pp);
+ ccuf.visit();
+ return ccuf.exprType;
} else {
return ExprType.NOT_ALGEBRAIC;
}
}
+ private static class CheckCombinableUserFunc extends PhyPlanVisitor{
+
+ private ExprType exprType = ExprType.ALGEBRAIC;
+
+ public CheckCombinableUserFunc(PhysicalPlan plan) {
+ super(plan, new DependencyOrderWalker<PhysicalOperator, PhysicalPlan>(plan));
+ }
+
+ @Override
+ public void visit() throws VisitorException {
+ super.visit();
+ }
+
+ @Override
+ public void visitUserFunc(POUserFunc userFunc) throws VisitorException {
+
+ /* We already know there is one combinable POUserFunc and its a leaf. So,
+ * successor of that userFunc is null. We are interested to find
+ * if there is another combinable userFunc somewhere in plan (that
+ * is a userFunc with successors and is Combinable).
+ */
+ List<PhysicalOperator> succs = this.mPlan.getSuccessors(userFunc);
+
+ if(succs != null && !succs.isEmpty() && userFunc.combinable()){
+ this.exprType = ExprType.NOT_ALGEBRAIC;
+ }
+ }
+ }
+
// Returns number of fields that this will project, including the added
// key field if that is necessary
private void fixUpForeachs(
Modified: hadoop/pig/trunk/test/org/apache/pig/test/TestCombiner.java
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/test/org/apache/pig/test/TestCombiner.java?rev=909129&r1=909128&r2=909129&view=diff
==============================================================================
--- hadoop/pig/trunk/test/org/apache/pig/test/TestCombiner.java (original)
+++ hadoop/pig/trunk/test/org/apache/pig/test/TestCombiner.java Thu Feb 11 20:16:28 2010
@@ -39,13 +39,40 @@
import org.apache.pig.builtin.PigStorage;
import org.apache.pig.data.Tuple;
+import org.apache.pig.impl.PigContext;
import org.apache.pig.impl.io.FileLocalizer;
+import org.apache.pig.impl.logicalLayer.LogicalPlan;
+import org.apache.pig.test.utils.LogicalPlanTester;
public class TestCombiner extends TestCase {
-
-
MiniCluster cluster = MiniCluster.buildCluster();
+
+ @Test
+ public void testSuccessiveUserFuncs1() throws Exception{
+
+ LogicalPlanTester tester = new LogicalPlanTester();
+ tester.buildPlan( "a = load 'students.txt' as (c1,c2,c3,c4); ");
+ tester.buildPlan("c = group a by c2; ");
+ tester.buildPlan("f = foreach c generate COUNT(org.apache.pig.builtin.Distinct($1.$2)); ");
+ LogicalPlan lp = tester.buildPlan("store f into 'out';");
+ PigContext pc = new PigServer(ExecType.MAPREDUCE, cluster.getProperties()).getPigContext();
+ assertTrue((Util.buildMRPlan(Util.buildPhysicalPlan(lp,pc),pc).getRoots().get(0).combinePlan.isEmpty()));
+ }
+
+ @Test
+ public void testSuccessiveUserFuncs2() throws Exception{
+
+ LogicalPlanTester tester = new LogicalPlanTester();
+ tester.buildPlan( "a = load 'students.txt' as (c1,c2,c3,c4); ");
+ tester.buildPlan("c = group a by c2; ");
+ String dummyUDF = JiraPig1030.class.getName();
+ tester.buildPlan("f = foreach c generate COUNT("+dummyUDF+"" +
+ "(org.apache.pig.builtin.Distinct($1.$2),"+dummyUDF+"())); ");
+ LogicalPlan lp = tester.buildPlan("store f into 'out';");
+ PigContext pc = new PigServer(ExecType.MAPREDUCE, cluster.getProperties()).getPigContext();
+ assertTrue((Util.buildMRPlan(Util.buildPhysicalPlan(lp,pc),pc).getRoots().get(0).combinePlan.isEmpty()));
+ }
@Test
public void testOnCluster() throws Exception {
@@ -92,7 +119,6 @@
+ PigStorage.class.getName() + "(',');");
}
-
@Test
public void testNoCombinerUse() {
// To simulate this, we will have two input files
@@ -348,7 +374,7 @@
return "";
}
}
-
+
@Test
public void testJiraPig1030() {
// test that combiner is NOT invoked when
@@ -391,4 +417,5 @@
}
}
}
+
}