You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pig.apache.org by da...@apache.org on 2010/01/22 21:20:59 UTC
svn commit: r902253 - in /hadoop/pig/trunk: CHANGES.txt
src/org/apache/pig/impl/logicalLayer/optimizer/PruneColumns.java
test/org/apache/pig/test/TestPruneColumn.java
Author: daijy
Date: Fri Jan 22 20:20:58 2010
New Revision: 902253
URL: http://svn.apache.org/viewvc?rev=902253&view=rev
Log:
Temporarily rollback PIG-1184-1, will commit again shortly
Modified:
hadoop/pig/trunk/CHANGES.txt
hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/optimizer/PruneColumns.java
hadoop/pig/trunk/test/org/apache/pig/test/TestPruneColumn.java
Modified: hadoop/pig/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/CHANGES.txt?rev=902253&r1=902252&r2=902253&view=diff
==============================================================================
--- hadoop/pig/trunk/CHANGES.txt (original)
+++ hadoop/pig/trunk/CHANGES.txt Fri Jan 22 20:20:58 2010
@@ -144,9 +144,6 @@
PIG-1176: Column Pruner issues in union of loader with and without schema
(daijy)
-PIG-1184: PruneColumns optimization does not handle the case of foreach
-flatten correctly if flattened bag is not used later (daijy)
-
Release 0.6.0 - Unreleased
INCOMPATIBLE CHANGES
Modified: hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/optimizer/PruneColumns.java
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/optimizer/PruneColumns.java?rev=902253&r1=902252&r2=902253&view=diff
==============================================================================
--- hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/optimizer/PruneColumns.java (original)
+++ hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/optimizer/PruneColumns.java Fri Jan 22 20:20:58 2010
@@ -53,7 +53,6 @@
import org.apache.pig.impl.logicalLayer.LogicalPlan;
import org.apache.pig.impl.logicalLayer.RelationalOperator;
import org.apache.pig.impl.logicalLayer.TopLevelProjectFinder;
-import org.apache.pig.impl.logicalLayer.schema.Schema;
import org.apache.pig.impl.plan.MapKeysInfo;
import org.apache.pig.impl.plan.NodeIdGenerator;
import org.apache.pig.impl.plan.OperatorKey;
@@ -362,40 +361,20 @@
}
}
}
-
+
+
// Merge with required input fields of this logical operator.
// RequiredInputFields come from two sources, one is mapping from required output to input,
// the other is from the operator itself. Here we use getRequiredFields to get the second part,
// and merge with the first part
List<RequiredFields> requiredFieldsListOfLOOp;
- // For LOForEach, requiredFields all flattened fields. Even the flattened fields get pruned,
- // it may expand the number of rows in the result. So flattened fields shall not be pruned.
- // LOForEach.getRequiredFields does not give the required fields. RequiredFields means that field
- // is required by all the outputs. The pipeline does not work correctly without that field.
- // LOForEach.getRequiredFields give all the input fields referred in the LOForEach statement, but those
- // fields can still be pruned (which means, not required)
- // Eg:
- // B = foreach A generate a0, a1, a2+a3;
- // LOForEach.getRequiredFields gives (a0, a1, a2, a3);
- // However, input column a2 and a3 can be pruned if we do not need output a2+a3 for LOForEach.
- // So here, we do not use LOForEach.getRequiredFields, instead, any flattened fields are required fields
- if (rlo instanceof LOForEach) {
- List<Pair<Integer, Integer>> flattenedInputs = new ArrayList<Pair<Integer, Integer>>();
- for (int i=0;i<rlo.getSchema().size();i++) {
- if (((LOForEach)rlo).isInputFlattened(i)) {
- flattenedInputs.add(new Pair<Integer, Integer>(0, i));
- }
- }
- if (!flattenedInputs.isEmpty()) {
- requiredFieldsListOfLOOp = new ArrayList<RequiredFields>();
- requiredFieldsListOfLOOp.add(new RequiredFields(flattenedInputs));
- }
- else
- requiredFieldsListOfLOOp = null;
- }
+ // For LOForEach, requiredFields is not really required fields. Here required fields means the input
+ // fields required by the entire output columns, such as filter condition in LOFilter, group columns in LOCoGroup.
+ // For LOForEach, output columns are generated by the foreach plan it belongs to, there is nothing globally required.
+ // So we need to fix the semantic gap here. If the operator is LOForEach, requiredFields is null.
// For LOCross/LOUnion, actually we do not require any field here
- else if (rlo instanceof LOCross || rlo instanceof LOUnion)
+ if (rlo instanceof LOForEach || rlo instanceof LOCross || rlo instanceof LOUnion)
requiredFieldsListOfLOOp = null;
else
requiredFieldsListOfLOOp = rlo.getRequiredFields();
@@ -806,5 +785,5 @@
String msg = "Unable to prune plan";
throw new OptimizerException(msg, errCode, PigException.BUG, e);
}
- }
+ }
}
Modified: hadoop/pig/trunk/test/org/apache/pig/test/TestPruneColumn.java
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/test/org/apache/pig/test/TestPruneColumn.java?rev=902253&r1=902252&r2=902253&view=diff
==============================================================================
--- hadoop/pig/trunk/test/org/apache/pig/test/TestPruneColumn.java (original)
+++ hadoop/pig/trunk/test/org/apache/pig/test/TestPruneColumn.java Fri Jan 22 20:20:58 2010
@@ -1767,36 +1767,4 @@
assertTrue(emptyLogFileMessage());
}
-
- // See PIG-1184
- @Test
- public void testForEachFlatten() throws Exception {
- File inputFile = Util.createInputFile("table_testForEachFlatten", "", new String[]{"oiue\tM\t{(3),(4)}\t{(toronto),(montreal)}"});
-
- pigServer.registerQuery("A = load '"+inputFile.toString()+"' as (a0:chararray, a1:chararray, a2:bag{t:tuple(id:chararray)}, a3:bag{t:tuple(loc:chararray)});");
- pigServer.registerQuery("B = foreach A generate a0, a1, flatten(a2), flatten(a3), 10;");
- pigServer.registerQuery("C = foreach B generate a0, $4;");
- Iterator<Tuple> iter = pigServer.openIterator("C");
-
- assertTrue(iter.hasNext());
- Tuple t = iter.next();
- assertTrue(t.toString().equals("(oiue,10)"));
-
- assertTrue(iter.hasNext());
- t = iter.next();
- assertTrue(t.toString().equals("(oiue,10)"));
-
- assertTrue(iter.hasNext());
- t = iter.next();
- assertTrue(t.toString().equals("(oiue,10)"));
-
- assertTrue(iter.hasNext());
- t = iter.next();
- assertTrue(t.toString().equals("(oiue,10)"));
-
- assertFalse(iter.hasNext());
-
- assertTrue(checkLogFileMessage(new String[]{"Columns pruned for A: $1",
- "No map keys pruned for A"}));
- }
}