You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pig.apache.org by da...@apache.org on 2010/01/07 19:18:59 UTC
svn commit: r896951 - in /hadoop/pig/trunk: CHANGES.txt
src/org/apache/pig/impl/logicalLayer/ColumnPruner.java
src/org/apache/pig/impl/logicalLayer/optimizer/PruneColumns.java
test/org/apache/pig/test/TestPruneColumn.java
Author: daijy
Date: Thu Jan 7 18:18:58 2010
New Revision: 896951
URL: http://svn.apache.org/viewvc?rev=896951&view=rev
Log:
PIG-1176: Column Pruner issues in union of loader with and without schema
Modified:
hadoop/pig/trunk/CHANGES.txt
hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/ColumnPruner.java
hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/optimizer/PruneColumns.java
hadoop/pig/trunk/test/org/apache/pig/test/TestPruneColumn.java
Modified: hadoop/pig/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/CHANGES.txt?rev=896951&r1=896950&r2=896951&view=diff
==============================================================================
--- hadoop/pig/trunk/CHANGES.txt (original)
+++ hadoop/pig/trunk/CHANGES.txt Thu Jan 7 18:18:58 2010
@@ -136,6 +136,9 @@
PIG-1146: Inconsistent column pruning in LOUnion (daijy)
+PIG-1176: Column Pruner issues in union of loader with and without schema
+(daijy)
+
Release 0.6.0 - Unreleased
INCOMPATIBLE CHANGES
Modified: hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/ColumnPruner.java
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/ColumnPruner.java?rev=896951&r1=896950&r2=896951&view=diff
==============================================================================
--- hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/ColumnPruner.java (original)
+++ hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/ColumnPruner.java Thu Jan 7 18:18:58 2010
@@ -169,7 +169,7 @@
currentOp = lOp.insertPlainForEachAfter(columnsToProject);
}
- if (lOp.pruneColumns(columnsPruned)) {
+ if (!columnsPruned.isEmpty()&&lOp.pruneColumns(columnsPruned)) {
prunedColumnsMap.put(currentOp, columnsToPrune);
}
} catch (FrontendException e) {
Modified: hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/optimizer/PruneColumns.java
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/optimizer/PruneColumns.java?rev=896951&r1=896950&r2=896951&view=diff
==============================================================================
--- hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/optimizer/PruneColumns.java (original)
+++ hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/optimizer/PruneColumns.java Thu Jan 7 18:18:58 2010
@@ -73,9 +73,10 @@
}
public class PruneColumns extends LogicalTransformer {
-
+ private boolean safeToPrune = true;
private static Log log = LogFactory.getLog(PruneColumns.class);
Map<RelationalOperator, RequiredInfo> cachedRequiredInfo = new HashMap<RelationalOperator, RequiredInfo>();
+ private Map<LOLoad, RequiredFields> prunedLoaderColumnsMap = new HashMap<LOLoad, RequiredFields>();
ColumnPruner pruner;
public PruneColumns(LogicalPlan plan) {
super(plan);
@@ -175,6 +176,8 @@
{
try
{
+ if (!safeToPrune)
+ return;
if (!(lo instanceof RelationalOperator))
{
int errCode = 2182;
@@ -183,6 +186,7 @@
}
if (lo.getSchema()==null)
{
+ safeToPrune = false;
return;
}
RelationalOperator rlo = (RelationalOperator)lo;
@@ -195,7 +199,7 @@
{
// LOLoad has only one output
RequiredFields loaderRequiredFields = requiredOutputInfo.requiredFieldsList.get(0);
- pruneLoader((LOLoad)rlo, loaderRequiredFields);
+ prunedLoaderColumnsMap.put((LOLoad)rlo, loaderRequiredFields);
return;
}
@@ -767,6 +771,12 @@
public void prune() throws OptimizerException {
try {
+ if (!safeToPrune)
+ return;
+
+ for (LOLoad load : prunedLoaderColumnsMap.keySet())
+ pruneLoader(load, prunedLoaderColumnsMap.get(load));
+
if (!pruner.isEmpty())
pruner.visit();
}
Modified: hadoop/pig/trunk/test/org/apache/pig/test/TestPruneColumn.java
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/test/org/apache/pig/test/TestPruneColumn.java?rev=896951&r1=896950&r2=896951&view=diff
==============================================================================
--- hadoop/pig/trunk/test/org/apache/pig/test/TestPruneColumn.java (original)
+++ hadoop/pig/trunk/test/org/apache/pig/test/TestPruneColumn.java Thu Jan 7 18:18:58 2010
@@ -1723,4 +1723,48 @@
"No map keys pruned for A", "No column pruned for B",
"No map keys pruned for B"}));
}
+
+ // See PIG-1176
+ @Test
+ public void testUnionMixedSchemaPruning() throws Exception {
+ pigServer.registerQuery("A = load '"+ Util.generateURI(tmpFile1.toString()) + "' AS (a0, a1, a2);");
+ pigServer.registerQuery("B = foreach A generate a0;;");
+ pigServer.registerQuery("C = load '"+ Util.generateURI(tmpFile2.toString()) + "';");
+ pigServer.registerQuery("D = foreach C generate $0;");
+ pigServer.registerQuery("E = union B, D;");
+ Iterator<Tuple> iter = pigServer.openIterator("E");
+ Collection<String> results = new HashSet<String>();
+ results.add("(1)");
+ results.add("(2)");
+ results.add("(1)");
+ results.add("(2)");
+
+ assertTrue(iter.hasNext());
+ Tuple t = iter.next();
+
+ assertTrue(t.size()==1);
+ assertTrue(results.contains(t.toString()));
+
+ assertTrue(iter.hasNext());
+ t = iter.next();
+
+ assertTrue(t.size()==1);
+ assertTrue(results.contains(t.toString()));
+
+ assertTrue(iter.hasNext());
+ t = iter.next();
+
+ assertTrue(t.size()==1);
+ assertTrue(results.contains(t.toString()));
+
+ assertTrue(iter.hasNext());
+ t = iter.next();
+
+ assertTrue(t.size()==1);
+ assertTrue(results.contains(t.toString()));
+
+ assertFalse(iter.hasNext());
+
+ assertTrue(emptyLogFileMessage());
+ }
}