You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pig.apache.org by da...@apache.org on 2010/01/07 19:18:59 UTC

svn commit: r896951 - in /hadoop/pig/trunk: CHANGES.txt src/org/apache/pig/impl/logicalLayer/ColumnPruner.java src/org/apache/pig/impl/logicalLayer/optimizer/PruneColumns.java test/org/apache/pig/test/TestPruneColumn.java

Author: daijy
Date: Thu Jan  7 18:18:58 2010
New Revision: 896951

URL: http://svn.apache.org/viewvc?rev=896951&view=rev
Log:
PIG-1176: Column Pruner issues in union of loader with and without schema

Modified:
    hadoop/pig/trunk/CHANGES.txt
    hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/ColumnPruner.java
    hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/optimizer/PruneColumns.java
    hadoop/pig/trunk/test/org/apache/pig/test/TestPruneColumn.java

Modified: hadoop/pig/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/CHANGES.txt?rev=896951&r1=896950&r2=896951&view=diff
==============================================================================
--- hadoop/pig/trunk/CHANGES.txt (original)
+++ hadoop/pig/trunk/CHANGES.txt Thu Jan  7 18:18:58 2010
@@ -136,6 +136,9 @@
 
 PIG-1146: Inconsistent column pruning in LOUnion (daijy)
 
+PIG-1176: Column Pruner issues in union of loader with and without schema
+(daijy)
+
 Release 0.6.0 - Unreleased
 
 INCOMPATIBLE CHANGES

Modified: hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/ColumnPruner.java
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/ColumnPruner.java?rev=896951&r1=896950&r2=896951&view=diff
==============================================================================
--- hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/ColumnPruner.java (original)
+++ hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/ColumnPruner.java Thu Jan  7 18:18:58 2010
@@ -169,7 +169,7 @@
                 currentOp = lOp.insertPlainForEachAfter(columnsToProject);
             }
             
-            if (lOp.pruneColumns(columnsPruned)) {
+            if (!columnsPruned.isEmpty()&&lOp.pruneColumns(columnsPruned)) {
                 prunedColumnsMap.put(currentOp, columnsToPrune);
             }
         } catch (FrontendException e) {

Modified: hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/optimizer/PruneColumns.java
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/optimizer/PruneColumns.java?rev=896951&r1=896950&r2=896951&view=diff
==============================================================================
--- hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/optimizer/PruneColumns.java (original)
+++ hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/optimizer/PruneColumns.java Thu Jan  7 18:18:58 2010
@@ -73,9 +73,10 @@
 }
 
 public class PruneColumns extends LogicalTransformer {
-
+    private boolean safeToPrune = true;
     private static Log log = LogFactory.getLog(PruneColumns.class);
     Map<RelationalOperator, RequiredInfo> cachedRequiredInfo = new HashMap<RelationalOperator, RequiredInfo>();
+    private Map<LOLoad, RequiredFields> prunedLoaderColumnsMap = new HashMap<LOLoad, RequiredFields>();
     ColumnPruner pruner;
     public PruneColumns(LogicalPlan plan) {
         super(plan);
@@ -175,6 +176,8 @@
     {
         try
         {
+            if (!safeToPrune)
+                return;
             if (!(lo instanceof RelationalOperator))
             {
                 int errCode = 2182;
@@ -183,6 +186,7 @@
             }
             if (lo.getSchema()==null)
             {
+                safeToPrune = false;
                 return;
             }
             RelationalOperator rlo = (RelationalOperator)lo;
@@ -195,7 +199,7 @@
             {
                 // LOLoad has only one output
                 RequiredFields loaderRequiredFields = requiredOutputInfo.requiredFieldsList.get(0);
-                pruneLoader((LOLoad)rlo, loaderRequiredFields);
+                prunedLoaderColumnsMap.put((LOLoad)rlo, loaderRequiredFields);
                 return;
             }
             
@@ -767,6 +771,12 @@
     
     public void prune() throws OptimizerException {
         try {
+            if (!safeToPrune)
+                return;
+            
+            for (LOLoad load : prunedLoaderColumnsMap.keySet())
+                pruneLoader(load, prunedLoaderColumnsMap.get(load));
+            
             if (!pruner.isEmpty())
                 pruner.visit();
         }

Modified: hadoop/pig/trunk/test/org/apache/pig/test/TestPruneColumn.java
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/test/org/apache/pig/test/TestPruneColumn.java?rev=896951&r1=896950&r2=896951&view=diff
==============================================================================
--- hadoop/pig/trunk/test/org/apache/pig/test/TestPruneColumn.java (original)
+++ hadoop/pig/trunk/test/org/apache/pig/test/TestPruneColumn.java Thu Jan  7 18:18:58 2010
@@ -1723,4 +1723,48 @@
             "No map keys pruned for A", "No column pruned for B",
             "No map keys pruned for B"}));
     }
+    
+    // See PIG-1176
+    @Test
+    public void testUnionMixedSchemaPruning() throws Exception {
+        pigServer.registerQuery("A = load '"+ Util.generateURI(tmpFile1.toString()) + "' AS (a0, a1, a2);");
+        pigServer.registerQuery("B = foreach A generate a0;;");
+        pigServer.registerQuery("C = load '"+ Util.generateURI(tmpFile2.toString()) + "';");
+        pigServer.registerQuery("D = foreach C generate $0;");
+        pigServer.registerQuery("E = union B, D;");
+        Iterator<Tuple> iter = pigServer.openIterator("E");
+        Collection<String> results = new HashSet<String>();
+        results.add("(1)");
+        results.add("(2)");
+        results.add("(1)");
+        results.add("(2)");
+
+        assertTrue(iter.hasNext());
+        Tuple t = iter.next();
+
+        assertTrue(t.size()==1);
+        assertTrue(results.contains(t.toString()));
+
+        assertTrue(iter.hasNext());
+        t = iter.next();
+
+        assertTrue(t.size()==1);
+        assertTrue(results.contains(t.toString()));
+
+        assertTrue(iter.hasNext());
+        t = iter.next();
+
+        assertTrue(t.size()==1);
+        assertTrue(results.contains(t.toString()));
+
+        assertTrue(iter.hasNext());
+        t = iter.next();
+
+        assertTrue(t.size()==1);
+        assertTrue(results.contains(t.toString()));
+
+        assertFalse(iter.hasNext());
+
+        assertTrue(emptyLogFileMessage());
+    }
 }