You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pig.apache.org by pr...@apache.org on 2010/01/26 01:02:25 UTC

svn commit: r903018 [5/5] - in /hadoop/pig/branches/load-store-redesign: ./ src/docs/src/documentation/content/xdocs/ src/org/apache/pig/backend/hadoop/executionengine/ src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/ src/org/apache/pi...

Added: hadoop/pig/branches/load-store-redesign/test/org/apache/pig/test/TestExperimentalRule.java
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/test/org/apache/pig/test/TestExperimentalRule.java?rev=903018&view=auto
==============================================================================
--- hadoop/pig/branches/load-store-redesign/test/org/apache/pig/test/TestExperimentalRule.java (added)
+++ hadoop/pig/branches/load-store-redesign/test/org/apache/pig/test/TestExperimentalRule.java Tue Jan 26 00:02:23 2010
@@ -0,0 +1,316 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.pig.test;
+
+import java.util.List;
+
+import org.apache.pig.experimental.plan.BaseOperatorPlan;
+import org.apache.pig.experimental.plan.Operator;
+import org.apache.pig.experimental.plan.OperatorPlan;
+import org.apache.pig.experimental.plan.PlanVisitor;
+import org.apache.pig.experimental.plan.optimizer.Rule;
+import org.apache.pig.experimental.plan.optimizer.Transformer;
+
+import junit.framework.TestCase;
+
+public class TestExperimentalRule extends TestCase {
+
+    private static class SillyRule extends Rule {
+    
+        public SillyRule(String n, OperatorPlan p) {
+            super(n, p);            
+        }
+        
+        @Override
+        public Transformer getNewTransformer() {			
+            return null;
+        }
+
+        @Override
+        protected OperatorPlan buildPattern() {
+            // TODO Auto-generated method stub
+            return null;
+        }
+        
+    }
+    
+    private static class SillyPlan extends BaseOperatorPlan {
+            
+        SillyPlan() {
+            super();
+        }
+
+    }
+    
+    private static class OP extends Operator {
+        OP(String n, OperatorPlan p) {
+            super(n, p);           
+        }
+
+        public void accept(PlanVisitor v) {
+            
+        }
+    }
+    
+    private static class OP_Load extends OP {
+        OP_Load(String n, OperatorPlan p) {
+            super(n, p);            
+        }
+    }
+    
+    private static class OP_Filter extends OP {
+        OP_Filter(String n, OperatorPlan p) {
+            super(n, p);            
+        }
+    }
+    
+    private static class OP_Split extends OP {
+        OP_Split(String n, OperatorPlan p) {
+            super(n, p);            
+        }
+    }
+    
+    private static class OP_Store extends OP {
+        OP_Store(String n, OperatorPlan p) {
+            super(n, p);            
+        }
+    }
+    
+    private static class OP_Join extends OP {
+        OP_Join(String n, OperatorPlan p) {
+            super(n, p);            
+        }
+    }
+
+    
+    OperatorPlan plan = null;
+    
+    public void setUp() {
+        plan = new SillyPlan();
+        Operator l1 = new OP_Load("p1", plan);
+        plan.add(l1);
+        Operator l2 = new OP_Load("p2", plan);
+        plan.add(l2);
+        Operator j1 = new OP_Join("j1", plan);
+        plan.add(j1);
+        Operator f1 = new OP_Filter("f1", plan);
+        plan.add(f1);
+        Operator f2 = new OP_Filter("f2", plan);
+        plan.add(f2);
+        Operator t1 = new OP_Split("t1",plan);
+        plan.add(t1);
+        Operator f3 = new OP_Filter("f3", plan);
+        plan.add(f3);
+        Operator f4 = new OP_Filter("f4", plan);
+        plan.add(f4);
+        Operator s1 = new OP_Store("s1", plan);
+        plan.add(s1);
+        Operator s2 = new OP_Store("s2", plan);
+        plan.add(s2);
+        
+        // load --|-join - filter - filter - split |- filter - store
+        // load --|                                |- filter - store
+        plan.connect(l1, j1);
+        plan.connect(l2, j1);
+        plan.connect(j1, f1);
+        plan.connect(f1, f2);
+        plan.connect(f2, t1);
+        plan.connect(t1, f3);
+        plan.connect(t1, f4);
+        plan.connect(f3, s1);
+        plan.connect(f4, s2);
+    }
+    
+    public void testSingleNodeMatch() {
+        // search for Load 
+        OperatorPlan pattern = new SillyPlan();
+        pattern.add(new OP_Load("mmm", pattern));
+        
+        Rule r = new SillyRule("basic", pattern);
+        List<OperatorPlan> l = r.match(plan);
+        assertEquals(l.size(), 2);
+        
+        Operator m1 = l.get(0).getRoots().get(0);
+        assertTrue(m1.getName().equals("p1") || m1.getName().equals("p2"));
+        assertEquals(l.get(0).size(), 1);
+        
+        Operator m2 = l.get(1).getRoots().get(0);
+        assertTrue(m2.getName().equals("p1") || m2.getName().equals("p2"));
+        assertEquals(l.get(1).size(), 1);
+        assertNotSame(m1.getName(), m2.getName());
+       
+        // search for filter
+        pattern = new SillyPlan();
+        pattern.add(new OP_Filter("mmm",pattern));
+        r = new SillyRule("basic", pattern);
+        l = r.match(plan);
+        assertEquals(l.size(), 4);
+        
+        m1 = l.get(0).getRoots().get(0);
+        assertTrue(m1.getName().equals("f1") || m1.getName().equals("f2") 
+                || m1.getName().equals("f3") || m1.getName().equals("f4"));
+        assertEquals(l.get(0).size(), 1);
+        
+        m2 = l.get(1).getRoots().get(0);
+        assertTrue(m1.getName().equals("f1") || m1.getName().equals("f2") 
+                || m1.getName().equals("f3") || m1.getName().equals("f4"));
+        assertEquals(l.get(1).size(), 1);
+        assertNotSame(m1.getName(), m2.getName());
+        
+        // search for store
+        pattern = new SillyPlan();
+        pattern.add(new OP_Store("mmm",pattern));
+        r = new SillyRule("basic", pattern);
+        l = r.match(plan);
+        assertEquals(l.size(), 2);
+        
+        m1 = l.get(0).getRoots().get(0);
+        assertTrue(m1.getName().equals("s1") || m1.getName().equals("s2"));
+        assertEquals(l.get(0).size(), 1);
+        
+        m2 = l.get(1).getRoots().get(0);
+        assertTrue(m2.getName().equals("s1") || m2.getName().equals("s2"));
+        assertEquals(l.get(1).size(), 1);
+        assertNotSame(m1.getName(), m2.getName());
+        
+        // search for split
+        pattern = new SillyPlan();
+        pattern.add(new OP_Split("mmm",pattern));
+        r = new SillyRule("basic", pattern);
+        l = r.match(plan);
+        assertEquals(l.size(), 1);
+        
+        m1 = l.get(0).getRoots().get(0);
+        assertTrue(m1.getName().equals("t1"));
+        assertEquals(l.get(0).size(), 1);
+        
+        // search for join
+        pattern = new SillyPlan();
+        pattern.add(new OP_Join("mmm",pattern));
+        r = new SillyRule("basic", pattern);
+        l = r.match(plan);
+        assertEquals(l.size(), 1);
+        
+        m1 = l.get(0).getRoots().get(0);
+        assertTrue(m1.getName().equals("j1"));
+        assertEquals(l.get(0).size(), 1);
+      
+    }
+    
+    public void testTwoNodeMatch() {
+        // search for 2 Loads at the same time 
+        OperatorPlan pattern = new SillyPlan();
+        pattern.add(new OP_Load("mmm1", pattern));
+        pattern.add(new OP_Load("mmm2", pattern));
+        
+        Rule r = new SillyRule("basic", pattern);
+        List<OperatorPlan> l = r.match(plan);
+        assertEquals(l.size(), 1);
+        
+        assertEquals(l.get(0).getRoots().size(), 2);
+        assertEquals(l.get(0).getLeaves().size(), 2);
+        assertEquals(l.get(0).size(), 2);
+        
+        Operator m1 = l.get(0).getRoots().get(0);
+        assertTrue(m1.getName().equals("p1") || m1.getName().equals("p2"));
+        Operator m2 = l.get(0).getRoots().get(1);
+        assertTrue(m2.getName().equals("p1") || m2.getName().equals("p2"));       
+        assertNotSame(m1.getName(), m2.getName());
+       
+        
+        // search for join then filter
+        pattern = new SillyPlan();
+        Operator s1 = new OP_Join("mmm1", pattern);
+        Operator s2 = new OP_Filter("mmm2", pattern);
+        pattern.add(s1);
+        pattern.add(s2);        
+        pattern.connect(s1, s2);
+        
+        r = new SillyRule("basic", pattern);
+        l = r.match(plan);
+        assertEquals(l.size(), 1);
+        
+        assertEquals(l.get(0).getRoots().size(), 1);
+        assertEquals(l.get(0).getLeaves().size(), 1);
+        assertEquals(l.get(0).size(), 2);
+        
+        m1 = l.get(0).getRoots().get(0);
+        assertTrue(m1.getName().equals("j1"));
+        m2 = l.get(0).getLeaves().get(0);
+        assertTrue(m2.getName().equals("f1"));       
+       
+  
+        // search for filter, then store
+        pattern = new SillyPlan();
+        s1 = new OP_Filter("mmm1", pattern);
+        s2 = new OP_Store("mmm2", pattern);        
+        pattern.add(s1);
+        pattern.add(s2);           
+        pattern.connect(s1, s2);        
+        
+        r = new SillyRule("basic", pattern);
+        l = r.match(plan);
+        assertEquals(2, l.size());
+        
+        assertEquals(l.get(0).getRoots().size(), 1);
+        assertEquals(l.get(0).getLeaves().size(), 1);                     
+        
+        // search for 2 loads, then join
+        pattern = new SillyPlan();
+        s1 = new OP_Load("mmm1", pattern);
+        s2 = new OP_Load("mmm2", pattern);
+        Operator s3 = new OP_Join("jjj", pattern);
+        pattern.add(s1);
+        pattern.add(s2);
+        pattern.add(s3);
+        pattern.connect(s1, s3);
+        pattern.connect(s2, s3);
+        
+        r = new SillyRule("basic", pattern);
+        l = r.match(plan);
+        assertEquals(l.size(), 1);
+        
+        // search for split then 2 filters
+        pattern = new SillyPlan();
+        s1 = new OP_Split("mmm1", pattern);
+        s2 = new OP_Filter("mmm2", pattern);
+        s3 = new OP_Filter("mmm3", pattern);
+        pattern.add(s1);
+        pattern.add(s2);        
+        pattern.add(s3);
+        pattern.connect(s1, s2);
+        pattern.connect(s1, s3);
+        
+        r = new SillyRule("basic", pattern);
+        l = r.match(plan);
+        assertEquals(1, l.size());
+        
+        assertEquals(l.get(0).getRoots().size(), 1);
+        assertEquals(l.get(0).getLeaves().size(), 2);
+        assertEquals(l.get(0).size(), 3);
+        
+        m1 = l.get(0).getRoots().get(0);
+        assertTrue(m1.getName().equals("t1"));
+        m2 = l.get(0).getLeaves().get(0);
+        assertTrue(m2.getName().equals("f3") || m2.getName().equals("f4"));    
+        m2 = l.get(0).getLeaves().get(1);
+        assertTrue(m2.getName().equals("f3") || m2.getName().equals("f4"));    
+    }
+   
+}

Modified: hadoop/pig/branches/load-store-redesign/test/org/apache/pig/test/TestSecondarySort.java
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/test/org/apache/pig/test/TestSecondarySort.java?rev=903018&r1=903017&r2=903018&view=diff
==============================================================================
--- hadoop/pig/branches/load-store-redesign/test/org/apache/pig/test/TestSecondarySort.java (original)
+++ hadoop/pig/branches/load-store-redesign/test/org/apache/pig/test/TestSecondarySort.java Tue Jan 26 00:02:23 2010
@@ -366,6 +366,26 @@
         assertTrue(so.getDistinctChanged()==0);
     }
     
+    // See PIG-1193
+    public void testSortOptimization8() throws Exception{
+        // Sort desc, used in UDF twice
+        LogicalPlanTester planTester = new LogicalPlanTester() ;
+        planTester.buildPlan("A = LOAD 'input1' AS (a0);");
+        planTester.buildPlan("B = group A all;");
+        planTester.buildPlan("C = foreach B { D = order A by $0 desc; generate DIFF(D, D);};");
+        
+        LogicalPlan lp = planTester.buildPlan("store C into '/tmp';");
+        PhysicalPlan pp = Util.buildPhysicalPlan(lp, pc);
+        MROperPlan mrPlan = Util.buildMRPlan(pp, pc);
+        
+        SecondaryKeyOptimizer so = new SecondaryKeyOptimizer(mrPlan);
+        so.visit();
+        
+        assertTrue(so.getNumMRUseSecondaryKey()==1);
+        assertTrue(so.getNumSortRemoved()==2);
+        assertTrue(so.getDistinctChanged()==0);
+    }
+    
     public void testNestedDistinctEndToEnd1() throws Exception{
         File tmpFile1 = File.createTempFile("test", "txt");
         PrintStream ps1 = new PrintStream(new FileOutputStream(tmpFile1));

Modified: hadoop/pig/branches/load-store-redesign/test/org/apache/pig/test/TestTypeCheckingValidator.java
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/test/org/apache/pig/test/TestTypeCheckingValidator.java?rev=903018&r1=903017&r2=903018&view=diff
==============================================================================
--- hadoop/pig/branches/load-store-redesign/test/org/apache/pig/test/TestTypeCheckingValidator.java (original)
+++ hadoop/pig/branches/load-store-redesign/test/org/apache/pig/test/TestTypeCheckingValidator.java Tue Jan 26 00:02:23 2010
@@ -5485,6 +5485,79 @@
       LogicalPlan plan = planTester.buildPlan("c = foreach b generate (chararray)viewinfo#'pos' as position;") ;
 
         // validate
+        runTypeCheckingValidator(plan);
+        
+        checkLoaderInCasts(plan, "org.apache.pig.builtin.PigStorage");
+    }
+    
+    /**
+     * test various scenarios with two level map lookup
+     */
+    @Test
+    public void testTwolevelMapLookupLineage() throws Exception {
+        List<String[]> queries = new ArrayList<String[]>();
+        // CASE 1: LOAD -> FILTER -> FOREACH -> LIMIT -> STORE
+        queries.add(new String[] {"sds = LOAD '/my/data/location' " +
+        		"AS (simpleFields:map[], mapFields:map[], listMapFields:map[]);",
+        		"queries = FILTER sds BY mapFields#'page_params'#'query' " +
+        		"is NOT NULL;",
+        		"queries_rand = FOREACH queries GENERATE " +
+        		"(CHARARRAY) (mapFields#'page_params'#'query') AS query_string;",
+        		"queries_limit = LIMIT queries_rand 100;",
+        		"STORE queries_limit INTO 'out';"});     
+        // CASE 2: LOAD -> FOREACH -> FILTER -> LIMIT -> STORE
+        queries.add(new String[]{"sds = LOAD '/my/data/location'  " +
+        		"AS (simpleFields:map[], mapFields:map[], listMapFields:map[]);",
+        		"queries_rand = FOREACH sds GENERATE " +
+        		"(CHARARRAY) (mapFields#'page_params'#'query') AS query_string;",
+        		"queries = FILTER queries_rand BY query_string IS NOT null;",
+        		"queries_limit = LIMIT queries 100;",
+        		"STORE queries_limit INTO 'out';"});
+        // CASE 3: LOAD -> FOREACH -> FOREACH -> FILTER -> LIMIT -> STORE
+        queries.add(new String[]{"sds = LOAD '/my/data/location'  " +
+        		"AS (simpleFields:map[], mapFields:map[], listMapFields:map[]);",
+        		"params = FOREACH sds GENERATE " +
+        		"(map[]) (mapFields#'page_params') AS params;",
+        		"queries = FOREACH params " +
+        		"GENERATE (CHARARRAY) (params#'query') AS query_string;",
+        		"queries_filtered = FILTER queries BY query_string IS NOT null;",
+        		"queries_limit = LIMIT queries_filtered 100;",
+        		"STORE queries_limit INTO 'out';"});
+        // CASE 4: LOAD -> FOREACH -> FOREACH -> LIMIT -> STORE
+        queries.add(new String[]{"sds = LOAD '/my/data/location'  " +
+        		"AS (simpleFields:map[], mapFields:map[], listMapFields:map[]);",
+        		"params = FOREACH sds GENERATE" +
+        		" (map[]) (mapFields#'page_params') AS params;",
+        		"queries = FOREACH params GENERATE " +
+        		"(CHARARRAY) (params#'query') AS query_string;",
+        		"queries_limit = LIMIT queries 100;",
+        		"STORE queries_limit INTO 'out';"});
+        // CASE 5: LOAD -> FOREACH -> FOREACH -> FOREACH -> LIMIT -> STORE
+        queries.add(new String[]{"sds = LOAD '/my/data/location'  " +
+                "AS (simpleFields:map[], mapFields:map[], listMapFields:map[]);",
+                "params = FOREACH sds GENERATE " +
+                "(map[]) (mapFields#'page_params') AS params;",
+                "queries = FOREACH params GENERATE " +
+                "(CHARARRAY) (params#'query') AS query_string;",
+                "rand_queries = FOREACH queries GENERATE query_string as query;",
+                "queries_limit = LIMIT rand_queries 100;",
+                "STORE rand_queries INTO 'out';"});
+        
+        for (String[] query: queries) {
+            LogicalPlan lp = null;
+            for (String queryLine : query) {
+                lp = planTester.buildPlan(queryLine);    
+            }
+            
+            // validate
+            runTypeCheckingValidator(lp);
+            checkLoaderInCasts(lp, "org.apache.pig.builtin.PigStorage");
+            
+        }
+    }
+    
+    private void runTypeCheckingValidator(LogicalPlan plan) throws 
+    PlanValidationException {
         CompilationMessageCollector collector = new CompilationMessageCollector() ;
         TypeCheckingValidator typeValidator = new TypeCheckingValidator() ;
         typeValidator.validate(plan, collector) ;
@@ -5496,12 +5569,16 @@
         if (collector.hasError()) {
             throw new AssertionError("Expect no  error") ;
         }
-
+    }
+    
+    private void checkLoaderInCasts(LogicalPlan plan, String loaderClassName) 
+    throws VisitorException {
         CastFinder cf = new CastFinder(plan);
         cf.visit();
         List<LOCast> casts = cf.casts;
         for (LOCast cast : casts) {
-            assertTrue(cast.getLoadFuncSpec().getClassName().startsWith("org.apache.pig.builtin.PigStorage"));    
+            assertTrue(cast.getLoadFuncSpec().getClassName().startsWith(
+                    loaderClassName));    
         }
     }
     
@@ -5656,6 +5733,7 @@
      */
     public static class TestBinCondFieldSchema extends EvalFunc<DataBag> {
         //no-op exec method
+        @Override
         public DataBag exec(Tuple input) {
             return null;
         }