You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pig.apache.org by pr...@apache.org on 2010/01/26 01:02:25 UTC
svn commit: r903018 [5/5] - in /hadoop/pig/branches/load-store-redesign: ./
src/docs/src/documentation/content/xdocs/
src/org/apache/pig/backend/hadoop/executionengine/
src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/
src/org/apache/pi...
Added: hadoop/pig/branches/load-store-redesign/test/org/apache/pig/test/TestExperimentalRule.java
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/test/org/apache/pig/test/TestExperimentalRule.java?rev=903018&view=auto
==============================================================================
--- hadoop/pig/branches/load-store-redesign/test/org/apache/pig/test/TestExperimentalRule.java (added)
+++ hadoop/pig/branches/load-store-redesign/test/org/apache/pig/test/TestExperimentalRule.java Tue Jan 26 00:02:23 2010
@@ -0,0 +1,316 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.pig.test;
+
+import java.util.List;
+
+import org.apache.pig.experimental.plan.BaseOperatorPlan;
+import org.apache.pig.experimental.plan.Operator;
+import org.apache.pig.experimental.plan.OperatorPlan;
+import org.apache.pig.experimental.plan.PlanVisitor;
+import org.apache.pig.experimental.plan.optimizer.Rule;
+import org.apache.pig.experimental.plan.optimizer.Transformer;
+
+import junit.framework.TestCase;
+
+public class TestExperimentalRule extends TestCase {
+
+ private static class SillyRule extends Rule {
+
+ public SillyRule(String n, OperatorPlan p) {
+ super(n, p);
+ }
+
+ @Override
+ public Transformer getNewTransformer() {
+ return null;
+ }
+
+ @Override
+ protected OperatorPlan buildPattern() {
+ // TODO Auto-generated method stub
+ return null;
+ }
+
+ }
+
+ private static class SillyPlan extends BaseOperatorPlan {
+
+ SillyPlan() {
+ super();
+ }
+
+ }
+
+ private static class OP extends Operator {
+ OP(String n, OperatorPlan p) {
+ super(n, p);
+ }
+
+ public void accept(PlanVisitor v) {
+
+ }
+ }
+
+ private static class OP_Load extends OP {
+ OP_Load(String n, OperatorPlan p) {
+ super(n, p);
+ }
+ }
+
+ private static class OP_Filter extends OP {
+ OP_Filter(String n, OperatorPlan p) {
+ super(n, p);
+ }
+ }
+
+ private static class OP_Split extends OP {
+ OP_Split(String n, OperatorPlan p) {
+ super(n, p);
+ }
+ }
+
+ private static class OP_Store extends OP {
+ OP_Store(String n, OperatorPlan p) {
+ super(n, p);
+ }
+ }
+
+ private static class OP_Join extends OP {
+ OP_Join(String n, OperatorPlan p) {
+ super(n, p);
+ }
+ }
+
+
+ OperatorPlan plan = null;
+
+ public void setUp() {
+ plan = new SillyPlan();
+ Operator l1 = new OP_Load("p1", plan);
+ plan.add(l1);
+ Operator l2 = new OP_Load("p2", plan);
+ plan.add(l2);
+ Operator j1 = new OP_Join("j1", plan);
+ plan.add(j1);
+ Operator f1 = new OP_Filter("f1", plan);
+ plan.add(f1);
+ Operator f2 = new OP_Filter("f2", plan);
+ plan.add(f2);
+ Operator t1 = new OP_Split("t1",plan);
+ plan.add(t1);
+ Operator f3 = new OP_Filter("f3", plan);
+ plan.add(f3);
+ Operator f4 = new OP_Filter("f4", plan);
+ plan.add(f4);
+ Operator s1 = new OP_Store("s1", plan);
+ plan.add(s1);
+ Operator s2 = new OP_Store("s2", plan);
+ plan.add(s2);
+
+ // load --|-join - filter - filter - split |- filter - store
+ // load --| |- filter - store
+ plan.connect(l1, j1);
+ plan.connect(l2, j1);
+ plan.connect(j1, f1);
+ plan.connect(f1, f2);
+ plan.connect(f2, t1);
+ plan.connect(t1, f3);
+ plan.connect(t1, f4);
+ plan.connect(f3, s1);
+ plan.connect(f4, s2);
+ }
+
+ public void testSingleNodeMatch() {
+ // search for Load
+ OperatorPlan pattern = new SillyPlan();
+ pattern.add(new OP_Load("mmm", pattern));
+
+ Rule r = new SillyRule("basic", pattern);
+ List<OperatorPlan> l = r.match(plan);
+ assertEquals(l.size(), 2);
+
+ Operator m1 = l.get(0).getRoots().get(0);
+ assertTrue(m1.getName().equals("p1") || m1.getName().equals("p2"));
+ assertEquals(l.get(0).size(), 1);
+
+ Operator m2 = l.get(1).getRoots().get(0);
+ assertTrue(m2.getName().equals("p1") || m2.getName().equals("p2"));
+ assertEquals(l.get(1).size(), 1);
+ assertNotSame(m1.getName(), m2.getName());
+
+ // search for filter
+ pattern = new SillyPlan();
+ pattern.add(new OP_Filter("mmm",pattern));
+ r = new SillyRule("basic", pattern);
+ l = r.match(plan);
+ assertEquals(l.size(), 4);
+
+ m1 = l.get(0).getRoots().get(0);
+ assertTrue(m1.getName().equals("f1") || m1.getName().equals("f2")
+ || m1.getName().equals("f3") || m1.getName().equals("f4"));
+ assertEquals(l.get(0).size(), 1);
+
+ m2 = l.get(1).getRoots().get(0);
+ assertTrue(m1.getName().equals("f1") || m1.getName().equals("f2")
+ || m1.getName().equals("f3") || m1.getName().equals("f4"));
+ assertEquals(l.get(1).size(), 1);
+ assertNotSame(m1.getName(), m2.getName());
+
+ // search for store
+ pattern = new SillyPlan();
+ pattern.add(new OP_Store("mmm",pattern));
+ r = new SillyRule("basic", pattern);
+ l = r.match(plan);
+ assertEquals(l.size(), 2);
+
+ m1 = l.get(0).getRoots().get(0);
+ assertTrue(m1.getName().equals("s1") || m1.getName().equals("s2"));
+ assertEquals(l.get(0).size(), 1);
+
+ m2 = l.get(1).getRoots().get(0);
+ assertTrue(m2.getName().equals("s1") || m2.getName().equals("s2"));
+ assertEquals(l.get(1).size(), 1);
+ assertNotSame(m1.getName(), m2.getName());
+
+ // search for split
+ pattern = new SillyPlan();
+ pattern.add(new OP_Split("mmm",pattern));
+ r = new SillyRule("basic", pattern);
+ l = r.match(plan);
+ assertEquals(l.size(), 1);
+
+ m1 = l.get(0).getRoots().get(0);
+ assertTrue(m1.getName().equals("t1"));
+ assertEquals(l.get(0).size(), 1);
+
+ // search for join
+ pattern = new SillyPlan();
+ pattern.add(new OP_Join("mmm",pattern));
+ r = new SillyRule("basic", pattern);
+ l = r.match(plan);
+ assertEquals(l.size(), 1);
+
+ m1 = l.get(0).getRoots().get(0);
+ assertTrue(m1.getName().equals("j1"));
+ assertEquals(l.get(0).size(), 1);
+
+ }
+
+ public void testTwoNodeMatch() {
+ // search for 2 Loads at the same time
+ OperatorPlan pattern = new SillyPlan();
+ pattern.add(new OP_Load("mmm1", pattern));
+ pattern.add(new OP_Load("mmm2", pattern));
+
+ Rule r = new SillyRule("basic", pattern);
+ List<OperatorPlan> l = r.match(plan);
+ assertEquals(l.size(), 1);
+
+ assertEquals(l.get(0).getRoots().size(), 2);
+ assertEquals(l.get(0).getLeaves().size(), 2);
+ assertEquals(l.get(0).size(), 2);
+
+ Operator m1 = l.get(0).getRoots().get(0);
+ assertTrue(m1.getName().equals("p1") || m1.getName().equals("p2"));
+ Operator m2 = l.get(0).getRoots().get(1);
+ assertTrue(m2.getName().equals("p1") || m2.getName().equals("p2"));
+ assertNotSame(m1.getName(), m2.getName());
+
+
+ // search for join then filter
+ pattern = new SillyPlan();
+ Operator s1 = new OP_Join("mmm1", pattern);
+ Operator s2 = new OP_Filter("mmm2", pattern);
+ pattern.add(s1);
+ pattern.add(s2);
+ pattern.connect(s1, s2);
+
+ r = new SillyRule("basic", pattern);
+ l = r.match(plan);
+ assertEquals(l.size(), 1);
+
+ assertEquals(l.get(0).getRoots().size(), 1);
+ assertEquals(l.get(0).getLeaves().size(), 1);
+ assertEquals(l.get(0).size(), 2);
+
+ m1 = l.get(0).getRoots().get(0);
+ assertTrue(m1.getName().equals("j1"));
+ m2 = l.get(0).getLeaves().get(0);
+ assertTrue(m2.getName().equals("f1"));
+
+
+ // search for filter, then store
+ pattern = new SillyPlan();
+ s1 = new OP_Filter("mmm1", pattern);
+ s2 = new OP_Store("mmm2", pattern);
+ pattern.add(s1);
+ pattern.add(s2);
+ pattern.connect(s1, s2);
+
+ r = new SillyRule("basic", pattern);
+ l = r.match(plan);
+ assertEquals(2, l.size());
+
+ assertEquals(l.get(0).getRoots().size(), 1);
+ assertEquals(l.get(0).getLeaves().size(), 1);
+
+ // search for 2 loads, then join
+ pattern = new SillyPlan();
+ s1 = new OP_Load("mmm1", pattern);
+ s2 = new OP_Load("mmm2", pattern);
+ Operator s3 = new OP_Join("jjj", pattern);
+ pattern.add(s1);
+ pattern.add(s2);
+ pattern.add(s3);
+ pattern.connect(s1, s3);
+ pattern.connect(s2, s3);
+
+ r = new SillyRule("basic", pattern);
+ l = r.match(plan);
+ assertEquals(l.size(), 1);
+
+ // search for split then 2 filters
+ pattern = new SillyPlan();
+ s1 = new OP_Split("mmm1", pattern);
+ s2 = new OP_Filter("mmm2", pattern);
+ s3 = new OP_Filter("mmm3", pattern);
+ pattern.add(s1);
+ pattern.add(s2);
+ pattern.add(s3);
+ pattern.connect(s1, s2);
+ pattern.connect(s1, s3);
+
+ r = new SillyRule("basic", pattern);
+ l = r.match(plan);
+ assertEquals(1, l.size());
+
+ assertEquals(l.get(0).getRoots().size(), 1);
+ assertEquals(l.get(0).getLeaves().size(), 2);
+ assertEquals(l.get(0).size(), 3);
+
+ m1 = l.get(0).getRoots().get(0);
+ assertTrue(m1.getName().equals("t1"));
+ m2 = l.get(0).getLeaves().get(0);
+ assertTrue(m2.getName().equals("f3") || m2.getName().equals("f4"));
+ m2 = l.get(0).getLeaves().get(1);
+ assertTrue(m2.getName().equals("f3") || m2.getName().equals("f4"));
+ }
+
+}
Modified: hadoop/pig/branches/load-store-redesign/test/org/apache/pig/test/TestSecondarySort.java
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/test/org/apache/pig/test/TestSecondarySort.java?rev=903018&r1=903017&r2=903018&view=diff
==============================================================================
--- hadoop/pig/branches/load-store-redesign/test/org/apache/pig/test/TestSecondarySort.java (original)
+++ hadoop/pig/branches/load-store-redesign/test/org/apache/pig/test/TestSecondarySort.java Tue Jan 26 00:02:23 2010
@@ -366,6 +366,26 @@
assertTrue(so.getDistinctChanged()==0);
}
+ // See PIG-1193
+ public void testSortOptimization8() throws Exception{
+ // Sort desc, used in UDF twice
+ LogicalPlanTester planTester = new LogicalPlanTester() ;
+ planTester.buildPlan("A = LOAD 'input1' AS (a0);");
+ planTester.buildPlan("B = group A all;");
+ planTester.buildPlan("C = foreach B { D = order A by $0 desc; generate DIFF(D, D);};");
+
+ LogicalPlan lp = planTester.buildPlan("store C into '/tmp';");
+ PhysicalPlan pp = Util.buildPhysicalPlan(lp, pc);
+ MROperPlan mrPlan = Util.buildMRPlan(pp, pc);
+
+ SecondaryKeyOptimizer so = new SecondaryKeyOptimizer(mrPlan);
+ so.visit();
+
+ assertTrue(so.getNumMRUseSecondaryKey()==1);
+ assertTrue(so.getNumSortRemoved()==2);
+ assertTrue(so.getDistinctChanged()==0);
+ }
+
public void testNestedDistinctEndToEnd1() throws Exception{
File tmpFile1 = File.createTempFile("test", "txt");
PrintStream ps1 = new PrintStream(new FileOutputStream(tmpFile1));
Modified: hadoop/pig/branches/load-store-redesign/test/org/apache/pig/test/TestTypeCheckingValidator.java
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/test/org/apache/pig/test/TestTypeCheckingValidator.java?rev=903018&r1=903017&r2=903018&view=diff
==============================================================================
--- hadoop/pig/branches/load-store-redesign/test/org/apache/pig/test/TestTypeCheckingValidator.java (original)
+++ hadoop/pig/branches/load-store-redesign/test/org/apache/pig/test/TestTypeCheckingValidator.java Tue Jan 26 00:02:23 2010
@@ -5485,6 +5485,79 @@
LogicalPlan plan = planTester.buildPlan("c = foreach b generate (chararray)viewinfo#'pos' as position;") ;
// validate
+ runTypeCheckingValidator(plan);
+
+ checkLoaderInCasts(plan, "org.apache.pig.builtin.PigStorage");
+ }
+
+ /**
+ * test various scenarios with two level map lookup
+ */
+ @Test
+ public void testTwolevelMapLookupLineage() throws Exception {
+ List<String[]> queries = new ArrayList<String[]>();
+ // CASE 1: LOAD -> FILTER -> FOREACH -> LIMIT -> STORE
+ queries.add(new String[] {"sds = LOAD '/my/data/location' " +
+ "AS (simpleFields:map[], mapFields:map[], listMapFields:map[]);",
+ "queries = FILTER sds BY mapFields#'page_params'#'query' " +
+ "is NOT NULL;",
+ "queries_rand = FOREACH queries GENERATE " +
+ "(CHARARRAY) (mapFields#'page_params'#'query') AS query_string;",
+ "queries_limit = LIMIT queries_rand 100;",
+ "STORE queries_limit INTO 'out';"});
+ // CASE 2: LOAD -> FOREACH -> FILTER -> LIMIT -> STORE
+ queries.add(new String[]{"sds = LOAD '/my/data/location' " +
+ "AS (simpleFields:map[], mapFields:map[], listMapFields:map[]);",
+ "queries_rand = FOREACH sds GENERATE " +
+ "(CHARARRAY) (mapFields#'page_params'#'query') AS query_string;",
+ "queries = FILTER queries_rand BY query_string IS NOT null;",
+ "queries_limit = LIMIT queries 100;",
+ "STORE queries_limit INTO 'out';"});
+ // CASE 3: LOAD -> FOREACH -> FOREACH -> FILTER -> LIMIT -> STORE
+ queries.add(new String[]{"sds = LOAD '/my/data/location' " +
+ "AS (simpleFields:map[], mapFields:map[], listMapFields:map[]);",
+ "params = FOREACH sds GENERATE " +
+ "(map[]) (mapFields#'page_params') AS params;",
+ "queries = FOREACH params " +
+ "GENERATE (CHARARRAY) (params#'query') AS query_string;",
+ "queries_filtered = FILTER queries BY query_string IS NOT null;",
+ "queries_limit = LIMIT queries_filtered 100;",
+ "STORE queries_limit INTO 'out';"});
+ // CASE 4: LOAD -> FOREACH -> FOREACH -> LIMIT -> STORE
+ queries.add(new String[]{"sds = LOAD '/my/data/location' " +
+ "AS (simpleFields:map[], mapFields:map[], listMapFields:map[]);",
+ "params = FOREACH sds GENERATE" +
+ " (map[]) (mapFields#'page_params') AS params;",
+ "queries = FOREACH params GENERATE " +
+ "(CHARARRAY) (params#'query') AS query_string;",
+ "queries_limit = LIMIT queries 100;",
+ "STORE queries_limit INTO 'out';"});
+ // CASE 5: LOAD -> FOREACH -> FOREACH -> FOREACH -> LIMIT -> STORE
+ queries.add(new String[]{"sds = LOAD '/my/data/location' " +
+ "AS (simpleFields:map[], mapFields:map[], listMapFields:map[]);",
+ "params = FOREACH sds GENERATE " +
+ "(map[]) (mapFields#'page_params') AS params;",
+ "queries = FOREACH params GENERATE " +
+ "(CHARARRAY) (params#'query') AS query_string;",
+ "rand_queries = FOREACH queries GENERATE query_string as query;",
+ "queries_limit = LIMIT rand_queries 100;",
+ "STORE rand_queries INTO 'out';"});
+
+ for (String[] query: queries) {
+ LogicalPlan lp = null;
+ for (String queryLine : query) {
+ lp = planTester.buildPlan(queryLine);
+ }
+
+ // validate
+ runTypeCheckingValidator(lp);
+ checkLoaderInCasts(lp, "org.apache.pig.builtin.PigStorage");
+
+ }
+ }
+
+ private void runTypeCheckingValidator(LogicalPlan plan) throws
+ PlanValidationException {
CompilationMessageCollector collector = new CompilationMessageCollector() ;
TypeCheckingValidator typeValidator = new TypeCheckingValidator() ;
typeValidator.validate(plan, collector) ;
@@ -5496,12 +5569,16 @@
if (collector.hasError()) {
throw new AssertionError("Expect no error") ;
}
-
+ }
+
+ private void checkLoaderInCasts(LogicalPlan plan, String loaderClassName)
+ throws VisitorException {
CastFinder cf = new CastFinder(plan);
cf.visit();
List<LOCast> casts = cf.casts;
for (LOCast cast : casts) {
- assertTrue(cast.getLoadFuncSpec().getClassName().startsWith("org.apache.pig.builtin.PigStorage"));
+ assertTrue(cast.getLoadFuncSpec().getClassName().startsWith(
+ loaderClassName));
}
}
@@ -5656,6 +5733,7 @@
*/
public static class TestBinCondFieldSchema extends EvalFunc<DataBag> {
//no-op exec method
+ @Override
public DataBag exec(Tuple input) {
return null;
}