You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pig.apache.org by ch...@apache.org on 2014/02/27 00:57:58 UTC
svn commit: r1572357 - in /pig/branches/tez:
shims/test/hadoop23/org/apache/pig/test/ test/org/apache/pig/test/
test/org/apache/pig/test/data/GoldenFiles/ test/org/apache/pig/tez/
Author: cheolsoo
Date: Wed Feb 26 23:57:57 2014
New Revision: 1572357
URL: http://svn.apache.org/r1572357
Log:
PIG-3780: Tez mini cluster tests run for a very long time with TezSession reuse on (cheolsoo)
Modified:
pig/branches/tez/shims/test/hadoop23/org/apache/pig/test/TezMiniCluster.java
pig/branches/tez/test/org/apache/pig/test/TestAccumulator.java
pig/branches/tez/test/org/apache/pig/test/TestCombiner.java
pig/branches/tez/test/org/apache/pig/test/TestCustomPartitioner.java
pig/branches/tez/test/org/apache/pig/test/TestSkewedJoin.java
pig/branches/tez/test/org/apache/pig/test/TestSplitStore.java
pig/branches/tez/test/org/apache/pig/test/Util.java
pig/branches/tez/test/org/apache/pig/test/data/GoldenFiles/TEZC1.gld
pig/branches/tez/test/org/apache/pig/test/data/GoldenFiles/TEZC12.gld
pig/branches/tez/test/org/apache/pig/test/data/GoldenFiles/TEZC14.gld
pig/branches/tez/test/org/apache/pig/test/data/GoldenFiles/TEZC15.gld
pig/branches/tez/test/org/apache/pig/test/data/GoldenFiles/TEZC16.gld
pig/branches/tez/test/org/apache/pig/test/data/GoldenFiles/TEZC17.gld
pig/branches/tez/test/org/apache/pig/test/data/GoldenFiles/TEZC2.gld
pig/branches/tez/test/org/apache/pig/test/data/GoldenFiles/TEZC3.gld
pig/branches/tez/test/org/apache/pig/test/data/GoldenFiles/TEZC4.gld
pig/branches/tez/test/org/apache/pig/test/data/GoldenFiles/TEZC5.gld
pig/branches/tez/test/org/apache/pig/test/data/GoldenFiles/TEZC6.gld
pig/branches/tez/test/org/apache/pig/tez/TestTezCompiler.java
Modified: pig/branches/tez/shims/test/hadoop23/org/apache/pig/test/TezMiniCluster.java
URL: http://svn.apache.org/viewvc/pig/branches/tez/shims/test/hadoop23/org/apache/pig/test/TezMiniCluster.java?rev=1572357&r1=1572356&r2=1572357&view=diff
==============================================================================
--- pig/branches/tez/shims/test/hadoop23/org/apache/pig/test/TezMiniCluster.java (original)
+++ pig/branches/tez/shims/test/hadoop23/org/apache/pig/test/TezMiniCluster.java Wed Feb 26 23:57:57 2014
@@ -31,6 +31,7 @@ import org.apache.hadoop.mapreduce.MRJob
import org.apache.hadoop.mapreduce.v2.MiniMRYarnCluster;
import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.pig.ExecType;
+import org.apache.pig.PigConfiguration;
import org.apache.pig.backend.hadoop.executionengine.tez.TezExecType;
import org.apache.pig.backend.hadoop.executionengine.tez.TezSessionManager;
import org.apache.tez.common.TezJobConfig;
@@ -106,6 +107,8 @@ public class TezMiniCluster extends Mini
// Write tez-site.xml
Configuration tez_conf = new Configuration(false);
+ // TODO Remove this once TezSession reuse in mini cluster is fixed
+ tez_conf.set(PigConfiguration.TEZ_SESSION_REUSE, "false");
// TODO PIG-3659 - Remove this once memory management is fixed
tez_conf.set(TezJobConfig.TEZ_RUNTIME_IO_SORT_MB, "20");
tez_conf.set("tez.lib.uris", "hdfs:///tez,hdfs:///tez/lib");
Modified: pig/branches/tez/test/org/apache/pig/test/TestAccumulator.java
URL: http://svn.apache.org/viewvc/pig/branches/tez/test/org/apache/pig/test/TestAccumulator.java?rev=1572357&r1=1572356&r2=1572357&view=diff
==============================================================================
--- pig/branches/tez/test/org/apache/pig/test/TestAccumulator.java (original)
+++ pig/branches/tez/test/org/apache/pig/test/TestAccumulator.java Wed Feb 26 23:57:57 2014
@@ -28,6 +28,7 @@ import java.io.PrintWriter;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
+import java.util.Properties;
import org.apache.pig.PigConfiguration;
import org.apache.pig.PigServer;
@@ -46,20 +47,21 @@ public class TestAccumulator {
private static final String INPUT_FILE4 = "AccumulatorInput4.txt";
private static final String INPUT_DIR = "build/test/data";
- private PigServer pigServer;
- private static MiniGenericCluster cluster = MiniGenericCluster.buildCluster();
+ private static PigServer pigServer;
+ private static Properties properties;
+ private static MiniGenericCluster cluster;
@BeforeClass
public static void oneTimeSetUp() throws Exception {
- createFiles();
- cluster.getProperties().setProperty("pig.accumulative.batchsize", "2");
- cluster.getProperties().setProperty("pig.exec.batchsize", "2");
- cluster.getProperties().setProperty("pig.exec.nocombiner", "true");
+ cluster = MiniGenericCluster.buildCluster();
+ properties = cluster.getProperties();
+ properties.setProperty("pig.accumulative.batchsize", "2");
+ properties.setProperty("pig.exec.batchsize", "2");
+ properties.setProperty("pig.exec.nocombiner", "true");
// Reducing the number of retry attempts to speed up test completion
- cluster.getProperties().setProperty("mapred.map.max.attempts","1");
- cluster.getProperties().setProperty("mapred.reduce.max.attempts","1");
- // Disable tez session reuse to ensure each test case runs fresh
- cluster.getProperties().setProperty(PigConfiguration.TEZ_SESSION_REUSE, "false");
+ properties.setProperty("mapred.map.max.attempts","1");
+ properties.setProperty("mapred.reduce.max.attempts","1");
+ createFiles();
}
@AfterClass
@@ -71,8 +73,8 @@ public class TestAccumulator {
@Before
public void setUp() throws Exception {
// Drop stale configuration from previous test run
- cluster.getProperties().remove(PigConfiguration.OPT_ACCUMULATOR);
- pigServer = new PigServer(cluster.getExecType(), cluster.getProperties());
+ properties.remove(PigConfiguration.OPT_ACCUMULATOR);
+ pigServer = new PigServer(cluster.getExecType(), properties);
}
@After
Modified: pig/branches/tez/test/org/apache/pig/test/TestCombiner.java
URL: http://svn.apache.org/viewvc/pig/branches/tez/test/org/apache/pig/test/TestCombiner.java?rev=1572357&r1=1572356&r2=1572357&view=diff
==============================================================================
--- pig/branches/tez/test/org/apache/pig/test/TestCombiner.java (original)
+++ pig/branches/tez/test/org/apache/pig/test/TestCombiner.java Wed Feb 26 23:57:57 2014
@@ -49,12 +49,13 @@ import org.junit.Test;
public class TestCombiner {
- static MiniGenericCluster cluster = MiniGenericCluster.buildCluster();
+ private static MiniGenericCluster cluster;
+ private static Properties properties;
@BeforeClass
public static void oneTimeSetUp() throws Exception {
- // Disable tez session reuse to ensure each test case runs fresh
- cluster.getProperties().setProperty(PigConfiguration.TEZ_SESSION_REUSE, "false");
+ cluster = MiniGenericCluster.buildCluster();
+ properties = cluster.getProperties();
}
@AfterClass
@@ -87,7 +88,7 @@ public class TestCombiner {
"c = group a by c2; " +
"f = foreach c generate COUNT(org.apache.pig.builtin.Distinct($1.$2)); " +
"store f into 'out';";
- PigServer pigServer = new PigServer(cluster.getExecType(), cluster.getProperties());
+ PigServer pigServer = new PigServer(cluster.getExecType(), properties);
PigContext pc = pigServer.getPigContext();
assertTrue((Util.buildMRPlan(Util.buildPp(pigServer, query), pc).getRoots().get(0).combinePlan
.isEmpty()));
@@ -102,7 +103,7 @@ public class TestCombiner {
"f = foreach c generate COUNT(" + dummyUDF + "" +
"(org.apache.pig.builtin.Distinct($1.$2)," + dummyUDF + "())); " +
"store f into 'out';";
- PigServer pigServer = new PigServer(cluster.getExecType(), cluster.getProperties());
+ PigServer pigServer = new PigServer(cluster.getExecType(), properties);
PigContext pc = pigServer.getPigContext();
assertTrue((Util.buildMRPlan(Util.buildPp(pigServer, query), pc).getRoots().get(0).combinePlan
.isEmpty()));
@@ -112,7 +113,7 @@ public class TestCombiner {
@Test
public void testOnCluster() throws Exception {
// run the test on cluster
- PigServer pigServer = new PigServer(cluster.getExecType(), cluster.getProperties());
+ PigServer pigServer = new PigServer(cluster.getExecType(), properties);
String inputFileName = runTest(pigServer);
Util.deleteFile(cluster, inputFileName);
pigServer.shutdown();
@@ -187,9 +188,10 @@ public class TestCombiner {
}
}
Util.createInputFile(cluster, "MultiCombinerUseInput.txt", input);
- Properties props = cluster.getProperties();
- props.setProperty("io.sort.mb", "1");
- PigServer pigServer = new PigServer(cluster.getExecType(), props);
+ String oldValue = properties.getProperty("io.sort.mb");
+ properties.setProperty("io.sort.mb", "1");
+
+ PigServer pigServer = new PigServer(cluster.getExecType(), properties);
pigServer.registerQuery("a = load 'MultiCombinerUseInput.txt' as (x:int);");
pigServer.registerQuery("b = group a all;");
pigServer.registerQuery("c = foreach b generate COUNT(a), SUM(a.$0), " +
@@ -214,6 +216,8 @@ public class TestCombiner {
assertFalse(it.hasNext());
Util.deleteFile(cluster, "MultiCombinerUseInput.txt");
+ // Reset io.sort.mb to the original value before exit
+ properties.setProperty("io.sort.mb", oldValue);
pigServer.shutdown();
}
@@ -230,7 +234,7 @@ public class TestCombiner {
"pig1\t20\t3.1" };
Util.createInputFile(cluster, "distinctAggs1Input.txt", input);
- PigServer pigServer = new PigServer(cluster.getExecType(), cluster.getProperties());
+ PigServer pigServer = new PigServer(cluster.getExecType(), properties);
pigServer.registerQuery("a = load 'distinctAggs1Input.txt' as (name:chararray, age:int, gpa:double);");
pigServer.registerQuery("b = group a by name;");
pigServer.registerQuery("c = foreach b {" +
@@ -277,7 +281,7 @@ public class TestCombiner {
};
Util.createInputFile(cluster, "testGroupElements.txt", input);
- PigServer pigServer = new PigServer(cluster.getExecType(), cluster.getProperties());
+ PigServer pigServer = new PigServer(cluster.getExecType(), properties);
pigServer.registerQuery("a = load 'testGroupElements.txt' as (str:chararray, num1:int, alph : chararray, num2 : int);");
pigServer.registerQuery("b = group a by (str, num1);");
@@ -338,7 +342,7 @@ public class TestCombiner {
};
Util.createInputFile(cluster, "testGroupLimit.txt", input);
- PigServer pigServer = new PigServer(cluster.getExecType(), cluster.getProperties());
+ PigServer pigServer = new PigServer(cluster.getExecType(), properties);
pigServer.registerQuery("a = load 'testGroupLimit.txt' using PigStorage(' ') " +
"as (str:chararray, num1:int) ;");
pigServer.registerQuery("b = group a by str;");
@@ -387,7 +391,7 @@ public class TestCombiner {
"pig1\t20\t3.1" };
Util.createInputFile(cluster, "distinctNoCombinerInput.txt", input);
- PigServer pigServer = new PigServer(cluster.getExecType(), cluster.getProperties());
+ PigServer pigServer = new PigServer(cluster.getExecType(), properties);
pigServer.registerQuery("a = load 'distinctNoCombinerInput.txt' as (name:chararray, age:int, gpa:double);");
pigServer.registerQuery("b = group a by name;");
pigServer.registerQuery("c = foreach b {" +
@@ -438,7 +442,7 @@ public class TestCombiner {
"pig1\t20\t3.1" };
Util.createInputFile(cluster, "forEachNoCombinerInput.txt", input);
- PigServer pigServer = new PigServer(cluster.getExecType(), cluster.getProperties());
+ PigServer pigServer = new PigServer(cluster.getExecType(), properties);
pigServer.registerQuery("a = load 'forEachNoCombinerInput.txt' as (name:chararray, age:int, gpa:double);");
pigServer.registerQuery("b = group a by name;");
pigServer.registerQuery("c = foreach b {" +
@@ -497,7 +501,7 @@ public class TestCombiner {
try {
Util.createInputFile(cluster, "forEachNoCombinerInput.txt", input);
- PigServer pigServer = new PigServer(cluster.getExecType(), cluster.getProperties());
+ PigServer pigServer = new PigServer(cluster.getExecType(), properties);
pigServer.registerQuery("a = load 'forEachNoCombinerInput.txt' as (name:chararray, age:int, gpa:double);");
pigServer.registerQuery("b = group a by name;");
pigServer.registerQuery("c = foreach b generate group, SUM(a.age), a;");
@@ -543,7 +547,7 @@ public class TestCombiner {
try {
Util.createInputFile(cluster, "forEachNoCombinerInput.txt", input);
- PigServer pigServer = new PigServer(cluster.getExecType(), cluster.getProperties());
+ PigServer pigServer = new PigServer(cluster.getExecType(), properties);
pigServer.registerQuery("a = load 'forEachNoCombinerInput.txt' as (name:chararray, age:int, gpa:double);");
pigServer.registerQuery("b = group a all;");
pigServer.registerQuery("c = foreach b {" +
Modified: pig/branches/tez/test/org/apache/pig/test/TestCustomPartitioner.java
URL: http://svn.apache.org/viewvc/pig/branches/tez/test/org/apache/pig/test/TestCustomPartitioner.java?rev=1572357&r1=1572356&r2=1572357&view=diff
==============================================================================
--- pig/branches/tez/test/org/apache/pig/test/TestCustomPartitioner.java (original)
+++ pig/branches/tez/test/org/apache/pig/test/TestCustomPartitioner.java Wed Feb 26 23:57:57 2014
@@ -26,8 +26,13 @@ import java.util.Collections;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
+import java.util.Properties;
import java.util.Random;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.pig.PigConfiguration;
import org.apache.pig.PigServer;
import org.apache.pig.data.BagFactory;
import org.apache.pig.data.Tuple;
@@ -37,24 +42,31 @@ import org.apache.pig.impl.logicalLayer.
import org.junit.AfterClass;
import org.junit.Assert;
import org.junit.Before;
+import org.junit.BeforeClass;
import org.junit.Ignore;
import org.junit.Test;
-import org.junit.runner.RunWith;
-import org.junit.runners.JUnit4;
-@RunWith(JUnit4.class)
public class TestCustomPartitioner {
- private static MiniGenericCluster cluster = MiniGenericCluster.buildCluster();
- private PigServer pigServer;
+ private static MiniGenericCluster cluster;
+ private static Properties properties;
+ private static PigServer pigServer;
+ private static FileSystem fs;
TupleFactory mTf = TupleFactory.getInstance();
BagFactory mBf = BagFactory.getInstance();
@Before
- public void setUp() throws Exception{
+ public void setUp() throws Exception {
FileLocalizer.setR(new Random());
- pigServer = new PigServer(cluster.getExecType(), cluster.getProperties());
+ pigServer = new PigServer(cluster.getExecType(), properties);
+ }
+
+ @BeforeClass
+ public static void oneTimeSetUp() throws Exception {
+ cluster = MiniGenericCluster.buildCluster();
+ properties = cluster.getProperties();
+ fs = cluster.getFileSystem();
}
@AfterClass
@@ -67,7 +79,7 @@ public class TestCustomPartitioner {
@Ignore
// Fails with Tez - DefaultSorter.java Illegal partition for Null: false index: 0 1 (-1), TotalPartitions: 0
public void testCustomPartitionerParseJoins() throws Exception{
- String[] input = {
+ String[] input = {
"1\t3",
"1\t2"
};
@@ -77,12 +89,12 @@ public class TestCustomPartitioner {
try {
pigServer.registerQuery("A = LOAD 'table_testCustomPartitionerParseJoins' as (a0:int, a1:int);");
pigServer.registerQuery("B = ORDER A by $0;");
- pigServer.registerQuery("skewed = JOIN A by $0, B by $0 USING 'skewed' PARTITION BY org.apache.pig.test.utils.SimpleCustomPartitioner;");
- //control should not reach here
- Assert.fail("Skewed join cannot accept a custom partitioner");
+ pigServer.registerQuery("skewed = JOIN A by $0, B by $0 USING 'skewed' PARTITION BY org.apache.pig.test.utils.SimpleCustomPartitioner;");
+ //control should not reach here
+ Assert.fail("Skewed join cannot accept a custom partitioner");
} catch(FrontendException e) {
- Assert.assertTrue( e.getMessage().contains( "Custom Partitioner is not supported for skewed join" ) );
- }
+ Assert.assertTrue( e.getMessage().contains( "Custom Partitioner is not supported for skewed join" ) );
+ }
pigServer.registerQuery("hash = JOIN A by $0, B by $0 USING 'hash' PARTITION BY org.apache.pig.test.utils.SimpleCustomPartitioner;");
Iterator<Tuple> iter = pigServer.openIterator("hash");
@@ -120,8 +132,8 @@ public class TestCustomPartitioner {
};
Util.createInputFile(cluster, "table_testCustomPartitionerGroups", input);
+ String outputDir = "tmp_testCustomPartitionerGroup";
pigServer.registerQuery("A = LOAD 'table_testCustomPartitionerGroups' as (a0:int, a1:int);");
-
// It should be noted that for a map reduce job, the total number of partitions
// is the same as the number of reduce tasks for the job. Hence we need to find a case wherein
// we will get more than one reduce job so that we can use the partitioner.
@@ -130,28 +142,30 @@ public class TestCustomPartitioner {
// partition number is bigger than 1.
//
pigServer.registerQuery("B = group A by $0 PARTITION BY org.apache.pig.test.utils.SimpleCustomPartitioner3 parallel 2;");
+ pigServer.store("B", outputDir);
- pigServer.store("B", "tmp_testCustomPartitionerGroups");
-
- new File("tmp_testCustomPartitionerGroups").mkdir();
+ new File(outputDir).mkdir();
+ FileStatus[] outputFiles = fs.listStatus(new Path(outputDir), Util.getSuccessMarkerPathFilter());
- Util.copyFromClusterToLocal(cluster, "tmp_testCustomPartitionerGroups/part-r-00000", "tmp_testCustomPartitionerGroups/part-r-00000");
- BufferedReader reader = new BufferedReader(new FileReader("tmp_testCustomPartitionerGroups/part-r-00000"));
- String line = null;
- while((line = reader.readLine()) != null) {
+ Util.copyFromClusterToLocal(cluster, outputFiles[0].getPath().toString(), outputDir + "/" + 0);
+ BufferedReader reader = new BufferedReader(new FileReader(outputDir + "/" + 0));
+ while(reader.readLine() != null) {
Assert.fail("Partition 0 should be empty. Most likely Custom Partitioner was not used.");
}
- Util.copyFromClusterToLocal(cluster, "tmp_testCustomPartitionerGroups/part-r-00001", "tmp_testCustomPartitionerGroups/part-r-00001");
- reader = new BufferedReader(new FileReader("tmp_testCustomPartitionerGroups/part-r-00001"));
- line = null;
+ reader.close();
+
+ Util.copyFromClusterToLocal(cluster, outputFiles[1].getPath().toString(), outputDir + "/" + 1);
+ reader = new BufferedReader(new FileReader(outputDir + "/" + 1));
int count=0;
- while((line = reader.readLine()) != null) {
+ while(reader.readLine() != null) {
//all outputs should come to partion 1 (with SimpleCustomPartitioner3)
count++;
}
+ reader.close();
Assert.assertEquals(4, count);
- Util.deleteDirectory(new File("tmp_testCustomPartitionerGroups"));
- Util.deleteFile(cluster, "tmp_testCustomPartitionerGroups");
+
+ Util.deleteDirectory(new File(outputDir));
+ Util.deleteFile(cluster, outputDir);
Util.deleteFile(cluster, "table_testCustomPartitionerGroups");
}
@@ -167,32 +181,34 @@ public class TestCustomPartitioner {
};
Util.createInputFile(cluster, "table_testCustomPartitionerDistinct", input);
+ String outputDir = "tmp_testCustomPartitionerDistinct";
pigServer.registerQuery("A = LOAD 'table_testCustomPartitionerDistinct' as (a0:int, a1:int);");
pigServer.registerQuery("B = distinct A PARTITION BY org.apache.pig.test.utils.SimpleCustomPartitioner3 parallel 2;");
- pigServer.store("B", "tmp_testCustomPartitionerDistinct");
+ pigServer.store("B", outputDir);
- new File("tmp_testCustomPartitionerDistinct").mkdir();
+ new File(outputDir).mkdir();
+ FileStatus[] outputFiles = fs.listStatus(new Path(outputDir), Util.getSuccessMarkerPathFilter());
// SimpleCustomPartitioner3 simply partition all inputs to *second* reducer
- Util.copyFromClusterToLocal(cluster, "tmp_testCustomPartitionerDistinct/part-r-00000", "tmp_testCustomPartitionerDistinct/part-r-00000");
- BufferedReader reader = new BufferedReader(new FileReader("tmp_testCustomPartitionerDistinct/part-r-00000"));
- String line = null;
- while((line = reader.readLine()) != null) {
+ Util.copyFromClusterToLocal(cluster, outputFiles[0].getPath().toString(), outputDir + "/" + 0);
+ BufferedReader reader = new BufferedReader(new FileReader(outputDir + "/" + 0));
+ while (reader.readLine() != null) {
Assert.fail("Partition 0 should be empty. Most likely Custom Partitioner was not used.");
}
reader.close();
- Util.copyFromClusterToLocal(cluster, "tmp_testCustomPartitionerDistinct/part-r-00001", "tmp_testCustomPartitionerDistinct/part-r-00001");
- reader = new BufferedReader(new FileReader("tmp_testCustomPartitionerDistinct/part-r-00001"));
- line = null;
+
+ Util.copyFromClusterToLocal(cluster, outputFiles[1].getPath().toString(), outputDir + "/" + 1);
+ reader = new BufferedReader(new FileReader(outputDir + "/" + 1));
int count=0;
- while((line = reader.readLine()) != null) {
+ while (reader.readLine() != null) {
//all outputs should come to partion 1 (with SimpleCustomPartitioner3)
count++;
}
reader.close();
Assert.assertEquals(4, count);
- Util.deleteDirectory(new File("tmp_testCustomPartitionerDistinct"));
- Util.deleteFile(cluster, "tmp_testCustomPartitionerDistinct");
+
+ Util.deleteDirectory(new File(outputDir));
+ Util.deleteFile(cluster, outputDir);
Util.deleteFile(cluster, "table_testCustomPartitionerDistinct");
}
Modified: pig/branches/tez/test/org/apache/pig/test/TestSkewedJoin.java
URL: http://svn.apache.org/viewvc/pig/branches/tez/test/org/apache/pig/test/TestSkewedJoin.java?rev=1572357&r1=1572356&r2=1572357&view=diff
==============================================================================
--- pig/branches/tez/test/org/apache/pig/test/TestSkewedJoin.java (original)
+++ pig/branches/tez/test/org/apache/pig/test/TestSkewedJoin.java Wed Feb 26 23:57:57 2014
@@ -30,11 +30,12 @@ import java.io.IOException;
import java.io.PrintWriter;
import java.util.Iterator;
import java.util.Map;
+import java.util.Properties;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.fs.PathFilter;
+import org.apache.pig.PigConfiguration;
import org.apache.pig.PigServer;
import org.apache.pig.data.BagFactory;
import org.apache.pig.data.DataBag;
@@ -60,20 +61,22 @@ public class TestSkewedJoin {
private static final String INPUT_DIR = "build/test/data";
private static final String OUTPUT_DIR = "build/test/output";
- private PigServer pigServer;
private static FileSystem fs;
+ private static PigServer pigServer;
+ private static Properties properties;
private static MiniGenericCluster cluster;
@Before
public void setUp() throws Exception {
- pigServer = new PigServer(cluster.getExecType(), cluster.getProperties());
- pigServer.getPigContext().getProperties().setProperty("pig.skewedjoin.reduce.maxtuple", "5");
- pigServer.getPigContext().getProperties().setProperty("pig.skewedjoin.reduce.memusage", "0.01");
+ pigServer = new PigServer(cluster.getExecType(), properties);
}
@BeforeClass
public static void oneTimeSetUp() throws Exception {
cluster = MiniGenericCluster.buildCluster();
+ properties = cluster.getProperties();
+ properties.setProperty("pig.skewedjoin.reduce.maxtuple", "5");
+ properties.setProperty("pig.skewedjoin.reduce.memusage", "0.01");
fs = cluster.getFileSystem();
createFiles();
}
@@ -206,7 +209,7 @@ public class TestSkewedJoin {
@Test
public void testSkewedJoinWithNoProperties() throws IOException{
- pigServer = new PigServer(cluster.getExecType(), cluster.getProperties());
+ pigServer = new PigServer(cluster.getExecType(), properties);
pigServer.registerQuery("A = LOAD '" + INPUT_FILE1 + "' as (id, name, n);");
pigServer.registerQuery("B = LOAD '" + INPUT_FILE2 + "' as (id, name);");
@@ -288,12 +291,7 @@ public class TestSkewedJoin {
int[][] lineCount = new int[3][7];
- FileStatus[] outputFiles = fs.listStatus(new Path(outputDir), new PathFilter() {
- @Override
- public boolean accept(Path p) {
- return !p.getName().startsWith("_");
- }
- });
+ FileStatus[] outputFiles = fs.listStatus(new Path(outputDir), Util.getSuccessMarkerPathFilter());
// check how many times a key appear in each part- file
for (int i=0; i<7; i++) {
String filename = outputFiles[i].getPath().toString();
Modified: pig/branches/tez/test/org/apache/pig/test/TestSplitStore.java
URL: http://svn.apache.org/viewvc/pig/branches/tez/test/org/apache/pig/test/TestSplitStore.java?rev=1572357&r1=1572356&r2=1572357&view=diff
==============================================================================
--- pig/branches/tez/test/org/apache/pig/test/TestSplitStore.java (original)
+++ pig/branches/tez/test/org/apache/pig/test/TestSplitStore.java Wed Feb 26 23:57:57 2014
@@ -21,23 +21,27 @@ package org.apache.pig.test;
import java.io.File;
import java.io.FileOutputStream;
import java.io.PrintStream;
+import java.util.Properties;
+import org.apache.pig.PigConfiguration;
import org.apache.pig.PigServer;
import org.apache.pig.impl.PigContext;
import org.apache.pig.impl.io.FileLocalizer;
import org.junit.AfterClass;
import org.junit.Before;
+import org.junit.BeforeClass;
import org.junit.Test;
public class TestSplitStore {
- private PigServer pig;
- private PigContext pigContext;
- private File tmpFile;
- private static MiniGenericCluster cluster = MiniGenericCluster.buildCluster();
+ private static File tmpFile;
+ private static PigServer pig;
+ private static PigContext pigContext;
+ private static Properties properties;
+ private static MiniGenericCluster cluster;
@Before
public void setUp() throws Exception {
- pig = new PigServer(cluster.getExecType(), cluster.getProperties());
+ pig = new PigServer(cluster.getExecType(), properties);
pigContext = pig.getPigContext();
int LOOP_SIZE = 20;
tmpFile = File.createTempFile("test", "txt");
@@ -49,6 +53,12 @@ public class TestSplitStore {
ps.close();
}
+ @BeforeClass
+ public static void oneTimeSetUp() throws Exception {
+ cluster = MiniGenericCluster.buildCluster();
+ properties = cluster.getProperties();
+ }
+
@AfterClass
public static void oneTimeTearDown() throws Exception {
cluster.shutDown();
Modified: pig/branches/tez/test/org/apache/pig/test/Util.java
URL: http://svn.apache.org/viewvc/pig/branches/tez/test/org/apache/pig/test/Util.java?rev=1572357&r1=1572356&r2=1572357&view=diff
==============================================================================
--- pig/branches/tez/test/org/apache/pig/test/Util.java (original)
+++ pig/branches/tez/test/org/apache/pig/test/Util.java Wed Feb 26 23:57:57 2014
@@ -1032,6 +1032,7 @@ public class Util {
while ((line = reader.readLine()) != null) {
logMessage = logMessage + line + "\n";
}
+ reader.close();
for (int i = 0; i < messages.length; i++) {
boolean present = logMessage.contains(messages[i]);
if (expected) {
@@ -1212,6 +1213,7 @@ public class Util {
result += line;
result += "\n";
}
+ reader.close();
return result;
}
@@ -1274,4 +1276,17 @@ public class Util {
return true;
return false;
}
+
+ /**
+ * Returns a PathFilter that filters out filenames that start with _.
+ * @return PathFilter
+ */
+ public static PathFilter getSuccessMarkerPathFilter() {
+ return new PathFilter() {
+ @Override
+ public boolean accept(Path p) {
+ return !p.getName().startsWith("_");
+ }
+ };
+ }
}
Modified: pig/branches/tez/test/org/apache/pig/test/data/GoldenFiles/TEZC1.gld
URL: http://svn.apache.org/viewvc/pig/branches/tez/test/org/apache/pig/test/data/GoldenFiles/TEZC1.gld?rev=1572357&r1=1572356&r2=1572357&view=diff
==============================================================================
--- pig/branches/tez/test/org/apache/pig/test/data/GoldenFiles/TEZC1.gld (original)
+++ pig/branches/tez/test/org/apache/pig/test/data/GoldenFiles/TEZC1.gld Wed Feb 26 23:57:57 2014
@@ -32,4 +32,4 @@ c: Store(file:///tmp/output:org.apache.p
| |
| |---Project[bytearray][1] - scope-4
|
- |---a: Load(file:///tmp/input:org.apache.pig.builtin.PigStorage) - scope-0
\ No newline at end of file
+ |---a: Load(file:///tmp/input:org.apache.pig.builtin.PigStorage) - scope-0
Modified: pig/branches/tez/test/org/apache/pig/test/data/GoldenFiles/TEZC12.gld
URL: http://svn.apache.org/viewvc/pig/branches/tez/test/org/apache/pig/test/data/GoldenFiles/TEZC12.gld?rev=1572357&r1=1572356&r2=1572357&view=diff
==============================================================================
--- pig/branches/tez/test/org/apache/pig/test/data/GoldenFiles/TEZC12.gld (original)
+++ pig/branches/tez/test/org/apache/pig/test/data/GoldenFiles/TEZC12.gld Wed Feb 26 23:57:57 2014
@@ -22,4 +22,4 @@ b: Store(file:///tmp/output:org.apache.p
| |
| |---Project[bytearray][1] - scope-4
|
- |---a: Load(file:///tmp/input:PigStorage(',')) - scope-0
\ No newline at end of file
+ |---a: Load(file:///tmp/input:PigStorage(',')) - scope-0
Modified: pig/branches/tez/test/org/apache/pig/test/data/GoldenFiles/TEZC14.gld
URL: http://svn.apache.org/viewvc/pig/branches/tez/test/org/apache/pig/test/data/GoldenFiles/TEZC14.gld?rev=1572357&r1=1572356&r2=1572357&view=diff
==============================================================================
--- pig/branches/tez/test/org/apache/pig/test/data/GoldenFiles/TEZC14.gld (original)
+++ pig/branches/tez/test/org/apache/pig/test/data/GoldenFiles/TEZC14.gld Wed Feb 26 23:57:57 2014
@@ -42,4 +42,4 @@ c: Store(file:///tmp/output:org.apache.p
| |
| |---Project[bag][1] - scope-18
|
- |---b: Package(Packager)[tuple]{int} - scope-12
\ No newline at end of file
+ |---b: Package(Packager)[tuple]{int} - scope-12
Modified: pig/branches/tez/test/org/apache/pig/test/data/GoldenFiles/TEZC15.gld
URL: http://svn.apache.org/viewvc/pig/branches/tez/test/org/apache/pig/test/data/GoldenFiles/TEZC15.gld?rev=1572357&r1=1572356&r2=1572357&view=diff
==============================================================================
--- pig/branches/tez/test/org/apache/pig/test/data/GoldenFiles/TEZC15.gld (original)
+++ pig/branches/tez/test/org/apache/pig/test/data/GoldenFiles/TEZC15.gld Wed Feb 26 23:57:57 2014
@@ -50,4 +50,4 @@ c: Store(file:///tmp/output:org.apache.p
| |
| |---Project[bag][1] - scope-48
|
- |---b: Package(Packager)[tuple]{int} - scope-42
\ No newline at end of file
+ |---b: Package(Packager)[tuple]{int} - scope-42
Modified: pig/branches/tez/test/org/apache/pig/test/data/GoldenFiles/TEZC16.gld
URL: http://svn.apache.org/viewvc/pig/branches/tez/test/org/apache/pig/test/data/GoldenFiles/TEZC16.gld?rev=1572357&r1=1572356&r2=1572357&view=diff
==============================================================================
--- pig/branches/tez/test/org/apache/pig/test/data/GoldenFiles/TEZC16.gld (original)
+++ pig/branches/tez/test/org/apache/pig/test/data/GoldenFiles/TEZC16.gld Wed Feb 26 23:57:57 2014
@@ -66,4 +66,4 @@ b: Store(file:///tmp/output:org.apache.p
| |
| Project[bag][1] - scope-32
|
- |---Package(LitePackager)[tuple]{int} - scope-31
\ No newline at end of file
+ |---Package(LitePackager)[tuple]{int} - scope-31
Modified: pig/branches/tez/test/org/apache/pig/test/data/GoldenFiles/TEZC17.gld
URL: http://svn.apache.org/viewvc/pig/branches/tez/test/org/apache/pig/test/data/GoldenFiles/TEZC17.gld?rev=1572357&r1=1572356&r2=1572357&view=diff
==============================================================================
--- pig/branches/tez/test/org/apache/pig/test/data/GoldenFiles/TEZC17.gld (original)
+++ pig/branches/tez/test/org/apache/pig/test/data/GoldenFiles/TEZC17.gld Wed Feb 26 23:57:57 2014
@@ -98,4 +98,4 @@ d: Store(file:///tmp/output:org.apache.p
| |
| Project[bag][2] - scope-53
|
- |---Package(Packager)[tuple]{int} - scope-51
\ No newline at end of file
+ |---Package(Packager)[tuple]{int} - scope-51
Modified: pig/branches/tez/test/org/apache/pig/test/data/GoldenFiles/TEZC2.gld
URL: http://svn.apache.org/viewvc/pig/branches/tez/test/org/apache/pig/test/data/GoldenFiles/TEZC2.gld?rev=1572357&r1=1572356&r2=1572357&view=diff
==============================================================================
--- pig/branches/tez/test/org/apache/pig/test/data/GoldenFiles/TEZC2.gld (original)
+++ pig/branches/tez/test/org/apache/pig/test/data/GoldenFiles/TEZC2.gld Wed Feb 26 23:57:57 2014
@@ -62,4 +62,4 @@ c: Store(file:///tmp/output:org.apache.p
| |
| |---Project[bag][1] - scope-31
|
- |---b: Package(CombinerPackager)[tuple]{int} - scope-9
\ No newline at end of file
+ |---b: Package(CombinerPackager)[tuple]{int} - scope-9
Modified: pig/branches/tez/test/org/apache/pig/test/data/GoldenFiles/TEZC3.gld
URL: http://svn.apache.org/viewvc/pig/branches/tez/test/org/apache/pig/test/data/GoldenFiles/TEZC3.gld?rev=1572357&r1=1572356&r2=1572357&view=diff
==============================================================================
--- pig/branches/tez/test/org/apache/pig/test/data/GoldenFiles/TEZC3.gld (original)
+++ pig/branches/tez/test/org/apache/pig/test/data/GoldenFiles/TEZC3.gld Wed Feb 26 23:57:57 2014
@@ -60,4 +60,4 @@ d: Store(file:///tmp/output:org.apache.p
| |
| Project[bag][2] - scope-25
|
- |---c: Package(Packager)[tuple]{int} - scope-19
\ No newline at end of file
+ |---c: Package(Packager)[tuple]{int} - scope-19
Modified: pig/branches/tez/test/org/apache/pig/test/data/GoldenFiles/TEZC4.gld
URL: http://svn.apache.org/viewvc/pig/branches/tez/test/org/apache/pig/test/data/GoldenFiles/TEZC4.gld?rev=1572357&r1=1572356&r2=1572357&view=diff
==============================================================================
--- pig/branches/tez/test/org/apache/pig/test/data/GoldenFiles/TEZC4.gld (original)
+++ pig/branches/tez/test/org/apache/pig/test/data/GoldenFiles/TEZC4.gld Wed Feb 26 23:57:57 2014
@@ -36,4 +36,4 @@ c: Store(file:///tmp/output:org.apache.p
| |
| Project[tuple][1] - scope-15
|
- |---b: Package(Packager)[tuple]{tuple} - scope-14
\ No newline at end of file
+ |---b: Package(Packager)[tuple]{tuple} - scope-14
Modified: pig/branches/tez/test/org/apache/pig/test/data/GoldenFiles/TEZC5.gld
URL: http://svn.apache.org/viewvc/pig/branches/tez/test/org/apache/pig/test/data/GoldenFiles/TEZC5.gld?rev=1572357&r1=1572356&r2=1572357&view=diff
==============================================================================
--- pig/branches/tez/test/org/apache/pig/test/data/GoldenFiles/TEZC5.gld (original)
+++ pig/branches/tez/test/org/apache/pig/test/data/GoldenFiles/TEZC5.gld Wed Feb 26 23:57:57 2014
@@ -46,4 +46,4 @@ c: Store(file:///tmp/output:org.apache.p
| |
| Project[tuple][0] - scope-23
|
- |---Package(Packager)[tuple]{tuple} - scope-22
\ No newline at end of file
+ |---Package(Packager)[tuple]{tuple} - scope-22
Modified: pig/branches/tez/test/org/apache/pig/test/data/GoldenFiles/TEZC6.gld
URL: http://svn.apache.org/viewvc/pig/branches/tez/test/org/apache/pig/test/data/GoldenFiles/TEZC6.gld?rev=1572357&r1=1572356&r2=1572357&view=diff
==============================================================================
--- pig/branches/tez/test/org/apache/pig/test/data/GoldenFiles/TEZC6.gld (original)
+++ pig/branches/tez/test/org/apache/pig/test/data/GoldenFiles/TEZC6.gld Wed Feb 26 23:57:57 2014
@@ -2,13 +2,13 @@
# There are 1 DAGs in the session
#--------------------------------------------------
#--------------------------------------------------
-# TEZ DAG plan: scope-30
+# TEZ DAG plan: scope-33
#--------------------------------------------------
Tez vertex scope-24
Tez vertex scope-24
# Plan on vertex
-1-1: Split - scope-8
+Split - scope-32
| |
| b: Store(file:///tmp/output/b:org.apache.pig.builtin.PigStorage) - scope-13
| |
@@ -50,4 +50,4 @@ Tez vertex scope-24
| |
| |---Project[bytearray][1] - scope-4
|
- |---a: Load(file:///tmp/input:org.apache.pig.builtin.PigStorage) - scope-0
\ No newline at end of file
+ |---a: Load(file:///tmp/input:org.apache.pig.builtin.PigStorage) - scope-0
Modified: pig/branches/tez/test/org/apache/pig/tez/TestTezCompiler.java
URL: http://svn.apache.org/viewvc/pig/branches/tez/test/org/apache/pig/tez/TestTezCompiler.java?rev=1572357&r1=1572356&r2=1572357&view=diff
==============================================================================
--- pig/branches/tez/test/org/apache/pig/tez/TestTezCompiler.java (original)
+++ pig/branches/tez/test/org/apache/pig/tez/TestTezCompiler.java Wed Feb 26 23:57:57 2014
@@ -21,6 +21,7 @@ import static org.junit.Assert.assertEqu
import java.io.ByteArrayOutputStream;
import java.io.FileInputStream;
+import java.io.FileOutputStream;
import java.io.PrintStream;
import java.util.Properties;
@@ -55,6 +56,11 @@ public class TestTezCompiler {
private static PigServer pigServer;
private static final int MAX_SIZE = 100000;
+ // If for some reason, the golden files need to be regenerated, set this to
+ // true - THIS WILL OVERWRITE THE GOLDEN FILES - So use this with caution
+ // and only for the test cases you need and are sure of.
+ private boolean generate = false;
+
@BeforeClass
public static void setUpBeforeClass() throws Exception {
pc = new PigContext(new TezLocalExecType(), new Properties());
@@ -306,6 +312,12 @@ public class TestTezCompiler {
System.out.println();
System.out.println("<<<" + compiledPlan + ">>>");
+ if (generate) {
+ FileOutputStream fos = new FileOutputStream(expectedFile);
+ fos.write(baos.toByteArray());
+ fos.close();
+ return;
+ }
FileInputStream fis = new FileInputStream(expectedFile);
byte[] b = new byte[MAX_SIZE];
int len = fis.read(b);