You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pig.apache.org by ga...@apache.org on 2008/04/23 00:58:33 UTC

svn commit: r650688 - in /incubator/pig/trunk: CHANGES.txt src/org/apache/pig/pen/DisplayExamples.java src/org/apache/pig/pen/ExGen.java test/org/apache/pig/test/TestExGenCogroup.java test/org/apache/pig/test/TestExGenEval.java

Author: gates
Date: Tue Apr 22 15:58:28 2008
New Revision: 650688

URL: http://svn.apache.org/viewvc?rev=650688&view=rev
Log:
PIG-207 Fix illustrate command to work in mapreduce mode.


Modified:
    incubator/pig/trunk/CHANGES.txt
    incubator/pig/trunk/src/org/apache/pig/pen/DisplayExamples.java
    incubator/pig/trunk/src/org/apache/pig/pen/ExGen.java
    incubator/pig/trunk/test/org/apache/pig/test/TestExGenCogroup.java
    incubator/pig/trunk/test/org/apache/pig/test/TestExGenEval.java

Modified: incubator/pig/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/incubator/pig/trunk/CHANGES.txt?rev=650688&r1=650687&r2=650688&view=diff
==============================================================================
--- incubator/pig/trunk/CHANGES.txt (original)
+++ incubator/pig/trunk/CHANGES.txt Tue Apr 22 15:58:28 2008
@@ -246,3 +246,7 @@
 
 	PIG-216 Fix streaming to work with commands that use unix pipes (acmurthy
 	via gates).
+
+	PIG-207 Fix illustrate command to work in mapreduce mode (shubhamc via
+	gates).
+

Modified: incubator/pig/trunk/src/org/apache/pig/pen/DisplayExamples.java
URL: http://svn.apache.org/viewvc/incubator/pig/trunk/src/org/apache/pig/pen/DisplayExamples.java?rev=650688&r1=650687&r2=650688&view=diff
==============================================================================
--- incubator/pig/trunk/src/org/apache/pig/pen/DisplayExamples.java (original)
+++ incubator/pig/trunk/src/org/apache/pig/pen/DisplayExamples.java Tue Apr 22 15:58:28 2008
@@ -38,7 +38,7 @@
 public class DisplayExamples {
 	
 	public static StringBuffer Result = new StringBuffer();
-	public static final int MAX_DATAATOM_LENGTH = 10;
+	public static final int MAX_DATAATOM_LENGTH = 15;
 	
     public static String PrintTabular(LogicalPlan lp, Map<LogicalOperator, DataBag> exampleData) {
     	StringBuffer output = new StringBuffer();
@@ -96,13 +96,13 @@
     	}
     	
     	//Display the schema first
-    	output.append(AddSpaces(total + 3*cols + 9, false) + "\n");
+    	output.append(AddSpaces(total + 3*(cols +1) + aliasLength + 1, false) + "\n");
     	output.append("| " + op.getAlias() + AddSpaces(4, true) + " | ");
     	for(int i = 0; i < cols; ++i) {
     		String field = fields.get(i).toString();
     		output.append(field + AddSpaces(maxColSizes[i] - field.length(), true) + " | ");
     	}
-    	output.append("\n" + AddSpaces(total + 3*cols + 9, false) + "\n");
+    	output.append("\n" + AddSpaces(total + 3*(cols +1) + aliasLength + 1, false) + "\n");
     	//now start displaying the data
     	for(int i = 0; i < rows; ++i) {
     		output.append("| " + AddSpaces(aliasLength, true) + " | ");
@@ -113,7 +113,7 @@
     		output.append("\n");
     	}
     	//now display the finish line
-    	output.append(AddSpaces(total + 3*cols + 9, false) + "\n");
+    	output.append(AddSpaces(total + 3*(cols +1) + aliasLength + 1, false) + "\n");
     }
     
     static String[][] MakeArray(LogicalOperator op, DataBag bag) {
@@ -144,7 +144,7 @@
     static String ShortenField(DataAtom da) {
     	int length = da.toString().length();
     	if(length > MAX_DATAATOM_LENGTH) {
-    		return new String(da.toString().substring(0, 3) + " ... " + da.toString().substring(length - 4, length - 1)); 
+    		return new String(da.toString().substring(0, 4) + " ... " + da.toString().substring(length - 4, length - 1)); 
     	}
     	return da.toString();
     }
@@ -159,7 +159,7 @@
     		while(it.hasNext()) {
     			Tuple t = it.next();
     			if(!it.hasNext()) {
-    				str.append(", ... " + ShortenField(t));
+    				str.append(", ..., " + ShortenField(t));
     			}
     		}
     	} else {
@@ -182,7 +182,7 @@
     	if(noFields > 3) {
     		Datum d = t.getField(0);
 			
-			str.append(ShortenField(d) + ", ...");
+			str.append(ShortenField(d) + ", ..., ");
 			d = t.getField(noFields - 1);
 			
 			str.append(ShortenField(d));

Modified: incubator/pig/trunk/src/org/apache/pig/pen/ExGen.java
URL: http://svn.apache.org/viewvc/incubator/pig/trunk/src/org/apache/pig/pen/ExGen.java?rev=650688&r1=650687&r2=650688&view=diff
==============================================================================
--- incubator/pig/trunk/src/org/apache/pig/pen/ExGen.java (original)
+++ incubator/pig/trunk/src/org/apache/pig/pen/ExGen.java Tue Apr 22 15:58:28 2008
@@ -24,6 +24,7 @@
 import java.util.LinkedList;
 import java.util.Map;
 
+import org.apache.pig.PigServer.ExecType;
 import org.apache.pig.backend.executionengine.ExecException;
 import org.apache.pig.backend.executionengine.ExecPhysicalOperator;
 import org.apache.pig.backend.executionengine.ExecPhysicalPlan;
@@ -45,14 +46,16 @@
     
     static Map<LOLoad, DataBag> GlobalBaseData = new HashMap<LOLoad, DataBag>();
     
-    public static Map<LogicalOperator, DataBag> GenerateExamples(LogicalPlan plan, PigContext pigContext) throws IOException {
+    public static Map<LogicalOperator, DataBag> GenerateExamples(LogicalPlan plan, PigContext hadoopPigContext) throws IOException {
         
     	long time = System.currentTimeMillis();
     	String Result;
+    	PigContext pigContext = new PigContext(ExecType.LOCAL, hadoopPigContext.getProperties());
     	
     	//compile the logical plan to get the physical plan once and for all
     	ExecPhysicalPlan PhyPlan = null;
     	try {
+    		pigContext.connect();
 			PhyPlan = pigContext.getExecutionEngine().compile(plan, null);
 		} catch (ExecException e1) {
 			// TODO Auto-generated catch block
@@ -62,7 +65,7 @@
 		Map<OperatorKey, ExecPhysicalOperator> physicalOpTable = PhyPlan.getOpTable();
     	    	
         // Acquire initial base data by sampling from input relations (this is idempotent)
-    	FetchBaseData.ReadBaseData(plan.getRootOperator(), GlobalBaseData, SAMPLE_SIZE, pigContext);
+    	FetchBaseData.ReadBaseData(plan.getRootOperator(), GlobalBaseData, SAMPLE_SIZE, hadoopPigContext);
     	    	
         /////// PASS 1: push data sample through query plan
         

Modified: incubator/pig/trunk/test/org/apache/pig/test/TestExGenCogroup.java
URL: http://svn.apache.org/viewvc/incubator/pig/trunk/test/org/apache/pig/test/TestExGenCogroup.java?rev=650688&r1=650687&r2=650688&view=diff
==============================================================================
--- incubator/pig/trunk/test/org/apache/pig/test/TestExGenCogroup.java (original)
+++ incubator/pig/trunk/test/org/apache/pig/test/TestExGenCogroup.java Tue Apr 22 15:58:28 2008
@@ -5,6 +5,8 @@
 import java.util.Random;
 
 import org.apache.pig.PigServer;
+import org.apache.pig.PigServer.ExecType;
+import org.apache.pig.impl.io.FileLocalizer;
 import org.junit.After;
 import org.junit.Before;
 import org.junit.Test;
@@ -12,21 +14,29 @@
 import junit.framework.TestCase;
 
 public class TestExGenCogroup extends TestCase{
-	File A, B;
+	String A, B;
 	private int MAX = 10;
 	
-	String initString = "local";
+	String initString = "mapreduce";
 	PigServer pig;
 	
+	MiniCluster cluster = MiniCluster.buildCluster();
+	
 	@Override
 	@Before
 	protected void setUp() throws Exception{
+		File fileA, fileB;
+		pig = new PigServer(ExecType.MAPREDUCE, cluster.getProperties());
 		System.out.println("Generating test data...");
-		A = File.createTempFile("dataA", ".dat");
-		B = File.createTempFile("dataB", ".dat");
+		fileA = File.createTempFile("dataA", ".dat");
+		fileB = File.createTempFile("dataB", ".dat");
+		
+		writeData(fileA);
+		writeData(fileB);
 		
-		writeData(A);
-		writeData(B);
+		A = "'" + FileLocalizer.hadoopify(fileA.toString(), pig.getPigContext()) + "'";
+		B = "'" + FileLocalizer.hadoopify(fileB.toString(), pig.getPigContext()) + "'";
+		System.out.println("A : " + A  + "\n" + "B : " + B);
 		System.out.println("Test data created.");
 		
 	}
@@ -39,39 +49,39 @@
 		
 		for(int i = 0; i < MAX; i++) 
 			dat.write((rand.nextInt(10) + "\t" + rand.nextInt(10) + "\n").getBytes());
+		dat.close();
 			
 	}
 	
 	@Override
 	@After
 	protected void tearDown() throws Exception {
-		A.delete();
-		B.delete();
+		
 	
 	}
 	
 	@Test
 	public void testCogroupMultipleCols() throws Exception {
-		pig = new PigServer(initString);
-		pig.registerQuery("A = load '" + A.toString() + "' as (x, y);");
-		pig.registerQuery("B = load '" + B.toString() + "' as (x, y);");
+		//pig = new PigServer(initString);
+		pig.registerQuery("A = load " + A + " as (x, y);");
+		pig.registerQuery("B = load " + B + " as (x, y);");
 		pig.registerQuery("C = cogroup A by (x, y), B by (x, y);");
 		pig.showExamples("C");
 	}
 	
 	@Test
 	public void testCogroup() throws Exception {
-		pig = new PigServer(initString);
-		pig.registerQuery("A = load '" + A.toString() + "' as (x, y);");
-		pig.registerQuery("B = load '" + B.toString() + "' as (x, y);");
+		//pig = new PigServer(initString);
+		pig.registerQuery("A = load " + A + " as (x, y);");
+		pig.registerQuery("B = load " + B + " as (x, y);");
 		pig.registerQuery("C = cogroup A by x, B by x;");
 		pig.showExamples("C");
 	}
 	
 	@Test
 	public void testGroup() throws Exception {
-		pig = new PigServer(initString);
-		pig.registerQuery("A = load '" + A.toString() + "' as (x, y);");
+		//pig = new PigServer(initString);
+		pig.registerQuery("A = load " + A.toString() + " as (x, y);");
 		pig.registerQuery("B = group A by x;");
 		pig.showExamples("B");
 		
@@ -79,9 +89,9 @@
 	
 	@Test
 	public void testComplexGroup() throws Exception {
-		pig = new PigServer(initString);
-		pig.registerQuery("A = load '" + A.toString() + "' as (x, y);");
-		pig.registerQuery("B = load '" + B.toString() + "' as (x, y);");
+		//pig = new PigServer(initString);
+		pig.registerQuery("A = load " + A.toString() + " as (x, y);");
+		pig.registerQuery("B = load " + B.toString() + " as (x, y);");
 		pig.registerQuery("C = cogroup A by x, B by x;");
 		pig.registerQuery("D = cogroup A by y, B by y;");
 		pig.registerQuery("E = cogroup C by $0, D by $0;");

Modified: incubator/pig/trunk/test/org/apache/pig/test/TestExGenEval.java
URL: http://svn.apache.org/viewvc/incubator/pig/trunk/test/org/apache/pig/test/TestExGenEval.java?rev=650688&r1=650687&r2=650688&view=diff
==============================================================================
--- incubator/pig/trunk/test/org/apache/pig/test/TestExGenEval.java (original)
+++ incubator/pig/trunk/test/org/apache/pig/test/TestExGenEval.java Tue Apr 22 15:58:28 2008
@@ -2,9 +2,14 @@
 
 import java.io.File;
 import java.io.FileOutputStream;
+import java.io.InputStream;
+import java.io.OutputStream;
 import java.util.Random;
 
 import org.apache.pig.PigServer;
+import org.apache.pig.PigServer.ExecType;
+import org.apache.pig.impl.PigContext;
+import org.apache.pig.impl.io.FileLocalizer;
 import org.junit.After;
 import org.junit.Before;
 import org.junit.Test;
@@ -13,26 +18,45 @@
 
 public class TestExGenEval extends TestCase {
 	
-	File A, B, C, D;
+	String A, B, C, D;
 	private int MAX = 10;
 	
-	String initString = "local";
+	String initString = "mapreduce";
 	PigServer pig;
+	PigContext pigContext;
+	
+	MiniCluster cluster = MiniCluster.buildCluster();
 	
 	@Override
 	@Before
 	protected void setUp() throws Exception{
 		System.out.println("Generating test data...");
-		A = File.createTempFile("dataA", ".dat");
-		B = File.createTempFile("dataB", ".dat");
-		C = File.createTempFile("dataC", ".dat");
-		D = File.createTempFile("dataD", ".dat");
-		
-		writeData(A);
-		writeData(B);
-		writeData(C);
-		writeData(D);
+		File fileA, fileB, fileC, fileD;
+		
+		pig = new PigServer(ExecType.MAPREDUCE, cluster.getProperties());
+		pigContext = pig.getPigContext();
+		fileA = File.createTempFile("dataA", ".dat");
+		fileB = File.createTempFile("dataB", ".dat");
+		fileC = File.createTempFile("dataC", ".dat");
+		fileD = File.createTempFile("dataD", ".dat");
+		
+		
+		writeData(fileA);
+		writeData(fileB);
+		writeData(fileC);
+		writeData(fileD);
+		
+		
+		A = "'" + FileLocalizer.hadoopify(fileA.toString(), pig.getPigContext()) + "'";
+		B = "'" + FileLocalizer.hadoopify(fileB.toString(), pig.getPigContext()) + "'";
+		C = "'" + FileLocalizer.hadoopify(fileC.toString(), pig.getPigContext()) + "'";
+		D = "'" + FileLocalizer.hadoopify(fileD.toString(), pig.getPigContext()) + "'";
+		
 		System.out.println("Test data created.");
+		fileA.delete();
+		fileB.delete();
+		fileC.delete();
+		fileD.delete();
 		
 	}
 	
@@ -50,17 +74,14 @@
 	@Override
 	@After
 	protected void tearDown() throws Exception {
-		A.delete();
-		B.delete();
-		C.delete();
-		D.delete();
+		
 	}
 	
 	@Test
 	public void testForeach() throws Exception {
-		pig = new PigServer(initString);
+		//pig = new PigServer(initString);
 		System.out.println("Testing Foreach statement...");
-		pig.registerQuery("A = load '" + A.toString() + "' as (x, y);");
+		pig.registerQuery("A = load " + A + " as (x, y);");
 		pig.registerQuery("B = foreach A generate x+y as sum;");
 		pig.showExamples("B");
 		assertEquals(1, 1);
@@ -68,8 +89,8 @@
 	
 	@Test 
 	public void testFilter() throws Exception {
-		pig = new PigServer(initString);
-		pig.registerQuery("A = load '" + A.toString() + "' as (x, y);");
+		//pig = new PigServer(initString);
+		pig.registerQuery("A = load " + A + " as (x, y);");
 		pig.registerQuery("B = filter A by x < 10.0;");
 		pig.showExamples("B");
 		assertEquals(1, 1);
@@ -77,11 +98,11 @@
 	
 	@Test
 	public void testFlatten() throws Exception {
-		pig = new PigServer(initString);
-		pig.registerQuery("A1 = load '" + A.toString() + "' as (x, y);");
-		pig.registerQuery("B1 = load '" + B.toString() + "' as (x, y);");
-		pig.registerQuery("C1 = load '" + C.toString() + "' as (x, y);");
-		pig.registerQuery("D1 = load '" + D.toString() + "' as (x, y);");
+		//pig = new PigServer(initString);
+		pig.registerQuery("A1 = load " + A + " as (x, y);");
+		pig.registerQuery("B1 = load " + B + " as (x, y);");
+		pig.registerQuery("C1 = load " + C + " as (x, y);");
+		pig.registerQuery("D1 = load " + D + " as (x, y);");
 		pig.registerQuery("E = join A1 by x, B1 by x;");
 		pig.registerQuery("F = join C1 by x, D1 by x;");
 		pig.registerQuery("G = join E by $0, F by $0;");