You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pig.apache.org by kn...@apache.org on 2016/08/11 15:27:32 UTC
svn commit: r1756002 - in /pig/trunk: ./
src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/expressionOperators/
src/org/apache/pig/newplan/logical/visitor/ test/org/apache/pig/test/
Author: knoguchi
Date: Thu Aug 11 15:27:32 2016
New Revision: 1756002
URL: http://svn.apache.org/viewvc?rev=1756002&view=rev
Log:
PIG-4933: TestDataBagAccess.testBagConstantFlatten1/TestLogicalPlanBuilder.testQuery90 broken after PIG-2315 (knoguchi)
Modified:
pig/trunk/CHANGES.txt
pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/expressionOperators/POCast.java
pig/trunk/src/org/apache/pig/newplan/logical/visitor/ForEachUserSchemaVisitor.java
pig/trunk/test/org/apache/pig/test/TestLogicalPlanBuilder.java
pig/trunk/test/org/apache/pig/test/TestPlanGeneration.java
Modified: pig/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/pig/trunk/CHANGES.txt?rev=1756002&r1=1756001&r2=1756002&view=diff
==============================================================================
--- pig/trunk/CHANGES.txt (original)
+++ pig/trunk/CHANGES.txt Thu Aug 11 15:27:32 2016
@@ -40,6 +40,8 @@ OPTIMIZATIONS
BUG FIXES
+PIG-4933: TestDataBagAccess.testBagConstantFlatten1/TestLogicalPlanBuilder.testQuery90 broken after PIG-2315 (knoguchi)
+
PIG-4965: Refactor test/perf/pigmix/bin/runpigmix.pl to delete the output of single test case
if we enable cleanup_after_test (kellyzly via daijy)
Modified: pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/expressionOperators/POCast.java
URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/expressionOperators/POCast.java?rev=1756002&r1=1756001&r2=1756002&view=diff
==============================================================================
--- pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/expressionOperators/POCast.java (original)
+++ pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/expressionOperators/POCast.java Thu Aug 11 15:27:32 2016
@@ -1795,6 +1795,10 @@ public class POCast extends ExpressionOp
default:
throw new ExecException("Cannot convert "+ obj + " to " + fs, 1120, PigException.INPUT);
}
+ case DataType.BYTEARRAY:
+ //no-op (PIG-4933)
+ result = obj;
+ break;
default:
throw new ExecException("Don't know how to convert "+ obj + " to " + fs, 1120, PigException.INPUT);
}
Modified: pig/trunk/src/org/apache/pig/newplan/logical/visitor/ForEachUserSchemaVisitor.java
URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/newplan/logical/visitor/ForEachUserSchemaVisitor.java?rev=1756002&r1=1756001&r2=1756002&view=diff
==============================================================================
--- pig/trunk/src/org/apache/pig/newplan/logical/visitor/ForEachUserSchemaVisitor.java (original)
+++ pig/trunk/src/org/apache/pig/newplan/logical/visitor/ForEachUserSchemaVisitor.java Thu Aug 11 15:27:32 2016
@@ -41,6 +41,67 @@ public class ForEachUserSchemaVisitor ex
super(plan, new DependencyOrderWalker(plan));
}
+ private static LogicalSchema replaceNullByteArraySchema(
+ LogicalSchema originalSchema,
+ LogicalSchema userSchema) throws FrontendException {
+ if( originalSchema == null && userSchema == null ) {
+ return null;
+ } else if ( originalSchema == null ) {
+ return userSchema.deepCopy();
+ } else if ( userSchema == null ) {
+ return originalSchema.deepCopy();
+ }
+
+ LogicalSchema replacedSchema = new LogicalSchema();
+ for (int i=0;i<originalSchema.size();i++) {
+ LogicalFieldSchema replacedFS = replaceNullByteArrayFieldSchema(originalSchema.getField(i), userSchema.getField(i));
+ replacedSchema.addField(replacedFS);
+ }
+ return replacedSchema;
+ }
+
+ private static LogicalFieldSchema replaceNullByteArrayFieldSchema(
+ LogicalFieldSchema originalFS,
+ LogicalFieldSchema userFS) throws FrontendException {
+ if( originalFS == null && userFS == null ) {
+ return null;
+ } else if ( originalFS == null ) {
+ return userFS.deepCopy();
+ } else if ( userFS == null ) {
+ return originalFS.deepCopy();
+ }
+ if ( originalFS.type==DataType.NULL
+ || originalFS.type==DataType.BYTEARRAY ) {
+ return userFS.deepCopy();
+ } else if ( userFS.type==DataType.NULL
+ || userFS.type==DataType.BYTEARRAY ) {
+ // Use originalFS schema but keep the alias from userFS
+ return new LogicalFieldSchema(userFS.alias, originalFS.schema, originalFS.type);
+ }
+
+ if ( !DataType.isSchemaType(originalFS.type) ) {
+ return userFS.deepCopy();
+ } else {
+ LogicalSchema replacedSchema = replaceNullByteArraySchema(originalFS.schema, userFS.schema);
+ return new LogicalFieldSchema(userFS.alias, replacedSchema, userFS.type);
+ }
+ }
+
+ private static boolean hasOnlyNullOrByteArraySchema (LogicalFieldSchema fs) {
+ if( DataType.isSchemaType(fs.type) ) {
+ if( fs.schema != null ) {
+ for (LogicalFieldSchema sub_fs : fs.schema.getFields() ) {
+ if( !hasOnlyNullOrByteArraySchema(sub_fs) ) {
+ return false;
+ }
+ }
+ }
+ } else if( fs.type != DataType.NULL && fs.type != DataType.BYTEARRAY ) {
+ return false;
+ }
+ return true;
+ }
+
@Override
public void visit(LOForEach foreach) throws FrontendException {
LOGenerate generate = (LOGenerate)foreach.getInnerPlan().getSinks().get(0);
@@ -93,7 +154,7 @@ public class ForEachUserSchemaVisitor ex
// Use user defined schema to cast, this is the prevailing use case
if (mExpSchema==null) {
for (LogicalFieldSchema fs : mUserDefinedSchema.getFields()) {
- if (fs.type==DataType.NULL||fs.type==DataType.BYTEARRAY) {
+ if (hasOnlyNullOrByteArraySchema(fs)) {
addToExps(casterForEach, innerPlan, gen, exps, index, false, null);
} else {
addToExps(casterForEach, innerPlan, gen, exps, index, true, fs);
@@ -120,11 +181,12 @@ public class ForEachUserSchemaVisitor ex
" fields to " + mUserDefinedSchema.size(), 0, foreach.getLocation());
}
+ LogicalSchema replacedSchema = replaceNullByteArraySchema(mExpSchema,mUserDefinedSchema);
for (int j=0;j<mExpSchema.size();j++) {
LogicalFieldSchema mExpFieldSchema = mExpSchema.getField(j);
- LogicalFieldSchema mUserDefinedFieldSchema = mUserDefinedSchema.getField(j);
- if (mUserDefinedFieldSchema.type==DataType.NULL ||
- mUserDefinedFieldSchema.type==DataType.BYTEARRAY ||
+ LogicalFieldSchema mUserDefinedFieldSchema = replacedSchema.getField(j);
+
+ if (hasOnlyNullOrByteArraySchema(mUserDefinedFieldSchema) ||
LogicalFieldSchema.typeMatch(mExpFieldSchema, mUserDefinedFieldSchema)) {
addToExps(casterForEach, innerPlan, gen, exps, index, false, null);
} else {
@@ -147,10 +209,18 @@ public class ForEachUserSchemaVisitor ex
// 'generate' (LOGenerate) still holds the reference to this
// mUserDefinedSchemas
for( LogicalSchema mUserDefinedSchema : mUserDefinedSchemas ) {
- if( mUserDefinedSchema != null ) {
- for (LogicalFieldSchema fs : mUserDefinedSchema.getFields()) {
- fs.type = DataType.NULL;
- }
+ resetTypeToNull( mUserDefinedSchema );
+ }
+ }
+ }
+
+ private void resetTypeToNull (LogicalSchema s1) {
+ if( s1 != null ) {
+ for (LogicalFieldSchema fs : s1.getFields()) {
+ if( DataType.isSchemaType(fs.type) ) {
+ resetTypeToNull(fs.schema);
+ } else {
+ fs.type = DataType.NULL;
}
}
}
Modified: pig/trunk/test/org/apache/pig/test/TestLogicalPlanBuilder.java
URL: http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/test/TestLogicalPlanBuilder.java?rev=1756002&r1=1756001&r2=1756002&view=diff
==============================================================================
--- pig/trunk/test/org/apache/pig/test/TestLogicalPlanBuilder.java (original)
+++ pig/trunk/test/org/apache/pig/test/TestLogicalPlanBuilder.java Thu Aug 11 15:27:32 2016
@@ -1135,8 +1135,7 @@ public class TestLogicalPlanBuilder {
"store c into 'output';");
store = lp.getSinks().get(0);
foreach = (LOForEach)lp.getPredecessors(store).get(0);
- Assert.assertTrue(foreach.getSchema().toString( false ).equals("mygroup:tuple(myname:chararray,myage:int),mycount:long"));
- /*
+ Assert.assertEquals(foreach.getSchema().toString( false ),"mygroup:tuple(myname:chararray,myage:int),mycount:long");
//setting the schema of flattened bag that has no schema with the user defined schema
String q = "a = load 'myfile' as (name:Chararray, age:Int, gpa:Float);" +
"c = load 'another_file';" +
@@ -1144,38 +1143,40 @@ public class TestLogicalPlanBuilder {
lp = buildPlan( q + "e = foreach d generate flatten(DIFF(a, c)) as (x, y, z), COUNT(a) as mycount;" + "store e into 'output';" );
store = lp.getSinks().get(0);
foreach = (LOForEach)lp.getPredecessors(store).get(0);
- Assert.assertTrue(foreach.getSchema().equals(Util.getSchemaFromString("x: bytearray, y: bytearray, z: bytearray, mycount: long")));
+ Assert.assertEquals(foreach.getSchema().toString(false),"x:bytearray,y:bytearray,z:bytearray,mycount:long");
//setting the schema of flattened bag that has no schema with the user defined schema
q = query +
"c = load 'another_file';" +
"d = cogroup a by $0, c by $0;" +
- "e = foreach d generate flatten(DIFF(a, c)) as (x: int, y: float, z), COUNT(a) as mycount;";
+ "e = foreach d generate flatten(DIFF(a, c)) as (x: int, y: float, z), COUNT(a) as mycount;" +
+ "store e into 'output';";
lp = buildPlan(q);
store = lp.getSinks().get(0);
foreach = (LOForEach)lp.getPredecessors(store).get(0);
- Assert.assertTrue(foreach.getSchema().equals(Util.getSchemaFromString("x: int, y: float, z: bytearray, mycount: long")));
+ Assert.assertEquals(foreach.getSchema().toString(false),"x:int,y:float,z:bytearray,mycount:long");
//setting the schema of flattened bag that has no schema with the user defined schema
q = query +
"c = load 'another_file';" +
"d = cogroup a by $0, c by $0;" +
- "e = foreach d generate flatten(DIFF(a, c)) as x, COUNT(a) as mycount;";
+ "e = foreach d generate flatten(DIFF(a, c)) as x, COUNT(a) as mycount;" +
+ "store e into 'output';";
lp = buildPlan(q);
store = lp.getSinks().get(0);
foreach = (LOForEach)lp.getPredecessors(store).get(0);
- Assert.assertTrue(foreach.getSchema().equals(Util.getSchemaFromString("x: bytearray, mycount: long")));
+ Assert.assertEquals(foreach.getSchema().toString(false),"x:bytearray,mycount:long");
//setting the schema of flattened bag that has no schema with the user defined schema
q = query +
"c = load 'another_file';" +
"d = cogroup a by $0, c by $0;" +
- "e = foreach d generate flatten(DIFF(a, c)) as x: int, COUNT(a) as mycount;";
+ "e = foreach d generate flatten(DIFF(a, c)) as x: int, COUNT(a) as mycount;" +
+ "store e into 'output';";
lp = buildPlan(q);
store = lp.getSinks().get(0);
foreach = (LOForEach)lp.getPredecessors(store).get(0);
- Assert.assertTrue(foreach.getSchema().equals(Util.getSchemaFromString("x: int, mycount: long")));
- */
+ Assert.assertEquals(foreach.getSchema().toString(false),"x:int,mycount:long");
}
@Test
Modified: pig/trunk/test/org/apache/pig/test/TestPlanGeneration.java
URL: http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/test/TestPlanGeneration.java?rev=1756002&r1=1756001&r2=1756002&view=diff
==============================================================================
--- pig/trunk/test/org/apache/pig/test/TestPlanGeneration.java (original)
+++ pig/trunk/test/org/apache/pig/test/TestPlanGeneration.java Thu Aug 11 15:27:32 2016
@@ -41,7 +41,7 @@ import org.apache.pig.backend.hadoop.exe
import org.apache.pig.builtin.PigStorage;
import org.apache.pig.builtin.mock.Storage;
import org.apache.pig.builtin.mock.Storage.Data;
-import static org.apache.pig.builtin.mock.Storage.tuple;
+import static org.apache.pig.builtin.mock.Storage.*;
import org.apache.pig.data.DataType;
import org.apache.pig.data.Tuple;
import org.apache.pig.impl.PigContext;
@@ -71,8 +71,8 @@ public class TestPlanGeneration {
private static PigServer ps;
@BeforeClass
- public static void setUp() throws ExecException {
- ps = new PigServer(ExecType.LOCAL);
+ public static void setUp() throws Exception {
+ ps = new PigServer(Util.getLocalTestMode());
pc = ps.getPigContext();
pc.connect();
}
@@ -466,8 +466,7 @@ public class TestPlanGeneration {
@Test
// See PIG-2315
public void testAsType1() throws Exception {
- PigServer pig = new PigServer(Util.getLocalTestMode());
- Data data = Storage.resetData(pig);
+ Data data = Storage.resetData(ps);
data.set("input", tuple(0.1), tuple(1.2), tuple(2.3));
String query =
@@ -475,7 +474,7 @@ public class TestPlanGeneration {
+ "B = FOREACH A GENERATE a1 as (a2:int);\n"
+ "store B into 'out' using mock.Storage;" ;
- Util.registerMultiLineQuery(pig, query);
+ Util.registerMultiLineQuery(ps, query);
List<Tuple> list = data.get("out");
// Without PIG-2315, this failed with (0.1), (1.2), (2.3)
List<Tuple> expectedRes =
@@ -487,8 +486,7 @@ public class TestPlanGeneration {
@Test
// See PIG-2315
public void testAsType2() throws Exception {
- PigServer pig = new PigServer(Util.getLocalTestMode());
- Data data = Storage.resetData(pig);
+ Data data = Storage.resetData(ps);
data.set("input", tuple("a"), tuple("b"), tuple("c"));
String query =
@@ -500,7 +498,7 @@ public class TestPlanGeneration {
+ "D = distinct C;\n"
+ "store D into 'out' using mock.Storage;" ;
- Util.registerMultiLineQuery(pig, query);
+ Util.registerMultiLineQuery(ps, query);
List<Tuple> list = data.get("out");
// Without PIG-2315, this produced TWO 12345.
// One by chararray and another by int.
@@ -509,4 +507,32 @@ public class TestPlanGeneration {
new String[] {"('12345')"});
Util.checkQueryOutputsAfterSort(list, expectedRes);
}
+
+ @Test
+ // See PIG-4933
+ public void testAsWithByteArrayCast() throws Exception {
+ Data data = Storage.resetData(ps);
+ data.set("input_testAsWithByteArrayCast", "t1:(f1:bytearray, f2:bytearray), f3:chararray",
+ tuple(tuple(1,5), "a"),
+ tuple(tuple(2,4), "b"),
+ tuple(tuple(3,3), "c") );
+
+ String query =
+ "A = load 'input_testAsWithByteArrayCast' USING mock.Storage();\n"
+ + "B = FOREACH A GENERATE t1 as (t2:(newf1, newf2:float)), f3;"
+ + "store B into 'out' using mock.Storage;" ;
+
+ // This will call typecast of (bytearray,float) on a tuple
+ // bytearray2bytearray should be no-op.
+ // Without pig-4933 patch on POCast,
+ // this typecast was producing empty results
+
+ Util.registerMultiLineQuery(ps, query);
+ List<Tuple> list = data.get("out");
+ String[] expectedRes =
+ new String[] {"((1,5.0),a)","((2,4.0),b)","((3,3.0),c)"};
+ for( int i=0; i < list.size(); i++ ) {
+ Assert.assertEquals(expectedRes[i], list.get(i).toString());
+ }
+ }
}