You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pig.apache.org by kn...@apache.org on 2016/08/11 15:27:32 UTC

svn commit: r1756002 - in /pig/trunk: ./ src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/expressionOperators/ src/org/apache/pig/newplan/logical/visitor/ test/org/apache/pig/test/

Author: knoguchi
Date: Thu Aug 11 15:27:32 2016
New Revision: 1756002

URL: http://svn.apache.org/viewvc?rev=1756002&view=rev
Log:
PIG-4933: TestDataBagAccess.testBagConstantFlatten1/TestLogicalPlanBuilder.testQuery90 broken after PIG-2315 (knoguchi)

Modified:
    pig/trunk/CHANGES.txt
    pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/expressionOperators/POCast.java
    pig/trunk/src/org/apache/pig/newplan/logical/visitor/ForEachUserSchemaVisitor.java
    pig/trunk/test/org/apache/pig/test/TestLogicalPlanBuilder.java
    pig/trunk/test/org/apache/pig/test/TestPlanGeneration.java

Modified: pig/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/pig/trunk/CHANGES.txt?rev=1756002&r1=1756001&r2=1756002&view=diff
==============================================================================
--- pig/trunk/CHANGES.txt (original)
+++ pig/trunk/CHANGES.txt Thu Aug 11 15:27:32 2016
@@ -40,6 +40,8 @@ OPTIMIZATIONS
  
 BUG FIXES
 
+PIG-4933: TestDataBagAccess.testBagConstantFlatten1/TestLogicalPlanBuilder.testQuery90 broken after PIG-2315 (knoguchi)
+
 PIG-4965: Refactor test/perf/pigmix/bin/runpigmix.pl to delete the output of single test case
   if we enable cleanup_after_test (kellyzly via daijy)
 

Modified: pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/expressionOperators/POCast.java
URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/expressionOperators/POCast.java?rev=1756002&r1=1756001&r2=1756002&view=diff
==============================================================================
--- pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/expressionOperators/POCast.java (original)
+++ pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/expressionOperators/POCast.java Thu Aug 11 15:27:32 2016
@@ -1795,6 +1795,10 @@ public class POCast extends ExpressionOp
             default:
                 throw new ExecException("Cannot convert "+ obj + " to " + fs, 1120, PigException.INPUT);
             }
+        case DataType.BYTEARRAY:
+            //no-op (PIG-4933)
+            result = obj;
+            break;
         default:
             throw new ExecException("Don't know how to convert "+ obj + " to " + fs, 1120, PigException.INPUT);
         }

Modified: pig/trunk/src/org/apache/pig/newplan/logical/visitor/ForEachUserSchemaVisitor.java
URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/newplan/logical/visitor/ForEachUserSchemaVisitor.java?rev=1756002&r1=1756001&r2=1756002&view=diff
==============================================================================
--- pig/trunk/src/org/apache/pig/newplan/logical/visitor/ForEachUserSchemaVisitor.java (original)
+++ pig/trunk/src/org/apache/pig/newplan/logical/visitor/ForEachUserSchemaVisitor.java Thu Aug 11 15:27:32 2016
@@ -41,6 +41,67 @@ public class ForEachUserSchemaVisitor ex
         super(plan, new DependencyOrderWalker(plan));
     }
 
+    private static LogicalSchema replaceNullByteArraySchema(
+                         LogicalSchema originalSchema,
+                         LogicalSchema userSchema) throws FrontendException {
+        if( originalSchema == null && userSchema == null ) {
+            return null;
+        } else if ( originalSchema == null ) {
+            return userSchema.deepCopy();
+        } else if ( userSchema == null ) {
+            return originalSchema.deepCopy();
+        }
+
+        LogicalSchema replacedSchema = new LogicalSchema();
+        for (int i=0;i<originalSchema.size();i++) {
+            LogicalFieldSchema replacedFS = replaceNullByteArrayFieldSchema(originalSchema.getField(i), userSchema.getField(i));
+            replacedSchema.addField(replacedFS);
+        }
+        return replacedSchema;
+    }
+
+    private static LogicalFieldSchema replaceNullByteArrayFieldSchema(
+                         LogicalFieldSchema originalFS,
+                         LogicalFieldSchema userFS) throws FrontendException {
+        if( originalFS == null && userFS == null ) {
+            return null;
+        } else if ( originalFS == null ) {
+            return userFS.deepCopy();
+        } else if ( userFS == null ) {
+            return originalFS.deepCopy();
+        }
+        if ( originalFS.type==DataType.NULL
+            || originalFS.type==DataType.BYTEARRAY ) {
+            return userFS.deepCopy();
+        } else if ( userFS.type==DataType.NULL
+            || userFS.type==DataType.BYTEARRAY ) {
+            // Use originalFS schema but keep the alias from userFS
+            return new LogicalFieldSchema(userFS.alias, originalFS.schema,  originalFS.type);
+        }
+
+        if ( !DataType.isSchemaType(originalFS.type) ) {
+            return userFS.deepCopy();
+        } else {
+            LogicalSchema replacedSchema = replaceNullByteArraySchema(originalFS.schema, userFS.schema);
+            return new LogicalFieldSchema(userFS.alias, replacedSchema, userFS.type);
+        }
+    }
+
+    private static boolean hasOnlyNullOrByteArraySchema (LogicalFieldSchema fs) {
+        if( DataType.isSchemaType(fs.type) ) {
+            if( fs.schema != null ) {
+                for (LogicalFieldSchema sub_fs : fs.schema.getFields() ) {
+                    if( !hasOnlyNullOrByteArraySchema(sub_fs)  ) {
+                        return false;
+                    }
+                }
+            }
+        } else if( fs.type != DataType.NULL && fs.type != DataType.BYTEARRAY )  {
+            return false;
+        }
+        return true;
+    }
+
     @Override
     public void visit(LOForEach foreach) throws FrontendException {
         LOGenerate generate = (LOGenerate)foreach.getInnerPlan().getSinks().get(0);
@@ -93,7 +154,7 @@ public class ForEachUserSchemaVisitor ex
             // Use user defined schema to cast, this is the prevailing use case
             if (mExpSchema==null) {
                 for (LogicalFieldSchema fs : mUserDefinedSchema.getFields()) {
-                    if (fs.type==DataType.NULL||fs.type==DataType.BYTEARRAY) {
+                    if (hasOnlyNullOrByteArraySchema(fs)) {
                         addToExps(casterForEach, innerPlan, gen, exps, index, false, null);
                     } else {
                         addToExps(casterForEach, innerPlan, gen, exps, index, true, fs);
@@ -120,11 +181,12 @@ public class ForEachUserSchemaVisitor ex
                         " fields to " + mUserDefinedSchema.size(), 0, foreach.getLocation());
             }
 
+            LogicalSchema replacedSchema = replaceNullByteArraySchema(mExpSchema,mUserDefinedSchema);
             for (int j=0;j<mExpSchema.size();j++) {
                 LogicalFieldSchema mExpFieldSchema = mExpSchema.getField(j);
-                LogicalFieldSchema mUserDefinedFieldSchema = mUserDefinedSchema.getField(j);
-                if (mUserDefinedFieldSchema.type==DataType.NULL ||
-                    mUserDefinedFieldSchema.type==DataType.BYTEARRAY ||
+                LogicalFieldSchema mUserDefinedFieldSchema = replacedSchema.getField(j);
+
+                if (hasOnlyNullOrByteArraySchema(mUserDefinedFieldSchema) ||
                     LogicalFieldSchema.typeMatch(mExpFieldSchema, mUserDefinedFieldSchema)) {
                     addToExps(casterForEach, innerPlan, gen, exps, index, false, null);
                 } else {
@@ -147,10 +209,18 @@ public class ForEachUserSchemaVisitor ex
             // 'generate' (LOGenerate) still holds the reference to this
             // mUserDefinedSchemas
             for( LogicalSchema mUserDefinedSchema : mUserDefinedSchemas ) {
-                if( mUserDefinedSchema != null ) {
-                    for (LogicalFieldSchema fs : mUserDefinedSchema.getFields()) {
-                        fs.type = DataType.NULL;
-                    }
+                resetTypeToNull( mUserDefinedSchema );
+            }
+        }
+    }
+
+    private void resetTypeToNull (LogicalSchema s1) {
+        if( s1 != null ) {
+            for (LogicalFieldSchema fs : s1.getFields()) {
+                if( DataType.isSchemaType(fs.type) ) {
+                    resetTypeToNull(fs.schema);
+                } else {
+                    fs.type = DataType.NULL;
                 }
             }
         }

Modified: pig/trunk/test/org/apache/pig/test/TestLogicalPlanBuilder.java
URL: http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/test/TestLogicalPlanBuilder.java?rev=1756002&r1=1756001&r2=1756002&view=diff
==============================================================================
--- pig/trunk/test/org/apache/pig/test/TestLogicalPlanBuilder.java (original)
+++ pig/trunk/test/org/apache/pig/test/TestLogicalPlanBuilder.java Thu Aug 11 15:27:32 2016
@@ -1135,8 +1135,7 @@ public class TestLogicalPlanBuilder {
         	    "store c into 'output';");
         store = lp.getSinks().get(0);
         foreach = (LOForEach)lp.getPredecessors(store).get(0);
-        Assert.assertTrue(foreach.getSchema().toString( false ).equals("mygroup:tuple(myname:chararray,myage:int),mycount:long"));
-        /*
+        Assert.assertEquals(foreach.getSchema().toString( false ),"mygroup:tuple(myname:chararray,myage:int),mycount:long");
         //setting the schema of flattened bag that has no schema with the user defined schema
         String q = "a = load 'myfile' as (name:Chararray, age:Int, gpa:Float);" +
                    "c = load 'another_file';" +
@@ -1144,38 +1143,40 @@ public class TestLogicalPlanBuilder {
         lp = buildPlan( q + "e = foreach d generate flatten(DIFF(a, c)) as (x, y, z), COUNT(a) as mycount;" + "store e into 'output';" );
         store = lp.getSinks().get(0);
         foreach = (LOForEach)lp.getPredecessors(store).get(0);
-        Assert.assertTrue(foreach.getSchema().equals(Util.getSchemaFromString("x: bytearray, y: bytearray, z: bytearray, mycount: long")));
+        Assert.assertEquals(foreach.getSchema().toString(false),"x:bytearray,y:bytearray,z:bytearray,mycount:long");
 
         //setting the schema of flattened bag that has no schema with the user defined schema
         q = query +
                   "c = load 'another_file';" +
                   "d = cogroup a by $0, c by $0;" +
-                  "e = foreach d generate flatten(DIFF(a, c)) as (x: int, y: float, z), COUNT(a) as mycount;";
+                  "e = foreach d generate flatten(DIFF(a, c)) as (x: int, y: float, z), COUNT(a) as mycount;" +
+                  "store e into 'output';";
         lp = buildPlan(q);
         store = lp.getSinks().get(0);
         foreach = (LOForEach)lp.getPredecessors(store).get(0);
-        Assert.assertTrue(foreach.getSchema().equals(Util.getSchemaFromString("x: int, y: float, z: bytearray, mycount: long")));
+        Assert.assertEquals(foreach.getSchema().toString(false),"x:int,y:float,z:bytearray,mycount:long");
 
         //setting the schema of flattened bag that has no schema with the user defined schema
         q = query +
             "c = load 'another_file';" +
             "d = cogroup a by $0, c by $0;" +
-            "e = foreach d generate flatten(DIFF(a, c)) as x, COUNT(a) as mycount;";
+            "e = foreach d generate flatten(DIFF(a, c)) as x, COUNT(a) as mycount;" +
+            "store e into 'output';";
         lp = buildPlan(q);
         store = lp.getSinks().get(0);
         foreach = (LOForEach)lp.getPredecessors(store).get(0);
-        Assert.assertTrue(foreach.getSchema().equals(Util.getSchemaFromString("x: bytearray, mycount: long")));
+        Assert.assertEquals(foreach.getSchema().toString(false),"x:bytearray,mycount:long");
 
         //setting the schema of flattened bag that has no schema with the user defined schema
         q = query + 
             "c = load 'another_file';" +
             "d = cogroup a by $0, c by $0;" +
-            "e = foreach d generate flatten(DIFF(a, c)) as x: int, COUNT(a) as mycount;";
+            "e = foreach d generate flatten(DIFF(a, c)) as x: int, COUNT(a) as mycount;" +
+            "store e into 'output';";
         lp = buildPlan(q);
         store = lp.getSinks().get(0);
         foreach = (LOForEach)lp.getPredecessors(store).get(0);
-        Assert.assertTrue(foreach.getSchema().equals(Util.getSchemaFromString("x: int, mycount: long")));
-         */
+        Assert.assertEquals(foreach.getSchema().toString(false),"x:int,mycount:long");
     }
 
     @Test

Modified: pig/trunk/test/org/apache/pig/test/TestPlanGeneration.java
URL: http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/test/TestPlanGeneration.java?rev=1756002&r1=1756001&r2=1756002&view=diff
==============================================================================
--- pig/trunk/test/org/apache/pig/test/TestPlanGeneration.java (original)
+++ pig/trunk/test/org/apache/pig/test/TestPlanGeneration.java Thu Aug 11 15:27:32 2016
@@ -41,7 +41,7 @@ import org.apache.pig.backend.hadoop.exe
 import org.apache.pig.builtin.PigStorage;
 import org.apache.pig.builtin.mock.Storage;
 import org.apache.pig.builtin.mock.Storage.Data;
-import static org.apache.pig.builtin.mock.Storage.tuple;
+import static org.apache.pig.builtin.mock.Storage.*;
 import org.apache.pig.data.DataType;
 import org.apache.pig.data.Tuple;
 import org.apache.pig.impl.PigContext;
@@ -71,8 +71,8 @@ public class TestPlanGeneration {
     private static PigServer ps;
 
     @BeforeClass
-    public static void setUp() throws ExecException {
-        ps = new PigServer(ExecType.LOCAL);
+    public static void setUp() throws Exception {
+        ps = new PigServer(Util.getLocalTestMode());
         pc = ps.getPigContext();
         pc.connect();
     }
@@ -466,8 +466,7 @@ public class TestPlanGeneration {
     @Test
     // See PIG-2315
     public void testAsType1() throws Exception {
-        PigServer pig = new PigServer(Util.getLocalTestMode());
-        Data data = Storage.resetData(pig);
+        Data data = Storage.resetData(ps);
         data.set("input", tuple(0.1), tuple(1.2), tuple(2.3));
 
         String query =
@@ -475,7 +474,7 @@ public class TestPlanGeneration {
             + "B = FOREACH A GENERATE a1 as (a2:int);\n"
             + "store B into 'out' using mock.Storage;" ;
 
-        Util.registerMultiLineQuery(pig, query);
+        Util.registerMultiLineQuery(ps, query);
         List<Tuple> list = data.get("out");
         // Without PIG-2315, this failed with (0.1), (1.2), (2.3)
         List<Tuple> expectedRes =
@@ -487,8 +486,7 @@ public class TestPlanGeneration {
     @Test
     // See PIG-2315
     public void testAsType2() throws Exception {
-        PigServer pig = new PigServer(Util.getLocalTestMode());
-        Data data = Storage.resetData(pig);
+        Data data = Storage.resetData(ps);
         data.set("input", tuple("a"), tuple("b"), tuple("c"));
 
         String query =
@@ -500,7 +498,7 @@ public class TestPlanGeneration {
             + "D = distinct C;\n"
             + "store D into 'out' using mock.Storage;" ;
 
-        Util.registerMultiLineQuery(pig, query);
+        Util.registerMultiLineQuery(ps, query);
         List<Tuple> list = data.get("out");
         // Without PIG-2315, this produced TWO 12345.
         // One by chararray and another by int.
@@ -509,4 +507,32 @@ public class TestPlanGeneration {
                         new String[] {"('12345')"});
         Util.checkQueryOutputsAfterSort(list, expectedRes);
     }
+
+    @Test
+    // See PIG-4933
+    public void testAsWithByteArrayCast() throws Exception {
+        Data data = Storage.resetData(ps);
+	    data.set("input_testAsWithByteArrayCast", "t1:(f1:bytearray, f2:bytearray), f3:chararray",
+				tuple(tuple(1,5), "a"),
+				tuple(tuple(2,4), "b"),
+				tuple(tuple(3,3), "c") );
+
+        String query =
+            "A = load 'input_testAsWithByteArrayCast' USING mock.Storage();\n"
+            + "B = FOREACH A GENERATE t1 as (t2:(newf1, newf2:float)), f3;"
+            + "store B into 'out' using mock.Storage;" ;
+
+        // This will call typecast of (bytearray,float) on a tuple
+        // bytearray2bytearray should be no-op.
+        // Without pig-4933 patch on POCast,
+        // this typecast was producing empty results
+
+        Util.registerMultiLineQuery(ps, query);
+        List<Tuple> list = data.get("out");
+        String[] expectedRes =
+                        new String[] {"((1,5.0),a)","((2,4.0),b)","((3,3.0),c)"};
+        for( int i=0; i < list.size(); i++ ) {
+            Assert.assertEquals(expectedRes[i], list.get(i).toString());
+        }
+    }
 }