You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pig.apache.org by da...@apache.org on 2012/03/28 21:49:02 UTC

svn commit: r1306557 - in /pig/trunk/test/e2e/pig: tests/nightly.conf tests/turing_jython.conf udfs/java/org/apache/pig/test/udf/evalfunc/TOBAG2.java

Author: daijy
Date: Wed Mar 28 19:49:02 2012
New Revision: 1306557

URL: http://svn.apache.org/viewvc?rev=1306557&view=rev
Log:
Fix several e2e tests

Added:
    pig/trunk/test/e2e/pig/udfs/java/org/apache/pig/test/udf/evalfunc/TOBAG2.java
Modified:
    pig/trunk/test/e2e/pig/tests/nightly.conf
    pig/trunk/test/e2e/pig/tests/turing_jython.conf

Modified: pig/trunk/test/e2e/pig/tests/nightly.conf
URL: http://svn.apache.org/viewvc/pig/trunk/test/e2e/pig/tests/nightly.conf?rev=1306557&r1=1306556&r2=1306557&view=diff
==============================================================================
--- pig/trunk/test/e2e/pig/tests/nightly.conf (original)
+++ pig/trunk/test/e2e/pig/tests/nightly.conf Wed Mar 28 19:49:02 2012
@@ -3766,8 +3766,8 @@ store T4 into ':OUTPATH:.8';
                         }, {
 # TEST : cast for TOTUPLE/TOBAG
 						 'num' => 4
-                        ,'pig' => q?
                         ,'ignore' => 1 # different error message for different version of hadoop
+                        ,'pig' => q?
 A  = load ':INPATH:/types/numbers.txt' using PigStorage(':') as (intnum1000: int,id: int,intnum5: int,intnum100: int,intnum: int,longnum: long,floatnum: float,doublenum: double);
 B= limit A 10;
 C = foreach B generate $0,  TOTUPLE((int) $0, (long) $0, (double) $0), TOBAG( (float) $0, (chararray) $0), $0;

Modified: pig/trunk/test/e2e/pig/tests/turing_jython.conf
URL: http://svn.apache.org/viewvc/pig/trunk/test/e2e/pig/tests/turing_jython.conf?rev=1306557&r1=1306556&r2=1306557&view=diff
==============================================================================
--- pig/trunk/test/e2e/pig/tests/turing_jython.conf (original)
+++ pig/trunk/test/e2e/pig/tests/turing_jython.conf Wed Mar 28 19:49:02 2012
@@ -997,8 +997,8 @@ from org.apache.pig.scripting import Pig
 
 #create pig script
 
-Pig.sql("""drop table if exists pig_script_hcat_ddl_1;""")
-ret = Pig.sql("""create table pig_script_hcat_ddl_1(name string,
+Pig.sql("""sql drop table if exists pig_script_hcat_ddl_1;""")
+ret = Pig.sql("""sql create table pig_script_hcat_ddl_1(name string,
 age int,
 gpa double)
 stored as textfile;

Added: pig/trunk/test/e2e/pig/udfs/java/org/apache/pig/test/udf/evalfunc/TOBAG2.java
URL: http://svn.apache.org/viewvc/pig/trunk/test/e2e/pig/udfs/java/org/apache/pig/test/udf/evalfunc/TOBAG2.java?rev=1306557&view=auto
==============================================================================
--- pig/trunk/test/e2e/pig/udfs/java/org/apache/pig/test/udf/evalfunc/TOBAG2.java (added)
+++ pig/trunk/test/e2e/pig/udfs/java/org/apache/pig/test/udf/evalfunc/TOBAG2.java Wed Mar 28 19:49:02 2012
@@ -0,0 +1,154 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.pig.test.udf.evalfunc;
+
+
+import java.io.IOException;
+
+import org.apache.pig.EvalFunc;
+import org.apache.pig.data.BagFactory;
+import org.apache.pig.data.DataBag;
+import org.apache.pig.data.DataType;
+import org.apache.pig.data.Tuple;
+import org.apache.pig.data.TupleFactory;
+import org.apache.pig.impl.logicalLayer.FrontendException;
+import org.apache.pig.impl.logicalLayer.schema.Schema;
+import org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema;
+
+/**
+ * This class takes a list of items and puts them into a bag
+ * T = foreach U generate TOBAG($0, $1, $2);
+ * It's like saying this:
+ * T = foreach U generate {($0), ($1), ($2)}
+ * 
+ * All arguments that are not of tuple type are inserted into a tuple before
+ * being added to the bag. This is because bag is always a bag of tuples.
+ * 
+ * Output schema:
+ * The output schema for this udf depends on the schema of its arguments.
+ * If all the arguments have same type and same inner 
+ * schema (for bags/tuple columns), then the udf output schema would be a bag 
+ * of tuples having a column of the type and inner-schema (if any) of the 
+ * arguments. 
+ * If the arguments are of type tuple/bag, then their innerschmea, including
+ * the alias names should match.
+ * If these conditions are not met the output schema will be a bag with null 
+ * inner schema.
+ *  
+ *  example 1 
+ *  grunt> describe a;
+ *  a: {a0: int,a1: int}
+ *  grunt> b = foreach a generate TOBAG(a0,a1);
+ *  grunt> describe b;
+ *  b: {{int}}
+ *  
+ *  example 2
+ *  grunt> describe a;
+ *  a: {a0: (x: int),a1: (x: int)}
+ *  grunt> b = foreach a generate TOBAG(a0,a1);                                    
+ *  grunt> describe b;                                                             
+ *  b: {{(x: int)}}
+ *  
+ *  example 3
+ *  grunt> describe a;                                                             
+ *  a: {a0: (x: int),a1: (y: int)}
+ * -- note that the inner schema is different because the alises (x & y) are different
+ *  grunt> b = foreach a generate TOBAG(a0,a1);                                    
+ *  grunt> describe b;                                                             
+ *  b: {{NULL}}
+ *  
+ *  
+ * 
+ */
+public class TOBAG2 extends EvalFunc<DataBag> {
+
+    @Override
+    public DataBag exec(Tuple input) throws IOException {
+        try {
+            DataBag bag = BagFactory.getInstance().newDefaultBag();
+
+            for (int i = 0; i < input.size(); ++i) {
+                final Object object = input.get(i);
+                if (object instanceof Tuple) {
+                    bag.add( (Tuple) object);
+                } else {
+                    Tuple tp2 = TupleFactory.getInstance().newTuple(1);
+                    tp2.set(0, object);
+                    bag.add(tp2);
+                }
+            }
+
+            return bag;
+        } catch (Exception ee) {
+            throw new RuntimeException("Error while creating a bag", ee);
+        }
+    }
+
+    /* (non-Javadoc)
+     * @see org.apache.pig.EvalFunc#outputSchema(org.apache.pig.impl.logicalLayer.schema.Schema)
+     * If all the columns in the tuple are of same type, then set the bag schema 
+     * to bag of tuple with column of this type
+     * 
+     */
+    @Override
+    public Schema outputSchema(Schema inputSch) {
+        byte type = DataType.ERROR;
+        Schema innerSchema = null;
+        if(inputSch != null){
+            for(FieldSchema fs : inputSch.getFields()){
+                if(type == DataType.ERROR){
+                    type = fs.type;
+                    innerSchema = fs.schema;
+                }else{
+                    if( type != fs.type || !nullEquals(innerSchema, fs.schema)){
+                        // invalidate the type
+                        type = DataType.ERROR;
+                        break;
+                    }
+                }
+            }
+        }
+        try {
+            if(type == DataType.ERROR){
+                return Schema.generateNestedSchema(DataType.BAG, DataType.NULL);
+            }
+            FieldSchema innerFs = new Schema.FieldSchema(null, innerSchema, type);
+            Schema innerSch = new Schema(innerFs);
+            Schema bagSchema = new Schema(new FieldSchema(null, innerSch, DataType.BAG));
+            return bagSchema;
+        } catch (FrontendException e) {
+            //This should not happen
+            throw new RuntimeException("Bug : exception thrown while " +
+                    "creating output schema for TOBAG udf", e);
+        }
+
+    }
+
+    private boolean nullEquals(Schema currentSchema, Schema newSchema) {
+        if(currentSchema == null){
+            if(newSchema != null){
+                return false;
+            }
+            return true;
+        }
+        return currentSchema.equals(newSchema);
+    }
+    
+
+}
+