You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pig.apache.org by da...@apache.org on 2012/03/28 04:33:50 UTC
svn commit: r1306124 - in /pig/branches/branch-0.10/test/e2e/pig:
tests/macro.conf tests/negative.conf tests/nightly.conf
udfs/java/org/apache/pig/test/udf/evalfunc/TOBAG2.java
Author: daijy
Date: Wed Mar 28 02:33:49 2012
New Revision: 1306124
URL: http://svn.apache.org/viewvc?rev=1306124&view=rev
Log:
Fix several e2e tests
Added:
pig/branches/branch-0.10/test/e2e/pig/udfs/java/org/apache/pig/test/udf/evalfunc/TOBAG2.java
Modified:
pig/branches/branch-0.10/test/e2e/pig/tests/macro.conf
pig/branches/branch-0.10/test/e2e/pig/tests/negative.conf
pig/branches/branch-0.10/test/e2e/pig/tests/nightly.conf
Modified: pig/branches/branch-0.10/test/e2e/pig/tests/macro.conf
URL: http://svn.apache.org/viewvc/pig/branches/branch-0.10/test/e2e/pig/tests/macro.conf?rev=1306124&r1=1306123&r2=1306124&view=diff
==============================================================================
--- pig/branches/branch-0.10/test/e2e/pig/tests/macro.conf (original)
+++ pig/branches/branch-0.10/test/e2e/pig/tests/macro.conf Wed Mar 28 02:33:49 2012
@@ -556,6 +556,7 @@ $cfg = {
{
# import non-existent file
'num' => 1,
+ 'ignore' => 1, # different error message for different version of hadoop
'pig' => q#import 'nosuchfile';
a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
Modified: pig/branches/branch-0.10/test/e2e/pig/tests/negative.conf
URL: http://svn.apache.org/viewvc/pig/branches/branch-0.10/test/e2e/pig/tests/negative.conf?rev=1306124&r1=1306123&r2=1306124&view=diff
==============================================================================
--- pig/branches/branch-0.10/test/e2e/pig/tests/negative.conf (original)
+++ pig/branches/branch-0.10/test/e2e/pig/tests/negative.conf Wed Mar 28 02:33:49 2012
@@ -174,6 +174,7 @@ store a into ':INPATH:/singlefile/fileex
# currently (as of 09/18/2008), the following are not allowed
#a = b;
'num' => 1,
+ 'ignore' => 1, # different error message for different version of hadoop
'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
b = a;\,
'expected_err_regex' => "mismatched input ';' expecting LEFT_PAREN",
@@ -182,6 +183,7 @@ store a into ':INPATH:/singlefile/fileex
# currently (as of 09/18/2008), the following are not allowed
#a = b as (x,y,z);
'num' => 2,
+ 'ignore' => 1, # different error message for different version of hadoop
'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
b = a as (x,y,z);\,
'expected_err_regex' => "mismatched input 'as' expecting LEFT_PAREN",
Modified: pig/branches/branch-0.10/test/e2e/pig/tests/nightly.conf
URL: http://svn.apache.org/viewvc/pig/branches/branch-0.10/test/e2e/pig/tests/nightly.conf?rev=1306124&r1=1306123&r2=1306124&view=diff
==============================================================================
--- pig/branches/branch-0.10/test/e2e/pig/tests/nightly.conf (original)
+++ pig/branches/branch-0.10/test/e2e/pig/tests/nightly.conf Wed Mar 28 02:33:49 2012
@@ -270,7 +270,7 @@ store b into ':OUTPATH:' using PigStorag
},
{
'num' => 21,
- 'ignore' => 1, # TODO Need to file a JIRA-2
+ 'ignore' => 1, # PIG-2593
'pig' => q\a = load ':INPATH:/singlefile/allscalar10k' as (name, age, gpa, instate);
b = filter a by instate == true;
store b into ':OUTPATH:' using PigStorage;\,
@@ -280,7 +280,7 @@ store b into ':OUTPATH:' using PigStorag
},
{
'num' => 22,
- 'ignore' => 1, # TODO Need to file a JIRA-2
+ 'ignore' => 1, # PIG-2593
'pig' => q\a = load ':INPATH:/singlefile/allscalar10k' as (name, age, gpa, instate);
b = filter a by instate == false;
store b into ':OUTPATH:' using PigStorage;\,
@@ -290,7 +290,7 @@ store b into ':OUTPATH:' using PigStorag
},
{
'num' => 23,
- 'ignore' => 1, # TODO Need to file a JIRA-1
+ 'ignore' => 1, # PIG-2593
'pig' => q\a = load ':INPATH:/singlefile/allscalar10k' as (name:chararray, age:int, gpa:double, instate:boolean);
b = filter a by instate;
store b into ':OUTPATH:' using PigStorage;\,
@@ -300,7 +300,7 @@ store b into ':OUTPATH:' using PigStorag
},
{
'num' => 24,
- 'ignore' => 1, # TODO Need to file a JIRA-1
+ 'ignore' => 1, # PIG-2593
'pig' => q\a = load ':INPATH:/singlefile/allscalar10k' as (name:chararray, age:int, gpa:double, instate:boolean);
b = filter a by not instate;
store b into ':OUTPATH:' using PigStorage;\,
@@ -3659,6 +3659,7 @@ store T4 into ':OUTPATH:.8';
# TEST : cast for TOTUPLE/TOBAG
'num' => 4
,'pig' => q?
+ ,'ignore' => 1 # different error message for different version of hadoop
A = load ':INPATH:/types/numbers.txt' using PigStorage(':') as (intnum1000: int,id: int,intnum5: int,intnum100: int,intnum: int,longnum: long,floatnum: float,doublenum: double);
B= limit A 10;
C = foreach B generate $0, TOTUPLE((int) $0, (long) $0, (double) $0), TOBAG( (float) $0, (chararray) $0), $0;
@@ -3752,11 +3753,24 @@ store T1 into ':OUTPATH:.3';
store T2 into ':OUTPATH:.4';
store T3 into ':OUTPATH:.5';
?
-
+ ,'verify_pig_script' => q?register :FUNCPATH:/testudf.jar;
+A = load ':INPATH:/types/numbers.txt' using PigStorage(':') as (intnum1000: int,id: int,intnum5: int,intnum100: int,intnum: int,longnum: long,floatnum: float,doublenum: double);
+B = limit A 10;
+C = foreach B generate TOBAG( $0, $1, $2);
+T1= foreach B generate TOTUPLE( TOBAG( $1, $2, $3),TOTUPLE($3, $4, $5) );
+T2= foreach B generate TOTUPLE( TOBAG( $1, $2, $3),TOBAG($3, $4, $5) );
+T3= foreach B generate org.apache.pig.test.udf.evalfunc.TOBAG2( TOTUPLE( $1, $2, $3), TOTUPLE($4,$5), TOTUPLE($6,$7));
+store B into ':OUTPATH:.1';
+store C into ':OUTPATH:.2';
+store T1 into ':OUTPATH:.3';
+store T2 into ':OUTPATH:.4';
+store T3 into ':OUTPATH:.5';
+?
}, {
#TEST negative test case: out of bounds positional parameter
# EVERYTHING IS CORRECT
'num' => 9
+ ,'ignore' => 1 # different error message for different version of hadoop
,'pig' => q?
A = load ':INPATH:/types/numbers.txt' using PigStorage(':') as (intnum1000: int,id: int,intnum5: int,intnum100: int,intnum: int,longnum: long,floatnum: float,doublenum: double);
B = limit A 10;
@@ -3767,6 +3781,7 @@ C = foreach B generate $0, $1, TOTUPLE($
#TEST negative test case: out of bounds positional parameter
# EVERYTHING IS CORRECT
'num' => 10
+ ,'ignore' => 1 # different error message for different version of hadoop
,'pig' => q?
A = load ':INPATH:/types/numbers.txt' using PigStorage(':') as (intnum1000: int,id: int,intnum5: int,intnum100: int,intnum: int,longnum: long,floatnum: float,doublenum: double);
B = limit A 10;
Added: pig/branches/branch-0.10/test/e2e/pig/udfs/java/org/apache/pig/test/udf/evalfunc/TOBAG2.java
URL: http://svn.apache.org/viewvc/pig/branches/branch-0.10/test/e2e/pig/udfs/java/org/apache/pig/test/udf/evalfunc/TOBAG2.java?rev=1306124&view=auto
==============================================================================
--- pig/branches/branch-0.10/test/e2e/pig/udfs/java/org/apache/pig/test/udf/evalfunc/TOBAG2.java (added)
+++ pig/branches/branch-0.10/test/e2e/pig/udfs/java/org/apache/pig/test/udf/evalfunc/TOBAG2.java Wed Mar 28 02:33:49 2012
@@ -0,0 +1,154 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.pig.test.udf.evalfunc;
+
+
+import java.io.IOException;
+
+import org.apache.pig.EvalFunc;
+import org.apache.pig.data.BagFactory;
+import org.apache.pig.data.DataBag;
+import org.apache.pig.data.DataType;
+import org.apache.pig.data.Tuple;
+import org.apache.pig.data.TupleFactory;
+import org.apache.pig.impl.logicalLayer.FrontendException;
+import org.apache.pig.impl.logicalLayer.schema.Schema;
+import org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema;
+
+/**
+ * This class takes a list of items and puts them into a bag
+ * T = foreach U generate TOBAG($0, $1, $2);
+ * It's like saying this:
+ * T = foreach U generate {($0), ($1), ($2)}
+ *
+ * All arguments that are not of tuple type are inserted into a tuple before
+ * being added to the bag. This is because bag is always a bag of tuples.
+ *
+ * Output schema:
+ * The output schema for this udf depends on the schema of its arguments.
+ * If all the arguments have same type and same inner
+ * schema (for bags/tuple columns), then the udf output schema would be a bag
+ * of tuples having a column of the type and inner-schema (if any) of the
+ * arguments.
+ * If the arguments are of type tuple/bag, then their innerschmea, including
+ * the alias names should match.
+ * If these conditions are not met the output schema will be a bag with null
+ * inner schema.
+ *
+ * example 1
+ * grunt> describe a;
+ * a: {a0: int,a1: int}
+ * grunt> b = foreach a generate TOBAG(a0,a1);
+ * grunt> describe b;
+ * b: {{int}}
+ *
+ * example 2
+ * grunt> describe a;
+ * a: {a0: (x: int),a1: (x: int)}
+ * grunt> b = foreach a generate TOBAG(a0,a1);
+ * grunt> describe b;
+ * b: {{(x: int)}}
+ *
+ * example 3
+ * grunt> describe a;
+ * a: {a0: (x: int),a1: (y: int)}
+ * -- note that the inner schema is different because the alises (x & y) are different
+ * grunt> b = foreach a generate TOBAG(a0,a1);
+ * grunt> describe b;
+ * b: {{NULL}}
+ *
+ *
+ *
+ */
+public class TOBAG2 extends EvalFunc<DataBag> {
+
+ @Override
+ public DataBag exec(Tuple input) throws IOException {
+ try {
+ DataBag bag = BagFactory.getInstance().newDefaultBag();
+
+ for (int i = 0; i < input.size(); ++i) {
+ final Object object = input.get(i);
+ if (object instanceof Tuple) {
+ bag.add( (Tuple) object);
+ } else {
+ Tuple tp2 = TupleFactory.getInstance().newTuple(1);
+ tp2.set(0, object);
+ bag.add(tp2);
+ }
+ }
+
+ return bag;
+ } catch (Exception ee) {
+ throw new RuntimeException("Error while creating a bag", ee);
+ }
+ }
+
+ /* (non-Javadoc)
+ * @see org.apache.pig.EvalFunc#outputSchema(org.apache.pig.impl.logicalLayer.schema.Schema)
+ * If all the columns in the tuple are of same type, then set the bag schema
+ * to bag of tuple with column of this type
+ *
+ */
+ @Override
+ public Schema outputSchema(Schema inputSch) {
+ byte type = DataType.ERROR;
+ Schema innerSchema = null;
+ if(inputSch != null){
+ for(FieldSchema fs : inputSch.getFields()){
+ if(type == DataType.ERROR){
+ type = fs.type;
+ innerSchema = fs.schema;
+ }else{
+ if( type != fs.type || !nullEquals(innerSchema, fs.schema)){
+ // invalidate the type
+ type = DataType.ERROR;
+ break;
+ }
+ }
+ }
+ }
+ try {
+ if(type == DataType.ERROR){
+ return Schema.generateNestedSchema(DataType.BAG, DataType.NULL);
+ }
+ FieldSchema innerFs = new Schema.FieldSchema(null, innerSchema, type);
+ Schema innerSch = new Schema(innerFs);
+ Schema bagSchema = new Schema(new FieldSchema(null, innerSch, DataType.BAG));
+ return bagSchema;
+ } catch (FrontendException e) {
+ //This should not happen
+ throw new RuntimeException("Bug : exception thrown while " +
+ "creating output schema for TOBAG udf", e);
+ }
+
+ }
+
+ private boolean nullEquals(Schema currentSchema, Schema newSchema) {
+ if(currentSchema == null){
+ if(newSchema != null){
+ return false;
+ }
+ return true;
+ }
+ return currentSchema.equals(newSchema);
+ }
+
+
+}
+