You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pig.apache.org by da...@apache.org on 2012/02/03 01:36:29 UTC
svn commit: r1239955 - in /pig/trunk: CHANGES.txt
test/e2e/pig/tests/nightly.conf test/e2e/pig/udfs/python/morepythonudfs.py
test/e2e/pig/udfs/python/scriptingudf.py
test/org/apache/pig/test/TestScriptUDF.java
Author: daijy
Date: Fri Feb 3 00:36:28 2012
New Revision: 1239955
URL: http://svn.apache.org/viewvc?rev=1239955&view=rev
Log:
PIG-2488: Move Python unit tests to e2e tests
Added:
pig/trunk/test/e2e/pig/udfs/python/morepythonudfs.py
Modified:
pig/trunk/CHANGES.txt
pig/trunk/test/e2e/pig/tests/nightly.conf
pig/trunk/test/e2e/pig/udfs/python/scriptingudf.py
pig/trunk/test/org/apache/pig/test/TestScriptUDF.java
Modified: pig/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/pig/trunk/CHANGES.txt?rev=1239955&r1=1239954&r2=1239955&view=diff
==============================================================================
--- pig/trunk/CHANGES.txt (original)
+++ pig/trunk/CHANGES.txt Fri Feb 3 00:36:28 2012
@@ -24,6 +24,8 @@ INCOMPATIBLE CHANGES
IMPROVEMENTS
+PIG-2488: Move Python unit tests to e2e tests (alangates via daijy)
+
PIG-2456: Pig should have a pigrc to specify default script cache (prkommireddi via daijy)
PIG-2496: Cache resolved classes in PigContext (dvryaboy)
Modified: pig/trunk/test/e2e/pig/tests/nightly.conf
URL: http://svn.apache.org/viewvc/pig/trunk/test/e2e/pig/tests/nightly.conf?rev=1239955&r1=1239954&r2=1239955&view=diff
==============================================================================
--- pig/trunk/test/e2e/pig/tests/nightly.conf (original)
+++ pig/trunk/test/e2e/pig/tests/nightly.conf Fri Feb 3 00:36:28 2012
@@ -3038,7 +3038,7 @@ store y into ':OUTPATH:';\,
'name' => 'Scripting',
'tests' => [
{
- # test common
+ # test integer square
'num' => 1,
'ignore23' => 'MAPREDUCE-3700',
'pig' => q\
@@ -3046,21 +3046,129 @@ register ':SCRIPTHOMEPATH:/python/script
a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age:int, gpa:double);
b = foreach a generate myfuncs.square(age);
store b into ':OUTPATH:';\,
+ 'verify_pig_script' => q\
+a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age:int, gpa:double);
+b = foreach a generate age * age;
+store b into ':OUTPATH:';\,
},
{
- # test common
+ # test string concat and referencing function without a namespace
'num' => 2,
'ignore23' => 'MAPREDUCE-3700',
'pig' => q\
+register ':SCRIPTHOMEPATH:/python/scriptingudf.py' using jython;
+a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name:chararray, age, gpa);
+b = foreach a generate concat(name) as name;
+store b into ':OUTPATH:';\,
+ 'verify_pig_script' => q\
+a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name:chararray, age:int, gpa:double);
+b = foreach a generate CONCAT(name, name);
+store b into ':OUTPATH:';\,
+ },
+ {
+ # test long and double square, plus two references to the same UDF with different schemas
+ 'num' => 3,
+ 'ignore23' => 'MAPREDUCE-3700',
+ 'pig' => q\
+register ':SCRIPTHOMEPATH:/python/scriptingudf.py' using jython as myfuncs;
+a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age:long, gpa:double);
+b = foreach a generate myfuncs.square(age), myfuncs.square(gpa);
+store b into ':OUTPATH:';\,
+ 'verify_pig_script' => q\
+a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age:int, gpa:double);
+b = foreach a generate age * age, gpa * gpa;
+store b into ':OUTPATH:';\,
+ 'floatpostprocess' => 1,
+ 'delimiter' => ' ',
+ },
+ {
+ # test method with no schema decorator (ie, returns bytearray)
+ 'num' => 4,
+ 'ignore23' => 'MAPREDUCE-3700',
+ 'pig' => q\
register ':SCRIPTHOMEPATH:/python/scriptingudf.py' using jython as myfuncs;
+a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name:chararray, age, gpa);
+b = foreach a generate myfuncs.byteconcat(name);
+store b into ':OUTPATH:';\,
+ 'verify_pig_script' => q\
a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, gpa);
-b = foreach a generate (chararray)name;
-c = distinct b;
-d = foreach c generate myfuncs.concat(name) as name;
-e = order d by name;
-store e into ':OUTPATH:';\,
- 'sortArgs' => ['-t', ' '],
+b = foreach a generate CONCAT(name, name);
+store b into ':OUTPATH:';\,
+ },
+ {
+ # test method with no schema decorator (ie, returns bytearray)
+ 'num' => 5,
+ 'ignore23' => 'MAPREDUCE-3700',
+ 'pig' => q\
+register ':SCRIPTHOMEPATH:/python/scriptingudf.py' using jython as myfuncs;
+a = load ':INPATH:/singlefile/studentcomplextab10k' using PigStorage() as (m:[], t:(name:chararray, age:int, gpa:double), b:{t:(name:chararray, age:int, gpa:double)});
+b = foreach a generate flatten(myfuncs.complexTypes(m, t, b)) as (mm, mt, mb);
+c = foreach b generate mm#'name', mt.$0, mb.$0;
+store c into ':OUTPATH:';\,
+ 'verify_pig_script' => q\
+a = load ':INPATH:/singlefile/studentcomplextab10k' using PigStorage() as (m:[], t:(name:chararray, age:int, gpa:double), b:{t:(name:chararray, age:int, gpa:double)});
+b = foreach a generate SIZE(m#'name'), t.$2, b.$2;
+store b into ':OUTPATH:';\,
},
+ {
+ # test null input and output
+ 'num' => 6,
+ 'ignore23' => 'MAPREDUCE-3700',
+ 'pig' => q\
+register ':SCRIPTHOMEPATH:/python/scriptingudf.py' using jython as myfuncs;
+a = load ':INPATH:/singlefile/studentnulltab10k' using PigStorage() as (name, age:int, gpa:double);
+b = foreach a generate myfuncs.square(age);
+store b into ':OUTPATH:';\,
+ 'verify_pig_script' => q\
+a = load ':INPATH:/singlefile/studentnulltab10k' using PigStorage() as (name, age:int, gpa:double);
+b = foreach a generate age * age;
+store b into ':OUTPATH:';\,
+ },
+ {
+ # test functions that call other functions and include other files
+ 'num' => 7,
+ 'ignore23' => 'MAPREDUCE-3700',
+ 'pig' => q\
+register ':SCRIPTHOMEPATH:/python/scriptingudf.py' using jython as myfuncs;
+a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age:int, gpa:double);
+b = foreach a generate myfuncs.redirect(age);
+store b into ':OUTPATH:';\,
+ 'verify_pig_script' => q\
+a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age:int, gpa:double);
+b = foreach a generate age * age;
+store b into ':OUTPATH:';\,
+ },
+ {
+ # test that functions with same names resolve correctly across name spaces
+ 'num' => 8,
+ 'ignore23' => 'MAPREDUCE-3700',
+ 'pig' => q\
+register ':SCRIPTHOMEPATH:/python/scriptingudf.py' using jython as myfuncs;
+register ':SCRIPTHOMEPATH:/python/morepythonudfs.py' using jython as morefuncs;
+a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age:int, gpa:double);
+b = foreach a generate myfuncs.square(age), morefuncs.square(age);
+store b into ':OUTPATH:';\,
+ 'verify_pig_script' => q\
+a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age:int, gpa:double);
+b = foreach a generate age * age, age * age * age;
+store b into ':OUTPATH:';\,
+ },
+ {
+ # test that functions with same names resolve correctly across name spaces
+ 'num' => 9,
+ 'ignore23' => 'MAPREDUCE-3700',
+ 'pig' => q\
+register ':SCRIPTHOMEPATH:/python/scriptingudf.py' using jython as myfuncs;
+a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age:int, gpa:double);
+b = group a by name;
+c = foreach b generate group, myfuncs.count(a);
+store c into ':OUTPATH:';\,
+ 'verify_pig_script' => q\
+a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age:int, gpa:double);
+b = group a by name;
+c = foreach b generate group, COUNT(a);
+store c into ':OUTPATH:';\,
+ }
]
},
{
Added: pig/trunk/test/e2e/pig/udfs/python/morepythonudfs.py
URL: http://svn.apache.org/viewvc/pig/trunk/test/e2e/pig/udfs/python/morepythonudfs.py?rev=1239955&view=auto
==============================================================================
--- pig/trunk/test/e2e/pig/udfs/python/morepythonudfs.py (added)
+++ pig/trunk/test/e2e/pig/udfs/python/morepythonudfs.py Fri Feb 3 00:36:28 2012
@@ -0,0 +1,25 @@
+#!/usr/bin/python
+
+############################################################################
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from org.apache.hadoop.fs import * # Test for PIG-1824
+p = Path('foo')
+
+@outputSchema("notsquare:int")
+def square(num):
+ return num * num * num
+
Modified: pig/trunk/test/e2e/pig/udfs/python/scriptingudf.py
URL: http://svn.apache.org/viewvc/pig/trunk/test/e2e/pig/udfs/python/scriptingudf.py?rev=1239955&r1=1239954&r2=1239955&view=diff
==============================================================================
--- pig/trunk/test/e2e/pig/udfs/python/scriptingudf.py (original)
+++ pig/trunk/test/e2e/pig/udfs/python/scriptingudf.py Fri Feb 3 00:36:28 2012
@@ -1,23 +1,27 @@
#!/usr/bin/python
-############################################################################
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements. See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
+############################################################################
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from org.apache.hadoop.fs import Path # Test for PIG-1824
+p = Path('foo')
+
@outputSchemaFunction("squareSchema")
def square(num):
+ if num == None:
+ return None
return ((num)*(num))
@schemaFunction("squareSchema")
@@ -27,3 +31,41 @@ def squareSchema(input):
@outputSchema("word:chararray")
def concat(word):
return word + word
+
+def byteconcat(word):
+ return word + word
+
+@outputSchema("outm:[], outt:(name:chararray, age:int, gpa:double), outb:{t:(name:chararray, age:int, gpa:double)}")
+def complexTypes(m, t, b):
+ outm = {}
+ if m == None:
+ outm = None
+ else:
+ for k, v in m.iteritems():
+ outm[k] = len(v)
+
+ outb = []
+ if b == None:
+ outb = None
+ else:
+ for r in b:
+ tup = (r[2], r[1], r[0])
+ outb.append(tup)
+
+ if t == None:
+ outt = None
+ else:
+ outt = (t[2], t[1], t[0])
+
+ return (outm, outt, outb)
+
+@outputSchemaFunction("squareSchema")
+def redirect(num):
+ return square(num)
+
+@outputSchema("cnt:long")
+def count(bag):
+ cnt = 0
+ for r in bag:
+ cnt += 1
+ return cnt
Modified: pig/trunk/test/org/apache/pig/test/TestScriptUDF.java
URL: http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/test/TestScriptUDF.java?rev=1239955&r1=1239954&r2=1239955&view=diff
==============================================================================
--- pig/trunk/test/org/apache/pig/test/TestScriptUDF.java (original)
+++ pig/trunk/test/org/apache/pig/test/TestScriptUDF.java Fri Feb 3 00:36:28 2012
@@ -23,7 +23,6 @@ import java.util.Map;
import java.util.Random;
import junit.framework.Assert;
-import junit.framework.TestCase;
import org.apache.pig.ExecType;
import org.apache.pig.PigServer;
@@ -57,64 +56,6 @@ public class TestScriptUDF{
// See PIG-928
@Test
- public void testPythonStandardScript() throws Exception{
- String[] script = {
- "#!/usr/bin/python",
- "@outputSchema(\"x:{t:(num:long)}\")",
- "def square(number):" ,
- "\treturn (number * number)"
- };
- String[] input = {
- "1\t3",
- "2\t4",
- "3\t5"
- };
-
- Util.createInputFile(cluster, "table_testPythonStandardScript", input);
- Util.createLocalInputFile( "testPythonStandardScript.py", script);
-
- // Test the namespace
- pigServer.registerCode("testPythonStandardScript.py", "jython", "pig");
- pigServer.registerQuery("A = LOAD 'table_testPythonStandardScript' as (a0:long, a1:long);");
- pigServer.registerQuery("B = foreach A generate pig.square(a0);");
-
- pigServer.registerCode("testPythonStandardScript.py", "jython", null);
- pigServer.registerQuery("C = foreach A generate square(a0);");
-
- Iterator<Tuple> iter = pigServer.openIterator("B");
- Assert.assertTrue(iter.hasNext());
- Tuple t = iter.next();
-
- Assert.assertTrue(t.toString().equals("(1)"));
-
- Assert.assertTrue(iter.hasNext());
- t = iter.next();
-
- Assert.assertTrue(t.toString().equals("(4)"));
-
- Assert.assertTrue(iter.hasNext());
- t = iter.next();
-
- Assert.assertTrue(t.toString().equals("(9)"));
-
- iter = pigServer.openIterator("C");
- Assert.assertTrue(iter.hasNext());
- t = iter.next();
-
- Assert.assertTrue(t.toString().equals("(1)"));
-
- Assert.assertTrue(iter.hasNext());
- t = iter.next();
-
- Assert.assertTrue(t.toString().equals("(4)"));
-
- Assert.assertTrue(iter.hasNext());
- t = iter.next();
-
- Assert.assertTrue(t.toString().equals("(9)"));
- }
-
- @Test
public void testJavascriptExampleScript() throws Exception{
String[] script = {
"helloworld.outputSchema = \"word:chararray\";",
@@ -158,288 +99,7 @@ public class TestScriptUDF{
}
- // See PIG-928
- @Test
- public void testPythonScriptWithSchemaFunction() throws Exception{
- String[] script = {
- "#!/usr/bin/python",
- "@outputSchemaFunction(\"squareSchema\")",
- "def square(number):" ,
- "\treturn (number * number)\n",
- "@schemaFunction(\"square\")",
- "def squareSchema(input):",
- "\treturn input "
- };
- String[] input = {
- "1\t3.0",
- "2\t4.0",
- "3\t5.0"
- };
-
- Util.createInputFile(cluster, "table_testPythonScriptWithSchemaFunction", input);
- Util.createLocalInputFile( "testPythonScriptWithSchemaFunction.py", script);
-
- // Test the namespace
- pigServer.registerCode("testPythonScriptWithSchemaFunction.py", "jython", "pig");
- pigServer.registerQuery("A = LOAD 'table_testPythonScriptWithSchemaFunction' as (a0:int, a1:double);");
- pigServer.registerQuery("B = foreach A generate pig.square(a0);");
-
- pigServer.registerCode("testPythonScriptWithSchemaFunction.py", "jython", null);
- pigServer.registerQuery("C = foreach A generate square(a1);");
-
- Iterator<Tuple> iter = pigServer.openIterator("B");
- Assert.assertTrue(iter.hasNext());
- Tuple t = iter.next();
-
- Assert.assertTrue(t.toString().equals("(1)"));
-
- Assert.assertTrue(iter.hasNext());
- t = iter.next();
-
- Assert.assertTrue(t.toString().equals("(4)"));
-
- Assert.assertTrue(iter.hasNext());
- t = iter.next();
-
- Assert.assertTrue(t.toString().equals("(9)"));
-
- // The same python function will operate on double and try to get square of double
- // Since these are small double numbers we do not need to use delta to test the results
- iter = pigServer.openIterator("C");
- Assert.assertTrue(iter.hasNext());
- t = iter.next();
-
- Assert.assertTrue(t.toString().equals("(9.0)"));
-
- Assert.assertTrue(iter.hasNext());
- t = iter.next();
-
- Assert.assertTrue(t.toString().equals("(16.0)"));
-
- Assert.assertTrue(iter.hasNext());
- t = iter.next();
-
- Assert.assertTrue(t.toString().equals("(25.0)"));
- }
-
- // See PIG-928
- @Test
- public void testPythonScriptUDFNoDecorator() throws Exception{
- String[] script = {
- "#!/usr/bin/python",
- // No decorator means schema is null - bytearray...
- "def concat(word):" ,
- "\treturn word + word"
- };
- String[] input = {
- "hello\t1",
- "pig\t2",
- "world\t3"
- };
-
- Util.createInputFile(cluster, "table_testPythonScriptUDFNoDecorator", input);
- Util.createLocalInputFile( "testPythonScriptUDFNoDecorator.py", script);
-
- pigServer.registerCode("testPythonScriptUDFNoDecorator.py", "jython", "pig");
- pigServer.registerQuery("A = LOAD 'table_testPythonScriptUDFNoDecorator' as (a0, a1:int);");
- pigServer.registerQuery("B = foreach A generate pig.concat(a0);");
-
- Iterator<Tuple> iter = pigServer.openIterator("B");
- Assert.assertTrue(iter.hasNext());
- Tuple t = iter.next();
-
- // We need to check whether this is a DataByteArray or fail otherwise
- if(!(t.get(0) instanceof DataByteArray)) {
- Assert.fail("Default return type should be bytearray");
- }
-
- Assert.assertTrue(t.get(0).toString().trim().equals("hellohello"));
-
- Assert.assertTrue(iter.hasNext());
- t = iter.next();
-
- Assert.assertTrue(t.get(0).toString().trim().equals("pigpig"));
-
- Assert.assertTrue(iter.hasNext());
- t = iter.next();
-
- Assert.assertTrue(t.get(0).toString().trim().equals("worldworld"));
- }
-
- @Test
- public void testPythonScriptUDFBagInput() throws Exception{
- String[] script = {
- "#!/usr/bin/python",
- "@outputSchema(\"bag:{(y:{t:(len:int,word:chararray)})}\")",
- "def collect(bag):" ,
- "\toutBag = []",
- "\tfor word in bag:",
- // We need to wrap word inside a tuple for pig
- "\t\ttup=(len(bag), word[1])",
- "\t\toutBag.append(tup)",
- "\treturn outBag"
- };
- String[] input = {
- "1\thello",
- "2\tpig",
- "1\tworld",
- "1\tprogram",
- "2\thadoop"
- };
-
- Util.createInputFile(cluster, "table_testPythonScriptUDFBagInput", input);
- Util.createLocalInputFile( "testPythonScriptUDFBagInput.py", script);
-
- pigServer.registerCode("testPythonScriptUDFBagInput.py", "jython", "pig");
- pigServer.registerQuery("A = LOAD 'table_testPythonScriptUDFBagInput' as (a0:int, a1:chararray);");
- pigServer.registerQuery("B = group A by a0;");
- pigServer.registerQuery("C = foreach B generate pig.collect(A);");
-
- Iterator<Tuple> iter = pigServer.openIterator("C");
-
- String[] expected = new String[] {
- "({(3,hello),(3,world),(3,program)})",
- "({(2,hadoop),(2,pig)})"
- };
- Util.checkQueryOutputsAfterSortRecursive(iter, expected, "y: {(len:int, word:chararray)}");
- }
-
- @Test
- public void testPythonScriptUDFMapInput() throws Exception{
- String[] script = {
- "#!/usr/bin/python",
- "@outputSchema(\"bag:{(y:{t:(word:chararray)})}\")",
- "def maptobag(map):" ,
- "\toutBag = []",
- "\tfor k, v in map.iteritems():",
- // We need to wrap word inside a tuple for pig
- "\t\ttup = (k, v)",
- "\t\toutBag.append(tup)",
- "\treturn outBag"
- };
- String[] input = {
- "[1#hello,2#world]",
- "[3#pig,4#rocks]",
- };
-
- Util.createInputFile(cluster, "table_testPythonScriptUDFMapInput", input);
- Util.createLocalInputFile( "testPythonScriptUDFMapInput.py", script);
-
- pigServer.registerCode("testPythonScriptUDFMapInput.py", "jython", "pig");
- pigServer.registerQuery("A = LOAD 'table_testPythonScriptUDFMapInput' as (a0:map[]);");
- pigServer.registerQuery("B = foreach A generate pig.maptobag(a0);");
-
- Iterator<Tuple> iter = pigServer.openIterator("B");
- Assert.assertTrue(iter.hasNext());
- Tuple t = iter.next();
-
- DataBag bag;
- Tuple tup;
- bag = BagFactory.getInstance().newDefaultBag();
- tup = TupleFactory.getInstance().newTuple();
- tup.append(1);
- tup.append("hello");
- bag.add(tup);
- tup = TupleFactory.getInstance().newTuple();
- tup.append(2);
- tup.append("world");
- bag.add(tup);
- Assert.assertTrue(t.get(0).toString().equals(bag.toString()));
-
- Assert.assertTrue(iter.hasNext());
- t = iter.next();
- tup = TupleFactory.getInstance().newTuple();
- tup.append(3);
- tup.append("pig");
- Assert.assertTrue(t.toString().contains(tup.toString()));
-
- tup = TupleFactory.getInstance().newTuple();
- tup.append(4);
- tup.append("rocks");
- Assert.assertTrue(t.toString().contains(tup.toString()));
-
- Assert.assertFalse(iter.hasNext());
-
- }
-
- @Test
- public void testPythonScriptUDFMapOutput() throws Exception{
- String[] script = {
- "#!/usr/bin/python",
- "@outputSchema(\"mapint:[]\")",
- "def maptomapint(map):" ,
- "\toutMap = {}",
- "\tfor k, v in map.iteritems():",
- "\t\toutMap[k] = len(v)",
- "\treturn outMap"
- };
- String[] input = {
- "[1#hello,2#world]",
- "[3#pig,4#rocks]",
- };
-
- Util.createInputFile(cluster, "table_testPythonScriptUDFMapOutput", input);
- Util.createLocalInputFile( "testPythonScriptUDFMapOutput.py", script);
-
- pigServer.registerCode("testPythonScriptUDFMapOutput.py", "jython", "pig");
- pigServer.registerQuery("A = LOAD 'table_testPythonScriptUDFMapOutput' as (a0:map[]);");
- pigServer.registerQuery("B = foreach A generate pig.maptomapint(a0);");
-
- Iterator<Tuple> iter = pigServer.openIterator("B");
- Assert.assertTrue(iter.hasNext());
- Tuple t = iter.next();
-
- Assert.assertEquals(5, ((Map<?,?>)t.get(0)).get("1"));
- Assert.assertEquals(5, ((Map<?,?>)t.get(0)).get("2"));
-
- Assert.assertTrue(iter.hasNext());
- t = iter.next();
- Assert.assertEquals(3, ((Map<?,?>)t.get(0)).get("3"));
- Assert.assertEquals(5, ((Map<?,?>)t.get(0)).get("4"));
-
- Assert.assertFalse(iter.hasNext());
-
- }
-
- @Test
- public void testPythonScriptUDFNullInputOutput() throws Exception {
- String[] script = {
- "#!/usr/bin/python",
- "@outputSchema(\"bag:{(y:{t:(word:chararray)})}\")",
- "def multStr(cnt, str):" ,
- "\tif cnt != None and str != None:",
- "\t\treturn cnt * str",
- "\telse:",
- "\t\treturn None"
- };
- String[] input = {
- "3\thello",
- // Null input
- "\tworld",
- };
-
- Util.createInputFile(cluster, "table_testPythonScriptUDFNullInputOutput", input);
- Util.createLocalInputFile( "testPythonScriptUDFNullInputOutput.py", script);
-
- pigServer.registerCode("testPythonScriptUDFNullInputOutput.py", "jython", "pig");
- pigServer.registerQuery("A = LOAD 'table_testPythonScriptUDFNullInputOutput' as (a0:int, a1:chararray);");
- pigServer.registerQuery("B = foreach A generate pig.multStr(a0, a1);");
-
- Iterator<Tuple> iter = pigServer.openIterator("B");
- Assert.assertTrue(iter.hasNext());
- Tuple t = iter.next();
-
- Assert.assertTrue(t.get(0).toString().equals("hellohellohello"));
-
- Assert.assertTrue(iter.hasNext());
- t = iter.next();
-
- // UDF takes null and returns null
- Assert.assertTrue(t.get(0) == null);
-
- }
-
- // See Pig-1653
+ // See Pig-1653 -- left here because we can't force absolute paths in e2e harness
@Test
public void testPythonAbsolutePath() throws Exception{
String[] script = {
@@ -482,49 +142,6 @@ public class TestScriptUDF{
}
/** See Pig-1824
- * test import of wildcarded java classes, this will not work unless
- * jython is configured with a valid cachedir, which is what this tests.
- * @throws Exception
- */
- @Test
- public void testPythonWilcardImport() throws Exception {
- // hadoop.fs.Path is in the classpath (always)
- String[] script = {
- "#!/usr/bin/python",
- "from org.apache.hadoop.fs import *",
- "p = Path('foo')",
- "@outputSchema(\"word:chararray\")",
- "def first(content):",
- " return content.split(' ')[0]"
- };
- String[] input = {
- "words words words",
- "talk talk talk"
- };
-
- Util.createInputFile(cluster, "table_testPythonWildcardImport", input);
- File scriptFile = Util.createLocalInputFile( "script.py", script);
-
- // Test the namespace
- pigServer.registerCode(scriptFile.getAbsolutePath(), "jython", "pig");
- pigServer.registerQuery("A = LOAD 'table_testPythonWildcardImport' as (a:chararray);");
- pigServer.registerQuery("B = foreach A generate pig.first(a);");
-
- Iterator<Tuple> iter = pigServer.openIterator("B");
- Assert.assertTrue(iter.hasNext());
- Tuple t = iter.next();
-
- Assert.assertTrue(t.toString().equals("(words)"));
-
- Assert.assertTrue(iter.hasNext());
- t = iter.next();
-
- Assert.assertTrue(t.toString().equals("(talk)"));
-
- Assert.assertFalse(iter.hasNext());
- }
-
- /** See Pig-1824
* test importing a second module/file from the local fs from within
* the first module.
*
@@ -533,6 +150,9 @@ public class TestScriptUDF{
* for "import re".
* to use a jython install, the Lib dir must be in the jython search path
* via env variable JYTHON_HOME=jy_home or JYTHON_PATH=jy_home/Lib:...
+ *
+ * Left in for now as we don't have paths to include other scripts in a
+ * script in the e2e harness.
*
* @throws Exception
*/