You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pig.apache.org by da...@apache.org on 2016/11/09 18:01:33 UTC

svn commit: r1769005 - in /pig/branches/branch-0.16: ./ src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/ src/org/apache/pig/data/ test/org/apache/pig/impl/builtin/

Author: daijy
Date: Wed Nov  9 18:01:33 2016
New Revision: 1769005

URL: http://svn.apache.org/viewvc?rev=1769005&view=rev
Log:
PIG-5048: HiveUDTF fail if it is the first expression in projection

Added:
    pig/branches/branch-0.16/test/org/apache/pig/impl/builtin/TestHiveUDTF.java
Modified:
    pig/branches/branch-0.16/ivy.xml
    pig/branches/branch-0.16/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POForEach.java
    pig/branches/branch-0.16/src/org/apache/pig/data/UnlimitedNullTuple.java

Modified: pig/branches/branch-0.16/ivy.xml
URL: http://svn.apache.org/viewvc/pig/branches/branch-0.16/ivy.xml?rev=1769005&r1=1769004&r2=1769005&view=diff
==============================================================================
--- pig/branches/branch-0.16/ivy.xml (original)
+++ pig/branches/branch-0.16/ivy.xml Wed Nov  9 18:01:33 2016
@@ -427,6 +427,8 @@
       conf="compile->master" />
     <dependency org="org.apache.hive.shims" name="hive-shims-common" rev="${hive.version}" changing="true"
       conf="compile->master" />
+    <dependency org="org.apache.hive" name="hive-contrib" rev="${hive.version}" changing="true"
+                conf="test->master" />
     <dependency org="org.apache.hive.shims" name="hive-shims-0.23" rev="${hive.version}" changing="true"
       conf="hadoop23->master" />
     <dependency org="org.apache.hive.shims" name="hive-shims-0.20S" rev="${hive.version}" changing="true"

Modified: pig/branches/branch-0.16/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POForEach.java
URL: http://svn.apache.org/viewvc/pig/branches/branch-0.16/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POForEach.java?rev=1769005&r1=1769004&r2=1769005&view=diff
==============================================================================
--- pig/branches/branch-0.16/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POForEach.java (original)
+++ pig/branches/branch-0.16/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POForEach.java Wed Nov  9 18:01:33 2016
@@ -55,6 +55,7 @@ import org.apache.pig.pen.util.LineageTr
 @SuppressWarnings("unchecked")
 public class POForEach extends PhysicalOperator {
     private static final long serialVersionUID = 1L;
+    private static final Result UNLIMITED_NULL_RESULT = new Result(POStatus.STATUS_OK, new UnlimitedNullTuple());
 
     protected List<PhysicalPlan> inputPlans;
 
@@ -264,7 +265,7 @@ public class POForEach extends PhysicalO
                 if (inp.returnStatus == POStatus.STATUS_EOP) {
                     if (parentPlan!=null && parentPlan.endOfAllInput && !endOfAllInputProcessed && endOfAllInputProcessing) {
                         // continue pull one more output
-                        inp = new Result(POStatus.STATUS_OK, new UnlimitedNullTuple());
+                        inp = UNLIMITED_NULL_RESULT;
                     } else {
                         return inp;
                     }

Modified: pig/branches/branch-0.16/src/org/apache/pig/data/UnlimitedNullTuple.java
URL: http://svn.apache.org/viewvc/pig/branches/branch-0.16/src/org/apache/pig/data/UnlimitedNullTuple.java?rev=1769005&r1=1769004&r2=1769005&view=diff
==============================================================================
--- pig/branches/branch-0.16/src/org/apache/pig/data/UnlimitedNullTuple.java (original)
+++ pig/branches/branch-0.16/src/org/apache/pig/data/UnlimitedNullTuple.java Wed Nov  9 18:01:33 2016
@@ -28,7 +28,7 @@ public class UnlimitedNullTuple extends
 
     @Override
     public int size() {
-        throw new RuntimeException("Unimplemented");
+        return Integer.MAX_VALUE;
     }
 
     @Override

Added: pig/branches/branch-0.16/test/org/apache/pig/impl/builtin/TestHiveUDTF.java
URL: http://svn.apache.org/viewvc/pig/branches/branch-0.16/test/org/apache/pig/impl/builtin/TestHiveUDTF.java?rev=1769005&view=auto
==============================================================================
--- pig/branches/branch-0.16/test/org/apache/pig/impl/builtin/TestHiveUDTF.java (added)
+++ pig/branches/branch-0.16/test/org/apache/pig/impl/builtin/TestHiveUDTF.java Wed Nov  9 18:01:33 2016
@@ -0,0 +1,117 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.pig.impl.builtin;
+
+import org.apache.commons.collections4.IteratorUtils;
+import org.apache.pig.ExecType;
+import org.apache.pig.PigServer;
+import org.apache.pig.backend.executionengine.ExecException;
+import org.apache.pig.data.Tuple;
+import org.apache.pig.test.MiniGenericCluster;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.Iterator;
+import java.util.List;
+
+import static org.apache.pig.builtin.mock.Storage.*;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+public class TestHiveUDTF {
+    private static PigServer pigServer = null;
+    private static MiniGenericCluster cluster = MiniGenericCluster.buildCluster();
+
+    @BeforeClass
+    public static void oneTimeSetup() throws ExecException {
+        pigServer = new PigServer(ExecType.LOCAL);
+    }
+
+    @AfterClass
+    public static void oneTimeTearDown() throws Exception {
+        cluster.shutDown();
+    }
+
+    @Test
+    public void testHiveUDTFOnBagInput() throws IOException {
+        Data data = resetData(pigServer);
+
+        Tuple tuple = tuple(bag(tuple("a"), tuple("b"), tuple("c")));
+
+        data.set("TestHiveUDTF", tuple);
+
+        pigServer.registerQuery("define posexplode HiveUDTF('posexplode');");
+        pigServer.registerQuery("A = load 'TestHiveUDTF' USING mock.Storage() as (a0:{(b0:chararray)});");
+        pigServer.registerQuery("B = foreach A generate posexplode(a0);");
+
+        Iterator<Tuple> result = pigServer.openIterator("B");
+        List<Tuple> out = IteratorUtils.toList(result);
+
+        assertEquals(2, out.size());
+        assertTrue("Result doesn't contain the HiveUDTF output",
+                out.contains(tuple(bag(tuple(0, "a"), tuple(1, "b"), tuple(2, "c")))));
+        assertTrue("Result doesn't contain an empty bag",
+                out.contains(tuple(bag())));
+    }
+
+    @Test
+    public void testHiveUDTFOnBagInputWithTwoProjection() throws IOException {
+        Data data = resetData(pigServer);
+
+        Tuple tuple = tuple(bag(tuple("a"), tuple("b"), tuple("c")));
+
+        data.set("TestHiveUDTF", tuple);
+
+        pigServer.registerQuery("define posexplode HiveUDTF('posexplode');");
+        pigServer.registerQuery("A = load 'TestHiveUDTF' USING mock.Storage() as (a0:{(b0:chararray)});");
+        pigServer.registerQuery("B = foreach A generate a0, posexplode(a0);");
+
+        Iterator<Tuple> result = pigServer.openIterator("B");
+        List<Tuple> out = IteratorUtils.toList(result);
+
+        assertEquals(2, out.size());
+        assertTrue("Result doesn't contain the HiveUDTF output",
+                out.contains(tuple(bag(tuple("a"), tuple("b"), tuple("c")), bag(tuple(0, "a"), tuple(1, "b"), tuple(2, "c")))));
+        assertTrue("Result doesn't contain an empty bag",
+                out.contains(tuple(null, bag())));
+    }
+
+    @Test
+    public void testHiveUDTFOnClose() throws IOException {
+        Data data = resetData(pigServer);
+
+        List<Tuple> tuples = Arrays.asList(tuple("a", 1), tuple("a", 2), tuple("a", 3));
+
+        data.set("TestHiveUDTF", tuples);
+
+        pigServer.registerQuery("define COUNT2 HiveUDTF('org.apache.hadoop.hive.contrib.udtf.example.GenericUDTFCount2');");
+        pigServer.registerQuery("a = load 'TestHiveUDTF' USING mock.Storage() as (name:chararray, id:int);");
+        pigServer.registerQuery("b = foreach a generate flatten(COUNT2(name));");
+
+        Iterator<Tuple> result = pigServer.openIterator("b");
+        List<Tuple> out = IteratorUtils.toList(result);
+
+        assertEquals(2, out.size());
+        assertEquals(tuple(3), out.get(0));
+        assertEquals(tuple(3), out.get(1));
+    }
+
+}