You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pig.apache.org by da...@apache.org on 2015/11/02 21:07:11 UTC

svn commit: r1712138 - in /pig/branches/branch-0.15: CHANGES.txt src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/expressionOperators/POUserFunc.java test/e2e/pig/tests/nightly.conf

Author: daijy
Date: Mon Nov  2 20:07:11 2015
New Revision: 1712138

URL: http://svn.apache.org/viewvc?rev=1712138&view=rev
Log:
PIG-4712: [Pig on Tez] NPE in Bloom UDF after Union

Modified:
    pig/branches/branch-0.15/CHANGES.txt
    pig/branches/branch-0.15/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/expressionOperators/POUserFunc.java
    pig/branches/branch-0.15/test/e2e/pig/tests/nightly.conf

Modified: pig/branches/branch-0.15/CHANGES.txt
URL: http://svn.apache.org/viewvc/pig/branches/branch-0.15/CHANGES.txt?rev=1712138&r1=1712137&r2=1712138&view=diff
==============================================================================
--- pig/branches/branch-0.15/CHANGES.txt (original)
+++ pig/branches/branch-0.15/CHANGES.txt Mon Nov  2 20:07:11 2015
@@ -28,6 +28,8 @@ OPTIMIZATIONS
 
 BUG FIXES
 
+PIG-4712: [Pig on Tez] NPE in Bloom UDF after Union (rohini)
+
 PIG-4707: [Pig on Tez] Streaming job hangs with pig.exec.mapPartAgg=true (rohini)
 
 PIG-4679: Performance degradation due to InputSizeReducerEstimator since PIG-3754 (daijy)

Modified: pig/branches/branch-0.15/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/expressionOperators/POUserFunc.java
URL: http://svn.apache.org/viewvc/pig/branches/branch-0.15/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/expressionOperators/POUserFunc.java?rev=1712138&r1=1712137&r2=1712138&view=diff
==============================================================================
--- pig/branches/branch-0.15/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/expressionOperators/POUserFunc.java (original)
+++ pig/branches/branch-0.15/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/expressionOperators/POUserFunc.java Mon Nov  2 20:07:11 2015
@@ -566,6 +566,8 @@ public class POUserFunc extends Expressi
             requestedParallelism, null, funcSpec.clone());
         clone.setResultType(resultType);
         clone.signature = signature;
+        clone.cacheFiles = cacheFiles;
+        clone.shipFiles = shipFiles;
         return clone;
     }
 

Modified: pig/branches/branch-0.15/test/e2e/pig/tests/nightly.conf
URL: http://svn.apache.org/viewvc/pig/branches/branch-0.15/test/e2e/pig/tests/nightly.conf?rev=1712138&r1=1712137&r2=1712138&view=diff
==============================================================================
--- pig/branches/branch-0.15/test/e2e/pig/tests/nightly.conf (original)
+++ pig/branches/branch-0.15/test/e2e/pig/tests/nightly.conf Mon Nov  2 20:07:11 2015
@@ -5139,6 +5139,30 @@ store C into ':OUTPATH:';\,
                                 C = load ':INPATH:/singlefile/votertab10k'as (name:chararray, age:int, reg:chararray, contrib:float);
                                 D = join C by name, B by name;
                                 store D into ':OUTPATH:';",
+                    },{
+                        'num' => 4,
+                        'pig' => "set pig.optimizer.rules.disabled PushUpFilter;
+                                define bb BuildBloom('Hash.JENKINS_HASH', 'fixed', '128', '3');
+                                A = LOAD ':INPATH:/singlefile/studenttab10k' AS (name:chararray, age:int, gpa:double);
+                                B = filter A by name == 'alice allen';
+                                C = group B all;
+                                D = foreach C generate bb(B.name);
+                                store D into ':HDFSTMP:/mybloom_4';
+                                exec;
+                                define bloom Bloom(':HDFSTMP:/mybloom_4');
+                                E = LOAD ':INPATH:/singlefile/studenttab10k' AS (name:chararray, age:int, gpa:double);
+                                F = LOAD ':INPATH:/singlefile/studenttab10k' AS (name:chararray, age:int, gpa:double);
+                                G = union E, F;
+                                -- PushUpFilter is disabled to avoid filter being pushed before union
+                                H = filter G by bloom(name);
+                                store H into ':OUTPATH:';",
+                        'notmq' => 1,
+                        'verify_pig_script' => "
+                                A = LOAD ':INPATH:/singlefile/studenttab10k' AS (name, age:int ,gpa:double);
+                                B = LOAD ':INPATH:/singlefile/studenttab10k' AS (name, age:int ,gpa:double);
+                                C = UNION A,B;
+                                D = filter C by name == 'alice allen';
+                                store D into ':OUTPATH:';",
                     }
                 ],
             },{