You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by he...@apache.org on 2010/10/07 19:01:38 UTC

svn commit: r1005527 - in /hadoop/hive/trunk: CHANGES.txt ql/src/java/org/apache/hadoop/hive/ql/udf/UDAFPercentile.java ql/src/test/queries/clientpositive/udf_percentile.q ql/src/test/results/clientpositive/udf_percentile.q.out

Author: heyongqiang
Date: Thu Oct  7 17:01:37 2010
New Revision: 1005527

URL: http://svn.apache.org/viewvc?rev=1005527&view=rev
Log:
HIVE-1376 Simple UDAFs with more than 1 parameter crash on empty row query. (Ning Zhang via He Yongqiang)

Modified:
    hadoop/hive/trunk/CHANGES.txt
    hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/UDAFPercentile.java
    hadoop/hive/trunk/ql/src/test/queries/clientpositive/udf_percentile.q
    hadoop/hive/trunk/ql/src/test/results/clientpositive/udf_percentile.q.out

Modified: hadoop/hive/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/CHANGES.txt?rev=1005527&r1=1005526&r2=1005527&view=diff
==============================================================================
--- hadoop/hive/trunk/CHANGES.txt (original)
+++ hadoop/hive/trunk/CHANGES.txt Thu Oct  7 17:01:37 2010
@@ -339,6 +339,9 @@ Trunk -  Unreleased
     HIVE-1674 count(*) returns wrong result when a mapper returns empty results
     (Ning Zhang via He Yongqiang)
 
+    HIVE-1376 Simple UDAFs with more than 1 parameter crash on empty row query
+    (Ning Zhang via He Yongqiang)
+
   TESTS
 
     HIVE-1464. improve  test query performance

Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/UDAFPercentile.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/UDAFPercentile.java?rev=1005527&r1=1005526&r2=1005527&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/UDAFPercentile.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/UDAFPercentile.java Thu Oct  7 17:01:37 2010
@@ -138,13 +138,20 @@ public class UDAFPercentile extends UDAF
       }
     }
 
-    public boolean iterate(LongWritable o, double percentile) {
+    /** Note that percentile can be null in a global aggregation with
+     *  0 input rows:  "select percentile(col, 0.5) from t where false"
+     *  In that case, iterate(null, null) will be called once.
+     */
+    public boolean iterate(LongWritable o, Double percentile) {
+      if (o == null && percentile == null) {
+        return false;
+      }
       if (state.percentiles == null) {
         if (percentile < 0.0 || percentile > 1.0) {
           throw new RuntimeException("Percentile value must be wihin the range of 0 to 1.");
         }
         state.percentiles = new ArrayList<DoubleWritable>(1);
-        state.percentiles.add(new DoubleWritable(percentile));
+        state.percentiles.add(new DoubleWritable(percentile.doubleValue()));
       }
       if (o != null) {
         increment(state, o, 1);
@@ -157,13 +164,16 @@ public class UDAFPercentile extends UDAF
     }
 
     public boolean merge(State other) {
+      if (other == null || other.counts == null || other.percentiles == null) {
+        return false;
+      }
+
       if (state.percentiles == null) {
         state.percentiles = new ArrayList<DoubleWritable>(other.percentiles);
       }
-      if (other.counts != null) {
-        for (Map.Entry<LongWritable, LongWritable> e: other.counts.entrySet()) {
-          increment(state, e.getKey(), e.getValue().get());
-        }
+
+      for (Map.Entry<LongWritable, LongWritable> e: other.counts.entrySet()) {
+        increment(state, e.getKey(), e.getValue().get());
       }
       return true;
     }
@@ -242,13 +252,16 @@ public class UDAFPercentile extends UDAF
     }
 
     public boolean merge(State other) {
+      if (other == null || other.counts == null || other.percentiles == null) {
+        return true;
+      }
+
       if (state.percentiles == null) {
         state.percentiles = new ArrayList<DoubleWritable>(other.percentiles);
       }
-      if (other.counts != null) {
-        for (Map.Entry<LongWritable, LongWritable> e: other.counts.entrySet()) {
-          increment(state, e.getKey(), e.getValue().get());
-        }
+
+      for (Map.Entry<LongWritable, LongWritable> e: other.counts.entrySet()) {
+        increment(state, e.getKey(), e.getValue().get());
       }
       return true;
     }

Modified: hadoop/hive/trunk/ql/src/test/queries/clientpositive/udf_percentile.q
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/queries/clientpositive/udf_percentile.q?rev=1005527&r1=1005526&r2=1005527&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/test/queries/clientpositive/udf_percentile.q (original)
+++ hadoop/hive/trunk/ql/src/test/queries/clientpositive/udf_percentile.q Thu Oct  7 17:01:37 2010
@@ -68,3 +68,5 @@ SELECT CAST(key AS INT) DIV 10,
        percentile(IF(CAST(key AS INT) DIV 10 < 5, 1, NULL), array(0.0, 0.5, 0.99, 1.0))
 FROM src
 GROUP BY CAST(key AS INT) DIV 10;
+
+select percentile(cast(key as bigint), 0.5) from src where false;

Modified: hadoop/hive/trunk/ql/src/test/results/clientpositive/udf_percentile.q.out
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/results/clientpositive/udf_percentile.q.out?rev=1005527&r1=1005526&r2=1005527&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/test/results/clientpositive/udf_percentile.q.out (original)
+++ hadoop/hive/trunk/ql/src/test/results/clientpositive/udf_percentile.q.out Thu Oct  7 17:01:37 2010
@@ -17,7 +17,7 @@ FROM src
 GROUP BY CAST(key AS INT) DIV 10
 PREHOOK: type: QUERY
 PREHOOK: Input: default@src
-PREHOOK: Output: file:/data/users/zshao/hadoop_hive_trunk2/build/ql/scratchdir/hive_2010-04-05_14-36-18_516_8402838973000771943/10000
+PREHOOK: Output: file:/tmp/nzhang/hive_2010-09-30_14-31-04_657_7695062961081758326/-mr-10000
 POSTHOOK: query: SELECT CAST(key AS INT) DIV 10,
        percentile(CAST(substr(value, 5) AS INT), 0.0),
        percentile(CAST(substr(value, 5) AS INT), 0.5),
@@ -27,7 +27,7 @@ FROM src
 GROUP BY CAST(key AS INT) DIV 10
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@src
-POSTHOOK: Output: file:/data/users/zshao/hadoop_hive_trunk2/build/ql/scratchdir/hive_2010-04-05_14-36-18_516_8402838973000771943/10000
+POSTHOOK: Output: file:/tmp/nzhang/hive_2010-09-30_14-31-04_657_7695062961081758326/-mr-10000
 0	0.0	4.5	9.0	[0.0,4.5,8.91,9.0]
 1	10.0	15.0	19.0	[10.0,15.0,18.91,19.0]
 2	20.0	26.0	28.0	[20.0,26.0,27.939999999999998,28.0]
@@ -87,7 +87,7 @@ FROM src
 GROUP BY CAST(key AS INT) DIV 10
 PREHOOK: type: QUERY
 PREHOOK: Input: default@src
-PREHOOK: Output: file:/data/users/zshao/hadoop_hive_trunk2/build/ql/scratchdir/hive_2010-04-05_14-36-22_731_2926350437853517241/10000
+PREHOOK: Output: file:/tmp/nzhang/hive_2010-09-30_14-31-08_328_1503104614300611608/-mr-10000
 POSTHOOK: query: SELECT CAST(key AS INT) DIV 10,
        percentile(CAST(substr(value, 5) AS INT), 0.0),
        percentile(CAST(substr(value, 5) AS INT), 0.5),
@@ -97,7 +97,7 @@ FROM src
 GROUP BY CAST(key AS INT) DIV 10
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@src
-POSTHOOK: Output: file:/data/users/zshao/hadoop_hive_trunk2/build/ql/scratchdir/hive_2010-04-05_14-36-22_731_2926350437853517241/10000
+POSTHOOK: Output: file:/tmp/nzhang/hive_2010-09-30_14-31-08_328_1503104614300611608/-mr-10000
 0	0.0	4.5	9.0	[0.0,4.5,8.91,9.0]
 1	10.0	15.0	19.0	[10.0,15.0,18.91,19.0]
 2	20.0	26.0	28.0	[20.0,26.0,27.939999999999998,28.0]
@@ -157,7 +157,7 @@ FROM src
 GROUP BY CAST(key AS INT) DIV 10
 PREHOOK: type: QUERY
 PREHOOK: Input: default@src
-PREHOOK: Output: file:/data/users/zshao/hadoop_hive_trunk2/build/ql/scratchdir/hive_2010-04-05_14-36-26_847_6402513687371445286/10000
+PREHOOK: Output: file:/tmp/nzhang/hive_2010-09-30_14-31-11_970_2555122074334450746/-mr-10000
 POSTHOOK: query: SELECT CAST(key AS INT) DIV 10,
        percentile(CAST(substr(value, 5) AS INT), 0.0),
        percentile(CAST(substr(value, 5) AS INT), 0.5),
@@ -167,7 +167,7 @@ FROM src
 GROUP BY CAST(key AS INT) DIV 10
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@src
-POSTHOOK: Output: file:/data/users/zshao/hadoop_hive_trunk2/build/ql/scratchdir/hive_2010-04-05_14-36-26_847_6402513687371445286/10000
+POSTHOOK: Output: file:/tmp/nzhang/hive_2010-09-30_14-31-11_970_2555122074334450746/-mr-10000
 0	0.0	4.5	9.0	[0.0,4.5,8.91,9.0]
 1	10.0	15.0	19.0	[10.0,15.0,18.91,19.0]
 2	20.0	26.0	28.0	[20.0,26.0,27.939999999999998,28.0]
@@ -227,7 +227,7 @@ FROM src
 GROUP BY CAST(key AS INT) DIV 10
 PREHOOK: type: QUERY
 PREHOOK: Input: default@src
-PREHOOK: Output: file:/data/users/zshao/hadoop_hive_trunk2/build/ql/scratchdir/hive_2010-04-05_14-36-34_869_4193114220624274575/10000
+PREHOOK: Output: file:/tmp/nzhang/hive_2010-09-30_14-31-18_605_6781388888873576931/-mr-10000
 POSTHOOK: query: SELECT CAST(key AS INT) DIV 10,
        percentile(CAST(substr(value, 5) AS INT), 0.0),
        percentile(CAST(substr(value, 5) AS INT), 0.5),
@@ -237,7 +237,7 @@ FROM src
 GROUP BY CAST(key AS INT) DIV 10
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@src
-POSTHOOK: Output: file:/data/users/zshao/hadoop_hive_trunk2/build/ql/scratchdir/hive_2010-04-05_14-36-34_869_4193114220624274575/10000
+POSTHOOK: Output: file:/tmp/nzhang/hive_2010-09-30_14-31-18_605_6781388888873576931/-mr-10000
 0	0.0	4.5	9.0	[0.0,4.5,8.91,9.0]
 1	10.0	15.0	19.0	[10.0,15.0,18.91,19.0]
 2	20.0	26.0	28.0	[20.0,26.0,27.939999999999998,28.0]
@@ -296,7 +296,7 @@ FROM src
 GROUP BY CAST(key AS INT) DIV 10
 PREHOOK: type: QUERY
 PREHOOK: Input: default@src
-PREHOOK: Output: file:/data/users/zshao/hadoop_hive_trunk2/build/ql/scratchdir/hive_2010-04-05_14-36-42_772_2730295615232304539/10000
+PREHOOK: Output: file:/tmp/nzhang/hive_2010-09-30_14-31-25_414_6485719353317968460/-mr-10000
 POSTHOOK: query: -- test null handling
 SELECT CAST(key AS INT) DIV 10,
        percentile(NULL, 0.0),
@@ -305,7 +305,7 @@ FROM src
 GROUP BY CAST(key AS INT) DIV 10
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@src
-POSTHOOK: Output: file:/data/users/zshao/hadoop_hive_trunk2/build/ql/scratchdir/hive_2010-04-05_14-36-42_772_2730295615232304539/10000
+POSTHOOK: Output: file:/tmp/nzhang/hive_2010-09-30_14-31-25_414_6485719353317968460/-mr-10000
 0	NULL	null
 1	NULL	null
 2	NULL	null
@@ -364,7 +364,7 @@ FROM src
 GROUP BY CAST(key AS INT) DIV 10
 PREHOOK: type: QUERY
 PREHOOK: Input: default@src
-PREHOOK: Output: file:/data/users/zshao/hadoop_hive_trunk2/build/ql/scratchdir/hive_2010-04-05_14-36-46_476_1889061834871373611/10000
+PREHOOK: Output: file:/tmp/nzhang/hive_2010-09-30_14-31-29_005_976735399684431468/-mr-10000
 POSTHOOK: query: -- test empty array handling
 SELECT CAST(key AS INT) DIV 10,
        percentile(IF(CAST(key AS INT) DIV 10 < 5, 1, NULL), 0.5),
@@ -373,7 +373,7 @@ FROM src
 GROUP BY CAST(key AS INT) DIV 10
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@src
-POSTHOOK: Output: file:/data/users/zshao/hadoop_hive_trunk2/build/ql/scratchdir/hive_2010-04-05_14-36-46_476_1889061834871373611/10000
+POSTHOOK: Output: file:/tmp/nzhang/hive_2010-09-30_14-31-29_005_976735399684431468/-mr-10000
 0	1.0	[1.0,1.0,1.0,1.0]
 1	1.0	[1.0,1.0,1.0,1.0]
 2	1.0	[1.0,1.0,1.0,1.0]
@@ -424,3 +424,12 @@ POSTHOOK: Output: file:/data/users/zshao
 47	NULL	null
 48	NULL	null
 49	NULL	null
+PREHOOK: query: select percentile(cast(key as bigint), 0.5) from src where false
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: file:/tmp/nzhang/hive_2010-09-30_14-31-32_978_7090615707538391094/-mr-10000
+POSTHOOK: query: select percentile(cast(key as bigint), 0.5) from src where false
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: file:/tmp/nzhang/hive_2010-09-30_14-31-32_978_7090615707538391094/-mr-10000
+NULL