You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by he...@apache.org on 2010/10/07 19:01:38 UTC
svn commit: r1005527 - in /hadoop/hive/trunk: CHANGES.txt
ql/src/java/org/apache/hadoop/hive/ql/udf/UDAFPercentile.java
ql/src/test/queries/clientpositive/udf_percentile.q
ql/src/test/results/clientpositive/udf_percentile.q.out
Author: heyongqiang
Date: Thu Oct 7 17:01:37 2010
New Revision: 1005527
URL: http://svn.apache.org/viewvc?rev=1005527&view=rev
Log:
HIVE-1376 Simple UDAFs with more than 1 parameter crash on empty row query. (Ning Zhang via He Yongqiang)
Modified:
hadoop/hive/trunk/CHANGES.txt
hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/UDAFPercentile.java
hadoop/hive/trunk/ql/src/test/queries/clientpositive/udf_percentile.q
hadoop/hive/trunk/ql/src/test/results/clientpositive/udf_percentile.q.out
Modified: hadoop/hive/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/CHANGES.txt?rev=1005527&r1=1005526&r2=1005527&view=diff
==============================================================================
--- hadoop/hive/trunk/CHANGES.txt (original)
+++ hadoop/hive/trunk/CHANGES.txt Thu Oct 7 17:01:37 2010
@@ -339,6 +339,9 @@ Trunk - Unreleased
HIVE-1674 count(*) returns wrong result when a mapper returns empty results
(Ning Zhang via He Yongqiang)
+ HIVE-1376 Simple UDAFs with more than 1 parameter crash on empty row query
+ (Ning Zhang via He Yongqiang)
+
TESTS
HIVE-1464. improve test query performance
Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/UDAFPercentile.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/UDAFPercentile.java?rev=1005527&r1=1005526&r2=1005527&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/UDAFPercentile.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/UDAFPercentile.java Thu Oct 7 17:01:37 2010
@@ -138,13 +138,20 @@ public class UDAFPercentile extends UDAF
}
}
- public boolean iterate(LongWritable o, double percentile) {
+ /** Note that percentile can be null in a global aggregation with
+ * 0 input rows: "select percentile(col, 0.5) from t where false"
+ * In that case, iterate(null, null) will be called once.
+ */
+ public boolean iterate(LongWritable o, Double percentile) {
+ if (o == null && percentile == null) {
+ return false;
+ }
if (state.percentiles == null) {
if (percentile < 0.0 || percentile > 1.0) {
throw new RuntimeException("Percentile value must be wihin the range of 0 to 1.");
}
state.percentiles = new ArrayList<DoubleWritable>(1);
- state.percentiles.add(new DoubleWritable(percentile));
+ state.percentiles.add(new DoubleWritable(percentile.doubleValue()));
}
if (o != null) {
increment(state, o, 1);
@@ -157,13 +164,16 @@ public class UDAFPercentile extends UDAF
}
public boolean merge(State other) {
+ if (other == null || other.counts == null || other.percentiles == null) {
+ return false;
+ }
+
if (state.percentiles == null) {
state.percentiles = new ArrayList<DoubleWritable>(other.percentiles);
}
- if (other.counts != null) {
- for (Map.Entry<LongWritable, LongWritable> e: other.counts.entrySet()) {
- increment(state, e.getKey(), e.getValue().get());
- }
+
+ for (Map.Entry<LongWritable, LongWritable> e: other.counts.entrySet()) {
+ increment(state, e.getKey(), e.getValue().get());
}
return true;
}
@@ -242,13 +252,16 @@ public class UDAFPercentile extends UDAF
}
public boolean merge(State other) {
+ if (other == null || other.counts == null || other.percentiles == null) {
+ return true;
+ }
+
if (state.percentiles == null) {
state.percentiles = new ArrayList<DoubleWritable>(other.percentiles);
}
- if (other.counts != null) {
- for (Map.Entry<LongWritable, LongWritable> e: other.counts.entrySet()) {
- increment(state, e.getKey(), e.getValue().get());
- }
+
+ for (Map.Entry<LongWritable, LongWritable> e: other.counts.entrySet()) {
+ increment(state, e.getKey(), e.getValue().get());
}
return true;
}
Modified: hadoop/hive/trunk/ql/src/test/queries/clientpositive/udf_percentile.q
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/queries/clientpositive/udf_percentile.q?rev=1005527&r1=1005526&r2=1005527&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/test/queries/clientpositive/udf_percentile.q (original)
+++ hadoop/hive/trunk/ql/src/test/queries/clientpositive/udf_percentile.q Thu Oct 7 17:01:37 2010
@@ -68,3 +68,5 @@ SELECT CAST(key AS INT) DIV 10,
percentile(IF(CAST(key AS INT) DIV 10 < 5, 1, NULL), array(0.0, 0.5, 0.99, 1.0))
FROM src
GROUP BY CAST(key AS INT) DIV 10;
+
+select percentile(cast(key as bigint), 0.5) from src where false;
Modified: hadoop/hive/trunk/ql/src/test/results/clientpositive/udf_percentile.q.out
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/results/clientpositive/udf_percentile.q.out?rev=1005527&r1=1005526&r2=1005527&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/test/results/clientpositive/udf_percentile.q.out (original)
+++ hadoop/hive/trunk/ql/src/test/results/clientpositive/udf_percentile.q.out Thu Oct 7 17:01:37 2010
@@ -17,7 +17,7 @@ FROM src
GROUP BY CAST(key AS INT) DIV 10
PREHOOK: type: QUERY
PREHOOK: Input: default@src
-PREHOOK: Output: file:/data/users/zshao/hadoop_hive_trunk2/build/ql/scratchdir/hive_2010-04-05_14-36-18_516_8402838973000771943/10000
+PREHOOK: Output: file:/tmp/nzhang/hive_2010-09-30_14-31-04_657_7695062961081758326/-mr-10000
POSTHOOK: query: SELECT CAST(key AS INT) DIV 10,
percentile(CAST(substr(value, 5) AS INT), 0.0),
percentile(CAST(substr(value, 5) AS INT), 0.5),
@@ -27,7 +27,7 @@ FROM src
GROUP BY CAST(key AS INT) DIV 10
POSTHOOK: type: QUERY
POSTHOOK: Input: default@src
-POSTHOOK: Output: file:/data/users/zshao/hadoop_hive_trunk2/build/ql/scratchdir/hive_2010-04-05_14-36-18_516_8402838973000771943/10000
+POSTHOOK: Output: file:/tmp/nzhang/hive_2010-09-30_14-31-04_657_7695062961081758326/-mr-10000
0 0.0 4.5 9.0 [0.0,4.5,8.91,9.0]
1 10.0 15.0 19.0 [10.0,15.0,18.91,19.0]
2 20.0 26.0 28.0 [20.0,26.0,27.939999999999998,28.0]
@@ -87,7 +87,7 @@ FROM src
GROUP BY CAST(key AS INT) DIV 10
PREHOOK: type: QUERY
PREHOOK: Input: default@src
-PREHOOK: Output: file:/data/users/zshao/hadoop_hive_trunk2/build/ql/scratchdir/hive_2010-04-05_14-36-22_731_2926350437853517241/10000
+PREHOOK: Output: file:/tmp/nzhang/hive_2010-09-30_14-31-08_328_1503104614300611608/-mr-10000
POSTHOOK: query: SELECT CAST(key AS INT) DIV 10,
percentile(CAST(substr(value, 5) AS INT), 0.0),
percentile(CAST(substr(value, 5) AS INT), 0.5),
@@ -97,7 +97,7 @@ FROM src
GROUP BY CAST(key AS INT) DIV 10
POSTHOOK: type: QUERY
POSTHOOK: Input: default@src
-POSTHOOK: Output: file:/data/users/zshao/hadoop_hive_trunk2/build/ql/scratchdir/hive_2010-04-05_14-36-22_731_2926350437853517241/10000
+POSTHOOK: Output: file:/tmp/nzhang/hive_2010-09-30_14-31-08_328_1503104614300611608/-mr-10000
0 0.0 4.5 9.0 [0.0,4.5,8.91,9.0]
1 10.0 15.0 19.0 [10.0,15.0,18.91,19.0]
2 20.0 26.0 28.0 [20.0,26.0,27.939999999999998,28.0]
@@ -157,7 +157,7 @@ FROM src
GROUP BY CAST(key AS INT) DIV 10
PREHOOK: type: QUERY
PREHOOK: Input: default@src
-PREHOOK: Output: file:/data/users/zshao/hadoop_hive_trunk2/build/ql/scratchdir/hive_2010-04-05_14-36-26_847_6402513687371445286/10000
+PREHOOK: Output: file:/tmp/nzhang/hive_2010-09-30_14-31-11_970_2555122074334450746/-mr-10000
POSTHOOK: query: SELECT CAST(key AS INT) DIV 10,
percentile(CAST(substr(value, 5) AS INT), 0.0),
percentile(CAST(substr(value, 5) AS INT), 0.5),
@@ -167,7 +167,7 @@ FROM src
GROUP BY CAST(key AS INT) DIV 10
POSTHOOK: type: QUERY
POSTHOOK: Input: default@src
-POSTHOOK: Output: file:/data/users/zshao/hadoop_hive_trunk2/build/ql/scratchdir/hive_2010-04-05_14-36-26_847_6402513687371445286/10000
+POSTHOOK: Output: file:/tmp/nzhang/hive_2010-09-30_14-31-11_970_2555122074334450746/-mr-10000
0 0.0 4.5 9.0 [0.0,4.5,8.91,9.0]
1 10.0 15.0 19.0 [10.0,15.0,18.91,19.0]
2 20.0 26.0 28.0 [20.0,26.0,27.939999999999998,28.0]
@@ -227,7 +227,7 @@ FROM src
GROUP BY CAST(key AS INT) DIV 10
PREHOOK: type: QUERY
PREHOOK: Input: default@src
-PREHOOK: Output: file:/data/users/zshao/hadoop_hive_trunk2/build/ql/scratchdir/hive_2010-04-05_14-36-34_869_4193114220624274575/10000
+PREHOOK: Output: file:/tmp/nzhang/hive_2010-09-30_14-31-18_605_6781388888873576931/-mr-10000
POSTHOOK: query: SELECT CAST(key AS INT) DIV 10,
percentile(CAST(substr(value, 5) AS INT), 0.0),
percentile(CAST(substr(value, 5) AS INT), 0.5),
@@ -237,7 +237,7 @@ FROM src
GROUP BY CAST(key AS INT) DIV 10
POSTHOOK: type: QUERY
POSTHOOK: Input: default@src
-POSTHOOK: Output: file:/data/users/zshao/hadoop_hive_trunk2/build/ql/scratchdir/hive_2010-04-05_14-36-34_869_4193114220624274575/10000
+POSTHOOK: Output: file:/tmp/nzhang/hive_2010-09-30_14-31-18_605_6781388888873576931/-mr-10000
0 0.0 4.5 9.0 [0.0,4.5,8.91,9.0]
1 10.0 15.0 19.0 [10.0,15.0,18.91,19.0]
2 20.0 26.0 28.0 [20.0,26.0,27.939999999999998,28.0]
@@ -296,7 +296,7 @@ FROM src
GROUP BY CAST(key AS INT) DIV 10
PREHOOK: type: QUERY
PREHOOK: Input: default@src
-PREHOOK: Output: file:/data/users/zshao/hadoop_hive_trunk2/build/ql/scratchdir/hive_2010-04-05_14-36-42_772_2730295615232304539/10000
+PREHOOK: Output: file:/tmp/nzhang/hive_2010-09-30_14-31-25_414_6485719353317968460/-mr-10000
POSTHOOK: query: -- test null handling
SELECT CAST(key AS INT) DIV 10,
percentile(NULL, 0.0),
@@ -305,7 +305,7 @@ FROM src
GROUP BY CAST(key AS INT) DIV 10
POSTHOOK: type: QUERY
POSTHOOK: Input: default@src
-POSTHOOK: Output: file:/data/users/zshao/hadoop_hive_trunk2/build/ql/scratchdir/hive_2010-04-05_14-36-42_772_2730295615232304539/10000
+POSTHOOK: Output: file:/tmp/nzhang/hive_2010-09-30_14-31-25_414_6485719353317968460/-mr-10000
0 NULL null
1 NULL null
2 NULL null
@@ -364,7 +364,7 @@ FROM src
GROUP BY CAST(key AS INT) DIV 10
PREHOOK: type: QUERY
PREHOOK: Input: default@src
-PREHOOK: Output: file:/data/users/zshao/hadoop_hive_trunk2/build/ql/scratchdir/hive_2010-04-05_14-36-46_476_1889061834871373611/10000
+PREHOOK: Output: file:/tmp/nzhang/hive_2010-09-30_14-31-29_005_976735399684431468/-mr-10000
POSTHOOK: query: -- test empty array handling
SELECT CAST(key AS INT) DIV 10,
percentile(IF(CAST(key AS INT) DIV 10 < 5, 1, NULL), 0.5),
@@ -373,7 +373,7 @@ FROM src
GROUP BY CAST(key AS INT) DIV 10
POSTHOOK: type: QUERY
POSTHOOK: Input: default@src
-POSTHOOK: Output: file:/data/users/zshao/hadoop_hive_trunk2/build/ql/scratchdir/hive_2010-04-05_14-36-46_476_1889061834871373611/10000
+POSTHOOK: Output: file:/tmp/nzhang/hive_2010-09-30_14-31-29_005_976735399684431468/-mr-10000
0 1.0 [1.0,1.0,1.0,1.0]
1 1.0 [1.0,1.0,1.0,1.0]
2 1.0 [1.0,1.0,1.0,1.0]
@@ -424,3 +424,12 @@ POSTHOOK: Output: file:/data/users/zshao
47 NULL null
48 NULL null
49 NULL null
+PREHOOK: query: select percentile(cast(key as bigint), 0.5) from src where false
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: file:/tmp/nzhang/hive_2010-09-30_14-31-32_978_7090615707538391094/-mr-10000
+POSTHOOK: query: select percentile(cast(key as bigint), 0.5) from src where false
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: file:/tmp/nzhang/hive_2010-09-30_14-31-32_978_7090615707538391094/-mr-10000
+NULL