You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by sz...@apache.org on 2014/12/23 20:25:43 UTC
svn commit: r1647653 - in /hive/trunk/ql/src:
java/org/apache/hadoop/hive/ql/udf/generic/NumericHistogram.java
test/queries/clientpositive/udaf_percentile_approx_23.q
test/results/clientpositive/udaf_percentile_approx_23.q.out
Author: szehon
Date: Tue Dec 23 19:25:43 2014
New Revision: 1647653
URL: http://svn.apache.org/r1647653
Log:
HIVE-8613 : percentile_approx raise a comparator error (Nicolas Lalevée and Navis, via Szehon)
Modified:
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/NumericHistogram.java
hive/trunk/ql/src/test/queries/clientpositive/udaf_percentile_approx_23.q
hive/trunk/ql/src/test/results/clientpositive/udaf_percentile_approx_23.q.out
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/NumericHistogram.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/NumericHistogram.java?rev=1647653&r1=1647652&r2=1647653&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/NumericHistogram.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/NumericHistogram.java Tue Dec 23 19:25:43 2014
@@ -19,7 +19,6 @@ package org.apache.hadoop.hive.ql.udf.ge
import java.util.ArrayList;
import java.util.List;
-import java.util.Arrays;
import java.util.Collections;
import java.util.Random;
import org.apache.hadoop.hive.serde2.io.DoubleWritable;
@@ -42,14 +41,7 @@ public class NumericHistogram {
double y;
public int compareTo(Object other) {
- Coord o = (Coord) other;
- if(x < o.x) {
- return -1;
- }
- if(x > o.x) {
- return 1;
- }
- return 0;
+ return Double.compare(x, ((Coord) other).x);
}
};
Modified: hive/trunk/ql/src/test/queries/clientpositive/udaf_percentile_approx_23.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/udaf_percentile_approx_23.q?rev=1647653&r1=1647652&r2=1647653&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/udaf_percentile_approx_23.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/udaf_percentile_approx_23.q Tue Dec 23 19:25:43 2014
@@ -84,3 +84,10 @@ select * from t9;
select * from t10;
select * from t11;
select * from t12;
+
+set hive.cbo.enable=false;
+
+-- NaN
+explain
+select percentile_approx(case when key < 100 then cast('NaN' as double) else key end, 0.5) from bucket;
+select percentile_approx(case when key < 100 then cast('NaN' as double) else key end, 0.5) from bucket;
Modified: hive/trunk/ql/src/test/results/clientpositive/udaf_percentile_approx_23.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/udaf_percentile_approx_23.q.out?rev=1647653&r1=1647652&r2=1647653&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/udaf_percentile_approx_23.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/udaf_percentile_approx_23.q.out Tue Dec 23 19:25:43 2014
@@ -504,3 +504,68 @@ POSTHOOK: type: QUERY
POSTHOOK: Input: default@t12
#### A masked pattern was here ####
[26.0,255.5,479.0,491.0]
+PREHOOK: query: -- NaN
+explain
+select percentile_approx(case when key < 100 then cast('NaN' as double) else key end, 0.5) from bucket
+PREHOOK: type: QUERY
+POSTHOOK: query: -- NaN
+explain
+select percentile_approx(case when key < 100 then cast('NaN' as double) else key end, 0.5) from bucket
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: bucket
+ Statistics: Num rows: 726 Data size: 5812 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: double)
+ outputColumnNames: key
+ Statistics: Num rows: 726 Data size: 5812 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: percentile_approx(CASE WHEN ((key < 100.0)) THEN (UDFToDouble('NaN')) ELSE (key) END, 0.5)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ value expressions: _col0 (type: array<double>)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: percentile_approx(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: double)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select percentile_approx(case when key < 100 then cast('NaN' as double) else key end, 0.5) from bucket
+PREHOOK: type: QUERY
+PREHOOK: Input: default@bucket
+#### A masked pattern was here ####
+POSTHOOK: query: select percentile_approx(case when key < 100 then cast('NaN' as double) else key end, 0.5) from bucket
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@bucket
+#### A masked pattern was here ####
+341.5