You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by sz...@apache.org on 2014/12/23 20:25:43 UTC

svn commit: r1647653 - in /hive/trunk/ql/src: java/org/apache/hadoop/hive/ql/udf/generic/NumericHistogram.java test/queries/clientpositive/udaf_percentile_approx_23.q test/results/clientpositive/udaf_percentile_approx_23.q.out

Author: szehon
Date: Tue Dec 23 19:25:43 2014
New Revision: 1647653

URL: http://svn.apache.org/r1647653
Log:
HIVE-8613 : percentile_approx raise a comparator error (Nicolas Lalevée and Navis, via Szehon)

Modified:
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/NumericHistogram.java
    hive/trunk/ql/src/test/queries/clientpositive/udaf_percentile_approx_23.q
    hive/trunk/ql/src/test/results/clientpositive/udaf_percentile_approx_23.q.out

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/NumericHistogram.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/NumericHistogram.java?rev=1647653&r1=1647652&r2=1647653&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/NumericHistogram.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/NumericHistogram.java Tue Dec 23 19:25:43 2014
@@ -19,7 +19,6 @@ package org.apache.hadoop.hive.ql.udf.ge
 
 import java.util.ArrayList;
 import java.util.List;
-import java.util.Arrays;
 import java.util.Collections;
 import java.util.Random;
 import org.apache.hadoop.hive.serde2.io.DoubleWritable;
@@ -42,14 +41,7 @@ public class NumericHistogram {
     double y;
 
     public int compareTo(Object other) {
-      Coord o = (Coord) other;
-      if(x < o.x) {
-        return -1;
-      }
-      if(x > o.x) {
-        return 1;
-      }
-      return 0;
+      return Double.compare(x, ((Coord) other).x);
     }
   };
 

Modified: hive/trunk/ql/src/test/queries/clientpositive/udaf_percentile_approx_23.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/udaf_percentile_approx_23.q?rev=1647653&r1=1647652&r2=1647653&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/udaf_percentile_approx_23.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/udaf_percentile_approx_23.q Tue Dec 23 19:25:43 2014
@@ -84,3 +84,10 @@ select * from t9;
 select * from t10;
 select * from t11;
 select * from t12;
+
+set hive.cbo.enable=false;
+
+-- NaN
+explain 
+select percentile_approx(case when key < 100 then cast('NaN' as double) else key end, 0.5) from bucket;
+select percentile_approx(case when key < 100 then cast('NaN' as double) else key end, 0.5) from bucket;

Modified: hive/trunk/ql/src/test/results/clientpositive/udaf_percentile_approx_23.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/udaf_percentile_approx_23.q.out?rev=1647653&r1=1647652&r2=1647653&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/udaf_percentile_approx_23.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/udaf_percentile_approx_23.q.out Tue Dec 23 19:25:43 2014
@@ -504,3 +504,68 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: default@t12
 #### A masked pattern was here ####
 [26.0,255.5,479.0,491.0]
+PREHOOK: query: -- NaN
+explain 
+select percentile_approx(case when key < 100 then cast('NaN' as double) else key end, 0.5) from bucket
+PREHOOK: type: QUERY
+POSTHOOK: query: -- NaN
+explain 
+select percentile_approx(case when key < 100 then cast('NaN' as double) else key end, 0.5) from bucket
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: bucket
+            Statistics: Num rows: 726 Data size: 5812 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: key (type: double)
+              outputColumnNames: key
+              Statistics: Num rows: 726 Data size: 5812 Basic stats: COMPLETE Column stats: NONE
+              Group By Operator
+                aggregations: percentile_approx(CASE WHEN ((key < 100.0)) THEN (UDFToDouble('NaN')) ELSE (key) END, 0.5)
+                mode: hash
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+                Reduce Output Operator
+                  sort order: 
+                  Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+                  value expressions: _col0 (type: array<double>)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: percentile_approx(VALUE._col0)
+          mode: mergepartial
+          outputColumnNames: _col0
+          Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+          Select Operator
+            expressions: _col0 (type: double)
+            outputColumnNames: _col0
+            Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+            File Output Operator
+              compressed: false
+              Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+              table:
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select percentile_approx(case when key < 100 then cast('NaN' as double) else key end, 0.5) from bucket
+PREHOOK: type: QUERY
+PREHOOK: Input: default@bucket
+#### A masked pattern was here ####
+POSTHOOK: query: select percentile_approx(case when key < 100 then cast('NaN' as double) else key end, 0.5) from bucket
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@bucket
+#### A masked pattern was here ####
+341.5