You are viewing a plain text version of this content. The canonical link for it is here.
Posted to reviews@spark.apache.org by GitBox <gi...@apache.org> on 2020/10/12 06:41:43 UTC

[GitHub] [spark] leanken commented on a change in pull request #29983: [SPARK-13860][SQL] Change statistical aggregate function to return null instead of Double.NaN when divideByZero

leanken commented on a change in pull request #29983:
URL: https://github.com/apache/spark/pull/29983#discussion_r503069104



##########
File path: sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/WindowQuerySuite.scala
##########
@@ -59,56 +60,115 @@ class WindowQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleto
   }
 
   test("windowing.q -- 15. testExpressions") {
-    // Moved because:
-    // - Spark uses a different default stddev (sample instead of pop)
-    // - Tiny numerical differences in stddev results.
-    // - Different StdDev behavior when n=1 (NaN instead of 0)
-    checkAnswer(sql(s"""
-      |select  p_mfgr,p_name, p_size,
-      |rank() over(distribute by p_mfgr sort by p_name) as r,
-      |dense_rank() over(distribute by p_mfgr sort by p_name) as dr,
-      |cume_dist() over(distribute by p_mfgr sort by p_name) as cud,
-      |percent_rank() over(distribute by p_mfgr sort by p_name) as pr,
-      |ntile(3) over(distribute by p_mfgr sort by p_name) as nt,
-      |count(p_size) over(distribute by p_mfgr sort by p_name) as ca,
-      |avg(p_size) over(distribute by p_mfgr sort by p_name) as avg,
-      |stddev(p_size) over(distribute by p_mfgr sort by p_name) as st,
-      |first_value(p_size % 5) over(distribute by p_mfgr sort by p_name) as fv,
-      |last_value(p_size) over(distribute by p_mfgr sort by p_name) as lv,
-      |first_value(p_size) over w1  as fvW1
-      |from part
-      |window w1 as (distribute by p_mfgr sort by p_mfgr, p_name
-      |             rows between 2 preceding and 2 following)
+    withSQLConf(SQLConf.LEGACY_CENTRAL_MOMENT_AGG.key -> "true") {

Review comment:
       done.

##########
File path: sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/WindowQuerySuite.scala
##########
@@ -59,56 +60,115 @@ class WindowQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleto
   }
 
   test("windowing.q -- 15. testExpressions") {
-    // Moved because:
-    // - Spark uses a different default stddev (sample instead of pop)
-    // - Tiny numerical differences in stddev results.
-    // - Different StdDev behavior when n=1 (NaN instead of 0)
-    checkAnswer(sql(s"""
-      |select  p_mfgr,p_name, p_size,
-      |rank() over(distribute by p_mfgr sort by p_name) as r,
-      |dense_rank() over(distribute by p_mfgr sort by p_name) as dr,
-      |cume_dist() over(distribute by p_mfgr sort by p_name) as cud,
-      |percent_rank() over(distribute by p_mfgr sort by p_name) as pr,
-      |ntile(3) over(distribute by p_mfgr sort by p_name) as nt,
-      |count(p_size) over(distribute by p_mfgr sort by p_name) as ca,
-      |avg(p_size) over(distribute by p_mfgr sort by p_name) as avg,
-      |stddev(p_size) over(distribute by p_mfgr sort by p_name) as st,
-      |first_value(p_size % 5) over(distribute by p_mfgr sort by p_name) as fv,
-      |last_value(p_size) over(distribute by p_mfgr sort by p_name) as lv,
-      |first_value(p_size) over w1  as fvW1
-      |from part
-      |window w1 as (distribute by p_mfgr sort by p_mfgr, p_name
-      |             rows between 2 preceding and 2 following)
+    withSQLConf(SQLConf.LEGACY_CENTRAL_MOMENT_AGG.key -> "true") {
+      // Moved because:
+      // - Spark uses a different default stddev (sample instead of pop)
+      // - Tiny numerical differences in stddev results.
+      // - Different StdDev behavior when n=1 (NaN instead of 0)

Review comment:
       done

##########
File path: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/CentralMomentAgg.scala
##########
@@ -174,7 +175,9 @@ case class StddevSamp(child: Expression) extends CentralMomentAgg(child) {
 
   override val evaluateExpression: Expression = {
     If(n === 0.0, Literal.create(null, DoubleType),
-      If(n === 1.0, Double.NaN, sqrt(m2 / (n - 1.0))))
+      If(n === 1.0,
+        if (SQLConf.get.legacyCentralMomentAgg) Double.NaN else Literal.create(null, DoubleType),

Review comment:
       done




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org