You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by do...@apache.org on 2018/12/18 18:10:09 UTC

[spark] branch master updated: [SPARK-26382][CORE] prefix comparator should handle -0.0

This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new befca98  [SPARK-26382][CORE] prefix comparator should handle -0.0
befca98 is described below

commit befca983d2da4f7828aa7a7cd7345d17c4f291dd
Author: Wenchen Fan <we...@databricks.com>
AuthorDate: Tue Dec 18 10:09:56 2018 -0800

    [SPARK-26382][CORE] prefix comparator should handle -0.0
    
    ## What changes were proposed in this pull request?
    
    This is kind of a followup of https://github.com/apache/spark/pull/23239
    
    The `UnsafeProject` will normalize special float/double values(NaN and -0.0), so the sorter doesn't have to handle it.
    
    However, for consistency and future-proof, this PR proposes to normalize `-0.0` in the prefix comparator, so that it's same with the normal ordering. Note that prefix comparator handles NaN as well.
    
    This is not a bug fix, but a safe guard.
    
    ## How was this patch tested?
    
    existing tests
    
    Closes #23334 from cloud-fan/sort.
    
    Authored-by: Wenchen Fan <we...@databricks.com>
    Signed-off-by: Dongjoon Hyun <do...@apache.org>
---
 .../util/collection/unsafe/sort/PrefixComparators.java  |  2 ++
 .../collection/unsafe/sort/PrefixComparatorsSuite.scala | 17 +++++++++++++++--
 2 files changed, 17 insertions(+), 2 deletions(-)

diff --git a/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/PrefixComparators.java b/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/PrefixComparators.java
index 0910db2..bef1bda 100644
--- a/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/PrefixComparators.java
+++ b/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/PrefixComparators.java
@@ -69,6 +69,8 @@ public class PrefixComparators {
      * details see http://stereopsis.com/radix.html.
      */
     public static long computePrefix(double value) {
+      // normalize -0.0 to 0.0, as they should be equal
+      value = value == -0.0 ? 0.0 : value;
       // Java's doubleToLongBits already canonicalizes all NaN values to the smallest possible
       // positive NaN, so there's nothing special we need to do for NaNs.
       long bits = Double.doubleToLongBits(value);
diff --git a/core/src/test/scala/org/apache/spark/util/collection/unsafe/sort/PrefixComparatorsSuite.scala b/core/src/test/scala/org/apache/spark/util/collection/unsafe/sort/PrefixComparatorsSuite.scala
index 73546ef..38cb37c 100644
--- a/core/src/test/scala/org/apache/spark/util/collection/unsafe/sort/PrefixComparatorsSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/collection/unsafe/sort/PrefixComparatorsSuite.scala
@@ -125,6 +125,7 @@ class PrefixComparatorsSuite extends SparkFunSuite with PropertyChecks {
     val nan2Prefix = PrefixComparators.DoublePrefixComparator.computePrefix(nan2)
     assert(nan1Prefix === nan2Prefix)
     val doubleMaxPrefix = PrefixComparators.DoublePrefixComparator.computePrefix(Double.MaxValue)
+    // NaN is greater than the max double value.
     assert(PrefixComparators.DOUBLE.compare(nan1Prefix, doubleMaxPrefix) === 1)
   }
 
@@ -134,22 +135,34 @@ class PrefixComparatorsSuite extends SparkFunSuite with PropertyChecks {
     assert(java.lang.Double.doubleToRawLongBits(negativeNan) < 0)
     val prefix = PrefixComparators.DoublePrefixComparator.computePrefix(negativeNan)
     val doubleMaxPrefix = PrefixComparators.DoublePrefixComparator.computePrefix(Double.MaxValue)
+    // -NaN is greater than the max double value.
     assert(PrefixComparators.DOUBLE.compare(prefix, doubleMaxPrefix) === 1)
   }
 
   test("double prefix comparator handles other special values properly") {
-    val nullValue = 0L
+    // See `SortPrefix.nullValue` for how we deal with nulls for float/double type
+    val smallestNullPrefix = 0L
+    val largestNullPrefix = -1L
     val nan = PrefixComparators.DoublePrefixComparator.computePrefix(Double.NaN)
     val posInf = PrefixComparators.DoublePrefixComparator.computePrefix(Double.PositiveInfinity)
     val negInf = PrefixComparators.DoublePrefixComparator.computePrefix(Double.NegativeInfinity)
     val minValue = PrefixComparators.DoublePrefixComparator.computePrefix(Double.MinValue)
     val maxValue = PrefixComparators.DoublePrefixComparator.computePrefix(Double.MaxValue)
     val zero = PrefixComparators.DoublePrefixComparator.computePrefix(0.0)
+    val minusZero = PrefixComparators.DoublePrefixComparator.computePrefix(-0.0)
+
+    // null is greater than everything including NaN, when we need to treat it as the largest value.
+    assert(PrefixComparators.DOUBLE.compare(largestNullPrefix, nan) === 1)
+    // NaN is greater than the positive infinity.
     assert(PrefixComparators.DOUBLE.compare(nan, posInf) === 1)
     assert(PrefixComparators.DOUBLE.compare(posInf, maxValue) === 1)
     assert(PrefixComparators.DOUBLE.compare(maxValue, zero) === 1)
     assert(PrefixComparators.DOUBLE.compare(zero, minValue) === 1)
     assert(PrefixComparators.DOUBLE.compare(minValue, negInf) === 1)
-    assert(PrefixComparators.DOUBLE.compare(negInf, nullValue) === 1)
+    // null is smaller than everything including negative infinity, when we need to treat it as
+    // the smallest value.
+    assert(PrefixComparators.DOUBLE.compare(negInf, smallestNullPrefix) === 1)
+    // 0.0 should be equal to -0.0.
+    assert(PrefixComparators.DOUBLE.compare(zero, minusZero) === 0)
   }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org