You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by do...@apache.org on 2018/12/18 18:10:09 UTC
[spark] branch master updated: [SPARK-26382][CORE] prefix
comparator should handle -0.0
This is an automated email from the ASF dual-hosted git repository.
dongjoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new befca98 [SPARK-26382][CORE] prefix comparator should handle -0.0
befca98 is described below
commit befca983d2da4f7828aa7a7cd7345d17c4f291dd
Author: Wenchen Fan <we...@databricks.com>
AuthorDate: Tue Dec 18 10:09:56 2018 -0800
[SPARK-26382][CORE] prefix comparator should handle -0.0
## What changes were proposed in this pull request?
This is kind of a followup of https://github.com/apache/spark/pull/23239
The `UnsafeProject` will normalize special float/double values(NaN and -0.0), so the sorter doesn't have to handle it.
However, for consistency and future-proof, this PR proposes to normalize `-0.0` in the prefix comparator, so that it's same with the normal ordering. Note that prefix comparator handles NaN as well.
This is not a bug fix, but a safe guard.
## How was this patch tested?
existing tests
Closes #23334 from cloud-fan/sort.
Authored-by: Wenchen Fan <we...@databricks.com>
Signed-off-by: Dongjoon Hyun <do...@apache.org>
---
.../util/collection/unsafe/sort/PrefixComparators.java | 2 ++
.../collection/unsafe/sort/PrefixComparatorsSuite.scala | 17 +++++++++++++++--
2 files changed, 17 insertions(+), 2 deletions(-)
diff --git a/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/PrefixComparators.java b/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/PrefixComparators.java
index 0910db2..bef1bda 100644
--- a/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/PrefixComparators.java
+++ b/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/PrefixComparators.java
@@ -69,6 +69,8 @@ public class PrefixComparators {
* details see http://stereopsis.com/radix.html.
*/
public static long computePrefix(double value) {
+ // normalize -0.0 to 0.0, as they should be equal
+ value = value == -0.0 ? 0.0 : value;
// Java's doubleToLongBits already canonicalizes all NaN values to the smallest possible
// positive NaN, so there's nothing special we need to do for NaNs.
long bits = Double.doubleToLongBits(value);
diff --git a/core/src/test/scala/org/apache/spark/util/collection/unsafe/sort/PrefixComparatorsSuite.scala b/core/src/test/scala/org/apache/spark/util/collection/unsafe/sort/PrefixComparatorsSuite.scala
index 73546ef..38cb37c 100644
--- a/core/src/test/scala/org/apache/spark/util/collection/unsafe/sort/PrefixComparatorsSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/collection/unsafe/sort/PrefixComparatorsSuite.scala
@@ -125,6 +125,7 @@ class PrefixComparatorsSuite extends SparkFunSuite with PropertyChecks {
val nan2Prefix = PrefixComparators.DoublePrefixComparator.computePrefix(nan2)
assert(nan1Prefix === nan2Prefix)
val doubleMaxPrefix = PrefixComparators.DoublePrefixComparator.computePrefix(Double.MaxValue)
+ // NaN is greater than the max double value.
assert(PrefixComparators.DOUBLE.compare(nan1Prefix, doubleMaxPrefix) === 1)
}
@@ -134,22 +135,34 @@ class PrefixComparatorsSuite extends SparkFunSuite with PropertyChecks {
assert(java.lang.Double.doubleToRawLongBits(negativeNan) < 0)
val prefix = PrefixComparators.DoublePrefixComparator.computePrefix(negativeNan)
val doubleMaxPrefix = PrefixComparators.DoublePrefixComparator.computePrefix(Double.MaxValue)
+ // -NaN is greater than the max double value.
assert(PrefixComparators.DOUBLE.compare(prefix, doubleMaxPrefix) === 1)
}
test("double prefix comparator handles other special values properly") {
- val nullValue = 0L
+ // See `SortPrefix.nullValue` for how we deal with nulls for float/double type
+ val smallestNullPrefix = 0L
+ val largestNullPrefix = -1L
val nan = PrefixComparators.DoublePrefixComparator.computePrefix(Double.NaN)
val posInf = PrefixComparators.DoublePrefixComparator.computePrefix(Double.PositiveInfinity)
val negInf = PrefixComparators.DoublePrefixComparator.computePrefix(Double.NegativeInfinity)
val minValue = PrefixComparators.DoublePrefixComparator.computePrefix(Double.MinValue)
val maxValue = PrefixComparators.DoublePrefixComparator.computePrefix(Double.MaxValue)
val zero = PrefixComparators.DoublePrefixComparator.computePrefix(0.0)
+ val minusZero = PrefixComparators.DoublePrefixComparator.computePrefix(-0.0)
+
+ // null is greater than everything including NaN, when we need to treat it as the largest value.
+ assert(PrefixComparators.DOUBLE.compare(largestNullPrefix, nan) === 1)
+ // NaN is greater than the positive infinity.
assert(PrefixComparators.DOUBLE.compare(nan, posInf) === 1)
assert(PrefixComparators.DOUBLE.compare(posInf, maxValue) === 1)
assert(PrefixComparators.DOUBLE.compare(maxValue, zero) === 1)
assert(PrefixComparators.DOUBLE.compare(zero, minValue) === 1)
assert(PrefixComparators.DOUBLE.compare(minValue, negInf) === 1)
- assert(PrefixComparators.DOUBLE.compare(negInf, nullValue) === 1)
+ // null is smaller than everything including negative infinity, when we need to treat it as
+ // the smallest value.
+ assert(PrefixComparators.DOUBLE.compare(negInf, smallestNullPrefix) === 1)
+ // 0.0 should be equal to -0.0.
+ assert(PrefixComparators.DOUBLE.compare(zero, minusZero) === 0)
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org