You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by do...@apache.org on 2018/12/27 19:24:46 UTC
[spark] branch branch-2.4 updated: Revert [SPARK-26021][SQL]
replace minus zero with zero in Platform.putDouble/Float
This is an automated email from the ASF dual-hosted git repository.
dongjoon pushed a commit to branch branch-2.4
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-2.4 by this push:
new fa1abe2 Revert [SPARK-26021][SQL] replace minus zero with zero in Platform.putDouble/Float
fa1abe2 is described below
commit fa1abe24f1ac9c44425c9997d25c787cfb5ecaad
Author: Wenchen Fan <we...@databricks.com>
AuthorDate: Thu Dec 27 11:23:05 2018 -0800
Revert [SPARK-26021][SQL] replace minus zero with zero in Platform.putDouble/Float
This PR reverts https://github.com/apache/spark/pull/23043 and its followup https://github.com/apache/spark/pull/23265, from branch 2.4, because it has behavior changes.
existing tests
Closes #23389 from cloud-fan/revert.
Authored-by: Wenchen Fan <we...@databricks.com>
Signed-off-by: Dongjoon Hyun <do...@apache.org>
---
.../spark/sql/catalyst/expressions/UnsafeRow.java | 6 +++++
.../catalyst/expressions/codegen/UnsafeWriter.java | 29 ----------------------
.../expressions/codegen/UnsafeRowWriterSuite.scala | 20 ---------------
.../apache/spark/sql/DataFrameAggregateSuite.scala | 14 -----------
.../org/apache/spark/sql/DataFrameJoinSuite.scala | 12 ---------
.../spark/sql/DataFrameWindowFunctionsSuite.scala | 14 -----------
.../scala/org/apache/spark/sql/QueryTest.scala | 5 +---
7 files changed, 7 insertions(+), 93 deletions(-)
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeRow.java b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeRow.java
index 9bf9452..a76e6ef 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeRow.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeRow.java
@@ -224,6 +224,9 @@ public final class UnsafeRow extends InternalRow implements Externalizable, Kryo
public void setDouble(int ordinal, double value) {
assertIndexIsValid(ordinal);
setNotNullAt(ordinal);
+ if (Double.isNaN(value)) {
+ value = Double.NaN;
+ }
Platform.putDouble(baseObject, getFieldOffset(ordinal), value);
}
@@ -252,6 +255,9 @@ public final class UnsafeRow extends InternalRow implements Externalizable, Kryo
public void setFloat(int ordinal, float value) {
assertIndexIsValid(ordinal);
setNotNullAt(ordinal);
+ if (Float.isNaN(value)) {
+ value = Float.NaN;
+ }
Platform.putFloat(baseObject, getFieldOffset(ordinal), value);
}
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/codegen/UnsafeWriter.java b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/codegen/UnsafeWriter.java
index 7553ab8..2781655 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/codegen/UnsafeWriter.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/codegen/UnsafeWriter.java
@@ -198,45 +198,16 @@ public abstract class UnsafeWriter {
Platform.putLong(getBuffer(), offset, value);
}
- // We need to take care of NaN and -0.0 in several places:
- // 1. When compare values, different NaNs should be treated as same, `-0.0` and `0.0` should be
- // treated as same.
- // 2. In GROUP BY, different NaNs should belong to the same group, -0.0 and 0.0 should belong
- // to the same group.
- // 3. As join keys, different NaNs should be treated as same, `-0.0` and `0.0` should be
- // treated as same.
- // 4. As window partition keys, different NaNs should be treated as same, `-0.0` and `0.0`
- // should be treated as same.
- //
- // Case 1 is fine, as we handle NaN and -0.0 well during comparison. For complex types, we
- // recursively compare the fields/elements, so it's also fine.
- //
- // Case 2, 3 and 4 are problematic, as they compare `UnsafeRow` binary directly, and different
- // NaNs have different binary representation, and the same thing happens for -0.0 and 0.0.
- //
- // Here we normalize NaN and -0.0, so that `UnsafeProjection` will normalize them when writing
- // float/double columns and nested fields to `UnsafeRow`.
- //
- // Note that, we must do this for all the `UnsafeProjection`s, not only the ones that extract
- // join/grouping/window partition keys. `UnsafeProjection` copies unsafe data directly for complex
- // types, so nested float/double may not be normalized. We need to make sure that all the unsafe
- // data(`UnsafeRow`, `UnsafeArrayData`, `UnsafeMapData`) will have flat/double normalized during
- // creation.
protected final void writeFloat(long offset, float value) {
if (Float.isNaN(value)) {
value = Float.NaN;
- } else if (value == -0.0f) {
- value = 0.0f;
}
Platform.putFloat(getBuffer(), offset, value);
}
- // See comments for `writeFloat`.
protected final void writeDouble(long offset, double value) {
if (Double.isNaN(value)) {
value = Double.NaN;
- } else if (value == -0.0d) {
- value = 0.0d;
}
Platform.putDouble(getBuffer(), offset, value);
}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/UnsafeRowWriterSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/UnsafeRowWriterSuite.scala
index 22e1fa6..fb651b7 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/UnsafeRowWriterSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/UnsafeRowWriterSuite.scala
@@ -50,24 +50,4 @@ class UnsafeRowWriterSuite extends SparkFunSuite {
assert(res1 == res2)
}
- test("SPARK-26021: normalize float/double NaN and -0.0") {
- val unsafeRowWriter1 = new UnsafeRowWriter(4)
- unsafeRowWriter1.resetRowWriter()
- unsafeRowWriter1.write(0, Float.NaN)
- unsafeRowWriter1.write(1, Double.NaN)
- unsafeRowWriter1.write(2, 0.0f)
- unsafeRowWriter1.write(3, 0.0)
- val res1 = unsafeRowWriter1.getRow
-
- val unsafeRowWriter2 = new UnsafeRowWriter(4)
- unsafeRowWriter2.resetRowWriter()
- unsafeRowWriter2.write(0, 0.0f/0.0f)
- unsafeRowWriter2.write(1, 0.0/0.0)
- unsafeRowWriter2.write(2, -0.0f)
- unsafeRowWriter2.write(3, -0.0)
- val res2 = unsafeRowWriter2.getRow
-
- // The two rows should be the equal
- assert(res1 == res2)
- }
}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
index 41dc72d..d0106c4 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
@@ -727,18 +727,4 @@ class DataFrameAggregateSuite extends QueryTest with SharedSQLContext {
"grouping expressions: [current_date(None)], value: [key: int, value: string], " +
"type: GroupBy]"))
}
-
- test("SPARK-26021: Double and Float 0.0/-0.0 should be equal when grouping") {
- val colName = "i"
- val doubles = Seq(0.0d, -0.0d, 0.0d).toDF(colName).groupBy(colName).count().collect()
- val floats = Seq(0.0f, -0.0f, 0.0f).toDF(colName).groupBy(colName).count().collect()
-
- assert(doubles.length == 1)
- assert(floats.length == 1)
- // using compare since 0.0 == -0.0 is true
- assert(java.lang.Double.compare(doubles(0).getDouble(0), 0.0d) == 0)
- assert(java.lang.Float.compare(floats(0).getFloat(0), 0.0f) == 0)
- assert(doubles(0).getLong(1) == 3)
- assert(floats(0).getLong(1) == 3)
- }
}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala
index c9f41ab..e6b30f9 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala
@@ -295,16 +295,4 @@ class DataFrameJoinSuite extends QueryTest with SharedSQLContext {
df.join(df, df("id") <=> df("id")).queryExecution.optimizedPlan
}
}
-
- test("NaN and -0.0 in join keys") {
- val df1 = Seq(Float.NaN -> Double.NaN, 0.0f -> 0.0, -0.0f -> -0.0).toDF("f", "d")
- val df2 = Seq(Float.NaN -> Double.NaN, 0.0f -> 0.0, -0.0f -> -0.0).toDF("f", "d")
- val joined = df1.join(df2, Seq("f", "d"))
- checkAnswer(joined, Seq(
- Row(Float.NaN, Double.NaN),
- Row(0.0f, 0.0),
- Row(0.0f, 0.0),
- Row(0.0f, 0.0),
- Row(0.0f, 0.0)))
- }
}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFunctionsSuite.scala
index bbeb1d1..97a8439 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFunctionsSuite.scala
@@ -658,18 +658,4 @@ class DataFrameWindowFunctionsSuite extends QueryTest with SharedSQLContext {
|GROUP BY a
|HAVING SUM(b) = 5 AND RANK() OVER(ORDER BY a) = 1""".stripMargin))
}
-
- test("NaN and -0.0 in window partition keys") {
- val df = Seq(
- (Float.NaN, Double.NaN, 1),
- (0.0f/0.0f, 0.0/0.0, 1),
- (0.0f, 0.0, 1),
- (-0.0f, -0.0, 1)).toDF("f", "d", "i")
- val result = df.select($"f", count("i").over(Window.partitionBy("f", "d")))
- checkAnswer(result, Seq(
- Row(Float.NaN, 2),
- Row(Float.NaN, 2),
- Row(0.0f, 2),
- Row(0.0f, 2)))
- }
}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/QueryTest.scala b/sql/core/src/test/scala/org/apache/spark/sql/QueryTest.scala
index 8ba6723..baca9c1 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/QueryTest.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/QueryTest.scala
@@ -289,7 +289,7 @@ object QueryTest {
def prepareRow(row: Row): Row = {
Row.fromSeq(row.toSeq.map {
case null => null
- case bd: java.math.BigDecimal => BigDecimal(bd)
+ case d: java.math.BigDecimal => BigDecimal(d)
// Equality of WrappedArray differs for AnyVal and AnyRef in Scala 2.12.2+
case seq: Seq[_] => seq.map {
case b: java.lang.Byte => b.byteValue
@@ -303,9 +303,6 @@ object QueryTest {
// Convert array to Seq for easy equality check.
case b: Array[_] => b.toSeq
case r: Row => prepareRow(r)
- // spark treats -0.0 as 0.0
- case d: Double if d == -0.0d => 0.0d
- case f: Float if f == -0.0f => 0.0f
case o => o
})
}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org